|
{ |
|
"model_table": [ |
|
{ |
|
"rank": 1, |
|
"provider": "Google", |
|
"model": "Gemini 2.0 Flash 001", |
|
"hf_id": null, |
|
"creation_date": null, |
|
"size": null, |
|
"type": "Commercial", |
|
"license": null, |
|
"average": 0.72, |
|
"classification_accuracy": 0.87, |
|
"language_modeling_chrf": 0.96, |
|
"translation_bleu": 0.45, |
|
"translation_chrf": 0.58 |
|
}, |
|
{ |
|
"rank": 2, |
|
"provider": "Google", |
|
"model": "Gemini 2.0 Flash Lite 001", |
|
"hf_id": null, |
|
"creation_date": null, |
|
"size": null, |
|
"type": "Commercial", |
|
"license": null, |
|
"average": 0.66, |
|
"classification_accuracy": 0.73, |
|
"language_modeling_chrf": 0.97, |
|
"translation_bleu": 0.4, |
|
"translation_chrf": 0.54 |
|
}, |
|
{ |
|
"rank": 3, |
|
"provider": "Google", |
|
"model": "Gemma 3 27b It", |
|
"hf_id": "google/gemma-3-27b-it", |
|
"creation_date": "2025-03-01", |
|
"size": 27432406640.0, |
|
"type": "Open", |
|
"license": "Gemma", |
|
"average": 0.65, |
|
"classification_accuracy": 0.72, |
|
"language_modeling_chrf": 0.96, |
|
"translation_bleu": 0.37, |
|
"translation_chrf": 0.54 |
|
}, |
|
{ |
|
"rank": 4, |
|
"provider": "Meta Llama", |
|
"model": "Llama 3.1 70b Instruct", |
|
"hf_id": "meta-llama/Llama-3.1-70B-Instruct", |
|
"creation_date": "2024-07-16", |
|
"size": 70553706496.0, |
|
"type": "Open", |
|
"license": "Llama3.1", |
|
"average": 0.62, |
|
"classification_accuracy": 0.57, |
|
"language_modeling_chrf": 0.92, |
|
"translation_bleu": 0.43, |
|
"translation_chrf": 0.57 |
|
}, |
|
{ |
|
"rank": 5, |
|
"provider": "Amazon", |
|
"model": "Nova Micro V1", |
|
"hf_id": null, |
|
"creation_date": null, |
|
"size": null, |
|
"type": "Commercial", |
|
"license": null, |
|
"average": 0.61, |
|
"classification_accuracy": 0.52, |
|
"language_modeling_chrf": 0.94, |
|
"translation_bleu": 0.4, |
|
"translation_chrf": 0.56 |
|
}, |
|
{ |
|
"rank": 6, |
|
"provider": "Meta Llama", |
|
"model": "Llama 3 70b Instruct", |
|
"hf_id": null, |
|
"creation_date": null, |
|
"size": null, |
|
"type": "Commercial", |
|
"license": null, |
|
"average": 0.61, |
|
"classification_accuracy": 0.8, |
|
"language_modeling_chrf": 0.95, |
|
"translation_bleu": 0.25, |
|
"translation_chrf": 0.43 |
|
}, |
|
{ |
|
"rank": 7, |
|
"provider": "OpenAI", |
|
"model": "GPT 4o Mini", |
|
"hf_id": null, |
|
"creation_date": null, |
|
"size": null, |
|
"type": "Commercial", |
|
"license": null, |
|
"average": 0.6, |
|
"classification_accuracy": 0.52, |
|
"language_modeling_chrf": 0.95, |
|
"translation_bleu": 0.39, |
|
"translation_chrf": 0.55 |
|
}, |
|
{ |
|
"rank": 8, |
|
"provider": "MistralAI", |
|
"model": "Mistral Small 24b Instruct 2501", |
|
"hf_id": "mistralai/Mistral-Small-24B-Instruct-2501", |
|
"creation_date": "2025-01-28", |
|
"size": 23572403200.0, |
|
"type": "Open", |
|
"license": "Apache 2.0", |
|
"average": 0.58, |
|
"classification_accuracy": 0.55, |
|
"language_modeling_chrf": 0.86, |
|
"translation_bleu": 0.38, |
|
"translation_chrf": 0.52 |
|
}, |
|
{ |
|
"rank": 9, |
|
"provider": "Meta Llama", |
|
"model": "Llama 3.3 70b Instruct", |
|
"hf_id": "meta-llama/Llama-3.3-70B-Instruct", |
|
"creation_date": "2024-11-26", |
|
"size": 70553706496.0, |
|
"type": "Open", |
|
"license": "Llama3.3", |
|
"average": 0.56, |
|
"classification_accuracy": 0.5, |
|
"language_modeling_chrf": 0.94, |
|
"translation_bleu": 0.31, |
|
"translation_chrf": 0.48 |
|
}, |
|
{ |
|
"rank": 10, |
|
"provider": "MistralAI", |
|
"model": "Mistral Nemo", |
|
"hf_id": null, |
|
"creation_date": null, |
|
"size": null, |
|
"type": "Commercial", |
|
"license": null, |
|
"average": 0.55, |
|
"classification_accuracy": 0.5, |
|
"language_modeling_chrf": 0.88, |
|
"translation_bleu": 0.32, |
|
"translation_chrf": 0.49 |
|
}, |
|
{ |
|
"rank": 11, |
|
"provider": "Microsoft", |
|
"model": "Phi 4 Multimodal Instruct", |
|
"hf_id": "microsoft/Phi-4-multimodal-instruct", |
|
"creation_date": "2025-02-24", |
|
"size": 5574460384.0, |
|
"type": "Open", |
|
"license": "Mit", |
|
"average": 0.52, |
|
"classification_accuracy": 0.42, |
|
"language_modeling_chrf": 0.87, |
|
"translation_bleu": 0.32, |
|
"translation_chrf": 0.46 |
|
}, |
|
{ |
|
"rank": 12, |
|
"provider": "Qwen", |
|
"model": "Qwq 32b", |
|
"hf_id": "Qwen/QwQ-32B", |
|
"creation_date": "2025-03-05", |
|
"size": 32763876352.0, |
|
"type": "Open", |
|
"license": "Apache 2.0", |
|
"average": 0.25, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.48, |
|
"translation_bleu": 0.21, |
|
"translation_chrf": 0.3 |
|
} |
|
], |
|
"language_table": [ |
|
{ |
|
"language_name": "English", |
|
"autonym": "English", |
|
"speakers": 1636485840, |
|
"family": "Indo-European", |
|
"average": 0.47, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.58, |
|
"language_modeling_chrf": 0.92, |
|
"translation_bleu": 0.37, |
|
"translation_chrf": 0.49 |
|
}, |
|
{ |
|
"language_name": "Chinese", |
|
"autonym": "中文", |
|
"speakers": 1304678914, |
|
"family": "Sino-Tibetan", |
|
"average": 0.46, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.55, |
|
"language_modeling_chrf": 0.86, |
|
"translation_bleu": 0.35, |
|
"translation_chrf": 0.53 |
|
}, |
|
{ |
|
"language_name": "Hindi", |
|
"autonym": "हिन्दी", |
|
"speakers": 546882144, |
|
"family": "Indo-European", |
|
"average": 0.46, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.53, |
|
"language_modeling_chrf": 0.95, |
|
"translation_bleu": 0.32, |
|
"translation_chrf": 0.49 |
|
}, |
|
{ |
|
"language_name": "Spanish", |
|
"autonym": "Español", |
|
"speakers": 493528077, |
|
"family": "Indo-European", |
|
"average": 0.45, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.53, |
|
"language_modeling_chrf": 0.96, |
|
"translation_bleu": 0.28, |
|
"translation_chrf": 0.46 |
|
}, |
|
{ |
|
"language_name": "Arabic", |
|
"autonym": "العربية", |
|
"speakers": 351664197, |
|
"family": "Afro-Asiatic", |
|
"average": 0.42, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.43, |
|
"language_modeling_chrf": 0.93, |
|
"translation_bleu": 0.28, |
|
"translation_chrf": 0.47 |
|
}, |
|
{ |
|
"language_name": "Urdu", |
|
"autonym": "اردو", |
|
"speakers": 290790290, |
|
"family": "Indo-European", |
|
"average": 0.42, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.5, |
|
"language_modeling_chrf": 0.93, |
|
"translation_bleu": 0.24, |
|
"translation_chrf": 0.42 |
|
}, |
|
{ |
|
"language_name": "French", |
|
"autonym": "Français", |
|
"speakers": 278611507, |
|
"family": "Indo-European", |
|
"average": 0.46, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.53, |
|
"language_modeling_chrf": 0.98, |
|
"translation_bleu": 0.32, |
|
"translation_chrf": 0.49 |
|
}, |
|
{ |
|
"language_name": "Bangla", |
|
"autonym": "বাংলা", |
|
"speakers": 267193288, |
|
"family": "Indo-European", |
|
"average": 0.38, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.4, |
|
"language_modeling_chrf": 0.9, |
|
"translation_bleu": 0.2, |
|
"translation_chrf": 0.41 |
|
}, |
|
{ |
|
"language_name": "Portuguese", |
|
"autonym": "Português", |
|
"speakers": 237496885, |
|
"family": "Indo-European", |
|
"average": 0.46, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.5, |
|
"language_modeling_chrf": 0.97, |
|
"translation_bleu": 0.31, |
|
"translation_chrf": 0.49 |
|
}, |
|
{ |
|
"language_name": "Punjabi", |
|
"autonym": "ਪੰਜਾਬੀ", |
|
"speakers": 203571210, |
|
"family": "Indo-European", |
|
"average": 0.42, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.4, |
|
"language_modeling_chrf": 0.87, |
|
"translation_bleu": 0.34, |
|
"translation_chrf": 0.49 |
|
}, |
|
{ |
|
"language_name": "Russian", |
|
"autonym": "Русский", |
|
"speakers": 195841151, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Swahili", |
|
"autonym": "Kiswahili", |
|
"speakers": 171610296, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Indonesian", |
|
"autonym": "Bahasa Indonesia", |
|
"speakers": 171207687, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "German", |
|
"autonym": "Deutsch", |
|
"speakers": 136350226, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Japanese", |
|
"autonym": "日本語", |
|
"speakers": 119729026, |
|
"family": "Japonic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Telugu", |
|
"autonym": "తెలుగు", |
|
"speakers": 95478480, |
|
"family": "Dravidian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Western Panjabi", |
|
"autonym": "لہندا پنجابی", |
|
"speakers": 93433552, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Marathi", |
|
"autonym": "मराठी", |
|
"speakers": 92826300, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Javanese", |
|
"autonym": "Jawa", |
|
"speakers": 91180665, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Vietnamese", |
|
"autonym": "Tiếng Việt", |
|
"speakers": 86222962, |
|
"family": "Austroasiatic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tamil", |
|
"autonym": "தமிழ்", |
|
"speakers": 85616159, |
|
"family": "Dravidian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Persian", |
|
"autonym": "فارسی", |
|
"speakers": 84710459, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Wu Chinese", |
|
"autonym": "Wu Chinese", |
|
"speakers": 83641200, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Turkish", |
|
"autonym": "Türkçe", |
|
"speakers": 80360704, |
|
"family": "Turkic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Cantonese", |
|
"autonym": "粵語", |
|
"speakers": 79654759, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Korean", |
|
"autonym": "한국어", |
|
"speakers": 78357046, |
|
"family": "Koreanic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Italian", |
|
"autonym": "Italiano", |
|
"speakers": 70247060, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Filipino", |
|
"autonym": "Filipino", |
|
"speakers": 67471096, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Egyptian Arabic", |
|
"autonym": "Egyptian Arabic", |
|
"speakers": 66639360, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Gujarati", |
|
"autonym": "ગુજરાતી", |
|
"speakers": 61721799, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Thai", |
|
"autonym": "ไทย", |
|
"speakers": 55181920, |
|
"family": "Tai-Kadai", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Pashto", |
|
"autonym": "پښتو", |
|
"speakers": 53542641, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kannada", |
|
"autonym": "ಕನ್ನಡ", |
|
"speakers": 49065330, |
|
"family": "Dravidian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Nigerian Pidgin", |
|
"autonym": "Naijíriá Píjin", |
|
"speakers": 44945880, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Malayalam", |
|
"autonym": "മലയാളം", |
|
"speakers": 43257484, |
|
"family": "Dravidian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Odia", |
|
"autonym": "ଓଡ଼ିଆ", |
|
"speakers": 42434880, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Polish", |
|
"autonym": "Polski", |
|
"speakers": 41077399, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Xiang Chinese", |
|
"autonym": "Xiang Chinese", |
|
"speakers": 40426580, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Hausa", |
|
"autonym": "Hausa", |
|
"speakers": 40411882, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sindhi", |
|
"autonym": "سنڌي", |
|
"speakers": 40329510, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "North Levantine Arabic", |
|
"autonym": "العامية", |
|
"speakers": 39031474, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Malay", |
|
"autonym": "Bahasa Malaysia", |
|
"speakers": 38097307, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Burmese", |
|
"autonym": "မြန်မာ", |
|
"speakers": 36559231, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Amharic", |
|
"autonym": "አማርኛ", |
|
"speakers": 35728475, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Algerian Arabic", |
|
"autonym": "Algerian Arabic", |
|
"speakers": 35667507, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Oromo", |
|
"autonym": "Oromoo", |
|
"speakers": 34897121, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bhojpuri", |
|
"autonym": "भोजपुरी", |
|
"speakers": 32934797, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Uzbek", |
|
"autonym": "O‘Zbek", |
|
"speakers": 32792780, |
|
"family": "Turkic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Azerbaijani", |
|
"autonym": "Azərbaycan", |
|
"speakers": 32446682, |
|
"family": "Turkic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Hakka Chinese", |
|
"autonym": "Hakka Chinese", |
|
"speakers": 32062460, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sundanese", |
|
"autonym": "Basa Sunda", |
|
"speakers": 32043120, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Dutch", |
|
"autonym": "Nederlands", |
|
"speakers": 31765645, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Moroccan Arabic", |
|
"autonym": "Moroccan Arabic", |
|
"speakers": 30938679, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ukrainian", |
|
"autonym": "Українська", |
|
"speakers": 29348975, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Yoruba", |
|
"autonym": "Èdè Yorùbá", |
|
"speakers": 28685568, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Saraiki", |
|
"autonym": "سرائیکی", |
|
"speakers": 28020120, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Igbo", |
|
"autonym": "Igbo", |
|
"speakers": 27823640, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Min Nan Chinese", |
|
"autonym": "Min Nan Chinese", |
|
"speakers": 26486380, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Cebuano", |
|
"autonym": "Cebuano", |
|
"speakers": 26203440, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Awadhi", |
|
"autonym": "Awadhi", |
|
"speakers": 25862924, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Malagasy", |
|
"autonym": "Malagasy", |
|
"speakers": 24260130, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Gan Chinese", |
|
"autonym": "Gan Chinese", |
|
"speakers": 23698340, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Romanian", |
|
"autonym": "Română", |
|
"speakers": 22187408, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bavarian", |
|
"autonym": "Bavarian", |
|
"speakers": 22043627, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Nepali", |
|
"autonym": "नेपाली", |
|
"speakers": 20903374, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Maithili", |
|
"autonym": "मैथिली", |
|
"speakers": 19249149, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Assamese", |
|
"autonym": "অসমীয়া", |
|
"speakers": 17239170, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Nyanja", |
|
"autonym": "Nyanja", |
|
"speakers": 17026781, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Somali", |
|
"autonym": "Soomaali", |
|
"speakers": 16911645, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Madurese", |
|
"autonym": "Madurese", |
|
"speakers": 16822638, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Northeastern Thai", |
|
"autonym": "Northeastern Thai", |
|
"speakers": 16554576, |
|
"family": "Tai-Kadai", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Rangpuri", |
|
"autonym": "Rangpuri", |
|
"speakers": 16274502, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Magahi", |
|
"autonym": "Magahi", |
|
"speakers": 15913080, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Haryanvi", |
|
"autonym": "हरियाणवी", |
|
"speakers": 15913080, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Marwari", |
|
"autonym": "Marwari", |
|
"speakers": 15913080, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Serbian", |
|
"autonym": "Српски", |
|
"speakers": 15602410, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sinhala", |
|
"autonym": "සිංහල", |
|
"speakers": 15564656, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Khmer", |
|
"autonym": "ខ្មែរ", |
|
"speakers": 15065030, |
|
"family": "Austroasiatic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Chhattisgarhi", |
|
"autonym": "Chhattisgarhi", |
|
"speakers": 14586990, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Nigerian Fulfulde", |
|
"autonym": "Nigerian Fulfulde", |
|
"speakers": 14339876, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Zulu", |
|
"autonym": "Isizulu", |
|
"speakers": 13973830, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kazakh", |
|
"autonym": "Қазақ Тілі", |
|
"speakers": 13637392, |
|
"family": "Turkic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Deccan", |
|
"autonym": "Deccan", |
|
"speakers": 13128291, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Czech", |
|
"autonym": "Čeština", |
|
"speakers": 13045532, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Swedish", |
|
"autonym": "Svenska", |
|
"speakers": 12932871, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Hungarian", |
|
"autonym": "Magyar", |
|
"speakers": 12443430, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Greek", |
|
"autonym": "Ελληνικά", |
|
"speakers": 12292242, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Shona", |
|
"autonym": "Chishona", |
|
"speakers": 11782503, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Low German", |
|
"autonym": "Neddersass’Sch", |
|
"speakers": 11520008, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Akan", |
|
"autonym": "Akan", |
|
"speakers": 11442678, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Quechua", |
|
"autonym": "Runasimi", |
|
"speakers": 11385851, |
|
"family": "Quechuan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Central Kurdish", |
|
"autonym": "کوردیی ناوەندی", |
|
"speakers": 11086549, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kinyarwanda", |
|
"autonym": "Kinyarwanda", |
|
"speakers": 11083625, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Wolof", |
|
"autonym": "Wolof", |
|
"speakers": 11025494, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tunisian Arabic", |
|
"autonym": "Tunisian Arabic", |
|
"speakers": 10549080, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Iloko", |
|
"autonym": "Ilokano", |
|
"speakers": 10481376, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Xhosa", |
|
"autonym": "Isixhosa", |
|
"speakers": 10182944, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tigrinya", |
|
"autonym": "ትግርኛ", |
|
"speakers": 10145911, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Belarusian", |
|
"autonym": "Беларуская", |
|
"speakers": 10064517, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Luba-Lulua", |
|
"autonym": "Luba-Lulua", |
|
"speakers": 9770880, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tajik", |
|
"autonym": "Тоҷикӣ", |
|
"speakers": 9644223, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Umbundu", |
|
"autonym": "Umbundu", |
|
"speakers": 9431467, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bambara", |
|
"autonym": "Bamanakan", |
|
"speakers": 9385632, |
|
"family": "Mande", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Afrikaans", |
|
"autonym": "Afrikaans", |
|
"speakers": 9318845, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Hiligaynon", |
|
"autonym": "Ilonggo", |
|
"speakers": 9171204, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kikuyu", |
|
"autonym": "Gikuyu", |
|
"speakers": 9099743, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Haitian Creole", |
|
"autonym": "Haitian Creole", |
|
"speakers": 8964918, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Catalan", |
|
"autonym": "Català", |
|
"speakers": 8679139, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Hebrew", |
|
"autonym": "עברית", |
|
"speakers": 8675480, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sichuan Yi", |
|
"autonym": "ꆈꌠꉙ", |
|
"speakers": 8364120, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mossi", |
|
"autonym": "Mossi", |
|
"speakers": 8334160, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Baluchi", |
|
"autonym": "بلۆچی", |
|
"speakers": 8227887, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sylheti", |
|
"autonym": "Sylheti", |
|
"speakers": 8132550, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kimbundu", |
|
"autonym": "Kimbundu", |
|
"speakers": 8130575, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Uyghur", |
|
"autonym": "ئۇيغۇرچە", |
|
"speakers": 8052967, |
|
"family": "Turkic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Minangkabau", |
|
"autonym": "Minangkabau", |
|
"speakers": 8010780, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Swiss German", |
|
"autonym": "Schwiizertüütsch", |
|
"speakers": 7956952, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bulgarian", |
|
"autonym": "Български", |
|
"speakers": 7878315, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Standard Moroccan Tamazight", |
|
"autonym": "ⵜⴰⵎⴰⵣⵉⵖⵜ", |
|
"speakers": 7823574, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Fula", |
|
"autonym": "Pulaar", |
|
"speakers": 7788904, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bosnian", |
|
"autonym": "Bosanski", |
|
"speakers": 7594468, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Rundi", |
|
"autonym": "Ikirundi", |
|
"speakers": 7475454, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kanauji", |
|
"autonym": "Kanauji", |
|
"speakers": 7426104, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Santali", |
|
"autonym": "ᱥᱟᱱᱛᱟᱲᱤ", |
|
"speakers": 7293495, |
|
"family": "Austroasiatic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Danish", |
|
"autonym": "Dansk", |
|
"speakers": 7072056, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Turkmen", |
|
"autonym": "Türkmen Dili", |
|
"speakers": 6870838, |
|
"family": "Turkic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kurdish", |
|
"autonym": "Kurdî (Kurmancî)", |
|
"speakers": 6866757, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Croatian", |
|
"autonym": "Hrvatski", |
|
"speakers": 6813164, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Albanian", |
|
"autonym": "Shqip", |
|
"speakers": 6791906, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Slovak", |
|
"autonym": "Slovenčina", |
|
"speakers": 6680269, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Dyula", |
|
"autonym": "Dyula", |
|
"speakers": 6667328, |
|
"family": "Mande", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Northern Thai", |
|
"autonym": "Northern Thai", |
|
"speakers": 6621830, |
|
"family": "Tai-Kadai", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mongolian", |
|
"autonym": "Монгол", |
|
"speakers": 6572846, |
|
"family": "Mongolic-Khitan", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Southern Sotho", |
|
"autonym": "Sesotho", |
|
"speakers": 6390567, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Krio", |
|
"autonym": "Krio", |
|
"speakers": 6293684, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tachelhit", |
|
"autonym": "ⵜⴰⵛⵍⵃⵉⵜ", |
|
"speakers": 6187736, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tswana", |
|
"autonym": "Tswana", |
|
"speakers": 6113428, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mewati", |
|
"autonym": "Mewati", |
|
"speakers": 6100014, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Luyia", |
|
"autonym": "Luluhia", |
|
"speakers": 5888069, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Guarani", |
|
"autonym": "Avañe’Ẽ", |
|
"speakers": 5827107, |
|
"family": "Tupian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Finnish", |
|
"autonym": "Suomi", |
|
"speakers": 5736842, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ganda", |
|
"autonym": "Luganda", |
|
"speakers": 5622890, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Betawi", |
|
"autonym": "Betawi", |
|
"speakers": 5607546, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kashmiri", |
|
"autonym": "کٲشُر", |
|
"speakers": 5598085, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Southern Thai", |
|
"autonym": "Southern Thai", |
|
"speakers": 5518192, |
|
"family": "Tai-Kadai", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Norwegian Bokmål", |
|
"autonym": "Norsk Bokmål", |
|
"speakers": 5468932, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Norwegian", |
|
"autonym": "Norsk", |
|
"speakers": 5467440, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bemba", |
|
"autonym": "Ichibemba", |
|
"speakers": 5402246, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Armenian", |
|
"autonym": "Հայերեն", |
|
"speakers": 5317273, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Northern Sotho", |
|
"autonym": "Northern Sotho", |
|
"speakers": 5307578, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Luo (Kenya and Tanzania)", |
|
"autonym": "Dholuo", |
|
"speakers": 5245734, |
|
"family": "Nilotic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tok Pisin", |
|
"autonym": "Tok Pisin", |
|
"speakers": 5154217, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Lao", |
|
"autonym": "ລາວ", |
|
"speakers": 5138706, |
|
"family": "Tai-Kadai", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sukuma", |
|
"autonym": "Sukuma", |
|
"speakers": 5094094, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Konkani", |
|
"autonym": "कोंकणी", |
|
"speakers": 4906533, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tsonga", |
|
"autonym": "Tsonga", |
|
"speakers": 4880932, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Main-Franconian", |
|
"autonym": "Main-Franconian", |
|
"speakers": 4809582, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Balinese", |
|
"autonym": "Balinese", |
|
"speakers": 4806468, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ewe", |
|
"autonym": "Eʋegbe", |
|
"speakers": 4690857, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Zhuang", |
|
"autonym": "Vahcuengh", |
|
"speakers": 4321462, |
|
"family": "Tai-Kadai", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Buginese", |
|
"autonym": "Buginese", |
|
"speakers": 4298211, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mazanderani", |
|
"autonym": "مازرونی", |
|
"speakers": 4246165, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Goan Konkani", |
|
"autonym": "Goan Konkani", |
|
"speakers": 4243488, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kamba", |
|
"autonym": "Kikamba", |
|
"speakers": 4068120, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kalenjin", |
|
"autonym": "Kalenjin", |
|
"speakers": 4068120, |
|
"family": "Nilotic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Banjar", |
|
"autonym": "Banjar", |
|
"speakers": 4010288, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Northern Hindko", |
|
"autonym": "Northern Hindko", |
|
"speakers": 3969517, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Makhuwa", |
|
"autonym": "Emakhuwa", |
|
"speakers": 3912766, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Gilaki", |
|
"autonym": "Gilaki", |
|
"speakers": 3906472, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Lombard", |
|
"autonym": "Lombard", |
|
"speakers": 3901518, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Zarma", |
|
"autonym": "Zarmaciine", |
|
"speakers": 3871308, |
|
"family": "Songhay", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ndau", |
|
"autonym": "Ndau", |
|
"speakers": 3867046, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sidamo", |
|
"autonym": "Sidamo", |
|
"speakers": 3783955, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Achinese", |
|
"autonym": "Achinese", |
|
"speakers": 3738364, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Shekhawati", |
|
"autonym": "Shekhawati", |
|
"speakers": 3713052, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Riffian", |
|
"autonym": "Tarifit", |
|
"speakers": 3692411, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Shan", |
|
"autonym": "တႆး", |
|
"speakers": 3687984, |
|
"family": "Tai-Kadai", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Lambadi", |
|
"autonym": "Lambadi", |
|
"speakers": 3580443, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Garhwali", |
|
"autonym": "Garhwali", |
|
"speakers": 3580443, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Georgian", |
|
"autonym": "ქართული", |
|
"speakers": 3543646, |
|
"family": "Kartvelian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Galician", |
|
"autonym": "Galego", |
|
"speakers": 3515530, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Lingala", |
|
"autonym": "Lingála", |
|
"speakers": 3514491, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mandingo", |
|
"autonym": "Mandingo", |
|
"speakers": 3511762, |
|
"family": "Mande", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Central Atlas Tamazight", |
|
"autonym": "Tamaziɣt N Laṭlaṣ", |
|
"speakers": 3485047, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Pattani Malay", |
|
"autonym": "Pattani Malay", |
|
"speakers": 3448870, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tiv", |
|
"autonym": "Tiv", |
|
"speakers": 3424448, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kabyle", |
|
"autonym": "Taqbaylit", |
|
"speakers": 3351886, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kyrgyz", |
|
"autonym": "Кыргызча", |
|
"speakers": 3338267, |
|
"family": "Turkic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bikol", |
|
"autonym": "Bikol", |
|
"speakers": 3275430, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Fon", |
|
"autonym": "Fon", |
|
"speakers": 3216150, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Gondi", |
|
"autonym": "Gondi", |
|
"speakers": 3182616, |
|
"family": "Dravidian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Waray", |
|
"autonym": "Waray", |
|
"speakers": 3166927, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Southern Kurdish", |
|
"autonym": "کوردی خوارگ", |
|
"speakers": 3142162, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Brahui", |
|
"autonym": "Brahui", |
|
"speakers": 3035513, |
|
"family": "Dravidian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Baoulé", |
|
"autonym": "Baoulé", |
|
"speakers": 3022921, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tibetan", |
|
"autonym": "བོད་སྐད་", |
|
"speakers": 3006697, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ibibio", |
|
"autonym": "Ibibio", |
|
"speakers": 2996392, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Efik", |
|
"autonym": "Efik", |
|
"speakers": 2996392, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sango", |
|
"autonym": "Sängö", |
|
"speakers": 2935521, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kumaoni", |
|
"autonym": "Kumaoni", |
|
"speakers": 2917398, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Aymara", |
|
"autonym": "Aymara", |
|
"speakers": 2838620, |
|
"family": "Aymaran", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Nyankole", |
|
"autonym": "Runyankore", |
|
"speakers": 2724939, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Jamaican Creole English", |
|
"autonym": "Jamaican Creole English", |
|
"speakers": 2668142, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Dogri", |
|
"autonym": "डोगरी", |
|
"speakers": 2652180, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Gusii", |
|
"autonym": "Ekegusii", |
|
"speakers": 2622867, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sasak", |
|
"autonym": "Sasak", |
|
"speakers": 2590152, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kurukh", |
|
"autonym": "Kurukh", |
|
"speakers": 2519571, |
|
"family": "Dravidian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Pampanga", |
|
"autonym": "Pampanga", |
|
"speakers": 2511163, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "West Albay Bikol", |
|
"autonym": "West Albay Bikol", |
|
"speakers": 2511163, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Lithuanian", |
|
"autonym": "Lietuvių", |
|
"speakers": 2488617, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Beja", |
|
"autonym": "Beja", |
|
"speakers": 2460326, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Batak Toba", |
|
"autonym": "Batak Toba", |
|
"speakers": 2456639, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sadri", |
|
"autonym": "Sadri", |
|
"speakers": 2386962, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Waddar", |
|
"autonym": "Waddar", |
|
"speakers": 2386962, |
|
"family": "Dravidian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Luba-Katanga", |
|
"autonym": "Tshiluba", |
|
"speakers": 2340940, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Chiga", |
|
"autonym": "Rukiga", |
|
"speakers": 2335662, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Soga", |
|
"autonym": "Olusoga", |
|
"speakers": 2292409, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Swati", |
|
"autonym": "Siswati", |
|
"speakers": 2212379, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Hazaragi", |
|
"autonym": "Hazaragi", |
|
"speakers": 2161984, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Meru", |
|
"autonym": "Kĩmĩrũ", |
|
"speakers": 2141116, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kangri", |
|
"autonym": "कांगड़ी", |
|
"speakers": 2121744, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Afar", |
|
"autonym": "Afar", |
|
"speakers": 2119663, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Teso", |
|
"autonym": "Kiteso", |
|
"speakers": 2082973, |
|
"family": "Nilotic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Lomwe", |
|
"autonym": "Lomwe", |
|
"speakers": 2046678, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Occitan", |
|
"autonym": "Occitan", |
|
"speakers": 2040398, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Western Balochi", |
|
"autonym": "بلوچی (رخشانی)", |
|
"speakers": 2037382, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Northern Luri", |
|
"autonym": "لۊری شومالی", |
|
"speakers": 2020512, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Wagdi", |
|
"autonym": "Wagdi", |
|
"speakers": 1989135, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tulu", |
|
"autonym": "Tulu", |
|
"speakers": 1989135, |
|
"family": "Dravidian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Khandesi", |
|
"autonym": "Khandesi", |
|
"speakers": 1989135, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tatar", |
|
"autonym": "Татар", |
|
"speakers": 1984108, |
|
"family": "Turkic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Slovenian", |
|
"autonym": "Slovenščina", |
|
"speakers": 1973181, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Makasar", |
|
"autonym": "Makasar", |
|
"speakers": 1949290, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Wolaytta", |
|
"autonym": "Wolaytta", |
|
"speakers": 1946034, |
|
"family": "Ta-Ne-Omotic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Nyamwezi", |
|
"autonym": "Nyamwezi", |
|
"speakers": 1932242, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bodo", |
|
"autonym": "बर’", |
|
"speakers": 1856526, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Lampung Api", |
|
"autonym": "Lampung Api", |
|
"speakers": 1842479, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Chuvash", |
|
"autonym": "Чӑваш", |
|
"speakers": 1842386, |
|
"family": "Turkic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bashkir", |
|
"autonym": "Башҡорт Теле", |
|
"speakers": 1842386, |
|
"family": "Turkic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Rohingya", |
|
"autonym": "𐴌𐴗𐴥𐴝𐴙𐴚𐴒𐴙𐴝", |
|
"speakers": 1824082, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mende", |
|
"autonym": "Mende", |
|
"speakers": 1813083, |
|
"family": "Mande", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tumbuka", |
|
"autonym": "Tumbuka", |
|
"speakers": 1780514, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tamashek", |
|
"autonym": "Tamashek", |
|
"speakers": 1776965, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "North Ndebele", |
|
"autonym": "Isindebele", |
|
"speakers": 1745556, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Masai", |
|
"autonym": "Maa", |
|
"speakers": 1734738, |
|
"family": "Nilotic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Serer", |
|
"autonym": "Serer", |
|
"speakers": 1731004, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Nimadi", |
|
"autonym": "Nimadi", |
|
"speakers": 1723917, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Timne", |
|
"autonym": "Timne", |
|
"speakers": 1722482, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Scots", |
|
"autonym": "Scots", |
|
"speakers": 1644028, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Lango (Uganda)", |
|
"autonym": "Lango (Uganda)", |
|
"speakers": 1643614, |
|
"family": "Nilotic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Domari", |
|
"autonym": "Domari", |
|
"speakers": 1613543, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Macedonian", |
|
"autonym": "Македонски", |
|
"speakers": 1608565, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Acoli", |
|
"autonym": "Acoli", |
|
"speakers": 1600361, |
|
"family": "Nilotic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Central-Eastern Niger Fulfulde", |
|
"autonym": "Central-Eastern Niger Fulfulde", |
|
"speakers": 1594068, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bhili", |
|
"autonym": "Bhili", |
|
"speakers": 1591308, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Pangasinan", |
|
"autonym": "Pangasinan", |
|
"speakers": 1528534, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kongo", |
|
"autonym": "Kongo", |
|
"speakers": 1526700, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bini", |
|
"autonym": "Bini", |
|
"speakers": 1519599, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Maasina Fulfulde", |
|
"autonym": "Maasina Fulfulde", |
|
"speakers": 1505612, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Manipuri", |
|
"autonym": "মৈতৈলোন্", |
|
"speakers": 1476591, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Abron", |
|
"autonym": "Abron", |
|
"speakers": 1467010, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Makonde", |
|
"autonym": "Chimakonde", |
|
"speakers": 1463820, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Gheg Albanian", |
|
"autonym": "Gheg Albanian", |
|
"speakers": 1430250, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Venda", |
|
"autonym": "Tshivenḓa", |
|
"speakers": 1391759, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sena", |
|
"autonym": "Sena", |
|
"speakers": 1384517, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Venetian", |
|
"autonym": "Veneto", |
|
"speakers": 1380829, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Susu", |
|
"autonym": "Susu", |
|
"speakers": 1378014, |
|
"family": "Mande", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Norwegian Nynorsk", |
|
"autonym": "Norsk Nynorsk", |
|
"speakers": 1366860, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Makhuwa-Meetto", |
|
"autonym": "Makua", |
|
"speakers": 1354419, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Rajasthani", |
|
"autonym": "राजस्थानी", |
|
"speakers": 1326090, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ho", |
|
"autonym": "Ho", |
|
"speakers": 1312829, |
|
"family": "Austroasiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Maguindanaon", |
|
"autonym": "Maguindanaon", |
|
"speakers": 1310172, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mewari", |
|
"autonym": "Mewari", |
|
"speakers": 1286307, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bulu", |
|
"autonym": "Bulu", |
|
"speakers": 1276270, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Masaaba", |
|
"autonym": "Masaaba", |
|
"speakers": 1254337, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mundari", |
|
"autonym": "Mundari", |
|
"speakers": 1252287, |
|
"family": "Austroasiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Morisyen", |
|
"autonym": "Kreol Morisien", |
|
"speakers": 1241433, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Irish", |
|
"autonym": "Gaeilge", |
|
"speakers": 1237487, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Fur", |
|
"autonym": "Fur", |
|
"speakers": 1230163, |
|
"family": "Furan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Rejang", |
|
"autonym": "Rejang", |
|
"speakers": 1228320, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bhilali", |
|
"autonym": "Bhilali", |
|
"speakers": 1220003, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tausug", |
|
"autonym": "Tausug", |
|
"speakers": 1200991, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kʼicheʼ", |
|
"autonym": "KʼIcheʼ", |
|
"speakers": 1200731, |
|
"family": "Mayan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bakhtiari", |
|
"autonym": "Bakhtiari", |
|
"speakers": 1188926, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kpelle", |
|
"autonym": "Kpɛlɛɛ", |
|
"speakers": 1186303, |
|
"family": "Mande", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Cebaara Senoufo", |
|
"autonym": "Cebaara Senoufo", |
|
"speakers": 1181687, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Northern Khmer", |
|
"autonym": "Northern Khmer", |
|
"speakers": 1172616, |
|
"family": "Austroasiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "West Flemish", |
|
"autonym": "West Flemish", |
|
"speakers": 1172070, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Soninke", |
|
"autonym": "Soninke", |
|
"speakers": 1153651, |
|
"family": "Mande", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Zaza", |
|
"autonym": "Zaza", |
|
"speakers": 1148245, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Latvian", |
|
"autonym": "Latviešu", |
|
"speakers": 1147550, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Dan", |
|
"autonym": "Dan", |
|
"speakers": 1099244, |
|
"family": "Mande", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Gorontalo", |
|
"autonym": "Gorontalo", |
|
"speakers": 1094807, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tigre", |
|
"autonym": "Tigre", |
|
"speakers": 1094616, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Basque", |
|
"autonym": "Euskara", |
|
"speakers": 1088519, |
|
"family": null, |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Hadothi", |
|
"autonym": "Hadothi", |
|
"speakers": 1087394, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kabardian", |
|
"autonym": "Kabardian", |
|
"speakers": 1070873, |
|
"family": "Abkhaz-Adyge", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Khasi", |
|
"autonym": "Khasi", |
|
"speakers": 1060872, |
|
"family": "Austroasiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sardinian", |
|
"autonym": "Sardu", |
|
"speakers": 1060846, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Lozi", |
|
"autonym": "Lozi", |
|
"speakers": 1045596, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Frafra", |
|
"autonym": "Frafra", |
|
"speakers": 1026907, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Najdi Arabic", |
|
"autonym": "Najdi Arabic", |
|
"speakers": 1025205, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ronga", |
|
"autonym": "Ronga", |
|
"speakers": 1023339, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Southern Luri", |
|
"autonym": "Southern Luri", |
|
"speakers": 1019080, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Newari", |
|
"autonym": "Newari", |
|
"speakers": 1000821, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Talysh", |
|
"autonym": "Talysh", |
|
"speakers": 1000168, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Yiddish", |
|
"autonym": "ייִדיש", |
|
"speakers": 997214, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bena", |
|
"autonym": "Hibena", |
|
"speakers": 995398, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Shambala", |
|
"autonym": "Kishambaa", |
|
"speakers": 995398, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kachhi", |
|
"autonym": "Kachhi", |
|
"speakers": 994568, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ngaju", |
|
"autonym": "Ngaju", |
|
"speakers": 987996, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kita Maninkakan", |
|
"autonym": "Kita Maninkakan", |
|
"speakers": 977670, |
|
"family": "Mande", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Jumli", |
|
"autonym": "Jumli", |
|
"speakers": 970493, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Central Okinawan", |
|
"autonym": "Central Okinawan", |
|
"speakers": 966404, |
|
"family": "Japonic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mon", |
|
"autonym": "Mon", |
|
"speakers": 966114, |
|
"family": "Austroasiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Chimborazo Highland Quichua", |
|
"autonym": "Chimborazo Highland Quichua", |
|
"speakers": 963579, |
|
"family": "Quechuan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kachin", |
|
"autonym": "Kachin", |
|
"speakers": 962032, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Southern Hindko", |
|
"autonym": "Southern Hindko", |
|
"speakers": 957354, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Limburgish", |
|
"autonym": "Limburgish", |
|
"speakers": 950422, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Manyika", |
|
"autonym": "Manyika", |
|
"speakers": 945510, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Chechen", |
|
"autonym": "Нохчийн", |
|
"speakers": 935365, |
|
"family": "Nakh-Daghestanian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kuanyama", |
|
"autonym": "Kuanyama", |
|
"speakers": 920524, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "South Ndebele", |
|
"autonym": "South Ndebele", |
|
"speakers": 903418, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Welsh", |
|
"autonym": "Cymraeg", |
|
"speakers": 884910, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Adangme", |
|
"autonym": "Adangme", |
|
"speakers": 880206, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Estonian", |
|
"autonym": "Eesti", |
|
"speakers": 878449, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Parsi-Dari", |
|
"autonym": "Parsi-Dari", |
|
"speakers": 864342, |
|
"family": "Bookkeeping", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Yucateco", |
|
"autonym": "Yucateco", |
|
"speakers": 861955, |
|
"family": "Mayan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ewondo", |
|
"autonym": "Ewondo", |
|
"speakers": 860095, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Komering", |
|
"autonym": "Komering", |
|
"speakers": 854483, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tooro", |
|
"autonym": "Tooro", |
|
"speakers": 821807, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Garo", |
|
"autonym": "Garo", |
|
"speakers": 821563, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ga", |
|
"autonym": "Gã", |
|
"speakers": 821526, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mbunga", |
|
"autonym": "Mbunga", |
|
"speakers": 819739, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tetum", |
|
"autonym": "Tetum", |
|
"speakers": 816395, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Iban", |
|
"autonym": "Iban", |
|
"speakers": 816302, |
|
"family": "Bookkeeping", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Embu", |
|
"autonym": "Kĩembu", |
|
"speakers": 802918, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Swabian", |
|
"autonym": "Swabian", |
|
"speakers": 801597, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Hmong Njua", |
|
"autonym": "𞄀𞄄𞄰𞄩𞄍𞄜𞄰", |
|
"speakers": 781687, |
|
"family": "Hmong-Mien", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kalanga", |
|
"autonym": "Kalanga", |
|
"speakers": 770954, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Dotyali", |
|
"autonym": "Dotyali", |
|
"speakers": 758198, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Picard", |
|
"autonym": "Picard", |
|
"speakers": 746330, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Western Frisian", |
|
"autonym": "Frysk", |
|
"speakers": 743057, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Chakma", |
|
"autonym": "𑄌𑄋𑄴𑄟𑄳𑄦", |
|
"speakers": 729137, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Yao", |
|
"autonym": "Yao", |
|
"speakers": 722357, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Uab Meto", |
|
"autonym": "Uab Meto", |
|
"speakers": 720970, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Capiznon", |
|
"autonym": "Capiznon", |
|
"speakers": 720595, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Asu", |
|
"autonym": "Kipare", |
|
"speakers": 702634, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tai Dam", |
|
"autonym": "ꪼꪕꪒꪾ", |
|
"speakers": 681177, |
|
"family": "Tai-Kadai", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Walloon", |
|
"autonym": "Walon", |
|
"speakers": 679801, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Koyraboro Senni", |
|
"autonym": "Koyraboro Senni", |
|
"speakers": 664816, |
|
"family": "Songhay", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bagheli", |
|
"autonym": "Bagheli", |
|
"speakers": 654424, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Asturian", |
|
"autonym": "Asturianu", |
|
"speakers": 650205, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Laki", |
|
"autonym": "Laki", |
|
"speakers": 645417, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Munda", |
|
"autonym": "Munda", |
|
"speakers": 636523, |
|
"family": "Bookkeeping", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Lisu", |
|
"autonym": "Lisu", |
|
"speakers": 627309, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "N’Ko", |
|
"autonym": "ߒߞߏ", |
|
"speakers": 626370, |
|
"family": "Artificial Language", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Gronings", |
|
"autonym": "Gronings", |
|
"speakers": 622094, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mongo", |
|
"autonym": "Mongo", |
|
"speakers": 620858, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Dangaura Tharu", |
|
"autonym": "Dangaura Tharu", |
|
"speakers": 606558, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Neapolitan", |
|
"autonym": "Neapolitan", |
|
"speakers": 605306, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Nuer", |
|
"autonym": "Thok Nath", |
|
"speakers": 591427, |
|
"family": "Nilotic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Breton", |
|
"autonym": "Brezhoneg", |
|
"speakers": 563140, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Pijin", |
|
"autonym": "Pijin", |
|
"speakers": 561780, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Réunion Creole French", |
|
"autonym": "Réunion Creole French", |
|
"speakers": 559185, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Avaric", |
|
"autonym": "Avaric", |
|
"speakers": 552716, |
|
"family": "Nakh-Daghestanian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ndonga", |
|
"autonym": "Ndonga", |
|
"speakers": 552315, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ossetic", |
|
"autonym": "Ирон", |
|
"speakers": 541444, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Udmurt", |
|
"autonym": "Udmurt", |
|
"speakers": 538544, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ligurian", |
|
"autonym": "Ligure", |
|
"speakers": 536663, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kabuverdianu", |
|
"autonym": "Kabuverdianu", |
|
"speakers": 530762, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Rusyn", |
|
"autonym": "Rusyn", |
|
"speakers": 527075, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mari", |
|
"autonym": "Mari", |
|
"speakers": 524371, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sicilian", |
|
"autonym": "Sicilianu", |
|
"speakers": 511702, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Langi", |
|
"autonym": "Kɨlaangi", |
|
"speakers": 509409, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Balti", |
|
"autonym": "Balti", |
|
"speakers": 502520, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Eastern Huasteca Nahuatl", |
|
"autonym": "Eastern Huasteca Nahuatl", |
|
"speakers": 501735, |
|
"family": "Uto-Aztecan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Western Huasteca Nahuatl", |
|
"autonym": "Western Huasteca Nahuatl", |
|
"speakers": 501735, |
|
"family": "Uto-Aztecan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Silesian", |
|
"autonym": "Ślōnski", |
|
"speakers": 497670, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kara-Kalpak", |
|
"autonym": "Kara-Kalpak", |
|
"speakers": 489046, |
|
"family": "Turkic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Gujari", |
|
"autonym": "Gujari", |
|
"speakers": 467002, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sirmauri", |
|
"autonym": "Sirmauri", |
|
"speakers": 464132, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Maltese", |
|
"autonym": "Malti", |
|
"speakers": 457267, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bantawa", |
|
"autonym": "Bantawa", |
|
"speakers": 454918, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Yakut", |
|
"autonym": "Саха Тыла", |
|
"speakers": 453510, |
|
"family": "Turkic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Masalit", |
|
"autonym": "Masalit", |
|
"speakers": 451060, |
|
"family": "Maban", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Jju", |
|
"autonym": "Kaje", |
|
"speakers": 449459, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Adyghe", |
|
"autonym": "Adyghe", |
|
"speakers": 444583, |
|
"family": "Abkhaz-Adyge", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Yemba", |
|
"autonym": "Yemba", |
|
"speakers": 443920, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mingrelian", |
|
"autonym": "Mingrelian", |
|
"speakers": 439670, |
|
"family": "Kartvelian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Erzya", |
|
"autonym": "Эрзянь Кель", |
|
"speakers": 439338, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Taita", |
|
"autonym": "Kitaita", |
|
"speakers": 438929, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Central Mazahua", |
|
"autonym": "Central Mazahua", |
|
"speakers": 437410, |
|
"family": "Otomanguean", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Vunjo", |
|
"autonym": "Kyivunjo", |
|
"speakers": 433291, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Rombo", |
|
"autonym": "Kihorombo", |
|
"speakers": 433291, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Machame", |
|
"autonym": "Kimachame", |
|
"speakers": 433291, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Khmu", |
|
"autonym": "Khmu", |
|
"speakers": 431949, |
|
"family": "Austroasiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Fang", |
|
"autonym": "Fang", |
|
"speakers": 426451, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kinaray-a", |
|
"autonym": "Kinaray-A", |
|
"speakers": 425806, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kuy", |
|
"autonym": "Kuy", |
|
"speakers": 421207, |
|
"family": "Austroasiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Luxembourgish", |
|
"autonym": "Lëtzebuergesch", |
|
"speakers": 421015, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sranan Tongo", |
|
"autonym": "Sranan Tongo", |
|
"speakers": 414507, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Jola-Fonyi", |
|
"autonym": "Joola", |
|
"speakers": 409146, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Western Tamang", |
|
"autonym": "Western Tamang", |
|
"speakers": 394263, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Pontic", |
|
"autonym": "Pontic", |
|
"speakers": 392463, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Negeri Sembilan Malay", |
|
"autonym": "Negeri Sembilan Malay", |
|
"speakers": 391825, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ngiemboon", |
|
"autonym": "Shwóŋò Ngiembɔɔn", |
|
"speakers": 388430, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ghomala", |
|
"autonym": "Ghomala", |
|
"speakers": 388430, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Divehi", |
|
"autonym": "Divehi", |
|
"speakers": 388044, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Saurashtra", |
|
"autonym": "Saurashtra", |
|
"speakers": 384566, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Fiji Hindi", |
|
"autonym": "Fiji Hindi", |
|
"speakers": 383749, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Nobiin", |
|
"autonym": "Nobiin", |
|
"speakers": 378161, |
|
"family": "Nubian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Balkan Gagauz Turkish", |
|
"autonym": "Balkan Gagauz Turkish", |
|
"speakers": 377280, |
|
"family": "Turkic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kerinci", |
|
"autonym": "Kerinci", |
|
"speakers": 373836, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Parkari Koli", |
|
"autonym": "Parkari Koli", |
|
"speakers": 373602, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Dzongkha", |
|
"autonym": "རྫོང་ཁ", |
|
"speakers": 370341, |
|
"family": "Bookkeeping", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Pökoot", |
|
"autonym": "Pökoot", |
|
"speakers": 369343, |
|
"family": "Nilotic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Dargwa", |
|
"autonym": "Dargwa", |
|
"speakers": 368477, |
|
"family": "Nakh-Daghestanian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Limbu", |
|
"autonym": "Limbu", |
|
"speakers": 368085, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Fijian", |
|
"autonym": "Fijian", |
|
"speakers": 365030, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Rana Tharu", |
|
"autonym": "Rana Tharu", |
|
"speakers": 363935, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kom", |
|
"autonym": "Kom", |
|
"speakers": 360685, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Icelandic", |
|
"autonym": "Íslenska", |
|
"speakers": 350734, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Khowar", |
|
"autonym": "Khowar", |
|
"speakers": 350252, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tolaki", |
|
"autonym": "Tolaki", |
|
"speakers": 347134, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Naxi", |
|
"autonym": "Naxi", |
|
"speakers": 334565, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Eastern Magar", |
|
"autonym": "Eastern Magar", |
|
"speakers": 333607, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bamun", |
|
"autonym": "Bamun", |
|
"speakers": 332940, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Basaa", |
|
"autonym": "Ɓàsàa", |
|
"speakers": 332940, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Koyra Chiini", |
|
"autonym": "Koyra Ciini", |
|
"speakers": 332408, |
|
"family": "Songhay", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Indus Kohistani", |
|
"autonym": "Indus Kohistani", |
|
"speakers": 326901, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Gayo", |
|
"autonym": "Gayo", |
|
"speakers": 320431, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ngazidja Comorian", |
|
"autonym": "Ngazidja Comorian", |
|
"speakers": 313124, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Buriat", |
|
"autonym": "Buriat", |
|
"speakers": 311788, |
|
"family": "Mongolic-Khitan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Rinconada Bikol", |
|
"autonym": "Rinconada Bikol", |
|
"speakers": 305707, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Medumba", |
|
"autonym": "Medumba", |
|
"speakers": 305195, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ao Naga", |
|
"autonym": "Ao Naga", |
|
"speakers": 305001, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Badaga", |
|
"autonym": "Badaga", |
|
"speakers": 305001, |
|
"family": "Dravidian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kochila Tharu", |
|
"autonym": "Kochila Tharu", |
|
"speakers": 303279, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Moksha", |
|
"autonym": "Мокшень Кяль", |
|
"speakers": 297616, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tae'", |
|
"autonym": "Tae'", |
|
"speakers": 293729, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Nzima", |
|
"autonym": "Nzima", |
|
"speakers": 293402, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Nama", |
|
"autonym": "Khoekhoegowab", |
|
"speakers": 289308, |
|
"family": "Khoe-Kwadi", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ndzwani Comorian", |
|
"autonym": "Ndzwani Comorian", |
|
"speakers": 287736, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kumyk", |
|
"autonym": "Kumyk", |
|
"speakers": 283444, |
|
"family": "Turkic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mundang", |
|
"autonym": "Mundaŋ", |
|
"speakers": 277450, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mapuche", |
|
"autonym": "Mapudungun", |
|
"speakers": 272802, |
|
"family": "Araucanian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Western Cham", |
|
"autonym": "Western Cham", |
|
"speakers": 270832, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bislama", |
|
"autonym": "Bislama", |
|
"speakers": 268500, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Lü", |
|
"autonym": "Lü", |
|
"speakers": 264864, |
|
"family": "Tai-Kadai", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tai Nüa", |
|
"autonym": "Tai Nüa", |
|
"speakers": 264864, |
|
"family": "Tai-Kadai", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Wadiyara Koli", |
|
"autonym": "Wadiyara Koli", |
|
"speakers": 256851, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kachi Koli", |
|
"autonym": "Kachi Koli", |
|
"speakers": 256851, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Lezghian", |
|
"autonym": "Lezghian", |
|
"speakers": 255100, |
|
"family": "Nakh-Daghestanian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Komi", |
|
"autonym": "Komi", |
|
"speakers": 255100, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Samoan", |
|
"autonym": "Samoan", |
|
"speakers": 252717, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Western Magar", |
|
"autonym": "Western Magar", |
|
"speakers": 251722, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Samburu", |
|
"autonym": "Kisampur", |
|
"speakers": 246228, |
|
"family": "Nilotic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Crimean Tatar", |
|
"autonym": "Crimean Tatar", |
|
"speakers": 245968, |
|
"family": "Turkic", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mandar", |
|
"autonym": "Mandar", |
|
"speakers": 245664, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sangir", |
|
"autonym": "Sangir", |
|
"speakers": 245664, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Extremaduran", |
|
"autonym": "Extremaduran", |
|
"speakers": 245077, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Central Huasteca Nahuatl", |
|
"autonym": "Central Huasteca Nahuatl", |
|
"speakers": 244435, |
|
"family": "Uto-Aztecan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Zeelandic", |
|
"autonym": "Zeelandic", |
|
"speakers": 241926, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Karachay-Balkar", |
|
"autonym": "Karachay-Balkar", |
|
"speakers": 240927, |
|
"family": "Turkic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Colognian", |
|
"autonym": "Kölsch", |
|
"speakers": 240479, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Herero", |
|
"autonym": "Herero", |
|
"speakers": 239336, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Saafi-Saafi", |
|
"autonym": "Saafi-Saafi", |
|
"speakers": 236046, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Zaghawa", |
|
"autonym": "Zaghawa", |
|
"speakers": 232364, |
|
"family": "Saharan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ingush", |
|
"autonym": "Ingush", |
|
"speakers": 226755, |
|
"family": "Nakh-Daghestanian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Saho", |
|
"autonym": "Saho", |
|
"speakers": 218923, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tomo Kan Dogon", |
|
"autonym": "Tomo Kan Dogon", |
|
"speakers": 215087, |
|
"family": "Dogon", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Papiamento", |
|
"autonym": "Papiamentu", |
|
"speakers": 211640, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Syriac", |
|
"autonym": "ܣܘܪܝܝܐ", |
|
"speakers": 210659, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Nyasa Tonga", |
|
"autonym": "Nyasa Tonga", |
|
"speakers": 207727, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mafa", |
|
"autonym": "Mafa", |
|
"speakers": 205313, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Punu", |
|
"autonym": "Punu", |
|
"speakers": 200782, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tyap", |
|
"autonym": "Katab", |
|
"speakers": 199046, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Xaasongaxango", |
|
"autonym": "Xaasongaxango", |
|
"speakers": 195534, |
|
"family": "Mande", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tuvinian", |
|
"autonym": "Tuvinian", |
|
"speakers": 184239, |
|
"family": "Turkic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Central Dusun", |
|
"autonym": "Central Dusun", |
|
"speakers": 182852, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Riang (India)", |
|
"autonym": "Riang (India)", |
|
"speakers": 172392, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Comorian", |
|
"autonym": "Comorian", |
|
"speakers": 170720, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bomu", |
|
"autonym": "Bomu", |
|
"speakers": 168159, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Latgalian", |
|
"autonym": "Latgalian", |
|
"speakers": 167429, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Navajo", |
|
"autonym": "Diné Bizaad", |
|
"speakers": 166320, |
|
"family": "Athabaskan-Eyak-Tlingit", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Jenaama Bozo", |
|
"autonym": "Jenaama Bozo", |
|
"speakers": 166204, |
|
"family": "Mande", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Corsican", |
|
"autonym": "Corsu", |
|
"speakers": 162836, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bafut", |
|
"autonym": "Bafut", |
|
"speakers": 158146, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sherpa", |
|
"autonym": "Sherpa", |
|
"speakers": 157705, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kirmanjki", |
|
"autonym": "Kirmanjki", |
|
"speakers": 155833, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Hiri Motu", |
|
"autonym": "Hiri Motu", |
|
"speakers": 152449, |
|
"family": "Pidgin", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kako", |
|
"autonym": "Kakɔ", |
|
"speakers": 149823, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Akoose", |
|
"autonym": "Akoose", |
|
"speakers": 149823, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Selayar", |
|
"autonym": "Selayar", |
|
"speakers": 144194, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Māori", |
|
"autonym": "Māori", |
|
"speakers": 137913, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Rajbanshi", |
|
"autonym": "Rajbanshi", |
|
"speakers": 133443, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Duala", |
|
"autonym": "Duálá", |
|
"speakers": 133176, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Wayuu", |
|
"autonym": "Wayuu", |
|
"speakers": 132529, |
|
"family": "Arawakan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Vai", |
|
"autonym": "ꕙꔤ", |
|
"speakers": 131906, |
|
"family": "Mande", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Eastern Tamang", |
|
"autonym": "Eastern Tamang", |
|
"speakers": 130410, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Metaʼ", |
|
"autonym": "Metaʼ", |
|
"speakers": 130401, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Pennsylvania German", |
|
"autonym": "Pennsylvania German", |
|
"speakers": 129729, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Rwa", |
|
"autonym": "Kiruwa", |
|
"speakers": 128816, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Torwali", |
|
"autonym": "توروالی", |
|
"speakers": 123756, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mandjak", |
|
"autonym": "Mandjak", |
|
"speakers": 121170, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tshangla", |
|
"autonym": "Tshangla", |
|
"speakers": 117348, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sangu", |
|
"autonym": "Ishisangu", |
|
"speakers": 117106, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Karelian", |
|
"autonym": "Karelian", |
|
"speakers": 116212, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ladino", |
|
"autonym": "Ladino", |
|
"speakers": 112781, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ifè", |
|
"autonym": "Ifè", |
|
"speakers": 111910, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Gagauz", |
|
"autonym": "Gagauz", |
|
"speakers": 111028, |
|
"family": "Turkic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Lak", |
|
"autonym": "Lak", |
|
"speakers": 110543, |
|
"family": "Nakh-Daghestanian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sassarese Sardinian", |
|
"autonym": "Sassarese Sardinian", |
|
"speakers": 106085, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tongan", |
|
"autonym": "Lea Fakatonga", |
|
"speakers": 100790, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Balanta-Ganja", |
|
"autonym": "Balanta-Ganja", |
|
"speakers": 95992, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ngomba", |
|
"autonym": "Ndaꞌa", |
|
"speakers": 94333, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Seselwa Creole French", |
|
"autonym": "Seselwa Creole French", |
|
"speakers": 94061, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Abkhazian", |
|
"autonym": "Аԥсшәа", |
|
"speakers": 91953, |
|
"family": "Abkhaz-Adyge", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tahitian", |
|
"autonym": "Tahitian", |
|
"speakers": 91488, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Inuktitut", |
|
"autonym": "Inuktitut", |
|
"speakers": 90466, |
|
"family": "Eskimo-Aleut", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Plautdietsch", |
|
"autonym": "Plautdietsch", |
|
"speakers": 90466, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bishnupriya", |
|
"autonym": "Bishnupriya", |
|
"speakers": 90174, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bafia", |
|
"autonym": "Rikpa", |
|
"speakers": 88784, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Gurung", |
|
"autonym": "Gurung", |
|
"speakers": 87951, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Eastern Cham", |
|
"autonym": "Eastern Cham", |
|
"speakers": 87862, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Western Lawa", |
|
"autonym": "Western Lawa", |
|
"speakers": 87751, |
|
"family": "Austroasiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mankanya", |
|
"autonym": "Mankanya", |
|
"speakers": 83151, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Lepcha", |
|
"autonym": "Lepcha", |
|
"speakers": 79743, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Blin", |
|
"autonym": "Blin", |
|
"speakers": 79056, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bateri", |
|
"autonym": "Bateri", |
|
"speakers": 78843, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kathoriya Tharu", |
|
"autonym": "Kathoriya Tharu", |
|
"speakers": 72787, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Scottish Gaelic", |
|
"autonym": "Gàidhlig", |
|
"speakers": 72337, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Faroese", |
|
"autonym": "Føroyskt", |
|
"speakers": 71351, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Võro", |
|
"autonym": "Võro", |
|
"speakers": 70031, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kenyang", |
|
"autonym": "Kɛnyaŋ", |
|
"speakers": 69362, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Gilbertese", |
|
"autonym": "Gilbertese", |
|
"speakers": 67078, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bube", |
|
"autonym": "Bube", |
|
"speakers": 66058, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mentawai", |
|
"autonym": "Mentawai", |
|
"speakers": 64086, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Arpitan", |
|
"autonym": "Arpitan", |
|
"speakers": 63777, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Komi-Permyak", |
|
"autonym": "Komi-Permyak", |
|
"speakers": 63775, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Koro", |
|
"autonym": "Koro", |
|
"speakers": 63207, |
|
"family": "Mande", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Marshallese", |
|
"autonym": "Marshallese", |
|
"speakers": 56879, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tornedalen Finnish", |
|
"autonym": "Tornedalen Finnish", |
|
"speakers": 56114, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kalaallisut", |
|
"autonym": "Kalaallisut", |
|
"speakers": 55440, |
|
"family": "Eskimo-Aleut", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Braj", |
|
"autonym": "Braj", |
|
"speakers": 54370, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Guianese Creole French", |
|
"autonym": "Guianese Creole French", |
|
"speakers": 51872, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Northern Sami", |
|
"autonym": "Davvisámegiella", |
|
"speakers": 51530, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Anii", |
|
"autonym": "Anii Kagɩja", |
|
"speakers": 51507, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kaingang", |
|
"autonym": "Kanhgág", |
|
"speakers": 50812, |
|
"family": "Nuclear-Macro-Je", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kashubian", |
|
"autonym": "Kashubian", |
|
"speakers": 49767, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Koro Wachi", |
|
"autonym": "Koro Wachi", |
|
"speakers": 46718, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Chamorro", |
|
"autonym": "Chamorro", |
|
"speakers": 46325, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Atsam", |
|
"autonym": "Atsam", |
|
"speakers": 44946, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bushi", |
|
"autonym": "Bushi", |
|
"speakers": 44620, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Romansh", |
|
"autonym": "Rumantsch", |
|
"speakers": 42020, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Aghem", |
|
"autonym": "Aghem", |
|
"speakers": 38843, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kuvi", |
|
"autonym": "Kuvi", |
|
"speakers": 38457, |
|
"family": "Dravidian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Noon", |
|
"autonym": "Noon", |
|
"speakers": 37767, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Friulian", |
|
"autonym": "Furlan", |
|
"speakers": 37442, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Thulung", |
|
"autonym": "Thulung", |
|
"speakers": 36393, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Emilian", |
|
"autonym": "Emilian", |
|
"speakers": 31201, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Chuukese", |
|
"autonym": "Chuukese", |
|
"speakers": 30731, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Western Mari", |
|
"autonym": "Western Mari", |
|
"speakers": 29762, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Hawaiian", |
|
"autonym": "ʻŌlelo HawaiʻI", |
|
"speakers": 29605, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mru", |
|
"autonym": "Mru", |
|
"speakers": 29277, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Cajun French", |
|
"autonym": "Cajun French", |
|
"speakers": 27942, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Nheengatu", |
|
"autonym": "Nheẽgatu", |
|
"speakers": 26171, |
|
"family": "Tupian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Aragonese", |
|
"autonym": "Aragonés", |
|
"speakers": 26008, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Cherokee", |
|
"autonym": "Ꮳꮃꭹ", |
|
"speakers": 25613, |
|
"family": "Iroquoian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sinte Romani", |
|
"autonym": "Sinte Romani", |
|
"speakers": 24372, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ojibwa", |
|
"autonym": "Ojibwa", |
|
"speakers": 23747, |
|
"family": "Algic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Pohnpeian", |
|
"autonym": "Pohnpeian", |
|
"speakers": 23560, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Laz", |
|
"autonym": "Laz", |
|
"speakers": 22965, |
|
"family": "Kartvelian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Muslim Tat", |
|
"autonym": "Muslim Tat", |
|
"speakers": 22453, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Central Yupik", |
|
"autonym": "Central Yupik", |
|
"speakers": 20956, |
|
"family": "Eskimo-Aleut", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Dakota", |
|
"autonym": "Dakota", |
|
"speakers": 20832, |
|
"family": "Siouan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Southern Altai", |
|
"autonym": "Southern Altai", |
|
"speakers": 19841, |
|
"family": "Turkic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Amo", |
|
"autonym": "Amo", |
|
"speakers": 18620, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Guajajára", |
|
"autonym": "Guajajára", |
|
"speakers": 17784, |
|
"family": "Tupian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Hanunoo", |
|
"autonym": "Hanunoo", |
|
"speakers": 17469, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tsakhur", |
|
"autonym": "Tsakhur", |
|
"speakers": 16329, |
|
"family": "Nakh-Daghestanian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Palauan", |
|
"autonym": "Palauan", |
|
"speakers": 16047, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Sanskrit", |
|
"autonym": "संस्कृत भाषा", |
|
"speakers": 15913, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Bassari", |
|
"autonym": "Bassari", |
|
"speakers": 15264, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Oji-Cree", |
|
"autonym": "Oji-Cree", |
|
"speakers": 15078, |
|
"family": "Algic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Khamti", |
|
"autonym": "Khamti", |
|
"speakers": 13527, |
|
"family": "Tai-Kadai", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Upper Sorbian", |
|
"autonym": "Hornjoserbšćina", |
|
"speakers": 12826, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Chipewyan", |
|
"autonym": "Chipewyan", |
|
"speakers": 12816, |
|
"family": "Athabaskan-Eyak-Tlingit", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Innu-aimun", |
|
"autonym": "Innu-Aimun", |
|
"speakers": 12062, |
|
"family": "Algic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Lower Silesian", |
|
"autonym": "Lower Silesian", |
|
"speakers": 11868, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Walser", |
|
"autonym": "Walser", |
|
"speakers": 11377, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Choctaw", |
|
"autonym": "Chahta", |
|
"speakers": 10977, |
|
"family": "Muskogean", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tagbanwa", |
|
"autonym": "Tagbanwa", |
|
"speakers": 10045, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Xavánte", |
|
"autonym": "Xavánte", |
|
"speakers": 9951, |
|
"family": "Nuclear-Macro-Je", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tuvalu", |
|
"autonym": "Tuvalu", |
|
"speakers": 9868, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Northern Frisian", |
|
"autonym": "Nordfriisk", |
|
"speakers": 9619, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Roviana", |
|
"autonym": "Roviana", |
|
"speakers": 9591, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Wallisian", |
|
"autonym": "Wallisian", |
|
"speakers": 9512, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tavringer Romani", |
|
"autonym": "Tavringer Romani", |
|
"speakers": 9488, |
|
"family": "Speech Register", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Cree", |
|
"autonym": "Cree", |
|
"speakers": 9047, |
|
"family": "Algic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kwasio", |
|
"autonym": "Kwasio", |
|
"speakers": 8878, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Lakota", |
|
"autonym": "LakȟólʼIyapi", |
|
"speakers": 8316, |
|
"family": "Siouan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kosraean", |
|
"autonym": "Kosraean", |
|
"speakers": 7990, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Inupiaq", |
|
"autonym": "Inupiaq", |
|
"speakers": 7983, |
|
"family": "Eskimo-Aleut", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Zoroastrian Dari", |
|
"autonym": "Zoroastrian Dari", |
|
"speakers": 7983, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tasawaq", |
|
"autonym": "Tasawaq Senni", |
|
"speakers": 7970, |
|
"family": "Songhay", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Buhid", |
|
"autonym": "Buhid", |
|
"speakers": 7970, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mi'kmaw", |
|
"autonym": "LʼNuiʼSuti", |
|
"speakers": 7916, |
|
"family": "Algic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Hassaniyya", |
|
"autonym": "Hassaniyya", |
|
"speakers": 7239, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Lower Sorbian", |
|
"autonym": "Dolnoserbšćina", |
|
"speakers": 6974, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Nauru", |
|
"autonym": "Nauru", |
|
"speakers": 6930, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Eastern Lawa", |
|
"autonym": "Eastern Lawa", |
|
"speakers": 6898, |
|
"family": "Austroasiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Yapese", |
|
"autonym": "Yapese", |
|
"speakers": 6556, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Atikamekw", |
|
"autonym": "Atikamekw", |
|
"speakers": 6408, |
|
"family": "Algic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Piedmontese", |
|
"autonym": "Piedmontese", |
|
"speakers": 6178, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kalo Finnish Romani", |
|
"autonym": "Kalo Finnish Romani", |
|
"speakers": 5015, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Siksiká", |
|
"autonym": "Siksiká", |
|
"speakers": 4900, |
|
"family": "Algic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "East Futuna", |
|
"autonym": "East Futuna", |
|
"speakers": 4756, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Taroko", |
|
"autonym": "Patas Taroko", |
|
"speakers": 4721, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Plains Cree", |
|
"autonym": "Plains Cree", |
|
"speakers": 4146, |
|
"family": "Algic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Safaliba", |
|
"autonym": "Safaliba", |
|
"speakers": 4108, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Muscogee", |
|
"autonym": "Mvskoke", |
|
"speakers": 3992, |
|
"family": "Muskogean", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Veps", |
|
"autonym": "Veps", |
|
"speakers": 3543, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ménik", |
|
"autonym": "Ménik", |
|
"speakers": 3305, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Virgin Islands Creole English", |
|
"autonym": "Virgin Islands Creole English", |
|
"speakers": 3113, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Turoyo", |
|
"autonym": "Turoyo", |
|
"speakers": 3035, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ulithian", |
|
"autonym": "Ulithian", |
|
"speakers": 2971, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Rotuman", |
|
"autonym": "Rotuman", |
|
"speakers": 2527, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Warlpiri", |
|
"autonym": "Warlpiri", |
|
"speakers": 2496, |
|
"family": "Pama-Nyungan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Yangben", |
|
"autonym": "Nuasue", |
|
"speakers": 2303, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Slave", |
|
"autonym": "Slave", |
|
"speakers": 2299, |
|
"family": "Athabaskan-Eyak-Tlingit", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Dogrib", |
|
"autonym": "Dogrib", |
|
"speakers": 2111, |
|
"family": "Athabaskan-Eyak-Tlingit", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Eastern Frisian", |
|
"autonym": "Eastern Frisian", |
|
"speakers": 2004, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Cornish", |
|
"autonym": "Kernewek", |
|
"speakers": 1973, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Swampy Cree", |
|
"autonym": "ᓀᐦᐃᓇᐍᐏᐣ", |
|
"speakers": 1809, |
|
"family": "Algic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Mohawk", |
|
"autonym": "KanienʼKéha", |
|
"speakers": 1772, |
|
"family": "Iroquoian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Manx", |
|
"autonym": "Gaelg", |
|
"speakers": 1719, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Lule Sami", |
|
"autonym": "Julevsámegiella", |
|
"speakers": 1530, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Naskapi", |
|
"autonym": "Naskapi", |
|
"speakers": 1395, |
|
"family": "Algic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tokelau", |
|
"autonym": "Tokelau", |
|
"speakers": 1285, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Niuean", |
|
"autonym": "Niuean", |
|
"speakers": 1120, |
|
"family": "Austronesian", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Saterland Frisian", |
|
"autonym": "Saterland Frisian", |
|
"speakers": 962, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Seri", |
|
"autonym": "Seri", |
|
"speakers": 901, |
|
"family": null, |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Chilcotin", |
|
"autonym": "Chilcotin", |
|
"speakers": 867, |
|
"family": "Athabaskan-Eyak-Tlingit", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Latin", |
|
"autonym": "Lingua Latina", |
|
"speakers": 820, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Halkomelem", |
|
"autonym": "Halkomelem", |
|
"speakers": 716, |
|
"family": "Salishan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Michif", |
|
"autonym": "Michif", |
|
"speakers": 678, |
|
"family": "Algic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Skolt Sami", |
|
"autonym": "SääʹMǩiõll", |
|
"speakers": 613, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Inari Sami", |
|
"autonym": "Anarâškielâ", |
|
"speakers": 613, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Lillooet", |
|
"autonym": "Lillooet", |
|
"speakers": 528, |
|
"family": "Salishan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Okanagan", |
|
"autonym": "Okanagan", |
|
"speakers": 490, |
|
"family": "Salishan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Maliseet-Passamaquoddy", |
|
"autonym": "Maliseet-Passamaquoddy", |
|
"speakers": 490, |
|
"family": "Algic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Kwakʼwala", |
|
"autonym": "KwakʼWala", |
|
"speakers": 377, |
|
"family": "Wakashan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Northern East Cree", |
|
"autonym": "Northern East Cree", |
|
"speakers": 377, |
|
"family": "Algic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Gwichʼin", |
|
"autonym": "GwichʼIn", |
|
"speakers": 302, |
|
"family": "Athabaskan-Eyak-Tlingit", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Esperanto", |
|
"autonym": "Esperanto", |
|
"speakers": 301, |
|
"family": "Artificial Language", |
|
"average": 0.0, |
|
"in_benchmark": true, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Southern Sami", |
|
"autonym": "Åarjelsaemien Gïele", |
|
"speakers": 296, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Tsakonian", |
|
"autonym": "Tsakonian", |
|
"speakers": 202, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ingrian", |
|
"autonym": "Ingrian", |
|
"speakers": 142, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Interlingua", |
|
"autonym": "Interlingua", |
|
"speakers": 136, |
|
"family": "Artificial Language", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Araona", |
|
"autonym": "Araona", |
|
"speakers": 105, |
|
"family": "Pano-Tacanan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Prussian", |
|
"autonym": "Prūsiskan", |
|
"speakers": 38, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Interlingue", |
|
"autonym": "Interlingue", |
|
"speakers": 1, |
|
"family": "Artificial Language", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Literary Chinese", |
|
"autonym": "Literary Chinese", |
|
"speakers": 0, |
|
"family": "Sino-Tibetan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Ido", |
|
"autonym": "Ido", |
|
"speakers": 0, |
|
"family": "Artificial Language", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Lojban", |
|
"autonym": "La .Lojban.", |
|
"speakers": 0, |
|
"family": "Artificial Language", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Jutish", |
|
"autonym": "Jutish", |
|
"speakers": 0, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Votic", |
|
"autonym": "Votic", |
|
"speakers": 0, |
|
"family": "Uralic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Geez", |
|
"autonym": "Geez", |
|
"speakers": 0, |
|
"family": "Afro-Asiatic", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Osage", |
|
"autonym": "𐓏𐓘𐓻𐓘𐓻𐓟", |
|
"speakers": 0, |
|
"family": "Siouan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Romagnol", |
|
"autonym": "Romagnol", |
|
"speakers": 0, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Church Slavic", |
|
"autonym": "Church Slavic", |
|
"speakers": 0, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Samogitian", |
|
"autonym": "Samogitian", |
|
"speakers": 0, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Obolo", |
|
"autonym": "Obolo", |
|
"speakers": 0, |
|
"family": "Atlantic-Congo", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Chickasaw", |
|
"autonym": "Chikashshanompaʼ", |
|
"speakers": 0, |
|
"family": "Muskogean", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Caddo", |
|
"autonym": "Caddo", |
|
"speakers": 0, |
|
"family": "Caddoan", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
}, |
|
{ |
|
"language_name": "Palatine German", |
|
"autonym": "Palatine German", |
|
"speakers": 0, |
|
"family": "Indo-European", |
|
"average": 0.0, |
|
"in_benchmark": false, |
|
"NaN": 0.0, |
|
"classification_accuracy": 0.0, |
|
"language_modeling_chrf": 0.0, |
|
"translation_bleu": 0.0, |
|
"translation_chrf": 0.0 |
|
} |
|
], |
|
"dataset_table": [ |
|
{ |
|
"name": "FLORES+", |
|
"author": "Meta", |
|
"author_url": "https://ai.meta.com", |
|
"url": "https://huggingface.co/datasets/openlanguagedata/flores_plus", |
|
"n_languages": 200, |
|
"tasks": [ |
|
"translation", |
|
"classification", |
|
"language_modeling" |
|
], |
|
"parallel": 1.0, |
|
"base": "FLORES", |
|
"implemented": 1.0 |
|
}, |
|
{ |
|
"name": "FLEURS", |
|
"author": "Meta", |
|
"author_url": "https://ai.meta.com", |
|
"url": "https://huggingface.co/datasets/google/fleurs", |
|
"n_languages": 102, |
|
"tasks": [ |
|
"speech_recognition" |
|
], |
|
"parallel": 1.0, |
|
"base": "FLORES", |
|
"implemented": 1.0 |
|
}, |
|
{ |
|
"name": "CommonVoice", |
|
"author": "Mozilla", |
|
"author_url": "https://mozilla.ai", |
|
"url": "https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0", |
|
"n_languages": 124, |
|
"tasks": [ |
|
"speech_recognition" |
|
], |
|
"parallel": null, |
|
"base": null, |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "MMMLU", |
|
"author": "OpenAI", |
|
"author_url": "https://openai.com", |
|
"url": "https://huggingface.co/datasets/openai/MMMLU", |
|
"n_languages": 14, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": 1.0, |
|
"base": "MMLU", |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "AfriMMLU", |
|
"author": "Masakhane", |
|
"author_url": "https://www.masakhane.io", |
|
"url": "https://huggingface.co/datasets/masakhane/afrimmlu", |
|
"n_languages": 17, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": 1.0, |
|
"base": "MMLU", |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "Okapi MMLU", |
|
"author": "Academic", |
|
"author_url": null, |
|
"url": "https://huggingface.co/datasets/jon-tow/okapi_mmlu", |
|
"n_languages": 16, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": 1.0, |
|
"base": "MMLU", |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "Global MMLU", |
|
"author": "Cohere", |
|
"author_url": "https://cohere.com", |
|
"url": "https://huggingface.co/datasets/CohereForAI/Global-MMLU", |
|
"n_languages": 42, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": 1.0, |
|
"base": "MMLU", |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "MGSM", |
|
"author": "Google", |
|
"author_url": "https://google.com", |
|
"url": "https://huggingface.co/datasets/juletxara/mgsm", |
|
"n_languages": 10, |
|
"tasks": [ |
|
"math" |
|
], |
|
"parallel": 1.0, |
|
"base": "MGSM", |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "AfriMGSM", |
|
"author": "Masakhane", |
|
"author_url": "https://www.masakhane.io", |
|
"url": "https://huggingface.co/datasets/masakhane/afrimgsm", |
|
"n_languages": 18, |
|
"tasks": [ |
|
"math" |
|
], |
|
"parallel": 1.0, |
|
"base": "MGSM", |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "Okapi ARC Challenge", |
|
"author": "Academic", |
|
"author_url": null, |
|
"url": "https://huggingface.co/datasets/jon-tow/okapi_arc_challenge", |
|
"n_languages": 31, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": 1.0, |
|
"base": "AI2 ARC", |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "Uhuru ARC Easy", |
|
"author": "Masakhane", |
|
"author_url": "https://www.masakhane.io", |
|
"url": "https://huggingface.co/datasets/masakhane/uhura-arc-easy", |
|
"n_languages": 6, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": 1.0, |
|
"base": "AI2 ARC", |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "Okapi TruthfulQA", |
|
"author": "Academic", |
|
"author_url": null, |
|
"url": "https://huggingface.co/datasets/jon-tow/okapi_truthfulqa/tree/main/data", |
|
"n_languages": 31, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": 1.0, |
|
"base": "TruthfulQA", |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "Uhura TruthfulQA", |
|
"author": "Masakhane", |
|
"author_url": "https://www.masakhane.io", |
|
"url": "https://huggingface.co/datasets/masakhane/uhura-truthfulqa", |
|
"n_languages": 6, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": 1.0, |
|
"base": "TruthfulQA", |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "XNLI", |
|
"author": "Meta", |
|
"author_url": "https://ai.meta.com", |
|
"url": "https://huggingface.co/datasets/facebook/xnli", |
|
"n_languages": 14, |
|
"tasks": [ |
|
"classification" |
|
], |
|
"parallel": 1.0, |
|
"base": "XNLI", |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "AfriXNLI", |
|
"author": "Masakhane", |
|
"author_url": "https://www.masakhane.io", |
|
"url": "https://huggingface.co/datasets/masakhane/afrixnli", |
|
"n_languages": 18, |
|
"tasks": [ |
|
"classification" |
|
], |
|
"parallel": 1.0, |
|
"base": "XNLI", |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "Okapi HellaSwag", |
|
"author": "Academic", |
|
"author_url": null, |
|
"url": "https://huggingface.co/datasets/jon-tow/okapi_hellaswag", |
|
"n_languages": 31, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": 1.0, |
|
"base": "HellaSwag", |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "WikiANN / PAN-X", |
|
"author": "Academic", |
|
"author_url": null, |
|
"url": "https://huggingface.co/datasets/unimelb-nlp/wikiann", |
|
"n_languages": 176, |
|
"tasks": [ |
|
"ner" |
|
], |
|
"parallel": 0.0, |
|
"base": null, |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "MSVAMP", |
|
"author": "Microsoft", |
|
"author_url": "https://microsoft.com", |
|
"url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP", |
|
"n_languages": 10, |
|
"tasks": [ |
|
"math" |
|
], |
|
"parallel": 1.0, |
|
"base": null, |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "XLSUM", |
|
"author": "Academic", |
|
"author_url": null, |
|
"url": "https://huggingface.co/datasets/csebuetnlp/xlsum", |
|
"n_languages": 45, |
|
"tasks": [ |
|
"summarization" |
|
], |
|
"parallel": 1.0, |
|
"base": null, |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "SEA-IFEVAL", |
|
"author": "AI Singapore", |
|
"author_url": "https://aisingapore.org", |
|
"url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval", |
|
"n_languages": 7, |
|
"tasks": [ |
|
"instruction_following" |
|
], |
|
"parallel": 1.0, |
|
"base": "IFEVAL", |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "XTREME", |
|
"author": "Google", |
|
"author_url": "https://google.com", |
|
"url": "https://huggingface.co/datasets/google/xtreme", |
|
"n_languages": 40, |
|
"tasks": [ |
|
"translation", |
|
"classification", |
|
"question_answering", |
|
"ner" |
|
], |
|
"parallel": null, |
|
"base": null, |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "XGLUE", |
|
"author": "Microsoft", |
|
"author_url": "https://microsoft.com", |
|
"url": "https://huggingface.co/datasets/microsoft/xglue", |
|
"n_languages": 18, |
|
"tasks": [ |
|
"pos" |
|
], |
|
"parallel": null, |
|
"base": "GLUE", |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "IndicGLUE", |
|
"author": "AI4Bharat", |
|
"author_url": "https://models.ai4bharat.org", |
|
"url": "https://huggingface.co/datasets/ai4bharat/indic_glue", |
|
"n_languages": 11, |
|
"tasks": [ |
|
"question_answering" |
|
], |
|
"parallel": null, |
|
"base": "GLUE", |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "Opus Gnome", |
|
"author": "Helsinki NLP", |
|
"author_url": null, |
|
"url": "https://huggingface.co/datasets/Helsinki-NLP/opus_gnome", |
|
"n_languages": 187, |
|
"tasks": [ |
|
"translation" |
|
], |
|
"parallel": 1.0, |
|
"base": null, |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "Opus Paracrawl", |
|
"author": "Helsinki NLP", |
|
"author_url": null, |
|
"url": "https://huggingface.co/datasets/Helsinki-NLP/opus_paracrawl", |
|
"n_languages": 43, |
|
"tasks": [ |
|
"translation" |
|
], |
|
"parallel": 0.0, |
|
"base": null, |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "CCAligned", |
|
"author": "Meta", |
|
"author_url": "https://ai.meta.com", |
|
"url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual", |
|
"n_languages": 137, |
|
"tasks": [ |
|
"translation" |
|
], |
|
"parallel": 0.0, |
|
"base": null, |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "OPUS Collection", |
|
"author": "Helsinki NLP", |
|
"author_url": null, |
|
"url": "https://opus.nlpl.eu", |
|
"n_languages": 747, |
|
"tasks": [ |
|
"translation" |
|
], |
|
"parallel": 0.0, |
|
"base": null, |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "MasakhaNER", |
|
"author": "Masakhane", |
|
"author_url": "https://www.masakhane.io", |
|
"url": "https://huggingface.co/datasets/masakhane/masakhaner", |
|
"n_languages": 10, |
|
"tasks": [ |
|
"ner" |
|
], |
|
"parallel": null, |
|
"base": null, |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "Multilingual Sentiments", |
|
"author": "Academic", |
|
"author_url": null, |
|
"url": "https://huggingface.co/datasets/tyqiangz/multilingual-sentiments", |
|
"n_languages": 12, |
|
"tasks": [ |
|
"sentiment_analysis" |
|
], |
|
"parallel": null, |
|
"base": null, |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "CulturaX", |
|
"author": "Academic", |
|
"author_url": null, |
|
"url": "https://huggingface.co/datasets/uonlp/CulturaX", |
|
"n_languages": 167, |
|
"tasks": [ |
|
"language_modeling" |
|
], |
|
"parallel": 0.0, |
|
"base": null, |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "Tülu 3 SFT Mixture", |
|
"author": "AllenAI", |
|
"author_url": "https://allenai.org", |
|
"url": "https://huggingface.co/datasets/allenai/tulu-3-sft-mixture", |
|
"n_languages": 70, |
|
"tasks": [ |
|
"instruction_following" |
|
], |
|
"parallel": 0.0, |
|
"base": null, |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "xP3", |
|
"author": "BigScience", |
|
"author_url": "https://bigscience.huggingface.co", |
|
"url": "https://huggingface.co/datasets/bigscience/xP3", |
|
"n_languages": 46, |
|
"tasks": [ |
|
"instruction_following" |
|
], |
|
"parallel": 0.0, |
|
"base": null, |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "Aya", |
|
"author": "Cohere", |
|
"author_url": "https://cohere.com", |
|
"url": "https://huggingface.co/datasets/CohereForAI/aya_dataset", |
|
"n_languages": 65, |
|
"tasks": [ |
|
"instruction_following" |
|
], |
|
"parallel": null, |
|
"base": null, |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "Lanfrica", |
|
"author": "Lanfrica", |
|
"author_url": "https://lanfrica.com", |
|
"url": "https://lanfrica.com/records?language=yor&task=machine%20translation", |
|
"n_languages": 2200, |
|
"tasks": [ |
|
"datasets" |
|
], |
|
"parallel": null, |
|
"base": null, |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "HuggingFace Languages", |
|
"author": "HuggingFace", |
|
"author_url": "https://huggingface.co", |
|
"url": "https://huggingface.co/languages", |
|
"n_languages": 4680, |
|
"tasks": [ |
|
"datasets", |
|
"models" |
|
], |
|
"parallel": null, |
|
"base": null, |
|
"implemented": null |
|
}, |
|
{ |
|
"name": "HuggingFace Multilingual Datasets", |
|
"author": "HuggingFace", |
|
"author_url": "https://huggingface.co", |
|
"url": "https://huggingface.co/datasets?other=multilinguality:multilingual", |
|
"n_languages": 2012, |
|
"tasks": [ |
|
"datasets" |
|
], |
|
"parallel": 0.0, |
|
"base": null, |
|
"implemented": null |
|
} |
|
] |
|
} |