diff --git "a/results.json" "b/results.json" --- "a/results.json" +++ "b/results.json" @@ -1,4 +1,88 @@ [ + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.1135444374 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.294888625 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.0732341445 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.1712917218 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.1518475015 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.4409012481 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.2466291513 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.4728505876 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ar", @@ -76,6 +160,76 @@ "metric":"chrf", "score":0.4094860171 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.1108973081 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.3531592039 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.1247431931 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.3827364935 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.1672670776 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.4344446004 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.1673485041 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.3498742372 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bn", @@ -853,6 +1007,132 @@ "metric":"chrf", "score":0.3320956129 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.2497447823 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.5162841499 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.3139021174 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.6108991322 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.235010453 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.4387602841 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.0992435005 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.3897491958 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.0253971349 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.1941399108 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0169773321 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.1905807428 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"or", @@ -1350,6 +1630,41 @@ "metric":"chrf", "score":0.3475324071 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.1532167967 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.4049784493 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.1825939096 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.4435726767 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"vi", @@ -1511,6 +1826,90 @@ "metric":"chrf", "score":0.3243323503 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.2026417516 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.463337874 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.1217867685 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.2662662886 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.2523784543 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.5195328715 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.2422451252 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.5264317244 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ar", @@ -1588,6 +1987,76 @@ "metric":"chrf", "score":0.4448745325 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.1557419708 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.4249908572 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.1551519146 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.4046874828 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.2524146198 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.4953606649 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.1826604742 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.3887388562 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bn", @@ -2365,6 +2834,132 @@ "metric":"chrf", "score":0.4918283752 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.2986347511 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.5339411724 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.3783664491 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.6540376697 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.2199531275 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.4736214414 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.2571304866 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.5221658577 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.0512543236 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.2613232237 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0260385015 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.3274101513 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"or", @@ -2862,6 +3457,41 @@ "metric":"chrf", "score":0.4761547661 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.2305488159 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.4723700911 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.2058479152 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.4902380763 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"vi", @@ -3023,6 +3653,90 @@ "metric":"chrf", "score":0.3459946232 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.2016755199 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.4502815524 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.1687061726 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.2849440478 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.2569252635 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.5329928091 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.2423805131 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.5144112629 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ar", @@ -3100,6 +3814,76 @@ "metric":"chrf", "score":0.4676592617 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.1950569484 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.4560500844 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.1448904562 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.4007813245 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.2098690628 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.4737398201 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.1892048942 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.3849575805 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bn", @@ -3877,6 +4661,132 @@ "metric":"chrf", "score":0.4748109447 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.3058774517 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.5603224049 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.3684068806 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.6535736283 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.2253225205 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.4798221167 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.2383027705 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.504994716 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.0235872225 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.2475231508 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0365961569 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.3050512265 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"or", @@ -4374,6 +5284,41 @@ "metric":"chrf", "score":0.4921734247 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.2244630159 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.4814457852 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.2007945741 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.4705479648 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"vi", @@ -4535,6 +5480,90 @@ "metric":"chrf", "score":0.3456705882 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.1512958639 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.3039734334 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.0801602615 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.1225273024 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.1445854242 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.3358973891 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.1013154049 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.3155908724 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ar", @@ -4612,6 +5641,76 @@ "metric":"chrf", "score":0.434347868 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.156849916 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.3691380603 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.1100740183 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.3479696433 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.1735533986 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.393764966 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.1575864364 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.3025413929 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bn", @@ -5389,6 +6488,132 @@ "metric":"chrf", "score":0.4314516197 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.24401684 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.4592926922 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.3739586622 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.6802015628 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.1237069224 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.3000426144 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.0752750224 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.1972354123 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.0257539048 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.2236454943 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0203648136 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.2172604464 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"or", @@ -5886,6 +7111,41 @@ "metric":"chrf", "score":0.4241396601 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.1963447008 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.4236533517 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.1559833307 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.3946264183 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"vi", @@ -7559,6 +8819,90 @@ "metric":"chrf", "score":0.3079482476 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.278130538 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.5385292979 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.2384630665 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.3608782934 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.3417210123 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.5899435952 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.2808352256 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.5279437548 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ar", @@ -7636,6 +8980,76 @@ "metric":"chrf", "score":0.4912165901 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.2496588936 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.4813242802 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.1805770029 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.4382698967 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.2478040527 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.5055046978 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.2299760176 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.4382930134 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bn", @@ -8413,6 +9827,132 @@ "metric":"chrf", "score":0.4516145469 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.4099406931 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.6343459464 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.4650961929 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.742377276 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.3081644584 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.567441399 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.2529515223 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.5121166935 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.1546333274 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.3730093916 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0699361494 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.4093787348 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"or", @@ -8910,6 +10450,41 @@ "metric":"chrf", "score":0.4826415387 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.2813736334 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.5350839669 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.3532409319 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.6242374823 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"vi", @@ -9071,6 +10646,90 @@ "metric":"chrf", "score":0.3921364269 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.2720072911 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.5061425801 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.1852281481 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.3131355766 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.2661119281 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.5602064313 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.194000916 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.4535351144 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ar", @@ -9148,6 +10807,76 @@ "metric":"chrf", "score":0.454646328 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.2101918607 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.4594084738 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.171792145 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.4130816559 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.2526226356 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.5007947756 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.2513374937 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.4492954272 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bn", @@ -9925,6 +11654,132 @@ "metric":"chrf", "score":0.4736891146 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.3362256422 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.5937301471 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.4506461939 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.7014575648 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.2814491551 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.5513455988 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.2724659132 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.5135188138 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.1059783758 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.3306681685 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.044447102 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.379164388 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"or", @@ -10422,6 +12277,41 @@ "metric":"chrf", "score":0.493353195 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.2407351505 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.4734226532 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.3363563156 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.5891323556 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"vi", @@ -10583,6 +12473,90 @@ "metric":"chrf", "score":0.314329989 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.339375511 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.566407043 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.2692320655 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.3685144367 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.3872368514 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.6253206413 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.3385588073 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.5846751763 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ar", @@ -10660,6 +12634,76 @@ "metric":"chrf", "score":0.4882539682 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.3101513593 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.5253923906 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.216177525 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.4555999734 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.3246124272 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.5547058357 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.2300858071 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.4391471287 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"bn", @@ -11437,6 +13481,132 @@ "metric":"chrf", "score":0.5017455952 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.4137133794 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.6312868188 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.4812654706 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.7314221816 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.3696810103 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.5941720939 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.3374140159 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.5598126987 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.1552982789 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.4088650794 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0578542594 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.4227186624 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"or", @@ -11934,6 +14104,41 @@ "metric":"chrf", "score":0.4351160146 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.3488574237 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.5833892626 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.3525318348 + }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.600901419 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"vi", @@ -12095,6 +14300,90 @@ "metric":"chrf", "score":0.3699956681 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.2883756135 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.5452929372 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.2563045907 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.3587997566 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.3529391424 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.6061230642 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.3735935027 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.6118752881 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ar", @@ -12172,6 +14461,76 @@ "metric":"chrf", "score":0.4796679654 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.2402619776 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.4816842061 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.2017479595 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.4438028104 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.3118354834 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.5301057957 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.2712094702 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.4554148161 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"bn", @@ -12949,6 +15308,132 @@ "metric":"chrf", "score":0.4845716024 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.3746934831 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.6046609636 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.4734156929 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.7284733826 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.3552781219 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.5977013775 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.3211140622 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.544805929 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.1871006972 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.4365658925 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.106229994 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.4277182017 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"or", @@ -13446,6 +15931,41 @@ "metric":"chrf", "score":0.4912965943 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.2583582755 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.483302551 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.3579243963 + }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.6083657804 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"vi", @@ -13607,6 +16127,90 @@ "metric":"chrf", "score":0.3358004159 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.2393172056 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.4971254293 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.2089212841 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.3406916002 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.2712045148 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.5477096036 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.178052271 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.468064885 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ar", @@ -13684,6 +16288,76 @@ "metric":"chrf", "score":0.4527968539 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.200456445 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.4226152307 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.1414132922 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.4170843853 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.2663307677 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.519985227 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.1913577407 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.4064669591 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bn", @@ -14461,6 +17135,132 @@ "metric":"chrf", "score":0.4726429935 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.3104483533 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.5705763492 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.4229626959 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.6856510383 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.2227645269 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.4888582617 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.1869632744 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.4322398057 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.0258426139 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.2237241232 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0460531144 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.3418147419 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"or", @@ -14958,6 +17758,41 @@ "metric":"chrf", "score":0.4226865444 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.2829644119 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.5194956482 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.2911955464 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.5560139888 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"vi", @@ -15119,6 +17954,90 @@ "metric":"chrf", "score":0.3110461024 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.1906134629 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.4467868389 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.2001643223 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.3225170104 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.1876459632 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.4830875841 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.1830944017 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.4175337587 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", @@ -15196,6 +18115,76 @@ "metric":"chrf", "score":0.4031218248 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.1604267099 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.4139767864 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.1651025864 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.3978212407 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.2519150677 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.4966963131 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.2057435019 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.4016427491 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", @@ -15973,6 +18962,132 @@ "metric":"chrf", "score":0.447826525 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.2986303081 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.5546917725 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.3680194341 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.6778287705 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.1979480779 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.4791457508 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.1536786708 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.4315811907 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.0981161875 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.3370208163 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.040931235 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.3615428475 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"or", @@ -16470,6 +19585,41 @@ "metric":"chrf", "score":0.4066956434 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.203750264 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.4979829233 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.2162945849 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.4941278712 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", @@ -16631,6 +19781,90 @@ "metric":"chrf", "score":0.2463326959 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.1913062339 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.4296053228 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.13443556 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.2528930204 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.24265587 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.4918380331 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.2104382871 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.456050442 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ar", @@ -16708,6 +19942,76 @@ "metric":"chrf", "score":0.4315390742 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.1880331404 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.4234748209 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.1517877566 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.423956163 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.2430984589 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.4969060141 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.1699224465 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.3964402252 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bn", @@ -17485,6 +20789,132 @@ "metric":"chrf", "score":0.4572688692 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.3794800258 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.6256125923 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.3593747877 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.664135376 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.251920694 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.4662583176 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.1647980206 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.4166796691 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.0417850648 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.2509675066 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0312813941 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.2886309955 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"or", @@ -17982,6 +21412,41 @@ "metric":"chrf", "score":0.429749938 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.2595944841 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.5081810113 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.2601189518 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.5225655991 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"vi", @@ -18143,6 +21608,90 @@ "metric":"chrf", "score":0.3237759485 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.0135029462 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.1510010912 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.0033288372 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.0389705109 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.0592251547 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.2921276604 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.0366276845 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.2393327958 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", @@ -18220,6 +21769,76 @@ "metric":"chrf", "score":0.2388670431 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.0148972561 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.2336350172 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.0047574121 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.1922357185 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.0581882104 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.2702416532 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.0562052656 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.2181774858 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", @@ -18997,6 +22616,132 @@ "metric":"chrf", "score":0.2211003771 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.1375979502 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.3518293272 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.1462646527 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.3886405702 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.1 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.2238699363 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.1 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.1606469353 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.0127402107 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.1651498064 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0088808864 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.1702476721 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"or", @@ -19494,6 +23239,41 @@ "metric":"chrf", "score":0.2661903898 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.0287030985 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.2124751899 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.0040250398 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.1706379305 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", @@ -19655,6 +23435,90 @@ "metric":"chrf", "score":0.232325354 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.1203676158 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.3241911739 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.0165994228 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.1058904177 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.2332719546 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.4924788322 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.1544598614 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.403814105 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", @@ -19732,6 +23596,76 @@ "metric":"chrf", "score":0.3635575685 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.130005692 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.3815764307 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.136654027 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.3809883299 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.2451140745 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.4883780153 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.1334730215 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.363000921 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", @@ -20509,6 +24443,132 @@ "metric":"chrf", "score":0.3941079443 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.2763114217 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.5353874356 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.3315044625 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.6241092077 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.2083780287 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.4466015977 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.0879797246 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.3441521948 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.0206783974 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.2005424268 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0171071488 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.239253642 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"or", @@ -21006,6 +25066,41 @@ "metric":"chrf", "score":0.3855814375 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.177513842 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.386230097 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.2544813414 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.4948262917 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"vi", @@ -21167,6 +25262,90 @@ "metric":"chrf", "score":0.2325846099 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.1375609672 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.3710873948 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.0526682247 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.1185738392 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.2624553878 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.5118695802 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.2175864677 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.4641969296 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ar", @@ -21244,6 +25423,76 @@ "metric":"chrf", "score":0.43482663 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.1767618659 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.4088166263 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.1381246624 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.3730280956 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.2731033294 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.5159041397 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.2075901182 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.4086862509 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bn", @@ -22021,6 +26270,132 @@ "metric":"chrf", "score":0.4332812085 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.3458633411 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.5703964991 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.4050410338 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.6535211779 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.2598947984 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.4881220333 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.2015903544 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.4513077936 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.024472012 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.2143061298 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0100859589 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.2528047704 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"or", @@ -22518,6 +26893,41 @@ "metric":"chrf", "score":0.4452300688 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.2640324297 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.5196545965 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.2748329219 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.5341998684 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"vi", @@ -22679,6 +27089,90 @@ "metric":"chrf", "score":0.3148065512 }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.0584274365 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.2032749366 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.0680986525 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.1147231502 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.3942588381 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.1208986545 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.329533412 + }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"ar", @@ -22756,6 +27250,76 @@ "metric":"chrf", "score":0.4040671507 }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.090597898 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.3141862136 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.0583354997 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.2066334095 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.1762047672 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.4472079773 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.1314817408 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.3114200678 + }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"bn", @@ -23533,6 +28097,132 @@ "metric":"chrf", "score":0.2965277611 }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.2497767256 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.4824221555 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.1854347311 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.331339842 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.0578909241 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.2614924673 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.1528509146 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.347684852 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.0322766368 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.1548467965 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0104672499 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.1495897529 + }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"or", @@ -24030,6 +28720,41 @@ "metric":"chrf", "score":0.3521086255 }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.1642228534 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.3736012886 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.055748564 + }, + { + "model":"meta-llama\/llama-3.1-8b-instruct", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.2468016347 + }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"vi", @@ -25031,6 +29756,90 @@ "metric":"chrf", "score":0.0294450289 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.1452228976 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.3699202818 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.0630164833 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.1755172285 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.2933386948 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.5447211689 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.2166524228 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.4496497227 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ar", @@ -25108,6 +29917,76 @@ "metric":"chrf", "score":0.4546098648 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.1576463626 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.3910005157 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.1690495289 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.3930640761 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.3061950313 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.5535821276 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.1811004213 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.3534531968 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", @@ -25885,6 +30764,132 @@ "metric":"chrf", "score":0.4383662593 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.3103894957 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.5854645421 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.3913336262 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.6467989318 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.2392486974 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.4920626101 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.2130836675 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.4622075168 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.0356994946 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.2438060785 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0183592041 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.3076327609 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"or", @@ -26382,6 +31387,41 @@ "metric":"chrf", "score":0.425306622 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.236632122 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.4793696196 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.265421135 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.5116987882 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"vi", @@ -26543,6 +31583,90 @@ "metric":"chrf", "score":0.2841722148 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.278050095 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.5030892659 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.2056413876 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.319475702 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.3412628716 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.6168944847 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.2079777284 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.4737326062 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", @@ -26620,6 +31744,76 @@ "metric":"chrf", "score":0.3980584269 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.2078798411 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.4443945632 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.1864199422 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.4220840798 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.2239129937 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.4981762083 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.1952639614 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.40510597 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bn", @@ -27397,6 +32591,132 @@ "metric":"chrf", "score":0.4569915545 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.345465339 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.5897544047 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.4313143535 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.672362003 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.3075903861 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.578349632 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.1897910105 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.4717519215 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.0536067183 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.2934406231 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0212136796 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.3367052821 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"or", @@ -27894,6 +33214,41 @@ "metric":"chrf", "score":0.429442787 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.2701010494 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.5241051692 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.3181656056 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.5907906511 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"vi", @@ -28055,6 +33410,90 @@ "metric":"chrf", "score":0.3676906362 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.0476085337 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.205897506 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.0110136998 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.0688367427 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.0927070911 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.3300356171 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.1909661669 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.4391780261 + }, { "model":"microsoft\/phi-4", "bcp_47":"ar", @@ -28132,6 +33571,76 @@ "metric":"chrf", "score":0.3761797641 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.0298418752 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.2580610439 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.041095966 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.2930729253 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.1569501012 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.4039420627 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.1151073387 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.3126448605 + }, { "model":"microsoft\/phi-4", "bcp_47":"bn", @@ -28909,6 +34418,132 @@ "metric":"chrf", "score":0.3158325956 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.1211248924 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.389246098 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.2367123999 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.5273473365 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.1262296798 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.3480250641 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.104091386 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.3135377948 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.0103766134 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.1821363344 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0003856632 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.0985339751 + }, { "model":"microsoft\/phi-4", "bcp_47":"or", @@ -29406,6 +35041,41 @@ "metric":"chrf", "score":0.3158904676 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.0676473408 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.2672641675 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.0166808106 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.2361978954 + }, { "model":"microsoft\/phi-4", "bcp_47":"vi", @@ -29567,6 +35237,90 @@ "metric":"chrf", "score":0.2799124898 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.0143438883 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.0651139855 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.0 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.0145699741 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.1077126314 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.3303312588 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.0568249639 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.1985159581 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ar", @@ -29644,6 +35398,76 @@ "metric":"chrf", "score":0.213092048 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.0805820584 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.2555758551 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.0296286693 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.1319853113 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.0459721625 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.2229551601 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.0233985631 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.1665184954 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bn", @@ -30421,6 +36245,132 @@ "metric":"chrf", "score":0.2211758055 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.120023798 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.3039131897 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.1137229069 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.3446031673 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.1101780964 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.2424045636 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.0971253665 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.1223804901 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.0171568718 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.1561109456 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0097264241 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.0452833915 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"or", @@ -30918,6 +36868,41 @@ "metric":"chrf", "score":0.2765897266 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.030810794 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.1804383237 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.0151653031 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.0766086067 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"vi", @@ -31079,6 +37064,90 @@ "metric":"chrf", "score":0.1793067232 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.0457848104 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.1635760551 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.0043114209 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.0733956093 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.0844832543 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.2899357726 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.0202296618 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.1170466993 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ar", @@ -31156,6 +37225,76 @@ "metric":"chrf", "score":0.1986373033 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.0700327695 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.2793919522 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.04780178 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.1986318307 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.1421502617 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.3249063292 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.044984749 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.1531327249 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bn", @@ -31933,6 +38072,132 @@ "metric":"chrf", "score":0.3076980329 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.2051934522 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.4174861616 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.1030714956 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.3069420156 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.1062506996 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.3311804385 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.0661499319 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.2408635082 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.0177577979 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.1411822431 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0101970078 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.1613941454 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"or", @@ -32430,6 +38695,41 @@ "metric":"chrf", "score":0.2859772299 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.0689558305 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.22853185 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.0415402981 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.2227329297 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"vi", @@ -32591,6 +38891,90 @@ "metric":"chrf", "score":0.1769207611 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.0462314764 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.1434072436 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.0132821079 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.0456848091 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.2054466179 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.4635698598 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.2513408047 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.5078766295 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"ar", @@ -32668,6 +39052,76 @@ "metric":"chrf", "score":0.4611825726 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.1923090312 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.3973361244 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.1398741741 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.4023712427 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.2246336129 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.4406538597 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.1192868334 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.3094007011 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"bn", @@ -33445,6 +39899,132 @@ "metric":"chrf", "score":0.4555868419 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.3030951939 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.5195567075 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.3655435175 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.6292737269 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.20731642 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.414222781 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.0884087592 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.2678188556 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.0200154664 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.1606834413 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0044245595 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.1054952984 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"or", @@ -33942,6 +40522,41 @@ "metric":"chrf", "score":0.477210689 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.1727284585 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.4250344787 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.1817744295 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.4656549066 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"vi", @@ -34103,6 +40718,90 @@ "metric":"chrf", "score":0.3131256963 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.0 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.0682401612 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.0028802187 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.0506386945 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.1505032551 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.4086092545 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.1882561377 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.3814760125 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ar", @@ -34180,6 +40879,76 @@ "metric":"chrf", "score":0.3080022567 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.0818689903 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.2636451344 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.0746797577 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.3147526037 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.1908291186 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.4048132215 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.0320570973 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.1921106676 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bn", @@ -34957,6 +41726,132 @@ "metric":"chrf", "score":0.3369477219 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.2392636803 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.4545241599 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.3632780792 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.6254141203 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.0217828279 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.144054989 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.0351771663 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.2287244941 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.0276939955 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.1837469296 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0160221908 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.2144565152 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"or", @@ -35454,6 +42349,41 @@ "metric":"chrf", "score":0.3685419874 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.1877241389 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.4060591516 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.1165785177 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.4074870036 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"vi", @@ -35615,6 +42545,90 @@ "metric":"chrf", "score":0.3045084897 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.1521950168 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.3956387285 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.0953768122 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.2497030659 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.2413546506 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.5235234652 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.2271910382 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.4908497482 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ar", @@ -35692,6 +42706,76 @@ "metric":"chrf", "score":0.4720007075 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.1840798833 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.4151816693 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.1349305067 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.397143235 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.2230536146 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.499651958 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.2227847146 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.40695057 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bn", @@ -36469,6 +43553,132 @@ "metric":"chrf", "score":0.4442281313 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.3443124421 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.5824988714 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.3763691574 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.6619682382 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.2596129619 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.485235691 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.2234699025 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.4866737746 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.1266863364 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.3723937215 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0507341481 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.3372593565 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"or", @@ -36966,6 +44176,41 @@ "metric":"chrf", "score":0.4485608146 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.2068814622 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.4820023997 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.1797290418 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.5053214161 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"vi", @@ -37127,6 +44372,90 @@ "metric":"chrf", "score":0.3349070044 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.0821675771 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.3261780265 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.0581954137 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.2133137227 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.2038935703 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.4744865332 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.198427289 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.4466553325 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ar", @@ -37204,6 +44533,76 @@ "metric":"chrf", "score":0.424235974 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.1020059939 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.3685311802 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.1584050367 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.4078207292 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.1839199068 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.4589371965 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.1754477624 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.3729187467 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bn", @@ -37981,6 +45380,132 @@ "metric":"chrf", "score":0.4466569291 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.2391713081 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.5284921106 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.3851770392 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.6778949951 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.1921402736 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.4286234239 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.226941594 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.4545167964 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.0470010342 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.3053087334 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0354609608 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.3000471846 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"or", @@ -38478,6 +46003,41 @@ "metric":"chrf", "score":0.4478960511 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.1939396294 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.4361718347 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.1189904742 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.4347992199 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"vi", @@ -38639,6 +46199,90 @@ "metric":"chrf", "score":0.3136635386 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"am", + "task":"translation_from", + "metric":"bleu", + "score":0.080495827 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"am", + "task":"translation_from", + "metric":"chrf", + "score":0.287512266 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"am", + "task":"translation_to", + "metric":"bleu", + "score":0.0649609212 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"am", + "task":"translation_to", + "metric":"chrf", + "score":0.203944936 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"apc", + "task":"translation_from", + "metric":"bleu", + "score":0.2086617902 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"apc", + "task":"translation_from", + "metric":"chrf", + "score":0.4774317011 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"apc", + "task":"translation_to", + "metric":"bleu", + "score":0.2730334942 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"apc", + "task":"translation_to", + "metric":"chrf", + "score":0.5458981435 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ar", @@ -38716,6 +46360,76 @@ "metric":"chrf", "score":0.4690487202 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"az", + "task":"translation_from", + "metric":"bleu", + "score":0.1025818924 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"az", + "task":"translation_from", + "metric":"chrf", + "score":0.355623252 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"az", + "task":"translation_to", + "metric":"bleu", + "score":0.1290294373 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"az", + "task":"translation_to", + "metric":"chrf", + "score":0.3807908275 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"bho", + "task":"translation_from", + "metric":"bleu", + "score":0.1573424376 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.3813908093 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.1860567167 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.3798747224 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bn", @@ -39493,6 +47207,132 @@ "metric":"chrf", "score":0.4581322597 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ms", + "task":"translation_from", + "metric":"bleu", + "score":0.2993296846 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ms", + "task":"translation_from", + "metric":"chrf", + "score":0.5743132494 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ms", + "task":"translation_to", + "metric":"bleu", + "score":0.3504238332 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ms", + "task":"translation_to", + "metric":"chrf", + "score":0.6154153931 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"my", + "task":"translation_from", + "metric":"bleu", + "score":0.1741885177 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"my", + "task":"translation_from", + "metric":"chrf", + "score":0.449774491 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"my", + "task":"translation_to", + "metric":"bleu", + "score":0.2106778 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"my", + "task":"translation_to", + "metric":"chrf", + "score":0.4610458467 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"om", + "task":"translation_from", + "metric":"bleu", + "score":0.0511068522 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"om", + "task":"translation_from", + "metric":"chrf", + "score":0.2702934215 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"om", + "task":"translation_to", + "metric":"bleu", + "score":0.0724907554 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"om", + "task":"translation_to", + "metric":"chrf", + "score":0.345324531 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"or", @@ -39990,6 +47830,41 @@ "metric":"chrf", "score":0.4734427307 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"uz", + "task":"translation_from", + "metric":"bleu", + "score":0.1411472616 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"uz", + "task":"translation_from", + "metric":"chrf", + "score":0.3855156193 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"uz", + "task":"translation_to", + "metric":"bleu", + "score":0.2021458884 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"uz", + "task":"translation_to", + "metric":"chrf", + "score":0.4930438511 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"vi",