Spaces:
Running
Running
commit
Browse files- model_results.json +158 -169
model_results.json
CHANGED
@@ -21,9 +21,7 @@
|
|
21 |
"score": 15.0
|
22 |
}
|
23 |
]
|
24 |
-
}
|
25 |
-
"mmlu_results": NaN,
|
26 |
-
"unified_exam_results": NaN
|
27 |
},
|
28 |
{
|
29 |
"model_name": "claude-3-5-sonnet-20241022",
|
@@ -108,9 +106,7 @@
|
|
108 |
"score": 12.75
|
109 |
}
|
110 |
]
|
111 |
-
}
|
112 |
-
"mmlu_results": NaN,
|
113 |
-
"unified_exam_results": NaN
|
114 |
},
|
115 |
{
|
116 |
"model_name": "gemini-2.0-flash",
|
@@ -195,9 +191,7 @@
|
|
195 |
"score": 17.25
|
196 |
}
|
197 |
]
|
198 |
-
}
|
199 |
-
"mmlu_results": NaN,
|
200 |
-
"unified_exam_results": NaN
|
201 |
},
|
202 |
{
|
203 |
"model_name": "gpt-4o",
|
@@ -282,9 +276,7 @@
|
|
282 |
"score": 13.25
|
283 |
}
|
284 |
]
|
285 |
-
}
|
286 |
-
"mmlu_results": NaN,
|
287 |
-
"unified_exam_results": NaN
|
288 |
},
|
289 |
{
|
290 |
"model_name": "qwen-max-2025-01-25",
|
@@ -308,9 +300,7 @@
|
|
308 |
"score": 14.25
|
309 |
}
|
310 |
]
|
311 |
-
}
|
312 |
-
"mmlu_results": NaN,
|
313 |
-
"unified_exam_results": NaN
|
314 |
},
|
315 |
{
|
316 |
"model_name": "gemini-1.5-flash",
|
@@ -395,9 +385,7 @@
|
|
395 |
"score": 15.0
|
396 |
}
|
397 |
]
|
398 |
-
}
|
399 |
-
"mmlu_results": NaN,
|
400 |
-
"unified_exam_results": NaN
|
401 |
},
|
402 |
{
|
403 |
"model_name": "DeepSeek-V3",
|
@@ -482,9 +470,7 @@
|
|
482 |
"score": 12.25
|
483 |
}
|
484 |
]
|
485 |
-
}
|
486 |
-
"mmlu_results": NaN,
|
487 |
-
"unified_exam_results": NaN
|
488 |
},
|
489 |
{
|
490 |
"model_name": "Meta-Llama-3.3-70B-Instruct",
|
@@ -569,9 +555,7 @@
|
|
569 |
"score": 11.5
|
570 |
}
|
571 |
]
|
572 |
-
}
|
573 |
-
"mmlu_results": NaN,
|
574 |
-
"unified_exam_results": NaN
|
575 |
},
|
576 |
{
|
577 |
"model_name": "claude-3-5-haiku-20241022",
|
@@ -656,169 +640,174 @@
|
|
656 |
"score": 10.75
|
657 |
}
|
658 |
]
|
659 |
-
}
|
660 |
-
"mmlu_results": NaN,
|
661 |
-
"unified_exam_results": NaN
|
662 |
},
|
663 |
{
|
664 |
"model_name": "Gen2B/HyGPT-10b-it",
|
665 |
-
"results":
|
666 |
-
|
667 |
-
|
668 |
-
|
669 |
-
|
670 |
-
|
671 |
-
|
672 |
-
|
673 |
-
|
674 |
-
|
675 |
-
|
676 |
-
|
677 |
-
|
678 |
-
|
679 |
-
|
680 |
-
|
681 |
-
|
682 |
-
|
683 |
-
|
684 |
-
|
|
|
685 |
},
|
686 |
{
|
687 |
"model_name": "google/gemma-2-9b-it",
|
688 |
-
"results":
|
689 |
-
|
690 |
-
|
691 |
-
|
692 |
-
|
693 |
-
|
694 |
-
|
695 |
-
|
696 |
-
|
697 |
-
|
698 |
-
|
699 |
-
|
700 |
-
|
701 |
-
|
702 |
-
|
703 |
-
|
704 |
-
|
705 |
-
|
706 |
-
|
707 |
-
|
|
|
708 |
},
|
709 |
{
|
710 |
-
"model_name": "google/gemma-
|
711 |
-
"results":
|
712 |
-
|
713 |
-
|
714 |
-
|
715 |
-
|
716 |
-
|
717 |
-
|
718 |
-
|
719 |
-
|
720 |
-
|
721 |
-
|
722 |
-
|
723 |
-
|
724 |
-
|
725 |
-
|
726 |
-
|
727 |
-
|
728 |
-
|
729 |
-
|
730 |
-
|
|
|
731 |
},
|
732 |
{
|
733 |
-
"model_name": "google/gemma-3n-
|
734 |
-
"results":
|
735 |
-
|
736 |
-
|
737 |
-
|
738 |
-
|
739 |
-
|
740 |
-
|
741 |
-
|
742 |
-
|
743 |
-
|
744 |
-
|
745 |
-
|
746 |
-
|
747 |
-
|
748 |
-
|
749 |
-
|
750 |
-
|
751 |
-
|
752 |
-
|
753 |
-
|
|
|
754 |
},
|
755 |
{
|
756 |
"model_name": "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
|
757 |
-
"results":
|
758 |
-
|
759 |
-
|
760 |
-
|
761 |
-
|
762 |
-
|
763 |
-
|
764 |
-
|
765 |
-
|
766 |
-
|
767 |
-
|
768 |
-
|
769 |
-
|
770 |
-
|
771 |
-
|
772 |
-
|
773 |
-
|
774 |
-
|
775 |
-
|
776 |
-
|
|
|
777 |
},
|
778 |
{
|
779 |
"model_name": "Qwen/Qwen3-32B",
|
780 |
-
"results":
|
781 |
-
|
782 |
-
|
783 |
-
|
784 |
-
|
785 |
-
|
786 |
-
|
787 |
-
|
788 |
-
|
789 |
-
|
790 |
-
|
791 |
-
|
792 |
-
|
793 |
-
|
794 |
-
|
795 |
-
|
796 |
-
|
797 |
-
|
798 |
-
|
799 |
-
|
|
|
800 |
},
|
801 |
{
|
802 |
"model_name": "Qwen/QwQ-32B",
|
803 |
-
"results":
|
804 |
-
|
805 |
-
|
806 |
-
|
807 |
-
|
808 |
-
|
809 |
-
|
810 |
-
|
811 |
-
|
812 |
-
|
813 |
-
|
814 |
-
|
815 |
-
|
816 |
-
|
817 |
-
|
818 |
-
|
819 |
-
|
820 |
-
|
821 |
-
|
822 |
-
|
|
|
823 |
}
|
824 |
]
|
|
|
21 |
"score": 15.0
|
22 |
}
|
23 |
]
|
24 |
+
}
|
|
|
|
|
25 |
},
|
26 |
{
|
27 |
"model_name": "claude-3-5-sonnet-20241022",
|
|
|
106 |
"score": 12.75
|
107 |
}
|
108 |
]
|
109 |
+
}
|
|
|
|
|
110 |
},
|
111 |
{
|
112 |
"model_name": "gemini-2.0-flash",
|
|
|
191 |
"score": 17.25
|
192 |
}
|
193 |
]
|
194 |
+
}
|
|
|
|
|
195 |
},
|
196 |
{
|
197 |
"model_name": "gpt-4o",
|
|
|
276 |
"score": 13.25
|
277 |
}
|
278 |
]
|
279 |
+
}
|
|
|
|
|
280 |
},
|
281 |
{
|
282 |
"model_name": "qwen-max-2025-01-25",
|
|
|
300 |
"score": 14.25
|
301 |
}
|
302 |
]
|
303 |
+
}
|
|
|
|
|
304 |
},
|
305 |
{
|
306 |
"model_name": "gemini-1.5-flash",
|
|
|
385 |
"score": 15.0
|
386 |
}
|
387 |
]
|
388 |
+
}
|
|
|
|
|
389 |
},
|
390 |
{
|
391 |
"model_name": "DeepSeek-V3",
|
|
|
470 |
"score": 12.25
|
471 |
}
|
472 |
]
|
473 |
+
}
|
|
|
|
|
474 |
},
|
475 |
{
|
476 |
"model_name": "Meta-Llama-3.3-70B-Instruct",
|
|
|
555 |
"score": 11.5
|
556 |
}
|
557 |
]
|
558 |
+
}
|
|
|
|
|
559 |
},
|
560 |
{
|
561 |
"model_name": "claude-3-5-haiku-20241022",
|
|
|
640 |
"score": 10.75
|
641 |
}
|
642 |
]
|
643 |
+
}
|
|
|
|
|
644 |
},
|
645 |
{
|
646 |
"model_name": "Gen2B/HyGPT-10b-it",
|
647 |
+
"results": {
|
648 |
+
"mmlu_results": [],
|
649 |
+
"unified_exam_results": [
|
650 |
+
{
|
651 |
+
"category": "Armenian language and literature",
|
652 |
+
"score": 4.5
|
653 |
+
},
|
654 |
+
{
|
655 |
+
"category": "Armenian history",
|
656 |
+
"score": 4.25
|
657 |
+
},
|
658 |
+
{
|
659 |
+
"category": "Mathematics",
|
660 |
+
"score": 3.0
|
661 |
+
},
|
662 |
+
{
|
663 |
+
"category": "Average",
|
664 |
+
"score": 3.9167
|
665 |
+
}
|
666 |
+
]
|
667 |
+
}
|
668 |
},
|
669 |
{
|
670 |
"model_name": "google/gemma-2-9b-it",
|
671 |
+
"results": {
|
672 |
+
"mmlu_results": [],
|
673 |
+
"unified_exam_results": [
|
674 |
+
{
|
675 |
+
"category": "Armenian language and literature",
|
676 |
+
"score": 3.25
|
677 |
+
},
|
678 |
+
{
|
679 |
+
"category": "Armenian history",
|
680 |
+
"score": 1.75
|
681 |
+
},
|
682 |
+
{
|
683 |
+
"category": "Mathematics",
|
684 |
+
"score": 2.0
|
685 |
+
},
|
686 |
+
{
|
687 |
+
"category": "Average",
|
688 |
+
"score": 2.3333
|
689 |
+
}
|
690 |
+
]
|
691 |
+
}
|
692 |
},
|
693 |
{
|
694 |
+
"model_name": "google/gemma-3n-E2B-it",
|
695 |
+
"results": {
|
696 |
+
"mmlu_results": [],
|
697 |
+
"unified_exam_results": [
|
698 |
+
{
|
699 |
+
"category": "Armenian language and literature",
|
700 |
+
"score": 2.25
|
701 |
+
},
|
702 |
+
{
|
703 |
+
"category": "Armenian history",
|
704 |
+
"score": 1.5
|
705 |
+
},
|
706 |
+
{
|
707 |
+
"category": "Mathematics",
|
708 |
+
"score": 4.25
|
709 |
+
},
|
710 |
+
{
|
711 |
+
"category": "Average",
|
712 |
+
"score": 2.6667
|
713 |
+
}
|
714 |
+
]
|
715 |
+
}
|
716 |
},
|
717 |
{
|
718 |
+
"model_name": "google/gemma-3n-E4B-it",
|
719 |
+
"results": {
|
720 |
+
"mmlu_results": [],
|
721 |
+
"unified_exam_results": [
|
722 |
+
{
|
723 |
+
"category": "Armenian language and literature",
|
724 |
+
"score": 2.75
|
725 |
+
},
|
726 |
+
{
|
727 |
+
"category": "Armenian history",
|
728 |
+
"score": 2.0
|
729 |
+
},
|
730 |
+
{
|
731 |
+
"category": "Mathematics",
|
732 |
+
"score": 5.5
|
733 |
+
},
|
734 |
+
{
|
735 |
+
"category": "Average",
|
736 |
+
"score": 3.4167
|
737 |
+
}
|
738 |
+
]
|
739 |
+
}
|
740 |
},
|
741 |
{
|
742 |
"model_name": "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
|
743 |
+
"results": {
|
744 |
+
"mmlu_results": [],
|
745 |
+
"unified_exam_results": [
|
746 |
+
{
|
747 |
+
"category": "Armenian language and literature",
|
748 |
+
"score": 6.25
|
749 |
+
},
|
750 |
+
{
|
751 |
+
"category": "Armenian history",
|
752 |
+
"score": 5.0
|
753 |
+
},
|
754 |
+
{
|
755 |
+
"category": "Mathematics",
|
756 |
+
"score": 12.5
|
757 |
+
},
|
758 |
+
{
|
759 |
+
"category": "Average",
|
760 |
+
"score": 7.9167
|
761 |
+
}
|
762 |
+
]
|
763 |
+
}
|
764 |
},
|
765 |
{
|
766 |
"model_name": "Qwen/Qwen3-32B",
|
767 |
+
"results": {
|
768 |
+
"mmlu_results": [],
|
769 |
+
"unified_exam_results": [
|
770 |
+
{
|
771 |
+
"category": "Armenian language and literature",
|
772 |
+
"score": 4.75
|
773 |
+
},
|
774 |
+
{
|
775 |
+
"category": "Armenian history",
|
776 |
+
"score": 3.5
|
777 |
+
},
|
778 |
+
{
|
779 |
+
"category": "Mathematics",
|
780 |
+
"score": 14.0
|
781 |
+
},
|
782 |
+
{
|
783 |
+
"category": "Average",
|
784 |
+
"score": 7.4167
|
785 |
+
}
|
786 |
+
]
|
787 |
+
}
|
788 |
},
|
789 |
{
|
790 |
"model_name": "Qwen/QwQ-32B",
|
791 |
+
"results": {
|
792 |
+
"mmlu_results": [],
|
793 |
+
"unified_exam_results": [
|
794 |
+
{
|
795 |
+
"category": "Armenian language and literature",
|
796 |
+
"score": 2.5
|
797 |
+
},
|
798 |
+
{
|
799 |
+
"category": "Armenian history",
|
800 |
+
"score": 2.5
|
801 |
+
},
|
802 |
+
{
|
803 |
+
"category": "Mathematics",
|
804 |
+
"score": 10.5
|
805 |
+
},
|
806 |
+
{
|
807 |
+
"category": "Average",
|
808 |
+
"score": 5.1667
|
809 |
+
}
|
810 |
+
]
|
811 |
+
}
|
812 |
}
|
813 |
]
|