Spaces:
Running
Running
commit
Browse files- model_results.json +62 -1
model_results.json
CHANGED
@@ -813,7 +813,68 @@
|
|
813 |
{
|
814 |
"model_name": "gemini-2.5-flash",
|
815 |
"results": {
|
816 |
-
"mmlu_results": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
817 |
"unified_exam_results": [
|
818 |
{
|
819 |
"category": "Average",
|
|
|
813 |
{
|
814 |
"model_name": "gemini-2.5-flash",
|
815 |
"results": {
|
816 |
+
"mmlu_results": [
|
817 |
+
{
|
818 |
+
"category": "Average",
|
819 |
+
"score": 0.7519
|
820 |
+
},
|
821 |
+
{
|
822 |
+
"category": "Biology",
|
823 |
+
"score": 0.8333
|
824 |
+
},
|
825 |
+
{
|
826 |
+
"category": "Business",
|
827 |
+
"score": 0.8939
|
828 |
+
},
|
829 |
+
{
|
830 |
+
"category": "Chemistry",
|
831 |
+
"score": 0.7579
|
832 |
+
},
|
833 |
+
{
|
834 |
+
"category": "Computer Science",
|
835 |
+
"score": 0.8529
|
836 |
+
},
|
837 |
+
{
|
838 |
+
"category": "Economics",
|
839 |
+
"score": 0.831
|
840 |
+
},
|
841 |
+
{
|
842 |
+
"category": "Engineering",
|
843 |
+
"score": 0.5875
|
844 |
+
},
|
845 |
+
{
|
846 |
+
"category": "Health",
|
847 |
+
"score": 0.7941
|
848 |
+
},
|
849 |
+
{
|
850 |
+
"category": "History",
|
851 |
+
"score": 0.5862
|
852 |
+
},
|
853 |
+
{
|
854 |
+
"category": "Law",
|
855 |
+
"score": 0.6742
|
856 |
+
},
|
857 |
+
{
|
858 |
+
"category": "Math",
|
859 |
+
"score": 0.7168
|
860 |
+
},
|
861 |
+
{
|
862 |
+
"category": "Other",
|
863 |
+
"score": 0.7273
|
864 |
+
},
|
865 |
+
{
|
866 |
+
"category": "Philosophy",
|
867 |
+
"score": 0.7857
|
868 |
+
},
|
869 |
+
{
|
870 |
+
"category": "Physics",
|
871 |
+
"score": 0.7248
|
872 |
+
},
|
873 |
+
{
|
874 |
+
"category": "Psychology",
|
875 |
+
"score": 0.7612
|
876 |
+
}
|
877 |
+
],
|
878 |
"unified_exam_results": [
|
879 |
{
|
880 |
"category": "Average",
|