Spaces:
Running
Running
api model results on unified exam appending
Browse files- model_results.json +48 -0
model_results.json
CHANGED
@@ -809,5 +809,53 @@
|
|
809 |
}
|
810 |
]
|
811 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
812 |
}
|
813 |
]
|
|
|
809 |
}
|
810 |
]
|
811 |
}
|
812 |
+
},
|
813 |
+
{
|
814 |
+
"model_name": "gemini-2.5-flash",
|
815 |
+
"results": {
|
816 |
+
"mmlu_results": [],
|
817 |
+
"unified_exam_results": [
|
818 |
+
{
|
819 |
+
"category": "Average",
|
820 |
+
"score": 9.5
|
821 |
+
}
|
822 |
+
]
|
823 |
+
}
|
824 |
+
},
|
825 |
+
{
|
826 |
+
"model_name": "gemini-2.5-pro",
|
827 |
+
"results": {
|
828 |
+
"mmlu_results": [],
|
829 |
+
"unified_exam_results": [
|
830 |
+
{
|
831 |
+
"category": "Average",
|
832 |
+
"score": 11.25
|
833 |
+
}
|
834 |
+
]
|
835 |
+
}
|
836 |
+
},
|
837 |
+
{
|
838 |
+
"model_name": "gpt-4.1-2025-04-14",
|
839 |
+
"results": {
|
840 |
+
"mmlu_results": [],
|
841 |
+
"unified_exam_results": [
|
842 |
+
{
|
843 |
+
"category": "Average",
|
844 |
+
"score": 8.0
|
845 |
+
}
|
846 |
+
]
|
847 |
+
}
|
848 |
+
},
|
849 |
+
{
|
850 |
+
"model_name": "claude-sonnet-4-20250514",
|
851 |
+
"results": {
|
852 |
+
"mmlu_results": [],
|
853 |
+
"unified_exam_results": [
|
854 |
+
{
|
855 |
+
"category": "Average",
|
856 |
+
"score": 7.0
|
857 |
+
}
|
858 |
+
]
|
859 |
+
}
|
860 |
}
|
861 |
]
|