Bagratuni commited on
Commit
b566964
·
1 Parent(s): e6e4a80

api model results on unified exam appending

Browse files
Files changed (1) hide show
  1. model_results.json +48 -0
model_results.json CHANGED
@@ -809,5 +809,53 @@
809
  }
810
  ]
811
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
812
  }
813
  ]
 
809
  }
810
  ]
811
  }
812
+ },
813
+ {
814
+ "model_name": "gemini-2.5-flash",
815
+ "results": {
816
+ "mmlu_results": [],
817
+ "unified_exam_results": [
818
+ {
819
+ "category": "Average",
820
+ "score": 9.5
821
+ }
822
+ ]
823
+ }
824
+ },
825
+ {
826
+ "model_name": "gemini-2.5-pro",
827
+ "results": {
828
+ "mmlu_results": [],
829
+ "unified_exam_results": [
830
+ {
831
+ "category": "Average",
832
+ "score": 11.25
833
+ }
834
+ ]
835
+ }
836
+ },
837
+ {
838
+ "model_name": "gpt-4.1-2025-04-14",
839
+ "results": {
840
+ "mmlu_results": [],
841
+ "unified_exam_results": [
842
+ {
843
+ "category": "Average",
844
+ "score": 8.0
845
+ }
846
+ ]
847
+ }
848
+ },
849
+ {
850
+ "model_name": "claude-sonnet-4-20250514",
851
+ "results": {
852
+ "mmlu_results": [],
853
+ "unified_exam_results": [
854
+ {
855
+ "category": "Average",
856
+ "score": 7.0
857
+ }
858
+ ]
859
+ }
860
  }
861
  ]