open-r1-eval-leaderboard
/
eval_results
/HuggingFaceH4
/mistral-7b-ift
/v48.106
/winogrande
/results_2024-03-13T09-55-40.901840.json
edbeeching
HF Staff
Upload eval_results/HuggingFaceH4/mistral-7b-ift/v48.106/winogrande/results_2024-03-13T09-55-40.901840.json with huggingface_hub
903a220
verified
| { | |
| "config_general": { | |
| "lighteval_sha": "?", | |
| "num_fewshot_seeds": 1, | |
| "override_batch_size": 1, | |
| "max_samples": null, | |
| "job_id": "", | |
| "start_time": 367055.995126485, | |
| "end_time": 367121.855298492, | |
| "total_evaluation_time_secondes": "65.86017200700007", | |
| "model_name": "HuggingFaceH4/mistral-7b-ift", | |
| "model_sha": "686e4ee0f7b1359c9565801c5919f825a34ae9c6", | |
| "model_dtype": "torch.bfloat16", | |
| "model_size": "13.99 GB", | |
| "config": null | |
| }, | |
| "results": { | |
| "lighteval|winogrande|5": { | |
| "acc": 0.681136543014996, | |
| "acc_stderr": 0.013097928420088771 | |
| } | |
| }, | |
| "versions": { | |
| "lighteval|winogrande|5": 0 | |
| }, | |
| "config_tasks": { | |
| "lighteval|winogrande": { | |
| "name": "winogrande", | |
| "prompt_function": "winogrande", | |
| "hf_repo": "winogrande", | |
| "hf_subset": "winogrande_xl", | |
| "metric": [ | |
| "loglikelihood_acc" | |
| ], | |
| "hf_avail_splits": [ | |
| "train", | |
| "test", | |
| "validation" | |
| ], | |
| "evaluation_splits": [ | |
| "validation" | |
| ], | |
| "few_shots_split": null, | |
| "few_shots_select": "random_sampling", | |
| "generation_size": -1, | |
| "stop_sequence": [ | |
| "\n" | |
| ], | |
| "output_regex": null, | |
| "frozen": false, | |
| "suite": [ | |
| "lighteval" | |
| ], | |
| "original_num_docs": 1267, | |
| "effective_num_docs": 1267 | |
| } | |
| }, | |
| "summary_tasks": { | |
| "lighteval|winogrande|5": { | |
| "hashes": { | |
| "hash_examples": "087d5d1a1afd4c7b", | |
| "hash_full_prompts": "68a2321c6687b7fb", | |
| "hash_input_tokens": "4ee3b4a10c000d4e", | |
| "hash_cont_tokens": "b62ff56a2b8ad70e" | |
| }, | |
| "truncated": 0, | |
| "non_truncated": 1267, | |
| "padded": 2403, | |
| "non_padded": 131, | |
| "effective_few_shots": 5.0, | |
| "num_truncated_few_shots": 0 | |
| } | |
| }, | |
| "summary_general": { | |
| "hashes": { | |
| "hash_examples": "b9a49975cc41fab7", | |
| "hash_full_prompts": "61d22a7151c380da", | |
| "hash_input_tokens": "be615631b41f228f", | |
| "hash_cont_tokens": "02fbe23ec37d1582" | |
| }, | |
| "truncated": 0, | |
| "non_truncated": 1267, | |
| "padded": 2403, | |
| "non_padded": 131, | |
| "num_truncated_few_shots": 0 | |
| } | |
| } |