open-r1-eval-leaderboard
/
eval_results
/HuggingFaceH4
/zephyr-7b-beta-ift
/v0.1
/gsm8k
/results_2024-03-13T22-43-28.269257.json
| { | |
| "config_general": { | |
| "lighteval_sha": "?", | |
| "num_fewshot_seeds": 1, | |
| "override_batch_size": 1, | |
| "max_samples": null, | |
| "job_id": "", | |
| "start_time": 649557.228585748, | |
| "end_time": 650708.143174138, | |
| "total_evaluation_time_secondes": "1150.9145883900346", | |
| "model_name": "HuggingFaceH4/zephyr-7b-beta-ift", | |
| "model_sha": "af9e6d786eb016993160afda7a8616953c114095", | |
| "model_dtype": "torch.bfloat16", | |
| "model_size": "13.99 GB", | |
| "config": null | |
| }, | |
| "results": { | |
| "lighteval|gsm8k|5": { | |
| "qem": 0.2699014404852161, | |
| "qem_stderr": 0.012227442856468897 | |
| } | |
| }, | |
| "versions": { | |
| "lighteval|gsm8k|5": 0 | |
| }, | |
| "config_tasks": { | |
| "lighteval|gsm8k": { | |
| "name": "gsm8k", | |
| "prompt_function": "gsm8k", | |
| "hf_repo": "gsm8k", | |
| "hf_subset": "main", | |
| "metric": [ | |
| "quasi_exact_match_gsm8k" | |
| ], | |
| "hf_avail_splits": [ | |
| "train", | |
| "test" | |
| ], | |
| "evaluation_splits": [ | |
| "test" | |
| ], | |
| "few_shots_split": null, | |
| "few_shots_select": "random_sampling_from_train", | |
| "generation_size": 256, | |
| "stop_sequence": [ | |
| "Question:" | |
| ], | |
| "output_regex": null, | |
| "frozen": false, | |
| "suite": [ | |
| "lighteval" | |
| ], | |
| "original_num_docs": 1319, | |
| "effective_num_docs": 1319, | |
| "trust_dataset": true | |
| } | |
| }, | |
| "summary_tasks": { | |
| "lighteval|gsm8k|5": { | |
| "hashes": { | |
| "hash_examples": "0ed016e24e7512fd", | |
| "hash_full_prompts": "faa09535944c3187", | |
| "hash_input_tokens": "b6842acfc19a5637", | |
| "hash_cont_tokens": "b45a9e2f68543216" | |
| }, | |
| "truncated": 0, | |
| "non_truncated": 1319, | |
| "padded": 0, | |
| "non_padded": 1319, | |
| "effective_few_shots": 5.0, | |
| "num_truncated_few_shots": 0 | |
| } | |
| }, | |
| "summary_general": { | |
| "hashes": { | |
| "hash_examples": "bc71463e88551d0e", | |
| "hash_full_prompts": "24555d96d1b06364", | |
| "hash_input_tokens": "2efc02ed9cc6bfb4", | |
| "hash_cont_tokens": "8ba2268dc13955c1" | |
| }, | |
| "truncated": 0, | |
| "non_truncated": 1319, | |
| "padded": 0, | |
| "non_padded": 1319, | |
| "num_truncated_few_shots": 0 | |
| } | |
| } |