{ "train_batch_size": 128, "fp16": { "enabled": true, "min_loss_scale": 1, "opt_level": "O2" }, "zero_optimization": { "stage": 2, "offload_param": { "device": "cpu" }, "offload_optimizer": { "device": "cpu" }, "allgather_partitions": true, "allgather_bucket_size": 5e8, "contiguous_gradients": true }, "optimizer": { "type": "AdamW", "params": { "lr": 1e-05, "betas": [ 0.9, 0.95 ], "eps": 1e-08 } }, "scheduler": { "type": "WarmupLR", "params": { "warmup_min_lr": 0, "warmup_max_lr": 1e-05, "warmup_num_steps": "auto" } } }