global_batch_size: 256; micro_batch_size: 2
Browse files
scripts/pretrain_core_model.yaml
CHANGED
@@ -58,7 +58,7 @@ train:
|
|
58 |
log_interval: 1
|
59 |
|
60 |
# Number of samples between optimizer steps across data-parallel ranks (type: int, default: 512)
|
61 |
-
global_batch_size:
|
62 |
|
63 |
# Number of samples per data-parallel rank (type: int, default: 4)
|
64 |
micro_batch_size: 2
|
|
|
58 |
log_interval: 1
|
59 |
|
60 |
# Number of samples between optimizer steps across data-parallel ranks (type: int, default: 512)
|
61 |
+
global_batch_size: 256
|
62 |
|
63 |
# Number of samples per data-parallel rank (type: int, default: 4)
|
64 |
micro_batch_size: 2
|