Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

.gitattributes +1 -0
config.hq_cd.yaml +116 -0
config.yaml +131 -0
metrics.hq_cd.jsonl +0 -0
metrics.jsonl +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+metrics.jsonl filter=lfs diff=lfs merge=lfs -text

config.hq_cd.yaml ADDED Viewed

	@@ -0,0 +1,116 @@

+name: prod_lingua_7B_2T_lin_hq_cd_128N
+dump_dir: /p/lustre5/kirchenb/common-pile-root/lingua/output/prod_lingua_7B_2T_lin_hq_cd_128N
+seed: 777
+grad_acc_steps: 1
+gc_collect_freq: 1000
+probe_freq: null
+steps: 239000
+data:
+  root_dir: /p/vast1/pretrain/datasets/common_pile/common-pile-chunked
+  sources:
+    stackv2_edu: 0.170363004622478
+    doab: 0.160392905980941
+    wikimedia: 0.15321084473113
+    stackexchange: 0.149577338855193
+    peS2o: 0.121808388328754
+    cccc: 0.116267077616518
+    arxiv_papers: 0.0649775722614287
+    data_provenance_initiative: 0.0455351865398593
+    pressbooks: 0.00768988231013495
+    libretexts: 0.00481335361292812
+    news: 0.00331307394000609
+    foodista: 0.00111435553323384
+    oercommons: 0.000692183554680895
+    python_enhancement_proposals: 0.000151098755264962
+    public_domain_review: 9.37333574480524e-05
+  batch_size: 4
+  seq_len: 4096
+  n_views: 2
+  seed: 42
+  add_bos: true
+  add_eos: true
+  load_async: true
+  prefetch_size: 4096
+  tokenizer:
+    name: tiktoken
+    path: /p/vast1/pretrain/datasets/common_pile/common-pile-chunked/tokenizer/common-pile-tokenizer.tiktoken
+optim:
+  lr: 0.002
+  weight_decay: 0.2
+  epsilon: 1.0e-08
+  beta1: 0.9
+  beta2: 0.95
+  clip: 1.0
+  scheduler: linear
+  warmup: -324821
+  lr_min_ratio: 0.0
+  cycle_length: 1.0
+  cosine_theta: 1.0
+  annealing_step: 1000
+  decay_fraction: 0.1
+  exp_factor: 0.5
+model:
+  dim: 4096
+  n_layers: 32
+  head_dim: null
+  n_heads: 32
+  n_kv_heads: null
+  ffn_dim_multiplier: 1.0
+  multiple_of: 256
+  norm_eps: 1.0e-05
+  rope_theta: 100000.0
+  init_base_std: null
+  init_std_factor: disabled
+  max_seqlen: 4096
+  seed: 42
+  vocab_size: 64256
+  weight_tying: false
+  sliding_window: null
+distributed:
+  dp_shard: 1
+  dp_replicate: 512
+  tp_size: 1
+  selective_activation_checkpointing: false
+  compile: true
+  fsdp_type: full_shard
+  model_dtype: bf16
+  float8_recipe: null
+  float8_filter: layers\.[0-9]+\.
+  matmul_allow_tf32: false
+  detect_anomaly: false
+  compile_cache_size_limit: 8
+  spawn_method: forkserver
+env:
+  MKL_SERVICE_FORCE_INTEL: GNU
+  OMP_NUM_THREADS: '1'
+  MKL_NUM_THREADS: '1'
+  ENABLE_INTRA_NODE_COMM: '1'
+  TORCH_NCCL_AVOID_RECORD_STREAMS: '1'
+  NCCL_IB_TIMEOUT: '22'
+  NCCL_DEBUG: INFO
+  TORCH_NCCL_ASYNC_ERROR_HANDLING: '1'
+checkpoint:
+  dump:
+    every: 1000
+    keep: -1
+  eval:
+    every: 900
+    keep: 11
+  path: /p/lustre5/kirchenb/common-pile-root/lingua/output/prod_lingua_7B_2T_lin_hq_cd_128N/checkpoints
+  init_ckpt_path: null
+  continue_training_from_init: false
+  ignore_data_loader_state: true
+  ignore_lr_scheduler_state: true
+profiling:
+  run: false
+  trace_folder: profiling
+  mem_warmup: 0
+  mem_steps: 4
+  profile_warmup: 100
+  profile_steps: 4
+logging:
+  freq: 1
+  acc_freq: null
+  wandb: null
+async_eval_gpus: null
+eval: null

config.yaml ADDED Viewed

	@@ -0,0 +1,131 @@

+name: prod_lingua_7B_2T_128N
+dump_dir: /p/lustre5/kirchenb/common-pile-root/lingua/output/prod_lingua_7B_2T_128N
+seed: 777
+grad_acc_steps: 1
+gc_collect_freq: 1000
+probe_freq: null
+steps: 250000
+data:
+  root_dir: /p/vast1/pretrain/datasets/common_pile/common-pile-chunked
+  sources:
+    peS2o: 0.274065475510351
+    stackexchange: 0.134617935796937
+    stackv2_edu: 0.127770669195666
+    cccc: 0.0871992270000557
+    wikimedia: 0.0861800315862719
+    github_archive: 0.0606452345122248
+    uspto: 0.0413469377516883
+    pubmed: 0.0367902799837971
+    arxiv_papers: 0.0292395449667613
+    caselaw_access_project: 0.0193875362722656
+    wikiteam: 0.0137485410839637
+    doab: 0.0180439781895451
+    uk_hansard: 0.0144498535570883
+    pre_1929_books: 0.0115755547988338
+    ubuntu_irc: 0.00794254267719456
+    regulations: 0.00762583706405442
+    data_provenance_initiative: 0.00512264496834867
+    project_gutenberg: 0.00502100654070129
+    youtube: 0.00465917165839394
+    arxiv_abstracts: 0.00359635066160403
+    stackv2_html: 0.00225924255952781
+    usgpo: 0.00226024581728848
+    library_of_congress: 0.00222469340783564
+    biodiversity_heritage_library: 0.00221737524370278
+    pressbooks: 0.000865101033213598
+    libretexts: 0.00054149556727006
+    news: 0.000372716196818104
+    foodista: 0.000125363443065615
+    oercommons: 7.78696843693821e-05
+    python_enhancement_proposals: 1.69983991984805e-05
+    public_domain_review: 1.05448719635173e-05
+  batch_size: 4
+  seq_len: 4096
+  n_views: 2
+  seed: 42
+  add_bos: true
+  add_eos: true
+  load_async: true
+  prefetch_size: 4096
+  tokenizer:
+    name: tiktoken
+    path: /p/vast1/pretrain/datasets/common_pile/common-pile-chunked/tokenizer/common-pile-tokenizer.tiktoken
+optim:
+  lr: 0.002
+  weight_decay: 0.2
+  epsilon: 1.0e-08
+  beta1: 0.9
+  beta2: 0.95
+  clip: 1.0
+  scheduler: cosine
+  warmup: 2000
+  lr_min_ratio: 1.0e-06
+  cycle_length: 1.0
+  cosine_theta: 1.0
+  annealing_step: 1000
+  decay_fraction: 0.1
+  exp_factor: 0.5
+model:
+  dim: 4096
+  n_layers: 32
+  head_dim: null
+  n_heads: 32
+  n_kv_heads: null
+  ffn_dim_multiplier: 1.0
+  multiple_of: 256
+  norm_eps: 1.0e-05
+  rope_theta: 100000.0
+  init_base_std: null
+  init_std_factor: disabled
+  max_seqlen: 4096
+  seed: 42
+  vocab_size: 64256
+  weight_tying: false
+  sliding_window: null
+distributed:
+  dp_shard: 1
+  dp_replicate: 512
+  tp_size: 1
+  selective_activation_checkpointing: false
+  compile: true
+  fsdp_type: full_shard
+  model_dtype: bf16
+  float8_recipe: null
+  float8_filter: layers\.[0-9]+\.
+  matmul_allow_tf32: false
+  detect_anomaly: false
+  compile_cache_size_limit: 8
+  spawn_method: forkserver
+env:
+  MKL_SERVICE_FORCE_INTEL: GNU
+  OMP_NUM_THREADS: '1'
+  MKL_NUM_THREADS: '1'
+  ENABLE_INTRA_NODE_COMM: '1'
+  TORCH_NCCL_AVOID_RECORD_STREAMS: '1'
+  NCCL_IB_TIMEOUT: '22'
+  NCCL_DEBUG: INFO
+  TORCH_NCCL_ASYNC_ERROR_HANDLING: '1'
+checkpoint:
+  dump:
+    every: 10000
+    keep: -1
+  eval:
+    every: 2000
+    keep: 3
+  path: /p/lustre5/kirchenb/common-pile-root/lingua/output/prod_lingua_7B_2T_128N/checkpoints
+  init_ckpt_path: null
+  continue_training_from_init: false
+  ignore_data_loader_state: false
+profiling:
+  run: false
+  trace_folder: profiling
+  mem_warmup: 0
+  mem_steps: 4
+  profile_warmup: 100
+  profile_steps: 4
+logging:
+  freq: 1
+  acc_freq: null
+  wandb: null
+async_eval_gpus: null
+eval: null

metrics.hq_cd.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

metrics.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6494d76109eb6ee1c0564dc902a8bb8f711e599afb545e9f49f28628fc00328a
+size 225744751