PJMixers-Dev
/

Gemma-3-Earthen-v0.1-4B-QLoRA

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d58e8b70cae140c27869d506ce1fd1f11fba30e54691e644d41e79743eee8cff
 size 1907432232

 version https://git-lfs.github.com/spec/v1
+oid sha256:badd47000be379b28f59dad18021a7b6074d3deaff0246e3db26f6e121fdd9c3
 size 1907432232

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.008485812781755503,
   "eval_steps": 10,
-  "global_step": 80,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -640,6 +640,552 @@
       "eval_samples_per_second": 1.888,
       "eval_steps_per_second": 0.472,
       "step": 80
     }
   ],
   "logging_steps": 1,
@@ -659,7 +1205,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.2900468007108608e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.015910898965791568,
   "eval_steps": 10,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 1.888,
       "eval_steps_per_second": 0.472,
       "step": 80
+    },
+    {
+      "epoch": 0.008591885441527447,
+      "grad_norm": 1.0189088582992554,
+      "learning_rate": 4.995886830373846e-07,
+      "loss": 1.5961,
+      "step": 81
+    },
+    {
+      "epoch": 0.00869795810129939,
+      "grad_norm": 0.7427169680595398,
+      "learning_rate": 4.995835649831073e-07,
+      "loss": 1.6456,
+      "step": 82
+    },
+    {
+      "epoch": 0.008804030761071334,
+      "grad_norm": 0.8337876796722412,
+      "learning_rate": 4.995784459437785e-07,
+      "loss": 1.2846,
+      "step": 83
+    },
+    {
+      "epoch": 0.008910103420843278,
+      "grad_norm": 0.6561726331710815,
+      "learning_rate": 4.995733259191137e-07,
+      "loss": 1.5769,
+      "step": 84
+    },
+    {
+      "epoch": 0.009016176080615222,
+      "grad_norm": 0.7599915862083435,
+      "learning_rate": 4.995682049088284e-07,
+      "loss": 1.7303,
+      "step": 85
+    },
+    {
+      "epoch": 0.009122248740387166,
+      "grad_norm": 0.7153452038764954,
+      "learning_rate": 4.995630829126379e-07,
+      "loss": 1.5249,
+      "step": 86
+    },
+    {
+      "epoch": 0.009228321400159108,
+      "grad_norm": 0.7663532495498657,
+      "learning_rate": 4.995579599302577e-07,
+      "loss": 1.4317,
+      "step": 87
+    },
+    {
+      "epoch": 0.009334394059931053,
+      "grad_norm": 0.8668680191040039,
+      "learning_rate": 4.995528359614027e-07,
+      "loss": 1.8046,
+      "step": 88
+    },
+    {
+      "epoch": 0.009440466719702997,
+      "grad_norm": 0.684675931930542,
+      "learning_rate": 4.99547711005788e-07,
+      "loss": 1.393,
+      "step": 89
+    },
+    {
+      "epoch": 0.00954653937947494,
+      "grad_norm": 0.8086925745010376,
+      "learning_rate": 4.995425850631287e-07,
+      "loss": 1.5898,
+      "step": 90
+    },
+    {
+      "epoch": 0.00954653937947494,
+      "eval_loss": 1.4867264032363892,
+      "eval_runtime": 68.2886,
+      "eval_samples_per_second": 1.874,
+      "eval_steps_per_second": 0.469,
+      "step": 90
+    },
+    {
+      "epoch": 0.009652612039246885,
+      "grad_norm": 0.6806846261024475,
+      "learning_rate": 4.995374581331393e-07,
+      "loss": 1.5126,
+      "step": 91
+    },
+    {
+      "epoch": 0.009758684699018827,
+      "grad_norm": 0.7875713109970093,
+      "learning_rate": 4.995323302155347e-07,
+      "loss": 1.5119,
+      "step": 92
+    },
+    {
+      "epoch": 0.009864757358790771,
+      "grad_norm": 0.6719956398010254,
+      "learning_rate": 4.995272013100296e-07,
+      "loss": 1.611,
+      "step": 93
+    },
+    {
+      "epoch": 0.009970830018562716,
+      "grad_norm": 0.7365944385528564,
+      "learning_rate": 4.995220714163384e-07,
+      "loss": 1.2394,
+      "step": 94
+    },
+    {
+      "epoch": 0.01007690267833466,
+      "grad_norm": 0.6620836853981018,
+      "learning_rate": 4.995169405341754e-07,
+      "loss": 1.6405,
+      "step": 95
+    },
+    {
+      "epoch": 0.010182975338106604,
+      "grad_norm": 0.7277278900146484,
+      "learning_rate": 4.995118086632551e-07,
+      "loss": 1.7809,
+      "step": 96
+    },
+    {
+      "epoch": 0.010289047997878546,
+      "grad_norm": 0.7061654925346375,
+      "learning_rate": 4.995066758032913e-07,
+      "loss": 1.361,
+      "step": 97
+    },
+    {
+      "epoch": 0.01039512065765049,
+      "grad_norm": 0.7945475578308105,
+      "learning_rate": 4.995015419539983e-07,
+      "loss": 1.608,
+      "step": 98
+    },
+    {
+      "epoch": 0.010501193317422435,
+      "grad_norm": 0.7080848813056946,
+      "learning_rate": 4.994964071150901e-07,
+      "loss": 1.6869,
+      "step": 99
+    },
+    {
+      "epoch": 0.010607265977194379,
+      "grad_norm": 0.6934227347373962,
+      "learning_rate": 4.994912712862803e-07,
+      "loss": 1.4698,
+      "step": 100
+    },
+    {
+      "epoch": 0.010607265977194379,
+      "eval_loss": 1.4758180379867554,
+      "eval_runtime": 67.7252,
+      "eval_samples_per_second": 1.89,
+      "eval_steps_per_second": 0.472,
+      "step": 100
+    },
+    {
+      "epoch": 0.010713338636966323,
+      "grad_norm": 0.6977412700653076,
+      "learning_rate": 4.994861344672828e-07,
+      "loss": 1.6276,
+      "step": 101
+    },
+    {
+      "epoch": 0.010819411296738265,
+      "grad_norm": 0.8750130534172058,
+      "learning_rate": 4.994809966578113e-07,
+      "loss": 1.5425,
+      "step": 102
+    },
+    {
+      "epoch": 0.01092548395651021,
+      "grad_norm": 1.6952922344207764,
+      "learning_rate": 4.99475857857579e-07,
+      "loss": 1.6159,
+      "step": 103
+    },
+    {
+      "epoch": 0.011031556616282153,
+      "grad_norm": 0.7001510858535767,
+      "learning_rate": 4.994707180662995e-07,
+      "loss": 1.5937,
+      "step": 104
+    },
+    {
+      "epoch": 0.011137629276054098,
+      "grad_norm": 0.7474836707115173,
+      "learning_rate": 4.99465577283686e-07,
+      "loss": 1.4775,
+      "step": 105
+    },
+    {
+      "epoch": 0.011243701935826042,
+      "grad_norm": 1.114769458770752,
+      "learning_rate": 4.994604355094518e-07,
+      "loss": 1.4304,
+      "step": 106
+    },
+    {
+      "epoch": 0.011349774595597984,
+      "grad_norm": 0.7222145199775696,
+      "learning_rate": 4.994552927433097e-07,
+      "loss": 1.2972,
+      "step": 107
+    },
+    {
+      "epoch": 0.011455847255369928,
+      "grad_norm": 0.7787733674049377,
+      "learning_rate": 4.994501489849728e-07,
+      "loss": 1.8544,
+      "step": 108
+    },
+    {
+      "epoch": 0.011561919915141872,
+      "grad_norm": 0.6402618288993835,
+      "learning_rate": 4.994450042341541e-07,
+      "loss": 1.5189,
+      "step": 109
+    },
+    {
+      "epoch": 0.011667992574913817,
+      "grad_norm": 0.6818183064460754,
+      "learning_rate": 4.99439858490566e-07,
+      "loss": 1.3971,
+      "step": 110
+    },
+    {
+      "epoch": 0.011667992574913817,
+      "eval_loss": 1.4671616554260254,
+      "eval_runtime": 68.8225,
+      "eval_samples_per_second": 1.86,
+      "eval_steps_per_second": 0.465,
+      "step": 110
+    },
+    {
+      "epoch": 0.011774065234685759,
+      "grad_norm": 0.8468677997589111,
+      "learning_rate": 4.994347117539214e-07,
+      "loss": 1.6674,
+      "step": 111
+    },
+    {
+      "epoch": 0.011880137894457703,
+      "grad_norm": 0.9757254123687744,
+      "learning_rate": 4.994295640239325e-07,
+      "loss": 1.5847,
+      "step": 112
+    },
+    {
+      "epoch": 0.011986210554229647,
+      "grad_norm": 0.9481499195098877,
+      "learning_rate": 4.99424415300312e-07,
+      "loss": 1.3539,
+      "step": 113
+    },
+    {
+      "epoch": 0.012092283214001591,
+      "grad_norm": 0.6789958477020264,
+      "learning_rate": 4.99419265582772e-07,
+      "loss": 1.5099,
+      "step": 114
+    },
+    {
+      "epoch": 0.012198355873773535,
+      "grad_norm": 0.6501567959785461,
+      "learning_rate": 4.994141148710247e-07,
+      "loss": 1.5429,
+      "step": 115
+    },
+    {
+      "epoch": 0.012304428533545478,
+      "grad_norm": 1.262799859046936,
+      "learning_rate": 4.994089631647824e-07,
+      "loss": 1.1193,
+      "step": 116
+    },
+    {
+      "epoch": 0.012410501193317422,
+      "grad_norm": 0.685874342918396,
+      "learning_rate": 4.994038104637567e-07,
+      "loss": 1.728,
+      "step": 117
+    },
+    {
+      "epoch": 0.012516573853089366,
+      "grad_norm": 0.7375260591506958,
+      "learning_rate": 4.993986567676594e-07,
+      "loss": 1.6958,
+      "step": 118
+    },
+    {
+      "epoch": 0.01262264651286131,
+      "grad_norm": 0.7215054631233215,
+      "learning_rate": 4.993935020762025e-07,
+      "loss": 1.3697,
+      "step": 119
+    },
+    {
+      "epoch": 0.012728719172633254,
+      "grad_norm": 0.7148920297622681,
+      "learning_rate": 4.993883463890975e-07,
+      "loss": 1.6451,
+      "step": 120
+    },
+    {
+      "epoch": 0.012728719172633254,
+      "eval_loss": 1.459830403327942,
+      "eval_runtime": 68.6763,
+      "eval_samples_per_second": 1.864,
+      "eval_steps_per_second": 0.466,
+      "step": 120
+    },
+    {
+      "epoch": 0.012834791832405197,
+      "grad_norm": 0.6565997004508972,
+      "learning_rate": 4.993831897060559e-07,
+      "loss": 1.4405,
+      "step": 121
+    },
+    {
+      "epoch": 0.012940864492177141,
+      "grad_norm": 0.7842444181442261,
+      "learning_rate": 4.993780320267891e-07,
+      "loss": 1.659,
+      "step": 122
+    },
+    {
+      "epoch": 0.013046937151949085,
+      "grad_norm": 0.7806555032730103,
+      "learning_rate": 4.993728733510084e-07,
+      "loss": 1.6056,
+      "step": 123
+    },
+    {
+      "epoch": 0.01315300981172103,
+      "grad_norm": 0.8176814317703247,
+      "learning_rate": 4.993677136784249e-07,
+      "loss": 1.6902,
+      "step": 124
+    },
+    {
+      "epoch": 0.013259082471492973,
+      "grad_norm": 0.7916125059127808,
+      "learning_rate": 4.993625530087498e-07,
+      "loss": 1.4324,
+      "step": 125
+    },
+    {
+      "epoch": 0.013365155131264916,
+      "grad_norm": 0.6581283211708069,
+      "learning_rate": 4.993573913416939e-07,
+      "loss": 1.753,
+      "step": 126
+    },
+    {
+      "epoch": 0.01347122779103686,
+      "grad_norm": 0.6454209685325623,
+      "learning_rate": 4.99352228676968e-07,
+      "loss": 1.5184,
+      "step": 127
+    },
+    {
+      "epoch": 0.013577300450808804,
+      "grad_norm": 0.746411919593811,
+      "learning_rate": 4.99347065014283e-07,
+      "loss": 1.5564,
+      "step": 128
+    },
+    {
+      "epoch": 0.013683373110580748,
+      "grad_norm": 0.710649847984314,
+      "learning_rate": 4.993419003533493e-07,
+      "loss": 1.46,
+      "step": 129
+    },
+    {
+      "epoch": 0.013789445770352692,
+      "grad_norm": 0.7483745217323303,
+      "learning_rate": 4.993367346938775e-07,
+      "loss": 1.4491,
+      "step": 130
+    },
+    {
+      "epoch": 0.013789445770352692,
+      "eval_loss": 1.4538627862930298,
+      "eval_runtime": 68.3516,
+      "eval_samples_per_second": 1.873,
+      "eval_steps_per_second": 0.468,
+      "step": 130
+    },
+    {
+      "epoch": 0.013895518430124635,
+      "grad_norm": 0.7695822715759277,
+      "learning_rate": 4.993315680355781e-07,
+      "loss": 1.5002,
+      "step": 131
+    },
+    {
+      "epoch": 0.014001591089896579,
+      "grad_norm": 0.7457824945449829,
+      "learning_rate": 4.993264003781611e-07,
+      "loss": 1.4464,
+      "step": 132
+    },
+    {
+      "epoch": 0.014107663749668523,
+      "grad_norm": 0.6966177821159363,
+      "learning_rate": 4.99321231721337e-07,
+      "loss": 1.5493,
+      "step": 133
+    },
+    {
+      "epoch": 0.014213736409440467,
+      "grad_norm": 0.7250157594680786,
+      "learning_rate": 4.993160620648156e-07,
+      "loss": 1.6838,
+      "step": 134
+    },
+    {
+      "epoch": 0.014319809069212411,
+      "grad_norm": 0.6885997653007507,
+      "learning_rate": 4.993108914083069e-07,
+      "loss": 1.3327,
+      "step": 135
+    },
+    {
+      "epoch": 0.014425881728984354,
+      "grad_norm": 0.6850383877754211,
+      "learning_rate": 4.993057197515208e-07,
+      "loss": 1.373,
+      "step": 136
+    },
+    {
+      "epoch": 0.014531954388756298,
+      "grad_norm": 0.8969720005989075,
+      "learning_rate": 4.993005470941668e-07,
+      "loss": 1.6318,
+      "step": 137
+    },
+    {
+      "epoch": 0.014638027048528242,
+      "grad_norm": 0.6809049844741821,
+      "learning_rate": 4.992953734359548e-07,
+      "loss": 1.3728,
+      "step": 138
+    },
+    {
+      "epoch": 0.014744099708300186,
+      "grad_norm": 0.6609801054000854,
+      "learning_rate": 4.992901987765941e-07,
+      "loss": 1.4302,
+      "step": 139
+    },
+    {
+      "epoch": 0.01485017236807213,
+      "grad_norm": 0.6714500188827515,
+      "learning_rate": 4.99285023115794e-07,
+      "loss": 1.3048,
+      "step": 140
+    },
+    {
+      "epoch": 0.01485017236807213,
+      "eval_loss": 1.4487165212631226,
+      "eval_runtime": 68.3673,
+      "eval_samples_per_second": 1.872,
+      "eval_steps_per_second": 0.468,
+      "step": 140
+    },
+    {
+      "epoch": 0.014956245027844072,
+      "grad_norm": 0.7616133689880371,
+      "learning_rate": 4.992798464532639e-07,
+      "loss": 1.6308,
+      "step": 141
+    },
+    {
+      "epoch": 0.015062317687616017,
+      "grad_norm": 0.7023948431015015,
+      "learning_rate": 4.99274668788713e-07,
+      "loss": 1.4595,
+      "step": 142
+    },
+    {
+      "epoch": 0.01516839034738796,
+      "grad_norm": 1.1847342252731323,
+      "learning_rate": 4.992694901218502e-07,
+      "loss": 1.892,
+      "step": 143
+    },
+    {
+      "epoch": 0.015274463007159905,
+      "grad_norm": 0.6449259519577026,
+      "learning_rate": 4.992643104523846e-07,
+      "loss": 1.4526,
+      "step": 144
+    },
+    {
+      "epoch": 0.015380535666931849,
+      "grad_norm": 0.8200846910476685,
+      "learning_rate": 4.992591297800247e-07,
+      "loss": 1.6277,
+      "step": 145
+    },
+    {
+      "epoch": 0.015486608326703791,
+      "grad_norm": 0.627193808555603,
+      "learning_rate": 4.992539481044796e-07,
+      "loss": 1.2767,
+      "step": 146
+    },
+    {
+      "epoch": 0.015592680986475736,
+      "grad_norm": 0.6754332780838013,
+      "learning_rate": 4.992487654254575e-07,
+      "loss": 1.7414,
+      "step": 147
+    },
+    {
+      "epoch": 0.01569875364624768,
+      "grad_norm": 0.8303399682044983,
+      "learning_rate": 4.992435817426671e-07,
+      "loss": 1.5313,
+      "step": 148
+    },
+    {
+      "epoch": 0.015804826306019624,
+      "grad_norm": 0.7653703093528748,
+      "learning_rate": 4.992383970558168e-07,
+      "loss": 1.6444,
+      "step": 149
+    },
+    {
+      "epoch": 0.015910898965791568,
+      "grad_norm": 0.756195068359375,
+      "learning_rate": 4.992332113646148e-07,
+      "loss": 1.5063,
+      "step": 150
+    },
+    {
+      "epoch": 0.015910898965791568,
+      "eval_loss": 1.4446443319320679,
+      "eval_runtime": 67.8607,
+      "eval_samples_per_second": 1.886,
+      "eval_steps_per_second": 0.472,
+      "step": 150
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.418837751332864e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null