Upload Step 80/9427

Browse files

Files changed (4) hide show

adapter_config.json +1 -1
adapter_model.safetensors +3 -0
tokenizer_config.json +0 -1
trainer_state.json +666 -0

adapter_config.json CHANGED Viewed

@@ -6,7 +6,7 @@
   "eva_config": null,
   "exclude_modules": null,
   "fan_in_fan_out": null,
-  "inference_mode": false,
   "init_lora_weights": true,
   "layer_replication": null,
   "layers_pattern": null,

   "eva_config": null,
   "exclude_modules": null,
   "fan_in_fan_out": null,
+  "inference_mode": true,
   "init_lora_weights": true,
   "layer_replication": null,
   "layers_pattern": null,

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d58e8b70cae140c27869d506ce1fd1f11fba30e54691e644d41e79743eee8cff
+size 1907432232

tokenizer_config.json CHANGED Viewed

@@ -51325,7 +51325,6 @@
   },
   "boi_token": "<start_of_image>",
   "bos_token": "<bos>",
-  "chat_template": "{{- bos_token }}{% for message in messages %}{{ '<start_of_turn>' + ('model' if message['role'] == 'assistant' else message['role']) + '\n' + message['content'] | trim + '<end_of_turn>' }}{% if not loop.last %}{{ '\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '\n<start_of_turn>model\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,
   "eoi_token": "<end_of_image>",
   "eos_token": "<end_of_turn>",

   },
   "boi_token": "<start_of_image>",
   "bos_token": "<bos>",
   "clean_up_tokenization_spaces": false,
   "eoi_token": "<end_of_image>",
   "eos_token": "<end_of_turn>",

trainer_state.json ADDED Viewed

	@@ -0,0 +1,666 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.008485812781755503,
+  "eval_steps": 10,
+  "global_step": 80,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.00010607265977194378,
+      "grad_norm": 1.647079586982727,
+      "learning_rate": 4.99994960800331e-07,
+      "loss": 2.3252,
+      "step": 1
+    },
+    {
+      "epoch": 0.00010607265977194378,
+      "eval_loss": 2.388094663619995,
+      "eval_runtime": 67.0533,
+      "eval_samples_per_second": 1.909,
+      "eval_steps_per_second": 0.477,
+      "step": 1
+    },
+    {
+      "epoch": 0.00021214531954388756,
+      "grad_norm": 1.7374595403671265,
+      "learning_rate": 4.999899206382888e-07,
+      "loss": 2.2091,
+      "step": 2
+    },
+    {
+      "epoch": 0.00031821797931583137,
+      "grad_norm": 1.4541484117507935,
+      "learning_rate": 4.999848795135978e-07,
+      "loss": 2.4903,
+      "step": 3
+    },
+    {
+      "epoch": 0.0004242906390877751,
+      "grad_norm": 1.3729519844055176,
+      "learning_rate": 4.999798374259822e-07,
+      "loss": 2.3478,
+      "step": 4
+    },
+    {
+      "epoch": 0.0005303632988597189,
+      "grad_norm": 2.059835433959961,
+      "learning_rate": 4.999747943751658e-07,
+      "loss": 2.2294,
+      "step": 5
+    },
+    {
+      "epoch": 0.0006364359586316627,
+      "grad_norm": 1.4303361177444458,
+      "learning_rate": 4.999697503608729e-07,
+      "loss": 2.3702,
+      "step": 6
+    },
+    {
+      "epoch": 0.0007425086184036064,
+      "grad_norm": 1.506787657737732,
+      "learning_rate": 4.999647053828272e-07,
+      "loss": 2.2794,
+      "step": 7
+    },
+    {
+      "epoch": 0.0008485812781755502,
+      "grad_norm": 1.1569323539733887,
+      "learning_rate": 4.999596594407525e-07,
+      "loss": 2.2048,
+      "step": 8
+    },
+    {
+      "epoch": 0.0009546539379474941,
+      "grad_norm": 1.0063825845718384,
+      "learning_rate": 4.999546125343724e-07,
+      "loss": 2.0392,
+      "step": 9
+    },
+    {
+      "epoch": 0.0010607265977194379,
+      "grad_norm": 1.2020900249481201,
+      "learning_rate": 4.999495646634105e-07,
+      "loss": 2.1371,
+      "step": 10
+    },
+    {
+      "epoch": 0.0010607265977194379,
+      "eval_loss": 1.957827091217041,
+      "eval_runtime": 70.5284,
+      "eval_samples_per_second": 1.815,
+      "eval_steps_per_second": 0.454,
+      "step": 10
+    },
+    {
+      "epoch": 0.0011667992574913816,
+      "grad_norm": 0.9959272742271423,
+      "learning_rate": 4.999445158275902e-07,
+      "loss": 2.0418,
+      "step": 11
+    },
+    {
+      "epoch": 0.0012728719172633255,
+      "grad_norm": 0.8702138662338257,
+      "learning_rate": 4.999394660266349e-07,
+      "loss": 1.97,
+      "step": 12
+    },
+    {
+      "epoch": 0.0013789445770352692,
+      "grad_norm": 0.6772508025169373,
+      "learning_rate": 4.999344152602678e-07,
+      "loss": 1.6033,
+      "step": 13
+    },
+    {
+      "epoch": 0.0014850172368072129,
+      "grad_norm": 1.0128448009490967,
+      "learning_rate": 4.99929363528212e-07,
+      "loss": 2.1025,
+      "step": 14
+    },
+    {
+      "epoch": 0.0015910898965791568,
+      "grad_norm": 0.9335213899612427,
+      "learning_rate": 4.999243108301906e-07,
+      "loss": 1.9303,
+      "step": 15
+    },
+    {
+      "epoch": 0.0016971625563511005,
+      "grad_norm": 0.9497168660163879,
+      "learning_rate": 4.999192571659265e-07,
+      "loss": 1.8389,
+      "step": 16
+    },
+    {
+      "epoch": 0.0018032352161230442,
+      "grad_norm": 0.8370901346206665,
+      "learning_rate": 4.999142025351424e-07,
+      "loss": 1.8423,
+      "step": 17
+    },
+    {
+      "epoch": 0.0019093078758949881,
+      "grad_norm": 1.1051160097122192,
+      "learning_rate": 4.999091469375611e-07,
+      "loss": 1.9193,
+      "step": 18
+    },
+    {
+      "epoch": 0.002015380535666932,
+      "grad_norm": 1.2687193155288696,
+      "learning_rate": 4.999040903729051e-07,
+      "loss": 2.1474,
+      "step": 19
+    },
+    {
+      "epoch": 0.0021214531954388757,
+      "grad_norm": 0.9469927549362183,
+      "learning_rate": 4.99899032840897e-07,
+      "loss": 1.6744,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021214531954388757,
+      "eval_loss": 1.7858046293258667,
+      "eval_runtime": 68.1877,
+      "eval_samples_per_second": 1.877,
+      "eval_steps_per_second": 0.469,
+      "step": 20
+    },
+    {
+      "epoch": 0.0022275258552108194,
+      "grad_norm": 0.9844200015068054,
+      "learning_rate": 4.998939743412591e-07,
+      "loss": 1.9173,
+      "step": 21
+    },
+    {
+      "epoch": 0.002333598514982763,
+      "grad_norm": 1.1347674131393433,
+      "learning_rate": 4.998889148737137e-07,
+      "loss": 1.8158,
+      "step": 22
+    },
+    {
+      "epoch": 0.002439671174754707,
+      "grad_norm": 1.0652062892913818,
+      "learning_rate": 4.99883854437983e-07,
+      "loss": 1.9189,
+      "step": 23
+    },
+    {
+      "epoch": 0.002545743834526651,
+      "grad_norm": 1.247801661491394,
+      "learning_rate": 4.998787930337891e-07,
+      "loss": 1.6044,
+      "step": 24
+    },
+    {
+      "epoch": 0.0026518164942985947,
+      "grad_norm": 0.856572151184082,
+      "learning_rate": 4.998737306608538e-07,
+      "loss": 1.9368,
+      "step": 25
+    },
+    {
+      "epoch": 0.0027578891540705384,
+      "grad_norm": 0.8902915716171265,
+      "learning_rate": 4.998686673188991e-07,
+      "loss": 1.7691,
+      "step": 26
+    },
+    {
+      "epoch": 0.002863961813842482,
+      "grad_norm": 0.9449676871299744,
+      "learning_rate": 4.998636030076468e-07,
+      "loss": 1.7605,
+      "step": 27
+    },
+    {
+      "epoch": 0.0029700344736144258,
+      "grad_norm": 0.7392516136169434,
+      "learning_rate": 4.998585377268183e-07,
+      "loss": 1.7255,
+      "step": 28
+    },
+    {
+      "epoch": 0.0030761071333863695,
+      "grad_norm": 0.8038751482963562,
+      "learning_rate": 4.998534714761353e-07,
+      "loss": 1.7689,
+      "step": 29
+    },
+    {
+      "epoch": 0.0031821797931583136,
+      "grad_norm": 0.8447410464286804,
+      "learning_rate": 4.998484042553191e-07,
+      "loss": 2.0482,
+      "step": 30
+    },
+    {
+      "epoch": 0.0031821797931583136,
+      "eval_loss": 1.6817430257797241,
+      "eval_runtime": 67.9443,
+      "eval_samples_per_second": 1.884,
+      "eval_steps_per_second": 0.471,
+      "step": 30
+    },
+    {
+      "epoch": 0.0032882524529302573,
+      "grad_norm": 0.8365817070007324,
+      "learning_rate": 4.998433360640912e-07,
+      "loss": 1.8075,
+      "step": 31
+    },
+    {
+      "epoch": 0.003394325112702201,
+      "grad_norm": 0.7525676488876343,
+      "learning_rate": 4.998382669021727e-07,
+      "loss": 1.6026,
+      "step": 32
+    },
+    {
+      "epoch": 0.0035003977724741447,
+      "grad_norm": 0.9553101658821106,
+      "learning_rate": 4.998331967692847e-07,
+      "loss": 1.743,
+      "step": 33
+    },
+    {
+      "epoch": 0.0036064704322460884,
+      "grad_norm": 0.966307520866394,
+      "learning_rate": 4.998281256651483e-07,
+      "loss": 1.7075,
+      "step": 34
+    },
+    {
+      "epoch": 0.0037125430920180325,
+      "grad_norm": 0.8701184988021851,
+      "learning_rate": 4.998230535894843e-07,
+      "loss": 1.6257,
+      "step": 35
+    },
+    {
+      "epoch": 0.0038186157517899762,
+      "grad_norm": 0.8450726866722107,
+      "learning_rate": 4.998179805420135e-07,
+      "loss": 1.8127,
+      "step": 36
+    },
+    {
+      "epoch": 0.00392468841156192,
+      "grad_norm": 0.8065881133079529,
+      "learning_rate": 4.998129065224565e-07,
+      "loss": 1.7574,
+      "step": 37
+    },
+    {
+      "epoch": 0.004030761071333864,
+      "grad_norm": 0.7674804925918579,
+      "learning_rate": 4.99807831530534e-07,
+      "loss": 1.6796,
+      "step": 38
+    },
+    {
+      "epoch": 0.004136833731105807,
+      "grad_norm": 0.8442147970199585,
+      "learning_rate": 4.998027555659665e-07,
+      "loss": 1.5551,
+      "step": 39
+    },
+    {
+      "epoch": 0.0042429063908777515,
+      "grad_norm": 0.7327367663383484,
+      "learning_rate": 4.99797678628474e-07,
+      "loss": 1.075,
+      "step": 40
+    },
+    {
+      "epoch": 0.0042429063908777515,
+      "eval_loss": 1.6100257635116577,
+      "eval_runtime": 67.9157,
+      "eval_samples_per_second": 1.885,
+      "eval_steps_per_second": 0.471,
+      "step": 40
+    },
+    {
+      "epoch": 0.004348979050649695,
+      "grad_norm": 0.8727586269378662,
+      "learning_rate": 4.997926007177772e-07,
+      "loss": 1.6814,
+      "step": 41
+    },
+    {
+      "epoch": 0.004455051710421639,
+      "grad_norm": 1.0420920848846436,
+      "learning_rate": 4.99787521833596e-07,
+      "loss": 1.6025,
+      "step": 42
+    },
+    {
+      "epoch": 0.004561124370193583,
+      "grad_norm": 0.757056713104248,
+      "learning_rate": 4.997824419756506e-07,
+      "loss": 1.7756,
+      "step": 43
+    },
+    {
+      "epoch": 0.004667197029965526,
+      "grad_norm": 0.9350019693374634,
+      "learning_rate": 4.997773611436606e-07,
+      "loss": 1.6165,
+      "step": 44
+    },
+    {
+      "epoch": 0.00477326968973747,
+      "grad_norm": 0.7474361062049866,
+      "learning_rate": 4.997722793373462e-07,
+      "loss": 1.8263,
+      "step": 45
+    },
+    {
+      "epoch": 0.004879342349509414,
+      "grad_norm": 0.6356221437454224,
+      "learning_rate": 4.997671965564268e-07,
+      "loss": 1.7313,
+      "step": 46
+    },
+    {
+      "epoch": 0.004985415009281358,
+      "grad_norm": 0.7225522398948669,
+      "learning_rate": 4.997621128006223e-07,
+      "loss": 1.8336,
+      "step": 47
+    },
+    {
+      "epoch": 0.005091487669053302,
+      "grad_norm": 0.7901801466941833,
+      "learning_rate": 4.997570280696519e-07,
+      "loss": 1.5573,
+      "step": 48
+    },
+    {
+      "epoch": 0.005197560328825245,
+      "grad_norm": 0.7901318073272705,
+      "learning_rate": 4.997519423632353e-07,
+      "loss": 1.5356,
+      "step": 49
+    },
+    {
+      "epoch": 0.005303632988597189,
+      "grad_norm": 0.7905575633049011,
+      "learning_rate": 4.997468556810914e-07,
+      "loss": 1.5592,
+      "step": 50
+    },
+    {
+      "epoch": 0.005303632988597189,
+      "eval_loss": 1.565526008605957,
+      "eval_runtime": 68.1883,
+      "eval_samples_per_second": 1.877,
+      "eval_steps_per_second": 0.469,
+      "step": 50
+    },
+    {
+      "epoch": 0.005409705648369133,
+      "grad_norm": 0.7927577495574951,
+      "learning_rate": 4.997417680229397e-07,
+      "loss": 1.8136,
+      "step": 51
+    },
+    {
+      "epoch": 0.005515778308141077,
+      "grad_norm": 0.6780912280082703,
+      "learning_rate": 4.997366793884992e-07,
+      "loss": 1.635,
+      "step": 52
+    },
+    {
+      "epoch": 0.005621850967913021,
+      "grad_norm": 0.6629224419593811,
+      "learning_rate": 4.997315897774888e-07,
+      "loss": 1.6814,
+      "step": 53
+    },
+    {
+      "epoch": 0.005727923627684964,
+      "grad_norm": 0.6568951606750488,
+      "learning_rate": 4.997264991896272e-07,
+      "loss": 1.7656,
+      "step": 54
+    },
+    {
+      "epoch": 0.005833996287456908,
+      "grad_norm": 0.9639095664024353,
+      "learning_rate": 4.997214076246334e-07,
+      "loss": 1.6255,
+      "step": 55
+    },
+    {
+      "epoch": 0.0059400689472288515,
+      "grad_norm": 1.7226521968841553,
+      "learning_rate": 4.99716315082226e-07,
+      "loss": 1.6617,
+      "step": 56
+    },
+    {
+      "epoch": 0.006046141607000796,
+      "grad_norm": 0.7537712454795837,
+      "learning_rate": 4.997112215621234e-07,
+      "loss": 1.7756,
+      "step": 57
+    },
+    {
+      "epoch": 0.006152214266772739,
+      "grad_norm": 0.5914387702941895,
+      "learning_rate": 4.99706127064044e-07,
+      "loss": 1.4576,
+      "step": 58
+    },
+    {
+      "epoch": 0.006258286926544683,
+      "grad_norm": 0.5612177848815918,
+      "learning_rate": 4.997010315877063e-07,
+      "loss": 1.6828,
+      "step": 59
+    },
+    {
+      "epoch": 0.006364359586316627,
+      "grad_norm": 0.6786366701126099,
+      "learning_rate": 4.996959351328284e-07,
+      "loss": 1.7288,
+      "step": 60
+    },
+    {
+      "epoch": 0.006364359586316627,
+      "eval_loss": 1.537359356880188,
+      "eval_runtime": 67.7518,
+      "eval_samples_per_second": 1.889,
+      "eval_steps_per_second": 0.472,
+      "step": 60
+    },
+    {
+      "epoch": 0.0064704322460885704,
+      "grad_norm": 0.678156316280365,
+      "learning_rate": 4.996908376991283e-07,
+      "loss": 1.5268,
+      "step": 61
+    },
+    {
+      "epoch": 0.006576504905860515,
+      "grad_norm": 0.6791099905967712,
+      "learning_rate": 4.99685739286324e-07,
+      "loss": 1.5498,
+      "step": 62
+    },
+    {
+      "epoch": 0.006682577565632458,
+      "grad_norm": 0.702700674533844,
+      "learning_rate": 4.996806398941335e-07,
+      "loss": 1.6741,
+      "step": 63
+    },
+    {
+      "epoch": 0.006788650225404402,
+      "grad_norm": 0.8344963788986206,
+      "learning_rate": 4.996755395222746e-07,
+      "loss": 1.6074,
+      "step": 64
+    },
+    {
+      "epoch": 0.006894722885176346,
+      "grad_norm": 3.9447405338287354,
+      "learning_rate": 4.996704381704648e-07,
+      "loss": 1.5762,
+      "step": 65
+    },
+    {
+      "epoch": 0.007000795544948289,
+      "grad_norm": 0.870587170124054,
+      "learning_rate": 4.996653358384218e-07,
+      "loss": 1.6515,
+      "step": 66
+    },
+    {
+      "epoch": 0.0071068682047202335,
+      "grad_norm": 0.5930059552192688,
+      "learning_rate": 4.996602325258629e-07,
+      "loss": 1.7334,
+      "step": 67
+    },
+    {
+      "epoch": 0.007212940864492177,
+      "grad_norm": 1.2227728366851807,
+      "learning_rate": 4.996551282325055e-07,
+      "loss": 1.3723,
+      "step": 68
+    },
+    {
+      "epoch": 0.007319013524264121,
+      "grad_norm": 1.1175763607025146,
+      "learning_rate": 4.996500229580668e-07,
+      "loss": 1.4476,
+      "step": 69
+    },
+    {
+      "epoch": 0.007425086184036065,
+      "grad_norm": 0.591778039932251,
+      "learning_rate": 4.99644916702264e-07,
+      "loss": 1.2612,
+      "step": 70
+    },
+    {
+      "epoch": 0.007425086184036065,
+      "eval_loss": 1.5133857727050781,
+      "eval_runtime": 68.2036,
+      "eval_samples_per_second": 1.877,
+      "eval_steps_per_second": 0.469,
+      "step": 70
+    },
+    {
+      "epoch": 0.007531158843808008,
+      "grad_norm": 0.62836754322052,
+      "learning_rate": 4.99639809464814e-07,
+      "loss": 1.4586,
+      "step": 71
+    },
+    {
+      "epoch": 0.0076372315035799524,
+      "grad_norm": 0.779591977596283,
+      "learning_rate": 4.996347012454338e-07,
+      "loss": 1.4937,
+      "step": 72
+    },
+    {
+      "epoch": 0.007743304163351896,
+      "grad_norm": 0.7092106342315674,
+      "learning_rate": 4.9962959204384e-07,
+      "loss": 1.74,
+      "step": 73
+    },
+    {
+      "epoch": 0.00784937682312384,
+      "grad_norm": 0.5990781188011169,
+      "learning_rate": 4.996244818597496e-07,
+      "loss": 1.3733,
+      "step": 74
+    },
+    {
+      "epoch": 0.007955449482895784,
+      "grad_norm": 0.7790846824645996,
+      "learning_rate": 4.996193706928789e-07,
+      "loss": 1.5198,
+      "step": 75
+    },
+    {
+      "epoch": 0.008061522142667728,
+      "grad_norm": 0.746094286441803,
+      "learning_rate": 4.996142585429444e-07,
+      "loss": 1.3825,
+      "step": 76
+    },
+    {
+      "epoch": 0.00816759480243967,
+      "grad_norm": 1.385066032409668,
+      "learning_rate": 4.996091454096626e-07,
+      "loss": 1.473,
+      "step": 77
+    },
+    {
+      "epoch": 0.008273667462211615,
+      "grad_norm": 0.6657389402389526,
+      "learning_rate": 4.996040312927497e-07,
+      "loss": 1.6375,
+      "step": 78
+    },
+    {
+      "epoch": 0.008379740121983559,
+      "grad_norm": 1.1722540855407715,
+      "learning_rate": 4.995989161919216e-07,
+      "loss": 1.5355,
+      "step": 79
+    },
+    {
+      "epoch": 0.008485812781755503,
+      "grad_norm": 0.7716183066368103,
+      "learning_rate": 4.995938001068947e-07,
+      "loss": 1.6588,
+      "step": 80
+    },
+    {
+      "epoch": 0.008485812781755503,
+      "eval_loss": 1.4981476068496704,
+      "eval_runtime": 67.8094,
+      "eval_samples_per_second": 1.888,
+      "eval_steps_per_second": 0.472,
+      "step": 80
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 9427,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.2900468007108608e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}