sedrickkeh commited on
Commit
3bf5c3e
·
verified ·
1 Parent(s): 859c8a5

Training in progress, epoch 1

Browse files
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65dfae7d6cd56dc695bb5a15750239eb2d6f592c9d17beab8c499b82aeee37dc
3
  size 4949453792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:962a692fca7dd9f825121148d96963130579be8df7ba20135886c390311b4402
3
  size 4949453792
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90f16865e1e58c7ee2701e3c94950addeb4faa0ed9e6283fa38f23b480448927
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1e6421f91e7659fea7cf9b1dc1469c66f5eefd3e1c9de3121d2ae01e58b5e33
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0709bfc4202829070211614a950e58daa004488796bdf2be00fd7879e7342e00
3
  size 4546807800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20271ce4945ef4dedddca69a9849f35afe7625c67e85fe139688627f12a06264
3
  size 4546807800
trainer_log.jsonl CHANGED
@@ -66,3 +66,70 @@
66
  {"current_steps": 660, "total_steps": 2001, "loss": 0.4596, "lr": 5e-06, "epoch": 0.9893198426082068, "percentage": 32.98, "elapsed_time": "5:46:07", "remaining_time": "11:43:16"}
67
  {"current_steps": 667, "total_steps": 2001, "eval_loss": 0.05758751183748245, "epoch": 0.9998126288176878, "percentage": 33.33, "elapsed_time": "5:57:26", "remaining_time": "11:54:53"}
68
  {"current_steps": 670, "total_steps": 2001, "loss": 0.4348, "lr": 5e-06, "epoch": 1.0043095371931796, "percentage": 33.48, "elapsed_time": "6:00:02", "remaining_time": "11:55:14"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  {"current_steps": 660, "total_steps": 2001, "loss": 0.4596, "lr": 5e-06, "epoch": 0.9893198426082068, "percentage": 32.98, "elapsed_time": "5:46:07", "remaining_time": "11:43:16"}
67
  {"current_steps": 667, "total_steps": 2001, "eval_loss": 0.05758751183748245, "epoch": 0.9998126288176878, "percentage": 33.33, "elapsed_time": "5:57:26", "remaining_time": "11:54:53"}
68
  {"current_steps": 670, "total_steps": 2001, "loss": 0.4348, "lr": 5e-06, "epoch": 1.0043095371931796, "percentage": 33.48, "elapsed_time": "6:00:02", "remaining_time": "11:55:14"}
69
+ {"current_steps": 680, "total_steps": 2001, "loss": 0.364, "lr": 5e-06, "epoch": 1.0192992317781526, "percentage": 33.98, "elapsed_time": "6:05:16", "remaining_time": "11:49:35"}
70
+ {"current_steps": 690, "total_steps": 2001, "loss": 0.3558, "lr": 5e-06, "epoch": 1.0342889263631254, "percentage": 34.48, "elapsed_time": "6:10:30", "remaining_time": "11:43:58"}
71
+ {"current_steps": 700, "total_steps": 2001, "loss": 0.355, "lr": 5e-06, "epoch": 1.0492786209480982, "percentage": 34.98, "elapsed_time": "6:15:47", "remaining_time": "11:38:25"}
72
+ {"current_steps": 710, "total_steps": 2001, "loss": 0.3519, "lr": 5e-06, "epoch": 1.064268315533071, "percentage": 35.48, "elapsed_time": "6:21:01", "remaining_time": "11:32:49"}
73
+ {"current_steps": 720, "total_steps": 2001, "loss": 0.3545, "lr": 5e-06, "epoch": 1.0792580101180438, "percentage": 35.98, "elapsed_time": "6:26:17", "remaining_time": "11:27:17"}
74
+ {"current_steps": 730, "total_steps": 2001, "loss": 0.3583, "lr": 5e-06, "epoch": 1.0942477047030166, "percentage": 36.48, "elapsed_time": "6:31:33", "remaining_time": "11:21:43"}
75
+ {"current_steps": 740, "total_steps": 2001, "loss": 0.3558, "lr": 5e-06, "epoch": 1.1092373992879896, "percentage": 36.98, "elapsed_time": "6:36:49", "remaining_time": "11:16:12"}
76
+ {"current_steps": 750, "total_steps": 2001, "loss": 0.3563, "lr": 5e-06, "epoch": 1.1242270938729624, "percentage": 37.48, "elapsed_time": "6:42:04", "remaining_time": "11:10:40"}
77
+ {"current_steps": 760, "total_steps": 2001, "loss": 0.358, "lr": 5e-06, "epoch": 1.1392167884579352, "percentage": 37.98, "elapsed_time": "6:47:18", "remaining_time": "11:05:05"}
78
+ {"current_steps": 770, "total_steps": 2001, "loss": 0.3574, "lr": 5e-06, "epoch": 1.154206483042908, "percentage": 38.48, "elapsed_time": "6:52:31", "remaining_time": "10:59:30"}
79
+ {"current_steps": 780, "total_steps": 2001, "loss": 0.3584, "lr": 5e-06, "epoch": 1.1691961776278808, "percentage": 38.98, "elapsed_time": "6:57:47", "remaining_time": "10:53:59"}
80
+ {"current_steps": 790, "total_steps": 2001, "loss": 0.3564, "lr": 5e-06, "epoch": 1.1841858722128538, "percentage": 39.48, "elapsed_time": "7:03:01", "remaining_time": "10:48:27"}
81
+ {"current_steps": 800, "total_steps": 2001, "loss": 0.3527, "lr": 5e-06, "epoch": 1.1991755667978266, "percentage": 39.98, "elapsed_time": "7:08:16", "remaining_time": "10:42:56"}
82
+ {"current_steps": 810, "total_steps": 2001, "loss": 0.3566, "lr": 5e-06, "epoch": 1.2141652613827993, "percentage": 40.48, "elapsed_time": "7:13:31", "remaining_time": "10:37:26"}
83
+ {"current_steps": 820, "total_steps": 2001, "loss": 0.3601, "lr": 5e-06, "epoch": 1.2291549559677721, "percentage": 40.98, "elapsed_time": "7:18:44", "remaining_time": "10:31:53"}
84
+ {"current_steps": 830, "total_steps": 2001, "loss": 0.3583, "lr": 5e-06, "epoch": 1.244144650552745, "percentage": 41.48, "elapsed_time": "7:23:57", "remaining_time": "10:26:20"}
85
+ {"current_steps": 840, "total_steps": 2001, "loss": 0.3542, "lr": 5e-06, "epoch": 1.259134345137718, "percentage": 41.98, "elapsed_time": "7:29:11", "remaining_time": "10:20:51"}
86
+ {"current_steps": 850, "total_steps": 2001, "loss": 0.3575, "lr": 5e-06, "epoch": 1.2741240397226907, "percentage": 42.48, "elapsed_time": "7:34:27", "remaining_time": "10:15:23"}
87
+ {"current_steps": 860, "total_steps": 2001, "loss": 0.3586, "lr": 5e-06, "epoch": 1.2891137343076635, "percentage": 42.98, "elapsed_time": "7:39:44", "remaining_time": "10:09:57"}
88
+ {"current_steps": 870, "total_steps": 2001, "loss": 0.3622, "lr": 5e-06, "epoch": 1.3041034288926363, "percentage": 43.48, "elapsed_time": "7:45:00", "remaining_time": "10:04:30"}
89
+ {"current_steps": 880, "total_steps": 2001, "loss": 0.3588, "lr": 5e-06, "epoch": 1.319093123477609, "percentage": 43.98, "elapsed_time": "7:50:17", "remaining_time": "9:59:04"}
90
+ {"current_steps": 890, "total_steps": 2001, "loss": 0.3634, "lr": 5e-06, "epoch": 1.334082818062582, "percentage": 44.48, "elapsed_time": "7:55:32", "remaining_time": "9:53:37"}
91
+ {"current_steps": 900, "total_steps": 2001, "loss": 0.3539, "lr": 5e-06, "epoch": 1.3490725126475547, "percentage": 44.98, "elapsed_time": "8:00:44", "remaining_time": "9:48:06"}
92
+ {"current_steps": 910, "total_steps": 2001, "loss": 0.3632, "lr": 5e-06, "epoch": 1.3640622072325277, "percentage": 45.48, "elapsed_time": "8:06:00", "remaining_time": "9:42:40"}
93
+ {"current_steps": 920, "total_steps": 2001, "loss": 0.3625, "lr": 5e-06, "epoch": 1.3790519018175005, "percentage": 45.98, "elapsed_time": "8:11:16", "remaining_time": "9:37:15"}
94
+ {"current_steps": 930, "total_steps": 2001, "loss": 0.3595, "lr": 5e-06, "epoch": 1.3940415964024733, "percentage": 46.48, "elapsed_time": "8:16:32", "remaining_time": "9:31:49"}
95
+ {"current_steps": 940, "total_steps": 2001, "loss": 0.3597, "lr": 5e-06, "epoch": 1.409031290987446, "percentage": 46.98, "elapsed_time": "8:21:50", "remaining_time": "9:26:26"}
96
+ {"current_steps": 950, "total_steps": 2001, "loss": 0.3611, "lr": 5e-06, "epoch": 1.4240209855724189, "percentage": 47.48, "elapsed_time": "8:27:07", "remaining_time": "9:21:02"}
97
+ {"current_steps": 960, "total_steps": 2001, "loss": 0.3603, "lr": 5e-06, "epoch": 1.4390106801573919, "percentage": 47.98, "elapsed_time": "8:32:25", "remaining_time": "9:15:39"}
98
+ {"current_steps": 970, "total_steps": 2001, "loss": 0.3631, "lr": 5e-06, "epoch": 1.4540003747423647, "percentage": 48.48, "elapsed_time": "8:37:42", "remaining_time": "9:10:15"}
99
+ {"current_steps": 980, "total_steps": 2001, "loss": 0.3609, "lr": 5e-06, "epoch": 1.4689900693273374, "percentage": 48.98, "elapsed_time": "8:42:59", "remaining_time": "9:04:51"}
100
+ {"current_steps": 990, "total_steps": 2001, "loss": 0.3625, "lr": 5e-06, "epoch": 1.4839797639123102, "percentage": 49.48, "elapsed_time": "8:48:15", "remaining_time": "8:59:27"}
101
+ {"current_steps": 1000, "total_steps": 2001, "loss": 0.3618, "lr": 5e-06, "epoch": 1.498969458497283, "percentage": 49.98, "elapsed_time": "8:53:32", "remaining_time": "8:54:04"}
102
+ {"current_steps": 1010, "total_steps": 2001, "loss": 0.3612, "lr": 5e-06, "epoch": 1.513959153082256, "percentage": 50.47, "elapsed_time": "8:58:48", "remaining_time": "8:48:40"}
103
+ {"current_steps": 1020, "total_steps": 2001, "loss": 0.3628, "lr": 5e-06, "epoch": 1.5289488476672288, "percentage": 50.97, "elapsed_time": "9:04:05", "remaining_time": "8:43:17"}
104
+ {"current_steps": 1030, "total_steps": 2001, "loss": 0.3626, "lr": 5e-06, "epoch": 1.5439385422522016, "percentage": 51.47, "elapsed_time": "9:09:21", "remaining_time": "8:37:53"}
105
+ {"current_steps": 1040, "total_steps": 2001, "loss": 0.3618, "lr": 5e-06, "epoch": 1.5589282368371744, "percentage": 51.97, "elapsed_time": "9:14:37", "remaining_time": "8:32:29"}
106
+ {"current_steps": 1050, "total_steps": 2001, "loss": 0.3615, "lr": 5e-06, "epoch": 1.5739179314221472, "percentage": 52.47, "elapsed_time": "9:19:52", "remaining_time": "8:27:05"}
107
+ {"current_steps": 1060, "total_steps": 2001, "loss": 0.3618, "lr": 5e-06, "epoch": 1.5889076260071202, "percentage": 52.97, "elapsed_time": "9:25:08", "remaining_time": "8:21:41"}
108
+ {"current_steps": 1070, "total_steps": 2001, "loss": 0.365, "lr": 5e-06, "epoch": 1.6038973205920928, "percentage": 53.47, "elapsed_time": "9:30:26", "remaining_time": "8:16:19"}
109
+ {"current_steps": 1080, "total_steps": 2001, "loss": 0.3632, "lr": 5e-06, "epoch": 1.6188870151770658, "percentage": 53.97, "elapsed_time": "9:35:43", "remaining_time": "8:10:57"}
110
+ {"current_steps": 1090, "total_steps": 2001, "loss": 0.3643, "lr": 5e-06, "epoch": 1.6338767097620386, "percentage": 54.47, "elapsed_time": "9:40:59", "remaining_time": "8:05:35"}
111
+ {"current_steps": 1100, "total_steps": 2001, "loss": 0.3649, "lr": 5e-06, "epoch": 1.6488664043470114, "percentage": 54.97, "elapsed_time": "9:46:16", "remaining_time": "8:00:12"}
112
+ {"current_steps": 1110, "total_steps": 2001, "loss": 0.3612, "lr": 5e-06, "epoch": 1.6638560989319844, "percentage": 55.47, "elapsed_time": "9:51:31", "remaining_time": "7:54:49"}
113
+ {"current_steps": 1120, "total_steps": 2001, "loss": 0.3614, "lr": 5e-06, "epoch": 1.678845793516957, "percentage": 55.97, "elapsed_time": "9:56:48", "remaining_time": "7:49:27"}
114
+ {"current_steps": 1130, "total_steps": 2001, "loss": 0.3601, "lr": 5e-06, "epoch": 1.69383548810193, "percentage": 56.47, "elapsed_time": "10:02:04", "remaining_time": "7:44:04"}
115
+ {"current_steps": 1140, "total_steps": 2001, "loss": 0.3627, "lr": 5e-06, "epoch": 1.7088251826869028, "percentage": 56.97, "elapsed_time": "10:07:21", "remaining_time": "7:38:42"}
116
+ {"current_steps": 1150, "total_steps": 2001, "loss": 0.3609, "lr": 5e-06, "epoch": 1.7238148772718755, "percentage": 57.47, "elapsed_time": "10:12:38", "remaining_time": "7:33:20"}
117
+ {"current_steps": 1160, "total_steps": 2001, "loss": 0.3601, "lr": 5e-06, "epoch": 1.7388045718568486, "percentage": 57.97, "elapsed_time": "10:17:54", "remaining_time": "7:27:58"}
118
+ {"current_steps": 1170, "total_steps": 2001, "loss": 0.3596, "lr": 5e-06, "epoch": 1.7537942664418211, "percentage": 58.47, "elapsed_time": "10:23:10", "remaining_time": "7:22:36"}
119
+ {"current_steps": 1180, "total_steps": 2001, "loss": 0.3614, "lr": 5e-06, "epoch": 1.7687839610267941, "percentage": 58.97, "elapsed_time": "10:28:27", "remaining_time": "7:17:15"}
120
+ {"current_steps": 1190, "total_steps": 2001, "loss": 0.3629, "lr": 5e-06, "epoch": 1.783773655611767, "percentage": 59.47, "elapsed_time": "10:33:42", "remaining_time": "7:11:52"}
121
+ {"current_steps": 1200, "total_steps": 2001, "loss": 0.364, "lr": 5e-06, "epoch": 1.7987633501967397, "percentage": 59.97, "elapsed_time": "10:38:58", "remaining_time": "7:06:30"}
122
+ {"current_steps": 1210, "total_steps": 2001, "loss": 0.3623, "lr": 5e-06, "epoch": 1.8137530447817127, "percentage": 60.47, "elapsed_time": "10:44:15", "remaining_time": "7:01:09"}
123
+ {"current_steps": 1220, "total_steps": 2001, "loss": 0.3629, "lr": 5e-06, "epoch": 1.8287427393666853, "percentage": 60.97, "elapsed_time": "10:49:30", "remaining_time": "6:55:47"}
124
+ {"current_steps": 1230, "total_steps": 2001, "loss": 0.3639, "lr": 5e-06, "epoch": 1.8437324339516583, "percentage": 61.47, "elapsed_time": "10:54:46", "remaining_time": "6:50:26"}
125
+ {"current_steps": 1240, "total_steps": 2001, "loss": 0.3614, "lr": 5e-06, "epoch": 1.858722128536631, "percentage": 61.97, "elapsed_time": "11:00:03", "remaining_time": "6:45:05"}
126
+ {"current_steps": 1250, "total_steps": 2001, "loss": 0.3628, "lr": 5e-06, "epoch": 1.873711823121604, "percentage": 62.47, "elapsed_time": "11:05:19", "remaining_time": "6:39:43"}
127
+ {"current_steps": 1260, "total_steps": 2001, "loss": 0.3624, "lr": 5e-06, "epoch": 1.8887015177065767, "percentage": 62.97, "elapsed_time": "11:10:34", "remaining_time": "6:34:21"}
128
+ {"current_steps": 1270, "total_steps": 2001, "loss": 0.362, "lr": 5e-06, "epoch": 1.9036912122915495, "percentage": 63.47, "elapsed_time": "11:15:50", "remaining_time": "6:29:00"}
129
+ {"current_steps": 1280, "total_steps": 2001, "loss": 0.363, "lr": 5e-06, "epoch": 1.9186809068765225, "percentage": 63.97, "elapsed_time": "11:21:06", "remaining_time": "6:23:39"}
130
+ {"current_steps": 1290, "total_steps": 2001, "loss": 0.3587, "lr": 5e-06, "epoch": 1.9336706014614953, "percentage": 64.47, "elapsed_time": "11:26:21", "remaining_time": "6:18:17"}
131
+ {"current_steps": 1300, "total_steps": 2001, "loss": 0.3608, "lr": 5e-06, "epoch": 1.948660296046468, "percentage": 64.97, "elapsed_time": "11:31:37", "remaining_time": "6:12:56"}
132
+ {"current_steps": 1310, "total_steps": 2001, "loss": 0.3621, "lr": 5e-06, "epoch": 1.9636499906314409, "percentage": 65.47, "elapsed_time": "11:36:52", "remaining_time": "6:07:35"}
133
+ {"current_steps": 1320, "total_steps": 2001, "loss": 0.3632, "lr": 5e-06, "epoch": 1.9786396852164136, "percentage": 65.97, "elapsed_time": "11:42:08", "remaining_time": "6:02:14"}
134
+ {"current_steps": 1330, "total_steps": 2001, "loss": 0.3617, "lr": 5e-06, "epoch": 1.9936293798013867, "percentage": 66.47, "elapsed_time": "11:47:24", "remaining_time": "5:56:53"}
135
+ {"current_steps": 1334, "total_steps": 2001, "eval_loss": 0.056735917925834656, "epoch": 1.9996252576353757, "percentage": 66.67, "elapsed_time": "11:57:08", "remaining_time": "5:58:34"}