sedrickkeh commited on
Commit
89b6420
·
verified ·
1 Parent(s): 3bf5c3e

Training in progress, epoch 2

Browse files
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:962a692fca7dd9f825121148d96963130579be8df7ba20135886c390311b4402
3
  size 4949453792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b96fe72d1b7c1debfdbff65766ffc06508128037787d434068e48e513ac74afc
3
  size 4949453792
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1e6421f91e7659fea7cf9b1dc1469c66f5eefd3e1c9de3121d2ae01e58b5e33
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bd05b1edb0d999ce359f981c82f3c49eac3af57d9dd14e58b1a7497b7df6044
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20271ce4945ef4dedddca69a9849f35afe7625c67e85fe139688627f12a06264
3
  size 4546807800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d56635848b56b60911ccb4057d5f38546274e0ac01a5aeeb7c5998c6d047a459
3
  size 4546807800
trainer_log.jsonl CHANGED
@@ -133,3 +133,70 @@
133
  {"current_steps": 1320, "total_steps": 2001, "loss": 0.3632, "lr": 5e-06, "epoch": 1.9786396852164136, "percentage": 65.97, "elapsed_time": "11:42:08", "remaining_time": "6:02:14"}
134
  {"current_steps": 1330, "total_steps": 2001, "loss": 0.3617, "lr": 5e-06, "epoch": 1.9936293798013867, "percentage": 66.47, "elapsed_time": "11:47:24", "remaining_time": "5:56:53"}
135
  {"current_steps": 1334, "total_steps": 2001, "eval_loss": 0.056735917925834656, "epoch": 1.9996252576353757, "percentage": 66.67, "elapsed_time": "11:57:08", "remaining_time": "5:58:34"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  {"current_steps": 1320, "total_steps": 2001, "loss": 0.3632, "lr": 5e-06, "epoch": 1.9786396852164136, "percentage": 65.97, "elapsed_time": "11:42:08", "remaining_time": "6:02:14"}
134
  {"current_steps": 1330, "total_steps": 2001, "loss": 0.3617, "lr": 5e-06, "epoch": 1.9936293798013867, "percentage": 66.47, "elapsed_time": "11:47:24", "remaining_time": "5:56:53"}
135
  {"current_steps": 1334, "total_steps": 2001, "eval_loss": 0.056735917925834656, "epoch": 1.9996252576353757, "percentage": 66.67, "elapsed_time": "11:57:08", "remaining_time": "5:58:34"}
136
+ {"current_steps": 1340, "total_steps": 2001, "loss": 0.3009, "lr": 5e-06, "epoch": 2.0086190743863592, "percentage": 66.97, "elapsed_time": "12:01:19", "remaining_time": "5:55:49"}
137
+ {"current_steps": 1350, "total_steps": 2001, "loss": 0.2445, "lr": 5e-06, "epoch": 2.0236087689713322, "percentage": 67.47, "elapsed_time": "12:06:34", "remaining_time": "5:50:22"}
138
+ {"current_steps": 1360, "total_steps": 2001, "loss": 0.2404, "lr": 5e-06, "epoch": 2.0385984635563053, "percentage": 67.97, "elapsed_time": "12:11:49", "remaining_time": "5:44:55"}
139
+ {"current_steps": 1370, "total_steps": 2001, "loss": 0.2396, "lr": 5e-06, "epoch": 2.053588158141278, "percentage": 68.47, "elapsed_time": "12:17:05", "remaining_time": "5:39:29"}
140
+ {"current_steps": 1380, "total_steps": 2001, "loss": 0.2399, "lr": 5e-06, "epoch": 2.068577852726251, "percentage": 68.97, "elapsed_time": "12:22:19", "remaining_time": "5:34:02"}
141
+ {"current_steps": 1390, "total_steps": 2001, "loss": 0.2398, "lr": 5e-06, "epoch": 2.0835675473112234, "percentage": 69.47, "elapsed_time": "12:27:34", "remaining_time": "5:28:36"}
142
+ {"current_steps": 1400, "total_steps": 2001, "loss": 0.2404, "lr": 5e-06, "epoch": 2.0985572418961964, "percentage": 69.97, "elapsed_time": "12:32:47", "remaining_time": "5:23:09"}
143
+ {"current_steps": 1410, "total_steps": 2001, "loss": 0.2406, "lr": 5e-06, "epoch": 2.1135469364811694, "percentage": 70.46, "elapsed_time": "12:38:01", "remaining_time": "5:17:43"}
144
+ {"current_steps": 1420, "total_steps": 2001, "loss": 0.2445, "lr": 5e-06, "epoch": 2.128536631066142, "percentage": 70.96, "elapsed_time": "12:43:16", "remaining_time": "5:12:17"}
145
+ {"current_steps": 1430, "total_steps": 2001, "loss": 0.2428, "lr": 5e-06, "epoch": 2.143526325651115, "percentage": 71.46, "elapsed_time": "12:48:32", "remaining_time": "5:06:52"}
146
+ {"current_steps": 1440, "total_steps": 2001, "loss": 0.2443, "lr": 5e-06, "epoch": 2.1585160202360876, "percentage": 71.96, "elapsed_time": "12:53:48", "remaining_time": "5:01:27"}
147
+ {"current_steps": 1450, "total_steps": 2001, "loss": 0.2431, "lr": 5e-06, "epoch": 2.1735057148210606, "percentage": 72.46, "elapsed_time": "12:59:05", "remaining_time": "4:56:03"}
148
+ {"current_steps": 1460, "total_steps": 2001, "loss": 0.2448, "lr": 5e-06, "epoch": 2.188495409406033, "percentage": 72.96, "elapsed_time": "13:04:18", "remaining_time": "4:50:37"}
149
+ {"current_steps": 1470, "total_steps": 2001, "loss": 0.247, "lr": 5e-06, "epoch": 2.203485103991006, "percentage": 73.46, "elapsed_time": "13:09:32", "remaining_time": "4:45:12"}
150
+ {"current_steps": 1480, "total_steps": 2001, "loss": 0.2491, "lr": 5e-06, "epoch": 2.218474798575979, "percentage": 73.96, "elapsed_time": "13:14:46", "remaining_time": "4:39:46"}
151
+ {"current_steps": 1490, "total_steps": 2001, "loss": 0.2485, "lr": 5e-06, "epoch": 2.2334644931609517, "percentage": 74.46, "elapsed_time": "13:20:02", "remaining_time": "4:34:22"}
152
+ {"current_steps": 1500, "total_steps": 2001, "loss": 0.2463, "lr": 5e-06, "epoch": 2.2484541877459248, "percentage": 74.96, "elapsed_time": "13:25:16", "remaining_time": "4:28:57"}
153
+ {"current_steps": 1510, "total_steps": 2001, "loss": 0.2486, "lr": 5e-06, "epoch": 2.2634438823308973, "percentage": 75.46, "elapsed_time": "13:30:31", "remaining_time": "4:23:33"}
154
+ {"current_steps": 1520, "total_steps": 2001, "loss": 0.2497, "lr": 5e-06, "epoch": 2.2784335769158703, "percentage": 75.96, "elapsed_time": "13:35:45", "remaining_time": "4:18:08"}
155
+ {"current_steps": 1530, "total_steps": 2001, "loss": 0.2492, "lr": 5e-06, "epoch": 2.2934232715008434, "percentage": 76.46, "elapsed_time": "13:41:00", "remaining_time": "4:12:44"}
156
+ {"current_steps": 1540, "total_steps": 2001, "loss": 0.2503, "lr": 5e-06, "epoch": 2.308412966085816, "percentage": 76.96, "elapsed_time": "13:46:18", "remaining_time": "4:07:21"}
157
+ {"current_steps": 1550, "total_steps": 2001, "loss": 0.2505, "lr": 5e-06, "epoch": 2.323402660670789, "percentage": 77.46, "elapsed_time": "13:51:34", "remaining_time": "4:01:57"}
158
+ {"current_steps": 1560, "total_steps": 2001, "loss": 0.2518, "lr": 5e-06, "epoch": 2.3383923552557615, "percentage": 77.96, "elapsed_time": "13:56:51", "remaining_time": "3:56:34"}
159
+ {"current_steps": 1570, "total_steps": 2001, "loss": 0.2504, "lr": 5e-06, "epoch": 2.3533820498407345, "percentage": 78.46, "elapsed_time": "14:02:06", "remaining_time": "3:51:10"}
160
+ {"current_steps": 1580, "total_steps": 2001, "loss": 0.2511, "lr": 5e-06, "epoch": 2.3683717444257075, "percentage": 78.96, "elapsed_time": "14:07:23", "remaining_time": "3:45:47"}
161
+ {"current_steps": 1590, "total_steps": 2001, "loss": 0.2517, "lr": 5e-06, "epoch": 2.38336143901068, "percentage": 79.46, "elapsed_time": "14:12:38", "remaining_time": "3:40:24"}
162
+ {"current_steps": 1600, "total_steps": 2001, "loss": 0.2519, "lr": 5e-06, "epoch": 2.398351133595653, "percentage": 79.96, "elapsed_time": "14:17:53", "remaining_time": "3:35:00"}
163
+ {"current_steps": 1610, "total_steps": 2001, "loss": 0.2537, "lr": 5e-06, "epoch": 2.4133408281806257, "percentage": 80.46, "elapsed_time": "14:23:10", "remaining_time": "3:29:37"}
164
+ {"current_steps": 1620, "total_steps": 2001, "loss": 0.2525, "lr": 5e-06, "epoch": 2.4283305227655987, "percentage": 80.96, "elapsed_time": "14:28:23", "remaining_time": "3:24:13"}
165
+ {"current_steps": 1630, "total_steps": 2001, "loss": 0.2519, "lr": 5e-06, "epoch": 2.4433202173505713, "percentage": 81.46, "elapsed_time": "14:33:39", "remaining_time": "3:18:51"}
166
+ {"current_steps": 1640, "total_steps": 2001, "loss": 0.2542, "lr": 5e-06, "epoch": 2.4583099119355443, "percentage": 81.96, "elapsed_time": "14:38:55", "remaining_time": "3:13:28"}
167
+ {"current_steps": 1650, "total_steps": 2001, "loss": 0.2523, "lr": 5e-06, "epoch": 2.4732996065205173, "percentage": 82.46, "elapsed_time": "14:44:11", "remaining_time": "3:08:05"}
168
+ {"current_steps": 1660, "total_steps": 2001, "loss": 0.2525, "lr": 5e-06, "epoch": 2.48828930110549, "percentage": 82.96, "elapsed_time": "14:49:27", "remaining_time": "3:02:42"}
169
+ {"current_steps": 1670, "total_steps": 2001, "loss": 0.2512, "lr": 5e-06, "epoch": 2.503278995690463, "percentage": 83.46, "elapsed_time": "14:54:43", "remaining_time": "2:57:20"}
170
+ {"current_steps": 1680, "total_steps": 2001, "loss": 0.2543, "lr": 5e-06, "epoch": 2.518268690275436, "percentage": 83.96, "elapsed_time": "14:59:59", "remaining_time": "2:51:57"}
171
+ {"current_steps": 1690, "total_steps": 2001, "loss": 0.2542, "lr": 5e-06, "epoch": 2.5332583848604084, "percentage": 84.46, "elapsed_time": "15:05:15", "remaining_time": "2:46:35"}
172
+ {"current_steps": 1700, "total_steps": 2001, "loss": 0.2543, "lr": 5e-06, "epoch": 2.5482480794453815, "percentage": 84.96, "elapsed_time": "15:10:30", "remaining_time": "2:41:12"}
173
+ {"current_steps": 1710, "total_steps": 2001, "loss": 0.2547, "lr": 5e-06, "epoch": 2.563237774030354, "percentage": 85.46, "elapsed_time": "15:15:45", "remaining_time": "2:35:50"}
174
+ {"current_steps": 1720, "total_steps": 2001, "loss": 0.2577, "lr": 5e-06, "epoch": 2.578227468615327, "percentage": 85.96, "elapsed_time": "15:21:01", "remaining_time": "2:30:28"}
175
+ {"current_steps": 1730, "total_steps": 2001, "loss": 0.2585, "lr": 5e-06, "epoch": 2.5932171632002996, "percentage": 86.46, "elapsed_time": "15:26:15", "remaining_time": "2:25:05"}
176
+ {"current_steps": 1740, "total_steps": 2001, "loss": 0.2566, "lr": 5e-06, "epoch": 2.6082068577852726, "percentage": 86.96, "elapsed_time": "15:31:32", "remaining_time": "2:19:43"}
177
+ {"current_steps": 1750, "total_steps": 2001, "loss": 0.2577, "lr": 5e-06, "epoch": 2.6231965523702456, "percentage": 87.46, "elapsed_time": "15:36:48", "remaining_time": "2:14:21"}
178
+ {"current_steps": 1760, "total_steps": 2001, "loss": 0.2584, "lr": 5e-06, "epoch": 2.638186246955218, "percentage": 87.96, "elapsed_time": "15:42:02", "remaining_time": "2:08:59"}
179
+ {"current_steps": 1770, "total_steps": 2001, "loss": 0.258, "lr": 5e-06, "epoch": 2.653175941540191, "percentage": 88.46, "elapsed_time": "15:47:17", "remaining_time": "2:03:37"}
180
+ {"current_steps": 1780, "total_steps": 2001, "loss": 0.2596, "lr": 5e-06, "epoch": 2.668165636125164, "percentage": 88.96, "elapsed_time": "15:52:31", "remaining_time": "1:58:15"}
181
+ {"current_steps": 1790, "total_steps": 2001, "loss": 0.259, "lr": 5e-06, "epoch": 2.683155330710137, "percentage": 89.46, "elapsed_time": "15:57:46", "remaining_time": "1:52:53"}
182
+ {"current_steps": 1800, "total_steps": 2001, "loss": 0.2613, "lr": 5e-06, "epoch": 2.6981450252951094, "percentage": 89.96, "elapsed_time": "16:03:02", "remaining_time": "1:47:32"}
183
+ {"current_steps": 1810, "total_steps": 2001, "loss": 0.2585, "lr": 5e-06, "epoch": 2.7131347198800824, "percentage": 90.45, "elapsed_time": "16:08:17", "remaining_time": "1:42:10"}
184
+ {"current_steps": 1820, "total_steps": 2001, "loss": 0.2597, "lr": 5e-06, "epoch": 2.7281244144650554, "percentage": 90.95, "elapsed_time": "16:13:29", "remaining_time": "1:36:48"}
185
+ {"current_steps": 1830, "total_steps": 2001, "loss": 0.2571, "lr": 5e-06, "epoch": 2.743114109050028, "percentage": 91.45, "elapsed_time": "16:18:44", "remaining_time": "1:31:27"}
186
+ {"current_steps": 1840, "total_steps": 2001, "loss": 0.263, "lr": 5e-06, "epoch": 2.758103803635001, "percentage": 91.95, "elapsed_time": "16:23:56", "remaining_time": "1:26:05"}
187
+ {"current_steps": 1850, "total_steps": 2001, "loss": 0.2608, "lr": 5e-06, "epoch": 2.773093498219974, "percentage": 92.45, "elapsed_time": "16:29:11", "remaining_time": "1:20:44"}
188
+ {"current_steps": 1860, "total_steps": 2001, "loss": 0.2613, "lr": 5e-06, "epoch": 2.7880831928049465, "percentage": 92.95, "elapsed_time": "16:34:24", "remaining_time": "1:15:22"}
189
+ {"current_steps": 1870, "total_steps": 2001, "loss": 0.2611, "lr": 5e-06, "epoch": 2.8030728873899196, "percentage": 93.45, "elapsed_time": "16:39:39", "remaining_time": "1:10:01"}
190
+ {"current_steps": 1880, "total_steps": 2001, "loss": 0.2622, "lr": 5e-06, "epoch": 2.818062581974892, "percentage": 93.95, "elapsed_time": "16:44:52", "remaining_time": "1:04:40"}
191
+ {"current_steps": 1890, "total_steps": 2001, "loss": 0.2615, "lr": 5e-06, "epoch": 2.833052276559865, "percentage": 94.45, "elapsed_time": "16:50:04", "remaining_time": "0:59:19"}
192
+ {"current_steps": 1900, "total_steps": 2001, "loss": 0.263, "lr": 5e-06, "epoch": 2.8480419711448377, "percentage": 94.95, "elapsed_time": "16:55:17", "remaining_time": "0:53:58"}
193
+ {"current_steps": 1910, "total_steps": 2001, "loss": 0.2649, "lr": 5e-06, "epoch": 2.8630316657298107, "percentage": 95.45, "elapsed_time": "17:00:29", "remaining_time": "0:48:37"}
194
+ {"current_steps": 1920, "total_steps": 2001, "loss": 0.2611, "lr": 5e-06, "epoch": 2.8780213603147837, "percentage": 95.95, "elapsed_time": "17:05:41", "remaining_time": "0:43:16"}
195
+ {"current_steps": 1930, "total_steps": 2001, "loss": 0.2626, "lr": 5e-06, "epoch": 2.8930110548997563, "percentage": 96.45, "elapsed_time": "17:10:55", "remaining_time": "0:37:55"}
196
+ {"current_steps": 1940, "total_steps": 2001, "loss": 0.2627, "lr": 5e-06, "epoch": 2.9080007494847293, "percentage": 96.95, "elapsed_time": "17:16:12", "remaining_time": "0:32:34"}
197
+ {"current_steps": 1950, "total_steps": 2001, "loss": 0.2628, "lr": 5e-06, "epoch": 2.9229904440697023, "percentage": 97.45, "elapsed_time": "17:21:27", "remaining_time": "0:27:14"}
198
+ {"current_steps": 1960, "total_steps": 2001, "loss": 0.2627, "lr": 5e-06, "epoch": 2.937980138654675, "percentage": 97.95, "elapsed_time": "17:26:43", "remaining_time": "0:21:53"}
199
+ {"current_steps": 1970, "total_steps": 2001, "loss": 0.2646, "lr": 5e-06, "epoch": 2.952969833239648, "percentage": 98.45, "elapsed_time": "17:31:57", "remaining_time": "0:16:33"}
200
+ {"current_steps": 1980, "total_steps": 2001, "loss": 0.2637, "lr": 5e-06, "epoch": 2.9679595278246205, "percentage": 98.95, "elapsed_time": "17:37:10", "remaining_time": "0:11:12"}
201
+ {"current_steps": 1990, "total_steps": 2001, "loss": 0.2637, "lr": 5e-06, "epoch": 2.9829492224095935, "percentage": 99.45, "elapsed_time": "17:42:22", "remaining_time": "0:05:52"}
202
+ {"current_steps": 2000, "total_steps": 2001, "loss": 0.2629, "lr": 5e-06, "epoch": 2.997938916994566, "percentage": 99.95, "elapsed_time": "17:47:34", "remaining_time": "0:00:32"}