ihanif commited on
Commit
d0476ba
1 Parent(s): e778dac

End of training

Browse files
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 50.0,
3
- "eval_cer": 0.19657237249638654,
4
- "eval_loss": 0.9153681993484497,
5
- "eval_runtime": 45.7458,
6
  "eval_samples": 481,
7
- "eval_samples_per_second": 10.515,
8
- "eval_steps_per_second": 1.333,
9
- "eval_wer": 0.5137278308321964,
10
- "train_loss": 2.643854008807412,
11
- "train_runtime": 15714.4593,
12
  "train_samples": 2528,
13
- "train_samples_per_second": 8.044,
14
- "train_steps_per_second": 0.251
15
  }
 
1
  {
2
+ "epoch": 50.63,
3
+ "eval_cer": 0.1969102547256584,
4
+ "eval_loss": 0.9162458181381226,
5
+ "eval_runtime": 46.1646,
6
  "eval_samples": 481,
7
+ "eval_samples_per_second": 10.419,
8
+ "eval_steps_per_second": 1.321,
9
+ "eval_wer": 0.5156036834924966,
10
+ "train_loss": 0.07298430502414703,
11
+ "train_runtime": 2125.6437,
12
  "train_samples": 2528,
13
+ "train_samples_per_second": 60.217,
14
+ "train_steps_per_second": 1.882
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 50.0,
3
- "eval_cer": 0.19657237249638654,
4
- "eval_loss": 0.9153681993484497,
5
- "eval_runtime": 45.7458,
6
  "eval_samples": 481,
7
- "eval_samples_per_second": 10.515,
8
- "eval_steps_per_second": 1.333,
9
- "eval_wer": 0.5137278308321964
10
  }
 
1
  {
2
+ "epoch": 50.63,
3
+ "eval_cer": 0.1969102547256584,
4
+ "eval_loss": 0.9162458181381226,
5
+ "eval_runtime": 46.1646,
6
  "eval_samples": 481,
7
+ "eval_samples_per_second": 10.419,
8
+ "eval_steps_per_second": 1.321,
9
+ "eval_wer": 0.5156036834924966
10
  }
runs/Dec16_13-55-02_129-146-104-29/events.out.tfevents.1671201437.129-146-104-29.128095.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08206f99f4f7b3fbfa384e19ec6ae571dfe46052f6556baacc5e7735d060a873
3
+ size 405
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 50.0,
3
- "train_loss": 2.643854008807412,
4
- "train_runtime": 15714.4593,
5
  "train_samples": 2528,
6
- "train_samples_per_second": 8.044,
7
- "train_steps_per_second": 0.251
8
  }
 
1
  {
2
+ "epoch": 50.63,
3
+ "train_loss": 0.07298430502414703,
4
+ "train_runtime": 2125.6437,
5
  "train_samples": 2528,
6
+ "train_samples_per_second": 60.217,
7
+ "train_steps_per_second": 1.882
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 50.0,
5
- "global_step": 3950,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2179,287 +2179,327 @@
2179
  },
2180
  {
2181
  "epoch": 44.43,
2182
- "learning_rate": 1.696153846153846e-05,
2183
- "loss": 0.6001,
2184
  "step": 3510
2185
  },
2186
  {
2187
  "epoch": 44.56,
2188
- "learning_rate": 1.6576923076923074e-05,
2189
- "loss": 0.6187,
2190
  "step": 3520
2191
  },
2192
  {
2193
  "epoch": 44.68,
2194
- "learning_rate": 1.6192307692307692e-05,
2195
- "loss": 0.5808,
2196
  "step": 3530
2197
  },
2198
  {
2199
  "epoch": 44.81,
2200
- "learning_rate": 1.5807692307692307e-05,
2201
- "loss": 0.6285,
2202
  "step": 3540
2203
  },
2204
  {
2205
  "epoch": 44.94,
2206
- "learning_rate": 1.542307692307692e-05,
2207
- "loss": 0.5612,
2208
  "step": 3550
2209
  },
2210
  {
2211
  "epoch": 45.06,
2212
- "learning_rate": 1.5038461538461538e-05,
2213
- "loss": 0.617,
2214
  "step": 3560
2215
  },
2216
  {
2217
  "epoch": 45.19,
2218
- "learning_rate": 1.4653846153846151e-05,
2219
- "loss": 0.5858,
2220
  "step": 3570
2221
  },
2222
  {
2223
  "epoch": 45.32,
2224
- "learning_rate": 1.426923076923077e-05,
2225
- "loss": 0.5933,
2226
  "step": 3580
2227
  },
2228
  {
2229
  "epoch": 45.44,
2230
- "learning_rate": 1.3884615384615383e-05,
2231
- "loss": 0.5971,
2232
  "step": 3590
2233
  },
2234
  {
2235
  "epoch": 45.57,
2236
- "learning_rate": 1.3499999999999998e-05,
2237
- "loss": 0.5997,
2238
  "step": 3600
2239
  },
2240
  {
2241
  "epoch": 45.7,
2242
- "learning_rate": 1.3115384615384614e-05,
2243
- "loss": 0.5872,
2244
  "step": 3610
2245
  },
2246
  {
2247
  "epoch": 45.82,
2248
- "learning_rate": 1.273076923076923e-05,
2249
- "loss": 0.5875,
2250
  "step": 3620
2251
  },
2252
  {
2253
  "epoch": 45.95,
2254
- "learning_rate": 1.2346153846153844e-05,
2255
- "loss": 0.5623,
2256
  "step": 3630
2257
  },
2258
  {
2259
  "epoch": 46.08,
2260
- "learning_rate": 1.196153846153846e-05,
2261
- "loss": 0.6336,
2262
  "step": 3640
2263
  },
2264
  {
2265
  "epoch": 46.2,
2266
- "learning_rate": 1.1576923076923076e-05,
2267
- "loss": 0.5559,
2268
  "step": 3650
2269
  },
2270
  {
2271
  "epoch": 46.33,
2272
- "learning_rate": 1.119230769230769e-05,
2273
- "loss": 0.6232,
2274
  "step": 3660
2275
  },
2276
  {
2277
  "epoch": 46.46,
2278
- "learning_rate": 1.0807692307692307e-05,
2279
- "loss": 0.5881,
2280
  "step": 3670
2281
  },
2282
  {
2283
  "epoch": 46.58,
2284
- "learning_rate": 1.0423076923076922e-05,
2285
- "loss": 0.5716,
2286
  "step": 3680
2287
  },
2288
  {
2289
  "epoch": 46.71,
2290
- "learning_rate": 1.0038461538461537e-05,
2291
- "loss": 0.549,
2292
  "step": 3690
2293
  },
2294
  {
2295
  "epoch": 46.84,
2296
- "learning_rate": 9.653846153846154e-06,
2297
- "loss": 0.5987,
2298
  "step": 3700
2299
  },
2300
  {
2301
  "epoch": 46.96,
2302
- "learning_rate": 9.269230769230768e-06,
2303
- "loss": 0.5751,
2304
  "step": 3710
2305
  },
2306
  {
2307
  "epoch": 47.09,
2308
- "learning_rate": 8.884615384615383e-06,
2309
- "loss": 0.607,
2310
  "step": 3720
2311
  },
2312
  {
2313
  "epoch": 47.22,
2314
- "learning_rate": 8.499999999999998e-06,
2315
- "loss": 0.5768,
2316
  "step": 3730
2317
  },
2318
  {
2319
  "epoch": 47.34,
2320
- "learning_rate": 8.115384615384615e-06,
2321
- "loss": 0.5738,
2322
  "step": 3740
2323
  },
2324
  {
2325
  "epoch": 47.47,
2326
- "learning_rate": 7.73076923076923e-06,
2327
- "loss": 0.5304,
2328
  "step": 3750
2329
  },
2330
  {
2331
  "epoch": 47.59,
2332
- "learning_rate": 7.346153846153845e-06,
2333
- "loss": 0.628,
2334
  "step": 3760
2335
  },
2336
  {
2337
  "epoch": 47.72,
2338
- "learning_rate": 6.96153846153846e-06,
2339
- "loss": 0.5455,
2340
  "step": 3770
2341
  },
2342
  {
2343
  "epoch": 47.85,
2344
- "learning_rate": 6.576923076923076e-06,
2345
- "loss": 0.6422,
2346
  "step": 3780
2347
  },
2348
  {
2349
  "epoch": 47.97,
2350
- "learning_rate": 6.192307692307692e-06,
2351
- "loss": 0.5657,
2352
  "step": 3790
2353
  },
2354
  {
2355
  "epoch": 48.1,
2356
- "learning_rate": 5.807692307692307e-06,
2357
- "loss": 0.5906,
2358
  "step": 3800
2359
  },
2360
  {
2361
  "epoch": 48.23,
2362
- "learning_rate": 5.4230769230769225e-06,
2363
- "loss": 0.5559,
2364
  "step": 3810
2365
  },
2366
  {
2367
  "epoch": 48.35,
2368
- "learning_rate": 5.038461538461538e-06,
2369
- "loss": 0.6286,
2370
  "step": 3820
2371
  },
2372
  {
2373
  "epoch": 48.48,
2374
- "learning_rate": 4.653846153846154e-06,
2375
- "loss": 0.5632,
2376
  "step": 3830
2377
  },
2378
  {
2379
  "epoch": 48.61,
2380
- "learning_rate": 4.269230769230769e-06,
2381
- "loss": 0.6231,
2382
  "step": 3840
2383
  },
2384
  {
2385
  "epoch": 48.73,
2386
- "learning_rate": 3.884615384615384e-06,
2387
- "loss": 0.559,
2388
  "step": 3850
2389
  },
2390
  {
2391
  "epoch": 48.86,
2392
- "learning_rate": 3.5e-06,
2393
- "loss": 0.5731,
2394
  "step": 3860
2395
  },
2396
  {
2397
  "epoch": 48.99,
2398
- "learning_rate": 3.1153846153846153e-06,
2399
- "loss": 0.5573,
2400
  "step": 3870
2401
  },
2402
  {
2403
  "epoch": 49.11,
2404
- "learning_rate": 2.73076923076923e-06,
2405
- "loss": 0.6044,
2406
  "step": 3880
2407
  },
2408
  {
2409
  "epoch": 49.24,
2410
- "learning_rate": 2.346153846153846e-06,
2411
- "loss": 0.5496,
2412
  "step": 3890
2413
  },
2414
  {
2415
  "epoch": 49.37,
2416
- "learning_rate": 1.9615384615384612e-06,
2417
- "loss": 0.5895,
2418
  "step": 3900
2419
  },
2420
  {
2421
  "epoch": 49.49,
2422
- "learning_rate": 1.5769230769230768e-06,
2423
- "loss": 0.5815,
2424
  "step": 3910
2425
  },
2426
  {
2427
  "epoch": 49.62,
2428
- "learning_rate": 1.1923076923076923e-06,
2429
- "loss": 0.5782,
2430
  "step": 3920
2431
  },
2432
  {
2433
  "epoch": 49.75,
2434
- "learning_rate": 8.076923076923076e-07,
2435
- "loss": 0.5605,
2436
  "step": 3930
2437
  },
2438
  {
2439
  "epoch": 49.87,
2440
- "learning_rate": 4.230769230769231e-07,
2441
- "loss": 0.5849,
2442
  "step": 3940
2443
  },
2444
  {
2445
  "epoch": 50.0,
2446
- "learning_rate": 3.846153846153846e-08,
2447
- "loss": 0.5558,
2448
  "step": 3950
2449
  },
2450
  {
2451
- "epoch": 50.0,
2452
- "step": 3950,
2453
- "total_flos": 4.545011460667043e+19,
2454
- "train_loss": 2.643854008807412,
2455
- "train_runtime": 15714.4593,
2456
- "train_samples_per_second": 8.044,
2457
- "train_steps_per_second": 0.251
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2458
  }
2459
  ],
2460
- "max_steps": 3950,
2461
- "num_train_epochs": 50,
2462
- "total_flos": 4.545011460667043e+19,
2463
  "trial_name": null,
2464
  "trial_params": null
2465
  }
 
1
  {
2
+ "best_metric": 0.9162458181381226,
3
+ "best_model_checkpoint": "./checkpoint-4000",
4
+ "epoch": 50.63291139240506,
5
+ "global_step": 4000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2179
  },
2180
  {
2181
  "epoch": 44.43,
2182
+ "learning_rate": 1.8412499999999997e-05,
2183
+ "loss": 0.6002,
2184
  "step": 3510
2185
  },
2186
  {
2187
  "epoch": 44.56,
2188
+ "learning_rate": 1.8037499999999998e-05,
2189
+ "loss": 0.6191,
2190
  "step": 3520
2191
  },
2192
  {
2193
  "epoch": 44.68,
2194
+ "learning_rate": 1.76625e-05,
2195
+ "loss": 0.5811,
2196
  "step": 3530
2197
  },
2198
  {
2199
  "epoch": 44.81,
2200
+ "learning_rate": 1.72875e-05,
2201
+ "loss": 0.6299,
2202
  "step": 3540
2203
  },
2204
  {
2205
  "epoch": 44.94,
2206
+ "learning_rate": 1.6912499999999998e-05,
2207
+ "loss": 0.5605,
2208
  "step": 3550
2209
  },
2210
  {
2211
  "epoch": 45.06,
2212
+ "learning_rate": 1.65375e-05,
2213
+ "loss": 0.6183,
2214
  "step": 3560
2215
  },
2216
  {
2217
  "epoch": 45.19,
2218
+ "learning_rate": 1.61625e-05,
2219
+ "loss": 0.5852,
2220
  "step": 3570
2221
  },
2222
  {
2223
  "epoch": 45.32,
2224
+ "learning_rate": 1.5787499999999997e-05,
2225
+ "loss": 0.594,
2226
  "step": 3580
2227
  },
2228
  {
2229
  "epoch": 45.44,
2230
+ "learning_rate": 1.54125e-05,
2231
+ "loss": 0.5965,
2232
  "step": 3590
2233
  },
2234
  {
2235
  "epoch": 45.57,
2236
+ "learning_rate": 1.50375e-05,
2237
+ "loss": 0.6005,
2238
  "step": 3600
2239
  },
2240
  {
2241
  "epoch": 45.7,
2242
+ "learning_rate": 1.4662499999999999e-05,
2243
+ "loss": 0.5884,
2244
  "step": 3610
2245
  },
2246
  {
2247
  "epoch": 45.82,
2248
+ "learning_rate": 1.4287499999999998e-05,
2249
+ "loss": 0.5884,
2250
  "step": 3620
2251
  },
2252
  {
2253
  "epoch": 45.95,
2254
+ "learning_rate": 1.39125e-05,
2255
+ "loss": 0.5628,
2256
  "step": 3630
2257
  },
2258
  {
2259
  "epoch": 46.08,
2260
+ "learning_rate": 1.3537499999999999e-05,
2261
+ "loss": 0.6339,
2262
  "step": 3640
2263
  },
2264
  {
2265
  "epoch": 46.2,
2266
+ "learning_rate": 1.3162499999999998e-05,
2267
+ "loss": 0.5578,
2268
  "step": 3650
2269
  },
2270
  {
2271
  "epoch": 46.33,
2272
+ "learning_rate": 1.2787499999999999e-05,
2273
+ "loss": 0.6239,
2274
  "step": 3660
2275
  },
2276
  {
2277
  "epoch": 46.46,
2278
+ "learning_rate": 1.24125e-05,
2279
+ "loss": 0.5872,
2280
  "step": 3670
2281
  },
2282
  {
2283
  "epoch": 46.58,
2284
+ "learning_rate": 1.20375e-05,
2285
+ "loss": 0.5697,
2286
  "step": 3680
2287
  },
2288
  {
2289
  "epoch": 46.71,
2290
+ "learning_rate": 1.1662499999999999e-05,
2291
+ "loss": 0.5475,
2292
  "step": 3690
2293
  },
2294
  {
2295
  "epoch": 46.84,
2296
+ "learning_rate": 1.1287499999999998e-05,
2297
+ "loss": 0.5979,
2298
  "step": 3700
2299
  },
2300
  {
2301
  "epoch": 46.96,
2302
+ "learning_rate": 1.0912499999999998e-05,
2303
+ "loss": 0.5742,
2304
  "step": 3710
2305
  },
2306
  {
2307
  "epoch": 47.09,
2308
+ "learning_rate": 1.05375e-05,
2309
+ "loss": 0.6054,
2310
  "step": 3720
2311
  },
2312
  {
2313
  "epoch": 47.22,
2314
+ "learning_rate": 1.01625e-05,
2315
+ "loss": 0.5777,
2316
  "step": 3730
2317
  },
2318
  {
2319
  "epoch": 47.34,
2320
+ "learning_rate": 9.787499999999999e-06,
2321
+ "loss": 0.5734,
2322
  "step": 3740
2323
  },
2324
  {
2325
  "epoch": 47.47,
2326
+ "learning_rate": 9.412499999999998e-06,
2327
+ "loss": 0.5322,
2328
  "step": 3750
2329
  },
2330
  {
2331
  "epoch": 47.59,
2332
+ "learning_rate": 9.0375e-06,
2333
+ "loss": 0.6287,
2334
  "step": 3760
2335
  },
2336
  {
2337
  "epoch": 47.72,
2338
+ "learning_rate": 8.6625e-06,
2339
+ "loss": 0.547,
2340
  "step": 3770
2341
  },
2342
  {
2343
  "epoch": 47.85,
2344
+ "learning_rate": 8.2875e-06,
2345
+ "loss": 0.6414,
2346
  "step": 3780
2347
  },
2348
  {
2349
  "epoch": 47.97,
2350
+ "learning_rate": 7.9125e-06,
2351
+ "loss": 0.5661,
2352
  "step": 3790
2353
  },
2354
  {
2355
  "epoch": 48.1,
2356
+ "learning_rate": 7.537499999999999e-06,
2357
+ "loss": 0.5893,
2358
  "step": 3800
2359
  },
2360
  {
2361
  "epoch": 48.23,
2362
+ "learning_rate": 7.1625e-06,
2363
+ "loss": 0.556,
2364
  "step": 3810
2365
  },
2366
  {
2367
  "epoch": 48.35,
2368
+ "learning_rate": 6.787499999999999e-06,
2369
+ "loss": 0.6265,
2370
  "step": 3820
2371
  },
2372
  {
2373
  "epoch": 48.48,
2374
+ "learning_rate": 6.4125e-06,
2375
+ "loss": 0.5644,
2376
  "step": 3830
2377
  },
2378
  {
2379
  "epoch": 48.61,
2380
+ "learning_rate": 6.037499999999999e-06,
2381
+ "loss": 0.6202,
2382
  "step": 3840
2383
  },
2384
  {
2385
  "epoch": 48.73,
2386
+ "learning_rate": 5.6624999999999996e-06,
2387
+ "loss": 0.5581,
2388
  "step": 3850
2389
  },
2390
  {
2391
  "epoch": 48.86,
2392
+ "learning_rate": 5.287499999999999e-06,
2393
+ "loss": 0.572,
2394
  "step": 3860
2395
  },
2396
  {
2397
  "epoch": 48.99,
2398
+ "learning_rate": 4.9125e-06,
2399
+ "loss": 0.5559,
2400
  "step": 3870
2401
  },
2402
  {
2403
  "epoch": 49.11,
2404
+ "learning_rate": 4.537499999999999e-06,
2405
+ "loss": 0.6013,
2406
  "step": 3880
2407
  },
2408
  {
2409
  "epoch": 49.24,
2410
+ "learning_rate": 4.1624999999999995e-06,
2411
+ "loss": 0.5498,
2412
  "step": 3890
2413
  },
2414
  {
2415
  "epoch": 49.37,
2416
+ "learning_rate": 3.7874999999999997e-06,
2417
+ "loss": 0.5883,
2418
  "step": 3900
2419
  },
2420
  {
2421
  "epoch": 49.49,
2422
+ "learning_rate": 3.4124999999999995e-06,
2423
+ "loss": 0.5777,
2424
  "step": 3910
2425
  },
2426
  {
2427
  "epoch": 49.62,
2428
+ "learning_rate": 3.0374999999999997e-06,
2429
+ "loss": 0.5768,
2430
  "step": 3920
2431
  },
2432
  {
2433
  "epoch": 49.75,
2434
+ "learning_rate": 2.6624999999999995e-06,
2435
+ "loss": 0.5603,
2436
  "step": 3930
2437
  },
2438
  {
2439
  "epoch": 49.87,
2440
+ "learning_rate": 2.2874999999999997e-06,
2441
+ "loss": 0.5814,
2442
  "step": 3940
2443
  },
2444
  {
2445
  "epoch": 50.0,
2446
+ "learning_rate": 1.9125e-06,
2447
+ "loss": 0.5562,
2448
  "step": 3950
2449
  },
2450
  {
2451
+ "epoch": 50.13,
2452
+ "learning_rate": 1.5374999999999999e-06,
2453
+ "loss": 0.5858,
2454
+ "step": 3960
2455
+ },
2456
+ {
2457
+ "epoch": 50.25,
2458
+ "learning_rate": 1.1624999999999999e-06,
2459
+ "loss": 0.5279,
2460
+ "step": 3970
2461
+ },
2462
+ {
2463
+ "epoch": 50.38,
2464
+ "learning_rate": 7.875e-07,
2465
+ "loss": 0.5734,
2466
+ "step": 3980
2467
+ },
2468
+ {
2469
+ "epoch": 50.51,
2470
+ "learning_rate": 4.124999999999999e-07,
2471
+ "loss": 0.5895,
2472
+ "step": 3990
2473
+ },
2474
+ {
2475
+ "epoch": 50.63,
2476
+ "learning_rate": 3.75e-08,
2477
+ "loss": 0.5935,
2478
+ "step": 4000
2479
+ },
2480
+ {
2481
+ "epoch": 50.63,
2482
+ "eval_cer": 0.1969102547256584,
2483
+ "eval_loss": 0.9162458181381226,
2484
+ "eval_runtime": 44.8405,
2485
+ "eval_samples_per_second": 10.727,
2486
+ "eval_steps_per_second": 1.36,
2487
+ "eval_wer": 0.5156036834924966,
2488
+ "step": 4000
2489
+ },
2490
+ {
2491
+ "epoch": 50.63,
2492
+ "step": 4000,
2493
+ "total_flos": 4.6049855774374625e+19,
2494
+ "train_loss": 0.07298430502414703,
2495
+ "train_runtime": 2125.6437,
2496
+ "train_samples_per_second": 60.217,
2497
+ "train_steps_per_second": 1.882
2498
  }
2499
  ],
2500
+ "max_steps": 4000,
2501
+ "num_train_epochs": 51,
2502
+ "total_flos": 4.6049855774374625e+19,
2503
  "trial_name": null,
2504
  "trial_params": null
2505
  }