Gizachew commited on
Commit
3a401ac
1 Parent(s): ee466fb

Upload 5 files

Browse files
Files changed (3) hide show
  1. rng_state.pth +3 -0
  2. scheduler.pt +3 -0
  3. trainer_state.json +3 -26
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4f3ee24726555e5e1086c23c28334dc67c9900546d600d694f59a4bd1120eca
3
+ size 14308
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b054a82daf1dc51ea4726a8b05eded8ded0cdd8f7d3601daf669357ade792cd5
3
+ size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.17145082354545593,
3
  "best_model_checkpoint": "/kaggle/working/ckpts/checkpoint-3500",
4
- "epoch": 14.969696969696969,
5
  "eval_steps": 500,
6
- "global_step": 3705,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -315,29 +315,6 @@
315
  "eval_samples_per_second": 21.179,
316
  "eval_steps_per_second": 5.305,
317
  "step": 3500
318
- },
319
- {
320
- "epoch": 14.55,
321
- "grad_norm": 17.49441146850586,
322
- "learning_rate": 2.968960863697706e-07,
323
- "loss": 0.0978,
324
- "step": 3600
325
- },
326
- {
327
- "epoch": 14.95,
328
- "grad_norm": 0.08890076726675034,
329
- "learning_rate": 2.6990553306342783e-08,
330
- "loss": 0.0463,
331
- "step": 3700
332
- },
333
- {
334
- "epoch": 14.97,
335
- "step": 3705,
336
- "total_flos": 9.492004863262944e+17,
337
- "train_loss": 0.2598070231043858,
338
- "train_runtime": 2003.5076,
339
- "train_samples_per_second": 14.817,
340
- "train_steps_per_second": 1.849
341
  }
342
  ],
343
  "logging_steps": 100,
@@ -345,7 +322,7 @@
345
  "num_input_tokens_seen": 0,
346
  "num_train_epochs": 15,
347
  "save_steps": 500,
348
- "total_flos": 9.492004863262944e+17,
349
  "train_batch_size": 4,
350
  "trial_name": null,
351
  "trial_params": null
 
1
  {
2
  "best_metric": 0.17145082354545593,
3
  "best_model_checkpoint": "/kaggle/working/ckpts/checkpoint-3500",
4
+ "epoch": 14.141414141414142,
5
  "eval_steps": 500,
6
+ "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
315
  "eval_samples_per_second": 21.179,
316
  "eval_steps_per_second": 5.305,
317
  "step": 3500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  }
319
  ],
320
  "logging_steps": 100,
 
322
  "num_input_tokens_seen": 0,
323
  "num_train_epochs": 15,
324
  "save_steps": 500,
325
+ "total_flos": 8.971183151896032e+17,
326
  "train_batch_size": 4,
327
  "trial_name": null,
328
  "trial_params": null