DorinSht commited on
Commit
25b9edb
1 Parent(s): d63804b

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:733197a61ea376aac61a951bb50d1ca3020638327455f8dec4a0ff679cecc0b1
3
  size 272123144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f6aef3a65b706883648a40261c469821b28ad715e937813acd70959ef272f12
3
  size 272123144
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:684dc2452fd203426d3639b34576b775f954fd7cb3c2337d837769d9331de7b5
3
  size 544259743
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:754050cd8531a1ec4be7c92439fadfa8bb9b0a1187abc0ab8979a2df096fbe8a
3
  size 544259743
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9196a1e708bf24d6abba41cce3f8558820acc3e50f9394c5955e29eb41ffea3d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d138cfe3a4adf21f048848ee35837c9a757a0a3616ff7adbb45b69aac247435
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:147289ff04041b1e9043e727828828c698ed512128acb68e5fd4bf30ee75b6c5
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74ace5f16e09466618cf956d695538d60bba6bf21c0003f9e08e546fc6acbe93
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5288207297726071,
5
  "eval_steps": 1000,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -53,6 +53,29 @@
53
  "eval_samples_per_second": 25.237,
54
  "eval_steps_per_second": 0.535,
55
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  }
57
  ],
58
  "logging_steps": 500,
@@ -72,7 +95,7 @@
72
  "attributes": {}
73
  }
74
  },
75
- "total_flos": 2.5630334779392e+16,
76
  "train_batch_size": 24,
77
  "trial_name": null,
78
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7932310946589106,
5
  "eval_steps": 1000,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
53
  "eval_samples_per_second": 25.237,
54
  "eval_steps_per_second": 0.535,
55
  "step": 2000
56
+ },
57
+ {
58
+ "epoch": 0.6610259122157589,
59
+ "grad_norm": 1.4154396057128906,
60
+ "learning_rate": 8.206313191070773e-05,
61
+ "loss": 2.686,
62
+ "step": 2500
63
+ },
64
+ {
65
+ "epoch": 0.7932310946589106,
66
+ "grad_norm": 1.821349024772644,
67
+ "learning_rate": 7.742312671390191e-05,
68
+ "loss": 2.607,
69
+ "step": 3000
70
+ },
71
+ {
72
+ "epoch": 0.7932310946589106,
73
+ "eval_accuracy": 0.5497897240925214,
74
+ "eval_loss": 2.657219886779785,
75
+ "eval_runtime": 73.4297,
76
+ "eval_samples_per_second": 25.058,
77
+ "eval_steps_per_second": 0.531,
78
+ "step": 3000
79
  }
80
  ],
81
  "logging_steps": 500,
 
95
  "attributes": {}
96
  }
97
  },
98
+ "total_flos": 3.8445502169088e+16,
99
  "train_batch_size": 24,
100
  "trial_name": null,
101
  "trial_params": null