DorinSht commited on
Commit
d7537dd
1 Parent(s): 2651627

Training in progress, step 4000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f6aef3a65b706883648a40261c469821b28ad715e937813acd70959ef272f12
3
  size 272123144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baf7620b0c51ef17a030b63cfe26c514df5d88602a1b8140fb12c4968dfa6ff4
3
  size 272123144
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:754050cd8531a1ec4be7c92439fadfa8bb9b0a1187abc0ab8979a2df096fbe8a
3
  size 544259743
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:908d9d7ed41d479f7f47a9fd0646de3f7800df94e052115c9815ea463d99e70d
3
  size 544259743
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d138cfe3a4adf21f048848ee35837c9a757a0a3616ff7adbb45b69aac247435
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c062f7f375beded48b5337f5a3f3a5cb38807fa3e85dbf3e294c0ab6b627bfc2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74ace5f16e09466618cf956d695538d60bba6bf21c0003f9e08e546fc6acbe93
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:394be853393fcf0db07e5bdfe4c0d7e15ce8f5fac5bdbb2ad1b413385499af51
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7932310946589106,
5
  "eval_steps": 1000,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -76,6 +76,29 @@
76
  "eval_samples_per_second": 25.058,
77
  "eval_steps_per_second": 0.531,
78
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  }
80
  ],
81
  "logging_steps": 500,
@@ -95,7 +118,7 @@
95
  "attributes": {}
96
  }
97
  },
98
- "total_flos": 3.8445502169088e+16,
99
  "train_batch_size": 24,
100
  "trial_name": null,
101
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0576414595452142,
5
  "eval_steps": 1000,
6
+ "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
76
  "eval_samples_per_second": 25.058,
77
  "eval_steps_per_second": 0.531,
78
  "step": 3000
79
+ },
80
+ {
81
+ "epoch": 0.9254362771020624,
82
+ "grad_norm": 2.0297396183013916,
83
+ "learning_rate": 7.278312151709609e-05,
84
+ "loss": 2.5642,
85
+ "step": 3500
86
+ },
87
+ {
88
+ "epoch": 1.0576414595452142,
89
+ "grad_norm": 2.8318285942077637,
90
+ "learning_rate": 6.814311632029027e-05,
91
+ "loss": 2.4734,
92
+ "step": 4000
93
+ },
94
+ {
95
+ "epoch": 1.0576414595452142,
96
+ "eval_accuracy": 0.5582058048894458,
97
+ "eval_loss": 2.5735702514648438,
98
+ "eval_runtime": 73.4679,
99
+ "eval_samples_per_second": 25.045,
100
+ "eval_steps_per_second": 0.531,
101
+ "step": 4000
102
  }
103
  ],
104
  "logging_steps": 500,
 
118
  "attributes": {}
119
  }
120
  },
121
+ "total_flos": 5.124838835670221e+16,
122
  "train_batch_size": 24,
123
  "trial_name": null,
124
  "trial_params": null