iamnguyen commited on
Commit
68149c6
1 Parent(s): a2032bc

Training in progress, step 24, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0ee9d038fdfb39e9bd9e55f4c8f788bb92151e68f72b9d6367ea82855ee2e2f
3
  size 295488936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89f79cb14b5c754d30bef8e66c4a5617b00fa5ef1da8dd3dca5f51741b0c0aa4
3
  size 295488936
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2f6f1b0a1f8f871f48a36f3d02a2520916fee6a6ee88edb89172de597c386e1
3
  size 148462004
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9789bb0f9d64c03fc14747851aa7f69a43a796a88d09e83e6312c8d8405b4f83
3
  size 148462004
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b32b2639b16b6624de4cc86c5b2874e9893bb8c25e458e5870ebfbb551ffdf86
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b0bfc3ae73ee0f6b8148dc70b49d099bfebc01dc53150f62977f94e0000d7f7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad5893d6157dd23b3c59b0fbd477b43e2b7bc277c23d1bb43b6558d4e0634ada
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dedb9dc405b830072407e634dd4d0c1b7c5c7dcee6352d24ec89b9bcf38e60ff
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.001937368508680319,
5
  "eval_steps": 500,
6
- "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -147,6 +147,34 @@
147
  "learning_rate": 3.846153846153846e-05,
148
  "loss": 1.9203,
149
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  }
151
  ],
152
  "logging_steps": 1.0,
@@ -166,7 +194,7 @@
166
  "attributes": {}
167
  }
168
  },
169
- "total_flos": 6822886326681600.0,
170
  "train_batch_size": 2,
171
  "trial_name": null,
172
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.002324842210416383,
5
  "eval_steps": 500,
6
+ "global_step": 24,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
147
  "learning_rate": 3.846153846153846e-05,
148
  "loss": 1.9203,
149
  "step": 20
150
+ },
151
+ {
152
+ "epoch": 0.002034236934114335,
153
+ "grad_norm": 0.15928150713443756,
154
+ "learning_rate": 4.038461538461539e-05,
155
+ "loss": 1.8459,
156
+ "step": 21
157
+ },
158
+ {
159
+ "epoch": 0.002131105359548351,
160
+ "grad_norm": 0.18540354073047638,
161
+ "learning_rate": 4.230769230769231e-05,
162
+ "loss": 1.7937,
163
+ "step": 22
164
+ },
165
+ {
166
+ "epoch": 0.0022279737849823667,
167
+ "grad_norm": 0.1321619600057602,
168
+ "learning_rate": 4.423076923076923e-05,
169
+ "loss": 1.8011,
170
+ "step": 23
171
+ },
172
+ {
173
+ "epoch": 0.002324842210416383,
174
+ "grad_norm": 0.1503838449716568,
175
+ "learning_rate": 4.615384615384616e-05,
176
+ "loss": 1.8784,
177
+ "step": 24
178
  }
179
  ],
180
  "logging_steps": 1.0,
 
194
  "attributes": {}
195
  }
196
  },
197
+ "total_flos": 8301560403240960.0,
198
  "train_batch_size": 2,
199
  "trial_name": null,
200
  "trial_params": null