AlekseyKorshuk commited on
Commit
50d3bb6
1 Parent(s): 8422a17

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/6ix9ine")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/23c4qor5/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on 6ix9ine's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/8askquwf) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/8askquwf/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/6ix9ine")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/eqmcaj0r/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on 6ix9ine's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/s5dpg3h2) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/s5dpg3h2/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 2.566208600997925, "eval_runtime": 2.6338, "eval_samples_per_second": 20.882, "eval_steps_per_second": 2.658, "epoch": 4.0}
 
1
+ {"eval_loss": 2.4751908779144287, "eval_runtime": 3.052, "eval_samples_per_second": 20.97, "eval_steps_per_second": 2.621, "epoch": 5.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1828e0611eb3dd2b0e31ca17d4310d4d7489f09c72ac60fd8ea14bbf161796e
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3f1fd139ad3512f861c6cd5592c2ace1fac98271efc42a42e36236dd6c48b59
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a41f3f5703dac31ae7a5401c632b68029d736e86bfffe760b46a0f472c96db60
3
  size 995603825
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aceace990143d629f3db6198ece7069e41e3adcbd2ede64c0751815e4e703dab
3
  size 995603825
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b8d9d316dc2aba5868d7f29d4e845259d1702edafe69690c706ad4d698822e2
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd7892196cf4da7ad9d98e0b4363158ba3daf1bd31982643189b086d83d48268
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21430a2736694d7f20ccf719d4c7704a6abda2499225d98352d67c327ad5baa9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2571a29377ff85a1a70c33edf6c6403f4d3f4301398dc6bfae9afd8ec5e8db
3
  size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81c6de287d0aeee2c682cd2a01e5e01657f181e757ee594a86aa0945f540d0b1
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:851e454e65c7fab7dc2d566df885ced9c351f3fb626f4834f3f43f9a6d1bcacf
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 2.566208600997925,
3
- "best_model_checkpoint": "output/6ix9ine/checkpoint-168",
4
- "epoch": 4.0,
5
- "global_step": 168,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -236,11 +236,67 @@
236
  "eval_samples_per_second": 20.835,
237
  "eval_steps_per_second": 2.652,
238
  "step": 168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  }
240
  ],
241
- "max_steps": 168,
242
- "num_train_epochs": 4,
243
- "total_flos": 172714033152000.0,
244
  "trial_name": null,
245
  "trial_params": null
246
  }
 
1
  {
2
+ "best_metric": 2.4751908779144287,
3
+ "best_model_checkpoint": "output/6ix9ine/checkpoint-205",
4
+ "epoch": 5.0,
5
+ "global_step": 205,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
236
  "eval_samples_per_second": 20.835,
237
  "eval_steps_per_second": 2.652,
238
  "step": 168
239
+ },
240
+ {
241
+ "epoch": 4.15,
242
+ "learning_rate": 9.63352496067802e-06,
243
+ "loss": 2.2438,
244
+ "step": 170
245
+ },
246
+ {
247
+ "epoch": 4.27,
248
+ "learning_rate": 2.7012936807688713e-05,
249
+ "loss": 2.4722,
250
+ "step": 175
251
+ },
252
+ {
253
+ "epoch": 4.39,
254
+ "learning_rate": 5.042226894930893e-05,
255
+ "loss": 2.5159,
256
+ "step": 180
257
+ },
258
+ {
259
+ "epoch": 4.51,
260
+ "learning_rate": 7.646728298233023e-05,
261
+ "loss": 2.2916,
262
+ "step": 185
263
+ },
264
+ {
265
+ "epoch": 4.63,
266
+ "learning_rate": 0.00010137157954994105,
267
+ "loss": 2.3591,
268
+ "step": 190
269
+ },
270
+ {
271
+ "epoch": 4.76,
272
+ "learning_rate": 0.00012152415773578518,
273
+ "loss": 2.7456,
274
+ "step": 195
275
+ },
276
+ {
277
+ "epoch": 4.88,
278
+ "learning_rate": 0.0001340029924949682,
279
+ "loss": 2.4208,
280
+ "step": 200
281
+ },
282
+ {
283
+ "epoch": 5.0,
284
+ "learning_rate": 0.0001369987139612046,
285
+ "loss": 2.3052,
286
+ "step": 205
287
+ },
288
+ {
289
+ "epoch": 5.0,
290
+ "eval_loss": 2.4751908779144287,
291
+ "eval_runtime": 3.015,
292
+ "eval_samples_per_second": 21.227,
293
+ "eval_steps_per_second": 2.653,
294
+ "step": 205
295
  }
296
  ],
297
+ "max_steps": 205,
298
+ "num_train_epochs": 5,
299
+ "total_flos": 210470731776000.0,
300
  "trial_name": null,
301
  "trial_params": null
302
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecac0d19b2e1f0f637a36c6ba81af1b5f98eeb060b14910c27f5bc5242b68a0d
3
  size 2863
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60e6f694485954cbb3712355594581b180503eb9abe9206152317c41a5b1ee51
3
  size 2863