AlekseyKorshuk commited on
Commit
9f48c13
1 Parent(s): b3c48cd

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/face")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1dmct9sw/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on FACE's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3otnah8o) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3otnah8o/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/face")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/xtozoqtm/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on FACE's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/knkqp5iy) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/knkqp5iy/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "gpt2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
 
1
  {
2
+ "_name_or_path": "huggingartists/face",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 1.7059965133666992, "eval_runtime": 8.746, "eval_samples_per_second": 22.296, "eval_steps_per_second": 2.858, "epoch": 2.0}
 
1
+ {"eval_loss": 1.536595344543457, "eval_runtime": 8.7966, "eval_samples_per_second": 22.281, "eval_steps_per_second": 2.842, "epoch": 3.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f694909ce939b2187156705b4fd647b0b2a00ad2041941dfde0b0712d3f44637
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a352dc2cb017f7537f484bbbd8674249ff6c9a5d0b9731ae3ead2468a57e2ed9
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0da00690b0b7fc97b4a072f81e5b4b37ad550b9ade55e57f69b85532cd94a81b
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22b89f9cd6657751dd2e461644ea376c8ac45cdaa75772ffdfea8283df3c3796
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59bad929cfa9ab4b603c5eafc65f453980979aef78d90b079446a704b2fa1c96
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83af0ad5904348f48de9db72cfdbe286540f30f4acb0c0e959c897788b30f4fa
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5a2448bee8c48598a482e8f0244dbeea82e24f9583900c96093b61edc578387
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34c56094291dd7460e0ba6cf9174a82a82f05fa0dfbe9f60c1626e93605a150c
3
+ size 14567
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d4c9151b782acfc3e2cb37cc47d07656048381fd905bdb28d282ab7d8f1a8a1
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:956abbf6e3c8b654d3a4e4b974a1c0bdb11f8a34b79cc43dfbd9f599fb3a3905
3
  size 623
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "gpt2", "tokenizer_class": "GPT2Tokenizer"}
 
1
+ {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/face", "tokenizer_class": "GPT2Tokenizer"}
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 1.7059965133666992,
3
- "best_model_checkpoint": "output/face/checkpoint-268",
4
- "epoch": 2.0,
5
- "global_step": 268,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -340,11 +340,181 @@
340
  "eval_samples_per_second": 22.706,
341
  "eval_steps_per_second": 2.911,
342
  "step": 268
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
  }
344
  ],
345
- "max_steps": 268,
346
- "num_train_epochs": 2,
347
- "total_flos": 278798598144000.0,
348
  "trial_name": null,
349
  "trial_params": null
350
  }
 
1
  {
2
+ "best_metric": 1.536595344543457,
3
+ "best_model_checkpoint": "output/face/checkpoint-402",
4
+ "epoch": 3.0,
5
+ "global_step": 402,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
340
  "eval_samples_per_second": 22.706,
341
  "eval_steps_per_second": 2.911,
342
  "step": 268
343
+ },
344
+ {
345
+ "epoch": 2.01,
346
+ "learning_rate": 0.0001371246011559198,
347
+ "loss": 1.731,
348
+ "step": 270
349
+ },
350
+ {
351
+ "epoch": 2.05,
352
+ "learning_rate": 0.0001362782664645371,
353
+ "loss": 1.6293,
354
+ "step": 275
355
+ },
356
+ {
357
+ "epoch": 2.09,
358
+ "learning_rate": 0.00013450300386777127,
359
+ "loss": 1.5935,
360
+ "step": 280
361
+ },
362
+ {
363
+ "epoch": 2.13,
364
+ "learning_rate": 0.00013182317999100535,
365
+ "loss": 1.5613,
366
+ "step": 285
367
+ },
368
+ {
369
+ "epoch": 2.16,
370
+ "learning_rate": 0.00012827557714724304,
371
+ "loss": 1.8451,
372
+ "step": 290
373
+ },
374
+ {
375
+ "epoch": 2.2,
376
+ "learning_rate": 0.000123908888476113,
377
+ "loss": 1.7444,
378
+ "step": 295
379
+ },
380
+ {
381
+ "epoch": 2.24,
382
+ "learning_rate": 0.00011878304959908774,
383
+ "loss": 1.6749,
384
+ "step": 300
385
+ },
386
+ {
387
+ "epoch": 2.28,
388
+ "learning_rate": 0.00011296841596437982,
389
+ "loss": 1.5253,
390
+ "step": 305
391
+ },
392
+ {
393
+ "epoch": 2.31,
394
+ "learning_rate": 0.00010654479717298873,
395
+ "loss": 1.6357,
396
+ "step": 310
397
+ },
398
+ {
399
+ "epoch": 2.35,
400
+ "learning_rate": 9.96003615403955e-05,
401
+ "loss": 1.6636,
402
+ "step": 315
403
+ },
404
+ {
405
+ "epoch": 2.39,
406
+ "learning_rate": 9.223042592950526e-05,
407
+ "loss": 1.646,
408
+ "step": 320
409
+ },
410
+ {
411
+ "epoch": 2.43,
412
+ "learning_rate": 8.453614746515988e-05,
413
+ "loss": 1.7026,
414
+ "step": 325
415
+ },
416
+ {
417
+ "epoch": 2.46,
418
+ "learning_rate": 7.662313508728495e-05,
419
+ "loss": 1.6289,
420
+ "step": 330
421
+ },
422
+ {
423
+ "epoch": 2.5,
424
+ "learning_rate": 6.860000000000001e-05,
425
+ "loss": 1.4981,
426
+ "step": 335
427
+ },
428
+ {
429
+ "epoch": 2.54,
430
+ "learning_rate": 6.0576864912715095e-05,
431
+ "loss": 1.5564,
432
+ "step": 340
433
+ },
434
+ {
435
+ "epoch": 2.57,
436
+ "learning_rate": 5.266385253484016e-05,
437
+ "loss": 1.6555,
438
+ "step": 345
439
+ },
440
+ {
441
+ "epoch": 2.61,
442
+ "learning_rate": 4.496957407049471e-05,
443
+ "loss": 1.5796,
444
+ "step": 350
445
+ },
446
+ {
447
+ "epoch": 2.65,
448
+ "learning_rate": 3.759963845960453e-05,
449
+ "loss": 1.7131,
450
+ "step": 355
451
+ },
452
+ {
453
+ "epoch": 2.69,
454
+ "learning_rate": 3.065520282701126e-05,
455
+ "loss": 1.61,
456
+ "step": 360
457
+ },
458
+ {
459
+ "epoch": 2.72,
460
+ "learning_rate": 2.423158403562016e-05,
461
+ "loss": 1.6306,
462
+ "step": 365
463
+ },
464
+ {
465
+ "epoch": 2.76,
466
+ "learning_rate": 1.8416950400912332e-05,
467
+ "loss": 1.5807,
468
+ "step": 370
469
+ },
470
+ {
471
+ "epoch": 2.8,
472
+ "learning_rate": 1.3291111523887019e-05,
473
+ "loss": 1.5766,
474
+ "step": 375
475
+ },
476
+ {
477
+ "epoch": 2.84,
478
+ "learning_rate": 8.924422852757e-06,
479
+ "loss": 1.6478,
480
+ "step": 380
481
+ },
482
+ {
483
+ "epoch": 2.87,
484
+ "learning_rate": 5.376820008994686e-06,
485
+ "loss": 1.6091,
486
+ "step": 385
487
+ },
488
+ {
489
+ "epoch": 2.91,
490
+ "learning_rate": 2.6969961322287634e-06,
491
+ "loss": 1.512,
492
+ "step": 390
493
+ },
494
+ {
495
+ "epoch": 2.95,
496
+ "learning_rate": 9.21733535462923e-07,
497
+ "loss": 1.57,
498
+ "step": 395
499
+ },
500
+ {
501
+ "epoch": 2.99,
502
+ "learning_rate": 7.53988440801922e-08,
503
+ "loss": 1.5282,
504
+ "step": 400
505
+ },
506
+ {
507
+ "epoch": 3.0,
508
+ "eval_loss": 1.536595344543457,
509
+ "eval_runtime": 8.6245,
510
+ "eval_samples_per_second": 22.726,
511
+ "eval_steps_per_second": 2.899,
512
+ "step": 402
513
  }
514
  ],
515
+ "max_steps": 402,
516
+ "num_train_epochs": 3,
517
+ "total_flos": 418067251200000.0,
518
  "trial_name": null,
519
  "trial_params": null
520
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ff20da1bf81cef2f82cade2d05f960ab574b870297377eb2bc6103395307a13
3
  size 2671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed2d4b33a781b12e1276cd897224ebf9c500b46f888821b120698cf8724651f6
3
  size 2671