AlekseyKorshuk commited on
Commit
0054227
1 Parent(s): 0a0d468

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/the-king-and-the-jester")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2f1nnkss/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Король и Шут (The King and the Jester)'s lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1gxge02f) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1gxge02f/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/the-king-and-the-jester")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/16ab6u68/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Король и Шут (The King and the Jester)'s lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/158p257u) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/158p257u/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -35,7 +35,7 @@
35
  }
36
  },
37
  "torch_dtype": "float32",
38
- "transformers_version": "4.10.0",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
 
35
  }
36
  },
37
  "torch_dtype": "float32",
38
+ "transformers_version": "4.10.2",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 1.4257222414016724, "eval_runtime": 3.0996, "eval_samples_per_second": 21.615, "eval_steps_per_second": 2.904, "epoch": 12.0}
 
1
+ {"eval_loss": 1.288225769996643, "eval_runtime": 2.8321, "eval_samples_per_second": 21.186, "eval_steps_per_second": 2.825, "epoch": 13.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfd6a25a8c031e4819a807021a5be3c88bc39519a7c4003bf0f250e4247ad861
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d241c36b5aa52d0c79f7737e2286ef56c0dffe0755b34d493daf245fdc82740
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:175e35917280372eea5a3d2e3edc4b64bcfe04e6ed50b99f5217e6155acc8fa8
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9ca153f8d1424a1a93900af418ce79188c701fbff88f35335f8042f20018620
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4f739f680d0907e02a0173d0fad6e2035b40d9b44ba1b0094eefd6da77ff5ac
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e0f741fd5524d9f61397df6adb94ac42bef8e536986693a38bd05b52690dde6
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:deef1051e7951574d9993f8369a780937388fa1a3d1bc42bbbf4e9abb0e4c0c5
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac404797c51c1607e03dfe53ce4d3c8fb76bbf9ab8955d467d072579420e39a5
3
+ size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e96d1457ab0358c99b5f49944e41e58487c80b3297320ad0dc4c7e31fc35c7d6
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e6da00c6cb356e2a465c855ef99274a47db666c8c8a892ef5823e047b99391f
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 1.4257222414016724,
3
- "best_model_checkpoint": "output/the-king-and-the-jester/checkpoint-480",
4
- "epoch": 12.0,
5
- "global_step": 480,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -686,11 +686,87 @@
686
  "eval_samples_per_second": 22.688,
687
  "eval_steps_per_second": 3.048,
688
  "step": 480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
689
  }
690
  ],
691
- "max_steps": 480,
692
- "num_train_epochs": 12,
693
- "total_flos": 491359666176000.0,
694
  "trial_name": null,
695
  "trial_params": null
696
  }
 
1
  {
2
+ "best_metric": 1.288225769996643,
3
+ "best_model_checkpoint": "output/the-king-and-the-jester/checkpoint-533",
4
+ "epoch": 13.0,
5
+ "global_step": 533,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
686
  "eval_samples_per_second": 22.688,
687
  "eval_steps_per_second": 3.048,
688
  "step": 480
689
+ },
690
+ {
691
+ "epoch": 11.83,
692
+ "learning_rate": 0.00012756647503932202,
693
+ "loss": 1.532,
694
+ "step": 485
695
+ },
696
+ {
697
+ "epoch": 11.95,
698
+ "learning_rate": 0.0001363960370713319,
699
+ "loss": 1.6289,
700
+ "step": 490
701
+ },
702
+ {
703
+ "epoch": 12.0,
704
+ "eval_loss": 1.3016469478607178,
705
+ "eval_runtime": 2.8272,
706
+ "eval_samples_per_second": 21.222,
707
+ "eval_steps_per_second": 2.83,
708
+ "step": 492
709
+ },
710
+ {
711
+ "epoch": 12.07,
712
+ "learning_rate": 0.00013539550607801572,
713
+ "loss": 1.5711,
714
+ "step": 495
715
+ },
716
+ {
717
+ "epoch": 12.2,
718
+ "learning_rate": 0.00012470995414859683,
719
+ "loss": 1.5507,
720
+ "step": 500
721
+ },
722
+ {
723
+ "epoch": 12.32,
724
+ "learning_rate": 0.00010588873393008394,
725
+ "loss": 1.5444,
726
+ "step": 505
727
+ },
728
+ {
729
+ "epoch": 12.44,
730
+ "learning_rate": 8.166083008869614e-05,
731
+ "loss": 1.5625,
732
+ "step": 510
733
+ },
734
+ {
735
+ "epoch": 12.56,
736
+ "learning_rate": 5.553916991130382e-05,
737
+ "loss": 1.523,
738
+ "step": 515
739
+ },
740
+ {
741
+ "epoch": 12.68,
742
+ "learning_rate": 3.131126606991604e-05,
743
+ "loss": 1.5342,
744
+ "step": 520
745
+ },
746
+ {
747
+ "epoch": 12.8,
748
+ "learning_rate": 1.2490045851403148e-05,
749
+ "loss": 1.4935,
750
+ "step": 525
751
+ },
752
+ {
753
+ "epoch": 12.93,
754
+ "learning_rate": 1.8044939219843934e-06,
755
+ "loss": 1.5076,
756
+ "step": 530
757
+ },
758
+ {
759
+ "epoch": 13.0,
760
+ "eval_loss": 1.288225769996643,
761
+ "eval_runtime": 2.8008,
762
+ "eval_samples_per_second": 21.423,
763
+ "eval_steps_per_second": 2.856,
764
+ "step": 533
765
  }
766
  ],
767
+ "max_steps": 533,
768
+ "num_train_epochs": 13,
769
+ "total_flos": 545185824768000.0,
770
  "trial_name": null,
771
  "trial_params": null
772
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c06aa8f0c58fedbe03b96b8daaeb3d54e7aa76b4c2fcf0f29356d693d750809
3
  size 2735
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d8c30d78c8ee7f99177924d05de07cbfd5aa02ba0f02675400bca9a15406b4a
3
  size 2735