AlekseyKorshuk commited on
Commit
095717a
1 Parent(s): 4fe63a3

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/skillet")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3dfttz1c/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Skillet's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3j4hok8c) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3j4hok8c/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/skillet")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1wmbkzn8/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Skillet's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3jke6b6i) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3jke6b6i/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "gpt2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
 
1
  {
2
+ "_name_or_path": "huggingartists/skillet",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 2.3767073154449463, "eval_runtime": 1.8608, "eval_samples_per_second": 20.421, "eval_steps_per_second": 2.687, "epoch": 1.0}
 
1
+ {"eval_loss": 2.182462215423584, "eval_runtime": 1.8241, "eval_samples_per_second": 20.832, "eval_steps_per_second": 2.741, "epoch": 2.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f9e4df6148aa13358b5fc0d1d7732ebcd76c703202383ab402bbfbde4f40cde
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:494a261cf329c21f2d5a20fd1bf77b6cfdc9e0eecaf3e3e2d5bd414d242c9214
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:632a74218c8c93865446ae287401c967c6edac289d04a2e1fde9b9d4ae55f633
3
  size 995603825
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6818a328c91aa8ee84d14bcca87646876943f9475d1bdea2558b2e344f1c0d70
3
  size 995603825
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:024c63bfd1728ea1606e9f49b45693548baf611c1bd3a441894dc76ee40d2877
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:419ae6d97900e1cf2c292ef2ac616408e51bde90741b105c130919afb01354dc
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5d4ce71149472c91ed8113acdfa27f14f794e25946ca3416a5a2bf011d07a2d
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce33455ae2ddcb7e4c137d33b3eb3d2db5d53e470ac3f0db274f1725db8326c8
3
+ size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b187631ec6033019fb457e7347eb2d1876fbc061592b2f57081baca0156b529a
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d5f77e4998f6148c66889b4624d4dea2feb622ab4d3aec5f547ee5d95d99502
3
  size 623
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "gpt2", "tokenizer_class": "GPT2Tokenizer"}
 
1
+ {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/skillet", "tokenizer_class": "GPT2Tokenizer"}
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 2.3767073154449463,
3
- "best_model_checkpoint": "output/skillet/checkpoint-28",
4
- "epoch": 1.0,
5
- "global_step": 28,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -44,11 +44,55 @@
44
  "eval_samples_per_second": 22.256,
45
  "eval_steps_per_second": 2.928,
46
  "step": 28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  }
48
  ],
49
- "max_steps": 28,
50
- "num_train_epochs": 1,
51
- "total_flos": 29264707584000.0,
52
  "trial_name": null,
53
  "trial_params": null
54
  }
 
1
  {
2
+ "best_metric": 2.182462215423584,
3
+ "best_model_checkpoint": "output/skillet/checkpoint-56",
4
+ "epoch": 2.0,
5
+ "global_step": 56,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
44
  "eval_samples_per_second": 22.256,
45
  "eval_steps_per_second": 2.928,
46
  "step": 28
47
+ },
48
+ {
49
+ "epoch": 1.07,
50
+ "learning_rate": 1.7199452243268996e-06,
51
+ "loss": 2.2694,
52
+ "step": 30
53
+ },
54
+ {
55
+ "epoch": 1.25,
56
+ "learning_rate": 2.009247481060283e-05,
57
+ "loss": 2.3714,
58
+ "step": 35
59
+ },
60
+ {
61
+ "epoch": 1.43,
62
+ "learning_rate": 5.333506393059682e-05,
63
+ "loss": 2.3801,
64
+ "step": 40
65
+ },
66
+ {
67
+ "epoch": 1.61,
68
+ "learning_rate": 9.125714365012444e-05,
69
+ "loss": 2.3492,
70
+ "step": 45
71
+ },
72
+ {
73
+ "epoch": 1.79,
74
+ "learning_rate": 0.00012223363969730684,
75
+ "loss": 2.3538,
76
+ "step": 50
77
+ },
78
+ {
79
+ "epoch": 1.96,
80
+ "learning_rate": 0.00013676865759867644,
81
+ "loss": 2.1417,
82
+ "step": 55
83
+ },
84
+ {
85
+ "epoch": 2.0,
86
+ "eval_loss": 2.182462215423584,
87
+ "eval_runtime": 1.7149,
88
+ "eval_samples_per_second": 22.159,
89
+ "eval_steps_per_second": 2.916,
90
+ "step": 56
91
  }
92
  ],
93
+ "max_steps": 56,
94
+ "num_train_epochs": 2,
95
+ "total_flos": 58398769152000.0,
96
  "trial_name": null,
97
  "trial_params": null
98
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:482e59b3a88c369d967bb53579ab0fd7ddcbb147e0c764e2dae093c8d67fc4a8
3
  size 2671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26460a0f39fd310edfb607ce3aaecafe8e443c5d2dae6b1db65bfc5f8444ab1c
3
  size 2671