AlekseyKorshuk commited on
Commit
ca0671f
1 Parent(s): d87ec48

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/lady-gaga")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/s6109nue/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Lady Gaga's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3g2fq2s0) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3g2fq2s0/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/lady-gaga")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/17c0d4ej/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Lady Gaga's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/2j7yp9qd) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/2j7yp9qd/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "gpt2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
@@ -18,7 +18,9 @@
18
  "n_inner": null,
19
  "n_layer": 12,
20
  "n_positions": 1024,
 
21
  "resid_pdrop": 0.1,
 
22
  "scale_attn_weights": true,
23
  "summary_activation": null,
24
  "summary_first_dropout": 0.1,
@@ -35,7 +37,7 @@
35
  }
36
  },
37
  "torch_dtype": "float32",
38
- "transformers_version": "4.10.0",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
 
1
  {
2
+ "_name_or_path": "lady-gaga",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
 
18
  "n_inner": null,
19
  "n_layer": 12,
20
  "n_positions": 1024,
21
+ "reorder_and_upcast_attn": false,
22
  "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
  "scale_attn_weights": true,
25
  "summary_activation": null,
26
  "summary_first_dropout": 0.1,
 
37
  }
38
  },
39
  "torch_dtype": "float32",
40
+ "transformers_version": "4.17.0",
41
  "use_cache": true,
42
  "vocab_size": 50257
43
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 1.7604948282241821, "eval_runtime": 8.1984, "eval_samples_per_second": 22.078, "eval_steps_per_second": 2.805, "epoch": 20.0}
 
1
+ {"eval_loss": 1.1818922758102417, "eval_runtime": 4.5799, "eval_samples_per_second": 42.359, "eval_steps_per_second": 5.459, "epoch": 10.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7afa87885c4237efdbd54f14a90c394a7789cee5d9dc805cefa09922257eca9
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99dd1e512d6d5a8bb64ae4522bfb7b135369d388d25a4d9301825fcb20cba277
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e975e394301ef8deef76b9ee8c42cf177aa52fc2de166eb69e8a52fa5f03089
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:260937b68e76f08ab05abc4034e8a26d999dba3a07d5dd085f85eafddea8716b
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:075245bdc5c5f7cd7a9cdac5907183e6b2f805ffe1d84da4d6b08b01b13cdde7
3
- size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b8b67c493edd3b41282c01b0aeeb6ec02fb37cddd445842e671281f8fe93481
3
+ size 510404393
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03a2d5e765e5cf5ecb55bf1d575b6d6e34d62c85ee9fecda0b266d1418fa375b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:543fe6be835f78a08147e71f220ab369938164ecea602c709f497ffb01da0814
3
  size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e3679dd478a770c99d8cf2c99947149cd3de03554a50244d19b822d7b456455
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19b8ed0e0974b735985e6aeb8c7bbf0ef2968c7807776eb9b1a23504cdae1948
3
  size 623
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "gpt2", "tokenizer_class": "GPT2Tokenizer"}
 
1
+ {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/lady-gaga", "tokenizer_class": "GPT2Tokenizer"}
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 1.7604948282241821,
3
- "best_model_checkpoint": "output/lady-gaga/checkpoint-822",
4
- "epoch": 6.0,
5
- "global_step": 822,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1038,11 +1038,169 @@
1038
  "eval_samples_per_second": 22.462,
1039
  "eval_steps_per_second": 2.854,
1040
  "step": 822
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1041
  }
1042
  ],
1043
- "max_steps": 2740,
1044
- "num_train_epochs": 20,
1045
- "total_flos": 855992696832000.0,
1046
  "trial_name": null,
1047
  "trial_params": null
1048
  }
 
1
  {
2
+ "best_metric": 1.1818922758102417,
3
+ "best_model_checkpoint": "output/lady-gaga/checkpoint-945",
4
+ "epoch": 7.0,
5
+ "global_step": 945,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1038
  "eval_samples_per_second": 22.462,
1039
  "eval_steps_per_second": 2.854,
1040
  "step": 822
1041
+ },
1042
+ {
1043
+ "epoch": 6.11,
1044
+ "learning_rate": 0.00013306291378591335,
1045
+ "loss": 1.3683,
1046
+ "step": 825
1047
+ },
1048
+ {
1049
+ "epoch": 6.15,
1050
+ "learning_rate": 0.00012990319912618617,
1051
+ "loss": 1.3883,
1052
+ "step": 830
1053
+ },
1054
+ {
1055
+ "epoch": 6.19,
1056
+ "learning_rate": 0.00012591446386292745,
1057
+ "loss": 1.289,
1058
+ "step": 835
1059
+ },
1060
+ {
1061
+ "epoch": 6.22,
1062
+ "learning_rate": 0.00012115064879796196,
1063
+ "loss": 1.3834,
1064
+ "step": 840
1065
+ },
1066
+ {
1067
+ "epoch": 6.26,
1068
+ "learning_rate": 0.00011567617635779509,
1069
+ "loss": 1.2739,
1070
+ "step": 845
1071
+ },
1072
+ {
1073
+ "epoch": 6.3,
1074
+ "learning_rate": 0.00010956507939081116,
1075
+ "loss": 1.0868,
1076
+ "step": 850
1077
+ },
1078
+ {
1079
+ "epoch": 6.33,
1080
+ "learning_rate": 0.00010290000000000012,
1081
+ "loss": 1.3412,
1082
+ "step": 855
1083
+ },
1084
+ {
1085
+ "epoch": 6.37,
1086
+ "learning_rate": 9.577107195028614e-05,
1087
+ "loss": 1.2163,
1088
+ "step": 860
1089
+ },
1090
+ {
1091
+ "epoch": 6.41,
1092
+ "learning_rate": 8.827470176398086e-05,
1093
+ "loss": 1.1091,
1094
+ "step": 865
1095
+ },
1096
+ {
1097
+ "epoch": 6.44,
1098
+ "learning_rate": 8.051226498795135e-05,
1099
+ "loss": 1.2886,
1100
+ "step": 870
1101
+ },
1102
+ {
1103
+ "epoch": 6.48,
1104
+ "learning_rate": 7.258873526325866e-05,
1105
+ "loss": 1.5192,
1106
+ "step": 875
1107
+ },
1108
+ {
1109
+ "epoch": 6.52,
1110
+ "learning_rate": 6.461126473674146e-05,
1111
+ "loss": 1.3007,
1112
+ "step": 880
1113
+ },
1114
+ {
1115
+ "epoch": 6.56,
1116
+ "learning_rate": 5.6687735012048764e-05,
1117
+ "loss": 1.0916,
1118
+ "step": 885
1119
+ },
1120
+ {
1121
+ "epoch": 6.59,
1122
+ "learning_rate": 4.892529823601925e-05,
1123
+ "loss": 1.1481,
1124
+ "step": 890
1125
+ },
1126
+ {
1127
+ "epoch": 6.63,
1128
+ "learning_rate": 4.1428928049713965e-05,
1129
+ "loss": 1.2328,
1130
+ "step": 895
1131
+ },
1132
+ {
1133
+ "epoch": 6.67,
1134
+ "learning_rate": 3.429999999999998e-05,
1135
+ "loss": 1.0816,
1136
+ "step": 900
1137
+ },
1138
+ {
1139
+ "epoch": 6.7,
1140
+ "learning_rate": 2.763492060918892e-05,
1141
+ "loss": 1.2086,
1142
+ "step": 905
1143
+ },
1144
+ {
1145
+ "epoch": 6.74,
1146
+ "learning_rate": 2.152382364220499e-05,
1147
+ "loss": 0.9535,
1148
+ "step": 910
1149
+ },
1150
+ {
1151
+ "epoch": 6.78,
1152
+ "learning_rate": 1.604935120203811e-05,
1153
+ "loss": 1.3744,
1154
+ "step": 915
1155
+ },
1156
+ {
1157
+ "epoch": 6.81,
1158
+ "learning_rate": 1.128553613707261e-05,
1159
+ "loss": 1.2614,
1160
+ "step": 920
1161
+ },
1162
+ {
1163
+ "epoch": 6.85,
1164
+ "learning_rate": 7.296800873813895e-06,
1165
+ "loss": 1.2024,
1166
+ "step": 925
1167
+ },
1168
+ {
1169
+ "epoch": 6.89,
1170
+ "learning_rate": 4.137086214086697e-06,
1171
+ "loss": 1.2731,
1172
+ "step": 930
1173
+ },
1174
+ {
1175
+ "epoch": 6.93,
1176
+ "learning_rate": 1.8491218782241098e-06,
1177
+ "loss": 1.1451,
1178
+ "step": 935
1179
+ },
1180
+ {
1181
+ "epoch": 6.96,
1182
+ "learning_rate": 4.638486589027391e-07,
1183
+ "loss": 1.2302,
1184
+ "step": 940
1185
+ },
1186
+ {
1187
+ "epoch": 7.0,
1188
+ "learning_rate": 0.0,
1189
+ "loss": 1.2435,
1190
+ "step": 945
1191
+ },
1192
+ {
1193
+ "epoch": 7.0,
1194
+ "eval_loss": 1.1818922758102417,
1195
+ "eval_runtime": 4.3047,
1196
+ "eval_samples_per_second": 45.067,
1197
+ "eval_steps_per_second": 5.808,
1198
+ "step": 945
1199
  }
1200
  ],
1201
+ "max_steps": 1350,
1202
+ "num_train_epochs": 10,
1203
+ "total_flos": 984287084544000.0,
1204
  "trial_name": null,
1205
  "trial_params": null
1206
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e887f9fc06cce3b63f585b7c5b8b706b54fff3e04c1880ad0fa1b0ea356cea0
3
- size 2671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4756263e940a9553814d86ac90b5fae14148f687aa8b2a4a1fdc0adfe160f9a8
3
+ size 3055