AlekseyKorshuk commited on
Commit
ca0dbb4
1 Parent(s): 580b2bc

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/obladaet")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3isuq1nm/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on OBLADAET's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3g99ogdv) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3g99ogdv/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/obladaet")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1mtsuuwr/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on OBLADAET's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1s9epb35) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1s9epb35/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "gpt2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
@@ -18,7 +18,9 @@
18
  "n_inner": null,
19
  "n_layer": 12,
20
  "n_positions": 1024,
 
21
  "resid_pdrop": 0.1,
 
22
  "scale_attn_weights": true,
23
  "summary_activation": null,
24
  "summary_first_dropout": 0.1,
@@ -35,7 +37,7 @@
35
  }
36
  },
37
  "torch_dtype": "float32",
38
- "transformers_version": "4.10.0",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
 
1
  {
2
+ "_name_or_path": "obladaet",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
 
18
  "n_inner": null,
19
  "n_layer": 12,
20
  "n_positions": 1024,
21
+ "reorder_and_upcast_attn": false,
22
  "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
  "scale_attn_weights": true,
25
  "summary_activation": null,
26
  "summary_first_dropout": 0.1,
 
37
  }
38
  },
39
  "torch_dtype": "float32",
40
+ "transformers_version": "4.15.0",
41
  "use_cache": true,
42
  "vocab_size": 50257
43
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 2.009047269821167, "eval_runtime": 6.0992, "eval_samples_per_second": 22.134, "eval_steps_per_second": 2.787, "epoch": 1.0}
 
1
+ {"eval_loss": 1.7732337713241577, "eval_runtime": 7.0986, "eval_samples_per_second": 21.694, "eval_steps_per_second": 2.817, "epoch": 4.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc113917090121b56fa09a1d5257122677d68e090396d6600587905fb2be06b8
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c52ab931279f47e28b6847e5d38086105967cafe3ede3d42e5bd29def29ab03
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82c57e12d2c2364e7669f9582d82b64c47557acc70fd25834e906c5a5f1bffc5
3
- size 995603825
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f793de0686cb2318496c05ad7e9eb84507b50597f633e6c6930423baa2c8acc
3
+ size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f267b7b47f873182799d927cb61b8855ba2e9a923a454f30069efa3b2edd9aa
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41ad58f7eaf8d9ccb1108b2de48617723e0257dfd854bfe83d33c8b497618eb0
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5fddc434b6543bcf9fb934d13798d2854524eb692171ad66973ff4ff84d2dc0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16ee4b3c5a3fcf7b5bbeda9aae6e07b48bec52cb057e0b87294a70edc4a51800
3
  size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:210db157b55dd747a9007cf5cf1259093551c56de3cafadcc170777b60990c64
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54fe9d32a9c27c0412ac2e6ca93dfcc8ff0c17501d50cf60b9047a8d1c51b1c3
3
  size 623
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "gpt2", "tokenizer_class": "GPT2Tokenizer"}
 
1
+ {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/obladaet", "tokenizer_class": "GPT2Tokenizer"}
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 2.009047269821167,
3
- "best_model_checkpoint": "output/obladaet/checkpoint-103",
4
- "epoch": 1.0,
5
- "global_step": 103,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -134,11 +134,395 @@
134
  "eval_samples_per_second": 22.775,
135
  "eval_steps_per_second": 2.868,
136
  "step": 103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  }
138
  ],
139
- "max_steps": 103,
140
- "num_train_epochs": 1,
141
- "total_flos": 107129733120000.0,
142
  "trial_name": null,
143
  "trial_params": null
144
  }
 
1
  {
2
+ "best_metric": 1.7732337713241577,
3
+ "best_model_checkpoint": "output/obladaet/checkpoint-404",
4
+ "epoch": 4.0,
5
+ "global_step": 404,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
134
  "eval_samples_per_second": 22.775,
135
  "eval_steps_per_second": 2.868,
136
  "step": 103
137
+ },
138
+ {
139
+ "epoch": 1.04,
140
+ "learning_rate": 5.302867558792348e-07,
141
+ "loss": 1.9242,
142
+ "step": 105
143
+ },
144
+ {
145
+ "epoch": 1.09,
146
+ "learning_rate": 2.6705334886474966e-06,
147
+ "loss": 2.0213,
148
+ "step": 110
149
+ },
150
+ {
151
+ "epoch": 1.14,
152
+ "learning_rate": 6.402259559252805e-06,
153
+ "loss": 1.998,
154
+ "step": 115
155
+ },
156
+ {
157
+ "epoch": 1.19,
158
+ "learning_rate": 1.1635384382334882e-05,
159
+ "loss": 2.0452,
160
+ "step": 120
161
+ },
162
+ {
163
+ "epoch": 1.24,
164
+ "learning_rate": 1.8243584927101022e-05,
165
+ "loss": 2.0056,
166
+ "step": 125
167
+ },
168
+ {
169
+ "epoch": 1.29,
170
+ "learning_rate": 2.6067345044190824e-05,
171
+ "loss": 1.8585,
172
+ "step": 130
173
+ },
174
+ {
175
+ "epoch": 1.34,
176
+ "learning_rate": 3.491780604523009e-05,
177
+ "loss": 1.9956,
178
+ "step": 135
179
+ },
180
+ {
181
+ "epoch": 1.39,
182
+ "learning_rate": 4.4581325585470584e-05,
183
+ "loss": 1.9927,
184
+ "step": 140
185
+ },
186
+ {
187
+ "epoch": 1.44,
188
+ "learning_rate": 5.48246348020977e-05,
189
+ "loss": 1.9939,
190
+ "step": 145
191
+ },
192
+ {
193
+ "epoch": 1.49,
194
+ "learning_rate": 6.540046921942313e-05,
195
+ "loss": 2.0247,
196
+ "step": 150
197
+ },
198
+ {
199
+ "epoch": 1.53,
200
+ "learning_rate": 7.605353749586352e-05,
201
+ "loss": 2.0324,
202
+ "step": 155
203
+ },
204
+ {
205
+ "epoch": 1.58,
206
+ "learning_rate": 8.652668393240227e-05,
207
+ "loss": 1.9751,
208
+ "step": 160
209
+ },
210
+ {
211
+ "epoch": 1.63,
212
+ "learning_rate": 9.656709598499981e-05,
213
+ "loss": 2.0028,
214
+ "step": 165
215
+ },
216
+ {
217
+ "epoch": 1.68,
218
+ "learning_rate": 0.00010593240693706343,
219
+ "loss": 1.9151,
220
+ "step": 170
221
+ },
222
+ {
223
+ "epoch": 1.73,
224
+ "learning_rate": 0.00011439654641882815,
225
+ "loss": 1.9662,
226
+ "step": 175
227
+ },
228
+ {
229
+ "epoch": 1.78,
230
+ "learning_rate": 0.00012175519754725823,
231
+ "loss": 1.9375,
232
+ "step": 180
233
+ },
234
+ {
235
+ "epoch": 1.83,
236
+ "learning_rate": 0.00012783072895591112,
237
+ "loss": 1.9174,
238
+ "step": 185
239
+ },
240
+ {
241
+ "epoch": 1.88,
242
+ "learning_rate": 0.00013247648265989943,
243
+ "loss": 1.9634,
244
+ "step": 190
245
+ },
246
+ {
247
+ "epoch": 1.93,
248
+ "learning_rate": 0.00013558031425066341,
249
+ "loss": 1.8422,
250
+ "step": 195
251
+ },
252
+ {
253
+ "epoch": 1.98,
254
+ "learning_rate": 0.00013706729996336458,
255
+ "loss": 1.8405,
256
+ "step": 200
257
+ },
258
+ {
259
+ "epoch": 2.0,
260
+ "eval_loss": 1.860771894454956,
261
+ "eval_runtime": 6.9634,
262
+ "eval_samples_per_second": 22.115,
263
+ "eval_steps_per_second": 2.872,
264
+ "step": 202
265
+ },
266
+ {
267
+ "epoch": 2.03,
268
+ "learning_rate": 0.00013690154527067468,
269
+ "loss": 1.9028,
270
+ "step": 205
271
+ },
272
+ {
273
+ "epoch": 2.08,
274
+ "learning_rate": 0.0001350870513450971,
275
+ "loss": 1.9087,
276
+ "step": 210
277
+ },
278
+ {
279
+ "epoch": 2.13,
280
+ "learning_rate": 0.0001316676184741814,
281
+ "loss": 1.9086,
282
+ "step": 215
283
+ },
284
+ {
285
+ "epoch": 2.18,
286
+ "learning_rate": 0.00012672578876010354,
287
+ "loss": 1.9232,
288
+ "step": 220
289
+ },
290
+ {
291
+ "epoch": 2.23,
292
+ "learning_rate": 0.00012038085362591339,
293
+ "loss": 1.8881,
294
+ "step": 225
295
+ },
296
+ {
297
+ "epoch": 2.28,
298
+ "learning_rate": 0.00011278597422549735,
299
+ "loss": 1.8032,
300
+ "step": 230
301
+ },
302
+ {
303
+ "epoch": 2.33,
304
+ "learning_rate": 0.00010412448426802514,
305
+ "loss": 1.8736,
306
+ "step": 235
307
+ },
308
+ {
309
+ "epoch": 2.38,
310
+ "learning_rate": 9.460546450345005e-05,
311
+ "loss": 1.9298,
312
+ "step": 240
313
+ },
314
+ {
315
+ "epoch": 2.43,
316
+ "learning_rate": 8.445869569708696e-05,
317
+ "loss": 1.882,
318
+ "step": 245
319
+ },
320
+ {
321
+ "epoch": 2.48,
322
+ "learning_rate": 7.392911192402588e-05,
323
+ "loss": 1.8485,
324
+ "step": 250
325
+ },
326
+ {
327
+ "epoch": 2.52,
328
+ "learning_rate": 6.327088807597416e-05,
329
+ "loss": 1.8129,
330
+ "step": 255
331
+ },
332
+ {
333
+ "epoch": 2.57,
334
+ "learning_rate": 5.2741304302913084e-05,
335
+ "loss": 1.7189,
336
+ "step": 260
337
+ },
338
+ {
339
+ "epoch": 2.62,
340
+ "learning_rate": 4.259453549654993e-05,
341
+ "loss": 1.7814,
342
+ "step": 265
343
+ },
344
+ {
345
+ "epoch": 2.67,
346
+ "learning_rate": 3.307551573197489e-05,
347
+ "loss": 1.892,
348
+ "step": 270
349
+ },
350
+ {
351
+ "epoch": 2.72,
352
+ "learning_rate": 2.4414025774502687e-05,
353
+ "loss": 1.8749,
354
+ "step": 275
355
+ },
356
+ {
357
+ "epoch": 2.77,
358
+ "learning_rate": 1.681914637408659e-05,
359
+ "loss": 1.8049,
360
+ "step": 280
361
+ },
362
+ {
363
+ "epoch": 2.82,
364
+ "learning_rate": 1.0474211239896518e-05,
365
+ "loss": 1.7877,
366
+ "step": 285
367
+ },
368
+ {
369
+ "epoch": 2.87,
370
+ "learning_rate": 5.532381525818605e-06,
371
+ "loss": 1.8053,
372
+ "step": 290
373
+ },
374
+ {
375
+ "epoch": 2.92,
376
+ "learning_rate": 2.112948654902904e-06,
377
+ "loss": 1.7789,
378
+ "step": 295
379
+ },
380
+ {
381
+ "epoch": 2.97,
382
+ "learning_rate": 2.984547293253313e-07,
383
+ "loss": 1.7656,
384
+ "step": 300
385
+ },
386
+ {
387
+ "epoch": 3.0,
388
+ "eval_loss": 1.7856239080429077,
389
+ "eval_runtime": 7.037,
390
+ "eval_samples_per_second": 21.884,
391
+ "eval_steps_per_second": 2.842,
392
+ "step": 303
393
+ },
394
+ {
395
+ "epoch": 3.02,
396
+ "learning_rate": 1.3270003663542904e-07,
397
+ "loss": 1.8334,
398
+ "step": 305
399
+ },
400
+ {
401
+ "epoch": 3.07,
402
+ "learning_rate": 1.6196857493365444e-06,
403
+ "loss": 1.8375,
404
+ "step": 310
405
+ },
406
+ {
407
+ "epoch": 3.12,
408
+ "learning_rate": 4.7235173401005474e-06,
409
+ "loss": 1.8418,
410
+ "step": 315
411
+ },
412
+ {
413
+ "epoch": 3.17,
414
+ "learning_rate": 9.369271044088901e-06,
415
+ "loss": 1.8346,
416
+ "step": 320
417
+ },
418
+ {
419
+ "epoch": 3.22,
420
+ "learning_rate": 1.5444802452741737e-05,
421
+ "loss": 1.7677,
422
+ "step": 325
423
+ },
424
+ {
425
+ "epoch": 3.27,
426
+ "learning_rate": 2.2803453581171793e-05,
427
+ "loss": 1.8371,
428
+ "step": 330
429
+ },
430
+ {
431
+ "epoch": 3.32,
432
+ "learning_rate": 3.1267593062936485e-05,
433
+ "loss": 1.7753,
434
+ "step": 335
435
+ },
436
+ {
437
+ "epoch": 3.37,
438
+ "learning_rate": 4.063290401500004e-05,
439
+ "loss": 1.8018,
440
+ "step": 340
441
+ },
442
+ {
443
+ "epoch": 3.42,
444
+ "learning_rate": 5.06733160675977e-05,
445
+ "loss": 1.741,
446
+ "step": 345
447
+ },
448
+ {
449
+ "epoch": 3.47,
450
+ "learning_rate": 6.114646250413644e-05,
451
+ "loss": 1.729,
452
+ "step": 350
453
+ },
454
+ {
455
+ "epoch": 3.51,
456
+ "learning_rate": 7.179953078057677e-05,
457
+ "loss": 1.7717,
458
+ "step": 355
459
+ },
460
+ {
461
+ "epoch": 3.56,
462
+ "learning_rate": 8.237536519790226e-05,
463
+ "loss": 1.7441,
464
+ "step": 360
465
+ },
466
+ {
467
+ "epoch": 3.61,
468
+ "learning_rate": 9.261867441452943e-05,
469
+ "loss": 1.7007,
470
+ "step": 365
471
+ },
472
+ {
473
+ "epoch": 3.66,
474
+ "learning_rate": 0.00010228219395476987,
475
+ "loss": 1.8014,
476
+ "step": 370
477
+ },
478
+ {
479
+ "epoch": 3.71,
480
+ "learning_rate": 0.0001111326549558092,
481
+ "loss": 1.6888,
482
+ "step": 375
483
+ },
484
+ {
485
+ "epoch": 3.76,
486
+ "learning_rate": 0.00011895641507289893,
487
+ "loss": 1.8129,
488
+ "step": 380
489
+ },
490
+ {
491
+ "epoch": 3.81,
492
+ "learning_rate": 0.00012556461561766505,
493
+ "loss": 1.7059,
494
+ "step": 385
495
+ },
496
+ {
497
+ "epoch": 3.86,
498
+ "learning_rate": 0.00013079774044074714,
499
+ "loss": 1.8051,
500
+ "step": 390
501
+ },
502
+ {
503
+ "epoch": 3.91,
504
+ "learning_rate": 0.0001345294665113525,
505
+ "loss": 1.7285,
506
+ "step": 395
507
+ },
508
+ {
509
+ "epoch": 3.96,
510
+ "learning_rate": 0.00013666971324412076,
511
+ "loss": 1.8123,
512
+ "step": 400
513
+ },
514
+ {
515
+ "epoch": 4.0,
516
+ "eval_loss": 1.7732337713241577,
517
+ "eval_runtime": 7.0198,
518
+ "eval_samples_per_second": 21.938,
519
+ "eval_steps_per_second": 2.849,
520
+ "step": 404
521
  }
522
  ],
523
+ "max_steps": 404,
524
+ "num_train_epochs": 4,
525
+ "total_flos": 418981773312000.0,
526
  "trial_name": null,
527
  "trial_params": null
528
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54901e110d42694df30e93d3fdcc273603a1cafca63450c9377093e7e9638050
3
- size 2671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61edd5ee9ac3511fa8aafd6320733cca851923fdf79ca86f83532841c0bd6b0d
3
+ size 2991