AlekseyKorshuk
commited on
Commit
•
9f48c13
1
Parent(s):
b3c48cd
huggingartists
Browse files- README.md +3 -3
- config.json +1 -1
- evaluation.txt +1 -1
- flax_model.msgpack +1 -1
- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +2 -2
- scheduler.pt +1 -1
- tokenizer_config.json +1 -1
- trainer_state.json +177 -7
- training_args.bin +1 -1
README.md
CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
|
|
45 |
dataset = load_dataset("huggingartists/face")
|
46 |
```
|
47 |
|
48 |
-
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on FACE's lyrics.
|
53 |
|
54 |
-
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/
|
55 |
|
56 |
-
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/
|
57 |
|
58 |
## How to use
|
59 |
|
|
|
45 |
dataset = load_dataset("huggingartists/face")
|
46 |
```
|
47 |
|
48 |
+
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/xtozoqtm/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on FACE's lyrics.
|
53 |
|
54 |
+
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/knkqp5iy) for full transparency and reproducibility.
|
55 |
|
56 |
+
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/knkqp5iy/artifacts) is logged and versioned.
|
57 |
|
58 |
## How to use
|
59 |
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"activation_function": "gelu_new",
|
4 |
"architectures": [
|
5 |
"GPT2LMHeadModel"
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "huggingartists/face",
|
3 |
"activation_function": "gelu_new",
|
4 |
"architectures": [
|
5 |
"GPT2LMHeadModel"
|
evaluation.txt
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"eval_loss": 1.
|
|
|
1 |
+
{"eval_loss": 1.536595344543457, "eval_runtime": 8.7966, "eval_samples_per_second": 22.281, "eval_steps_per_second": 2.842, "epoch": 3.0}
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 497764120
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a352dc2cb017f7537f484bbbd8674249ff6c9a5d0b9731ae3ead2468a57e2ed9
|
3 |
size 497764120
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 995604017
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22b89f9cd6657751dd2e461644ea376c8ac45cdaa75772ffdfea8283df3c3796
|
3 |
size 995604017
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510403817
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83af0ad5904348f48de9db72cfdbe286540f30f4acb0c0e959c897788b30f4fa
|
3 |
size 510403817
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34c56094291dd7460e0ba6cf9174a82a82f05fa0dfbe9f60c1626e93605a150c
|
3 |
+
size 14567
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:956abbf6e3c8b654d3a4e4b974a1c0bdb11f8a34b79cc43dfbd9f599fb3a3905
|
3 |
size 623
|
tokenizer_config.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "
|
|
|
1 |
+
{"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/face", "tokenizer_class": "GPT2Tokenizer"}
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "output/face/checkpoint-
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -340,11 +340,181 @@
|
|
340 |
"eval_samples_per_second": 22.706,
|
341 |
"eval_steps_per_second": 2.911,
|
342 |
"step": 268
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
}
|
344 |
],
|
345 |
-
"max_steps":
|
346 |
-
"num_train_epochs":
|
347 |
-
"total_flos":
|
348 |
"trial_name": null,
|
349 |
"trial_params": null
|
350 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.536595344543457,
|
3 |
+
"best_model_checkpoint": "output/face/checkpoint-402",
|
4 |
+
"epoch": 3.0,
|
5 |
+
"global_step": 402,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
340 |
"eval_samples_per_second": 22.706,
|
341 |
"eval_steps_per_second": 2.911,
|
342 |
"step": 268
|
343 |
+
},
|
344 |
+
{
|
345 |
+
"epoch": 2.01,
|
346 |
+
"learning_rate": 0.0001371246011559198,
|
347 |
+
"loss": 1.731,
|
348 |
+
"step": 270
|
349 |
+
},
|
350 |
+
{
|
351 |
+
"epoch": 2.05,
|
352 |
+
"learning_rate": 0.0001362782664645371,
|
353 |
+
"loss": 1.6293,
|
354 |
+
"step": 275
|
355 |
+
},
|
356 |
+
{
|
357 |
+
"epoch": 2.09,
|
358 |
+
"learning_rate": 0.00013450300386777127,
|
359 |
+
"loss": 1.5935,
|
360 |
+
"step": 280
|
361 |
+
},
|
362 |
+
{
|
363 |
+
"epoch": 2.13,
|
364 |
+
"learning_rate": 0.00013182317999100535,
|
365 |
+
"loss": 1.5613,
|
366 |
+
"step": 285
|
367 |
+
},
|
368 |
+
{
|
369 |
+
"epoch": 2.16,
|
370 |
+
"learning_rate": 0.00012827557714724304,
|
371 |
+
"loss": 1.8451,
|
372 |
+
"step": 290
|
373 |
+
},
|
374 |
+
{
|
375 |
+
"epoch": 2.2,
|
376 |
+
"learning_rate": 0.000123908888476113,
|
377 |
+
"loss": 1.7444,
|
378 |
+
"step": 295
|
379 |
+
},
|
380 |
+
{
|
381 |
+
"epoch": 2.24,
|
382 |
+
"learning_rate": 0.00011878304959908774,
|
383 |
+
"loss": 1.6749,
|
384 |
+
"step": 300
|
385 |
+
},
|
386 |
+
{
|
387 |
+
"epoch": 2.28,
|
388 |
+
"learning_rate": 0.00011296841596437982,
|
389 |
+
"loss": 1.5253,
|
390 |
+
"step": 305
|
391 |
+
},
|
392 |
+
{
|
393 |
+
"epoch": 2.31,
|
394 |
+
"learning_rate": 0.00010654479717298873,
|
395 |
+
"loss": 1.6357,
|
396 |
+
"step": 310
|
397 |
+
},
|
398 |
+
{
|
399 |
+
"epoch": 2.35,
|
400 |
+
"learning_rate": 9.96003615403955e-05,
|
401 |
+
"loss": 1.6636,
|
402 |
+
"step": 315
|
403 |
+
},
|
404 |
+
{
|
405 |
+
"epoch": 2.39,
|
406 |
+
"learning_rate": 9.223042592950526e-05,
|
407 |
+
"loss": 1.646,
|
408 |
+
"step": 320
|
409 |
+
},
|
410 |
+
{
|
411 |
+
"epoch": 2.43,
|
412 |
+
"learning_rate": 8.453614746515988e-05,
|
413 |
+
"loss": 1.7026,
|
414 |
+
"step": 325
|
415 |
+
},
|
416 |
+
{
|
417 |
+
"epoch": 2.46,
|
418 |
+
"learning_rate": 7.662313508728495e-05,
|
419 |
+
"loss": 1.6289,
|
420 |
+
"step": 330
|
421 |
+
},
|
422 |
+
{
|
423 |
+
"epoch": 2.5,
|
424 |
+
"learning_rate": 6.860000000000001e-05,
|
425 |
+
"loss": 1.4981,
|
426 |
+
"step": 335
|
427 |
+
},
|
428 |
+
{
|
429 |
+
"epoch": 2.54,
|
430 |
+
"learning_rate": 6.0576864912715095e-05,
|
431 |
+
"loss": 1.5564,
|
432 |
+
"step": 340
|
433 |
+
},
|
434 |
+
{
|
435 |
+
"epoch": 2.57,
|
436 |
+
"learning_rate": 5.266385253484016e-05,
|
437 |
+
"loss": 1.6555,
|
438 |
+
"step": 345
|
439 |
+
},
|
440 |
+
{
|
441 |
+
"epoch": 2.61,
|
442 |
+
"learning_rate": 4.496957407049471e-05,
|
443 |
+
"loss": 1.5796,
|
444 |
+
"step": 350
|
445 |
+
},
|
446 |
+
{
|
447 |
+
"epoch": 2.65,
|
448 |
+
"learning_rate": 3.759963845960453e-05,
|
449 |
+
"loss": 1.7131,
|
450 |
+
"step": 355
|
451 |
+
},
|
452 |
+
{
|
453 |
+
"epoch": 2.69,
|
454 |
+
"learning_rate": 3.065520282701126e-05,
|
455 |
+
"loss": 1.61,
|
456 |
+
"step": 360
|
457 |
+
},
|
458 |
+
{
|
459 |
+
"epoch": 2.72,
|
460 |
+
"learning_rate": 2.423158403562016e-05,
|
461 |
+
"loss": 1.6306,
|
462 |
+
"step": 365
|
463 |
+
},
|
464 |
+
{
|
465 |
+
"epoch": 2.76,
|
466 |
+
"learning_rate": 1.8416950400912332e-05,
|
467 |
+
"loss": 1.5807,
|
468 |
+
"step": 370
|
469 |
+
},
|
470 |
+
{
|
471 |
+
"epoch": 2.8,
|
472 |
+
"learning_rate": 1.3291111523887019e-05,
|
473 |
+
"loss": 1.5766,
|
474 |
+
"step": 375
|
475 |
+
},
|
476 |
+
{
|
477 |
+
"epoch": 2.84,
|
478 |
+
"learning_rate": 8.924422852757e-06,
|
479 |
+
"loss": 1.6478,
|
480 |
+
"step": 380
|
481 |
+
},
|
482 |
+
{
|
483 |
+
"epoch": 2.87,
|
484 |
+
"learning_rate": 5.376820008994686e-06,
|
485 |
+
"loss": 1.6091,
|
486 |
+
"step": 385
|
487 |
+
},
|
488 |
+
{
|
489 |
+
"epoch": 2.91,
|
490 |
+
"learning_rate": 2.6969961322287634e-06,
|
491 |
+
"loss": 1.512,
|
492 |
+
"step": 390
|
493 |
+
},
|
494 |
+
{
|
495 |
+
"epoch": 2.95,
|
496 |
+
"learning_rate": 9.21733535462923e-07,
|
497 |
+
"loss": 1.57,
|
498 |
+
"step": 395
|
499 |
+
},
|
500 |
+
{
|
501 |
+
"epoch": 2.99,
|
502 |
+
"learning_rate": 7.53988440801922e-08,
|
503 |
+
"loss": 1.5282,
|
504 |
+
"step": 400
|
505 |
+
},
|
506 |
+
{
|
507 |
+
"epoch": 3.0,
|
508 |
+
"eval_loss": 1.536595344543457,
|
509 |
+
"eval_runtime": 8.6245,
|
510 |
+
"eval_samples_per_second": 22.726,
|
511 |
+
"eval_steps_per_second": 2.899,
|
512 |
+
"step": 402
|
513 |
}
|
514 |
],
|
515 |
+
"max_steps": 402,
|
516 |
+
"num_train_epochs": 3,
|
517 |
+
"total_flos": 418067251200000.0,
|
518 |
"trial_name": null,
|
519 |
"trial_params": null
|
520 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2671
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed2d4b33a781b12e1276cd897224ebf9c500b46f888821b120698cf8724651f6
|
3 |
size 2671
|