huggingartists

Browse files

Files changed (10) hide show

README.md +3 -3
config.json +1 -1
evaluation.txt +1 -1
flax_model.msgpack +1 -1
optimizer.pt +1 -1
pytorch_model.bin +1 -1
rng_state.pth +2 -2
scheduler.pt +1 -1
trainer_state.json +83 -7
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/the-king-and-the-jester")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2f1nnkss/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Король и Шут (The King and the Jester)'s lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1gxge02f) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1gxge02f/artifacts) is logged and versioned.
 ## How to use

 dataset = load_dataset("huggingartists/the-king-and-the-jester")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/16ab6u68/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Король и Шут (The King and the Jester)'s lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/158p257u) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/158p257u/artifacts) is logged and versioned.
 ## How to use

config.json CHANGED Viewed

@@ -35,7 +35,7 @@
     }
   },
   "torch_dtype": "float32",
-  "transformers_version": "4.10.0",
   "use_cache": true,
   "vocab_size": 50257
 }

     }
   },
   "torch_dtype": "float32",
+  "transformers_version": "4.10.2",
   "use_cache": true,
   "vocab_size": 50257
 }

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 1.~~4257222414016724~~, "eval_runtime": 3.~~0996~~, "eval_samples_per_second": 21.~~615~~, "eval_steps_per_second": 2.~~904~~, "epoch": 12.0}


1	+ {"eval_loss": 1.288225769996643, "eval_runtime": 2.8321, "eval_samples_per_second": 21.186, "eval_steps_per_second": 2.825, "epoch": 13.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cfd6a25a8c031e4819a807021a5be3c88bc39519a7c4003bf0f250e4247ad861
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:4d241c36b5aa52d0c79f7737e2286ef56c0dffe0755b34d493daf245fdc82740
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:175e35917280372eea5a3d2e3edc4b64bcfe04e6ed50b99f5217e6155acc8fa8
 size 995604017

 version https://git-lfs.github.com/spec/v1
+oid sha256:f9ca153f8d1424a1a93900af418ce79188c701fbff88f35335f8042f20018620
 size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e4f739f680d0907e02a0173d0fad6e2035b40d9b44ba1b0094eefd6da77ff5ac
 size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e0f741fd5524d9f61397df6adb94ac42bef8e536986693a38bd05b52690dde6
 size 510403817

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:deef1051e7951574d9993f8369a780937388fa1a3d1bc42bbbf4e9abb0e4c0c5
-size 14567

 version https://git-lfs.github.com/spec/v1
+oid sha256:ac404797c51c1607e03dfe53ce4d3c8fb76bbf9ab8955d467d072579420e39a5
+size 14503

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e96d1457ab0358c99b5f49944e41e58487c80b3297320ad0dc4c7e31fc35c7d6
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e6da00c6cb356e2a465c855ef99274a47db666c8c8a892ef5823e047b99391f
 size 623

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_metric": 1.4257222414016724,
-  "best_model_checkpoint": "output/the-king-and-the-jester/checkpoint-480",
-  "epoch": 12.0,
-  "global_step": 480,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -686,11 +686,87 @@
       "eval_samples_per_second": 22.688,
       "eval_steps_per_second": 3.048,
       "step": 480
     }
   ],
-  "max_steps": 480,
-  "num_train_epochs": 12,
-  "total_flos": 491359666176000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 1.288225769996643,
+  "best_model_checkpoint": "output/the-king-and-the-jester/checkpoint-533",
+  "epoch": 13.0,
+  "global_step": 533,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 22.688,
       "eval_steps_per_second": 3.048,
       "step": 480
+    },
+    {
+      "epoch": 11.83,
+      "learning_rate": 0.00012756647503932202,
+      "loss": 1.532,
+      "step": 485
+    },
+    {
+      "epoch": 11.95,
+      "learning_rate": 0.0001363960370713319,
+      "loss": 1.6289,
+      "step": 490
+    },
+    {
+      "epoch": 12.0,
+      "eval_loss": 1.3016469478607178,
+      "eval_runtime": 2.8272,
+      "eval_samples_per_second": 21.222,
+      "eval_steps_per_second": 2.83,
+      "step": 492
+    },
+    {
+      "epoch": 12.07,
+      "learning_rate": 0.00013539550607801572,
+      "loss": 1.5711,
+      "step": 495
+    },
+    {
+      "epoch": 12.2,
+      "learning_rate": 0.00012470995414859683,
+      "loss": 1.5507,
+      "step": 500
+    },
+    {
+      "epoch": 12.32,
+      "learning_rate": 0.00010588873393008394,
+      "loss": 1.5444,
+      "step": 505
+    },
+    {
+      "epoch": 12.44,
+      "learning_rate": 8.166083008869614e-05,
+      "loss": 1.5625,
+      "step": 510
+    },
+    {
+      "epoch": 12.56,
+      "learning_rate": 5.553916991130382e-05,
+      "loss": 1.523,
+      "step": 515
+    },
+    {
+      "epoch": 12.68,
+      "learning_rate": 3.131126606991604e-05,
+      "loss": 1.5342,
+      "step": 520
+    },
+    {
+      "epoch": 12.8,
+      "learning_rate": 1.2490045851403148e-05,
+      "loss": 1.4935,
+      "step": 525
+    },
+    {
+      "epoch": 12.93,
+      "learning_rate": 1.8044939219843934e-06,
+      "loss": 1.5076,
+      "step": 530
+    },
+    {
+      "epoch": 13.0,
+      "eval_loss": 1.288225769996643,
+      "eval_runtime": 2.8008,
+      "eval_samples_per_second": 21.423,
+      "eval_steps_per_second": 2.856,
+      "step": 533
     }
   ],
+  "max_steps": 533,
+  "num_train_epochs": 13,
+  "total_flos": 545185824768000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8c06aa8f0c58fedbe03b96b8daaeb3d54e7aa76b4c2fcf0f29356d693d750809
 size 2735

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d8c30d78c8ee7f99177924d05de07cbfd5aa02ba0f02675400bca9a15406b4a
 size 2735