diogopaes10 commited on Jul 22, 2023

Commit

11288fe

•

1 Parent(s): b37b8ce

Training in progress, epoch 9

Browse files

Files changed (26) hide show

{checkpoint-1000 → checkpoint-2000}/added_tokens.json +0 -0
{checkpoint-1000 → checkpoint-2000}/config.json +0 -0
{checkpoint-1000 → checkpoint-2000}/optimizer.pt +1 -1
{checkpoint-1250 → checkpoint-2000}/pytorch_model.bin +1 -1
{checkpoint-1250 → checkpoint-2000}/rng_state.pth +1 -1
{checkpoint-1250 → checkpoint-2000}/scheduler.pt +1 -1
{checkpoint-1000 → checkpoint-2000}/special_tokens_map.json +0 -0
{checkpoint-1000 → checkpoint-2000}/spm.model +0 -0
{checkpoint-1000 → checkpoint-2000}/tokenizer.json +0 -0
{checkpoint-1000 → checkpoint-2000}/tokenizer_config.json +0 -0
{checkpoint-1250 → checkpoint-2000}/trainer_state.json +107 -3
{checkpoint-1000 → checkpoint-2000}/training_args.bin +0 -0
{checkpoint-1250 → checkpoint-2250}/added_tokens.json +0 -0
{checkpoint-1250 → checkpoint-2250}/config.json +0 -0
{checkpoint-1250 → checkpoint-2250}/optimizer.pt +1 -1
{checkpoint-1000 → checkpoint-2250}/pytorch_model.bin +1 -1
{checkpoint-1000 → checkpoint-2250}/rng_state.pth +1 -1
{checkpoint-1000 → checkpoint-2250}/scheduler.pt +1 -1
{checkpoint-1250 → checkpoint-2250}/special_tokens_map.json +0 -0
{checkpoint-1250 → checkpoint-2250}/spm.model +0 -0
{checkpoint-1250 → checkpoint-2250}/tokenizer.json +0 -0
{checkpoint-1250 → checkpoint-2250}/tokenizer_config.json +0 -0
{checkpoint-1000 → checkpoint-2250}/trainer_state.json +159 -3
{checkpoint-1250 → checkpoint-2250}/training_args.bin +0 -0
pytorch_model.bin +1 -1
runs/Jul22_21-19-20_ab4276e44fca/events.out.tfevents.1690060770.ab4276e44fca.659.0 +2 -2

{checkpoint-1000 → checkpoint-2000}/added_tokens.json RENAMED Viewed

File without changes

{checkpoint-1000 → checkpoint-2000}/config.json RENAMED Viewed

File without changes

{checkpoint-1000 → checkpoint-2000}/optimizer.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8933160b3231a03ba857db1c0b9a44a2a04c1b49248d1f369c856ad4887c531
 size 1475557125

 version https://git-lfs.github.com/spec/v1
+oid sha256:81f3216586a58d2085a7f37ca95117d6c10fbcd3dc9c70f034b94e35fd6e9149
 size 1475557125

{checkpoint-1250 → checkpoint-2000}/pytorch_model.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c37c9b2cf82643ccb441b9f2e76d6a70dccdc2fd29cd95cea2aeac5989dcc044
 size 737788917

 version https://git-lfs.github.com/spec/v1
+oid sha256:0b71e7c7798b6760f8be8c551b4e8951ebe3dfa93da41d1460b9cb33fd0d6f86
 size 737788917

{checkpoint-1250 → checkpoint-2000}/rng_state.pth RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c5494a563819f3435c3619722f6c611dd7e04ea13d2a4561f1cd488ab67b296
 size 14575

 version https://git-lfs.github.com/spec/v1
+oid sha256:96ffa685fc4010a50b57e506b086e0167e48b18c5d9de223e06893b4aa16c22a
 size 14575

{checkpoint-1250 → checkpoint-2000}/scheduler.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29851d63b5f6f5b0335901be112937603be45604b6b61b539240790a7047e5b9
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:626c1ac2df61838775a74a9749f04b4d515724a3ccd7da9a914227a50ae26d2d
 size 627

{checkpoint-1000 → checkpoint-2000}/special_tokens_map.json RENAMED Viewed

File without changes

{checkpoint-1000 → checkpoint-2000}/spm.model RENAMED Viewed

File without changes

{checkpoint-1000 → checkpoint-2000}/tokenizer.json RENAMED Viewed

File without changes

{checkpoint-1000 → checkpoint-2000}/tokenizer_config.json RENAMED Viewed

File without changes

{checkpoint-1250 → checkpoint-2000}/trainer_state.json RENAMED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 5.0,
-  "global_step": 1250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -168,11 +168,115 @@
       "eval_system_ram_total": 83.48074722290039,
       "eval_system_ram_used": 4.176631927490234,
       "step": 1128
     }
   ],
   "max_steps": 3750,
   "num_train_epochs": 15,
-  "total_flos": 2407649365164288.0,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 8.0,
+  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_system_ram_total": 83.48074722290039,
       "eval_system_ram_used": 4.176631927490234,
       "step": 1128
+    },
+    {
+      "epoch": 5.26,
+      "learning_rate": 1.2992e-05,
+      "loss": 0.4555,
+      "step": 1316
+    },
+    {
+      "epoch": 5.26,
+      "eval_accuracy": 0.7075,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 33.76519775390625,
+      "eval_f1": 0.7086283787248422,
+      "eval_gpu_ram_allocated": 2.089810371398926,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 46,
+      "eval_loss": 1.117536187171936,
+      "eval_precision": 0.71510102752271,
+      "eval_recall": 0.7075,
+      "eval_runtime": 2.5545,
+      "eval_samples_per_second": 782.936,
+      "eval_steps_per_second": 24.662,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.225734710693359,
+      "step": 1316
+    },
+    {
+      "epoch": 6.02,
+      "learning_rate": 1.1989333333333336e-05,
+      "loss": 0.3535,
+      "step": 1504
+    },
+    {
+      "epoch": 6.02,
+      "eval_accuracy": 0.708,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 33.76530456542969,
+      "eval_f1": 0.7032209621498534,
+      "eval_gpu_ram_allocated": 2.0898032188415527,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 50,
+      "eval_loss": 1.1748836040496826,
+      "eval_precision": 0.7076659711678004,
+      "eval_recall": 0.708,
+      "eval_runtime": 2.3932,
+      "eval_samples_per_second": 835.715,
+      "eval_steps_per_second": 26.325,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.23016357421875,
+      "step": 1504
+    },
+    {
+      "epoch": 6.77,
+      "learning_rate": 1.0986666666666668e-05,
+      "loss": 0.2614,
+      "step": 1692
+    },
+    {
+      "epoch": 6.77,
+      "eval_accuracy": 0.709,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 33.76542282104492,
+      "eval_f1": 0.7056311006074188,
+      "eval_gpu_ram_allocated": 2.089783191680908,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 49,
+      "eval_loss": 1.2027860879898071,
+      "eval_precision": 0.7079398723985221,
+      "eval_recall": 0.709,
+      "eval_runtime": 2.3888,
+      "eval_samples_per_second": 837.234,
+      "eval_steps_per_second": 26.373,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.237628936767578,
+      "step": 1692
+    },
+    {
+      "epoch": 7.52,
+      "learning_rate": 9.984e-06,
+      "loss": 0.2321,
+      "step": 1880
+    },
+    {
+      "epoch": 7.52,
+      "eval_accuracy": 0.698,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 33.7656135559082,
+      "eval_f1": 0.7018556265437493,
+      "eval_gpu_ram_allocated": 2.089846134185791,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 49,
+      "eval_loss": 1.2960551977157593,
+      "eval_precision": 0.708462957552084,
+      "eval_recall": 0.698,
+      "eval_runtime": 2.391,
+      "eval_samples_per_second": 836.478,
+      "eval_steps_per_second": 26.349,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.224781036376953,
+      "step": 1880
     }
   ],
   "max_steps": 3750,
   "num_train_epochs": 15,
+  "total_flos": 3850219425648384.0,
   "trial_name": null,
   "trial_params": null
 }

{checkpoint-1000 → checkpoint-2000}/training_args.bin RENAMED Viewed

File without changes

{checkpoint-1250 → checkpoint-2250}/added_tokens.json RENAMED Viewed

File without changes

{checkpoint-1250 → checkpoint-2250}/config.json RENAMED Viewed

File without changes

{checkpoint-1250 → checkpoint-2250}/optimizer.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a110d5b7bd9065a6e2aea20e66039c3ad94a36fb9ab062f65ed04c8052bc467
 size 1475557125

 version https://git-lfs.github.com/spec/v1
+oid sha256:d092da6a86e9d3fddf667c0e8cc73377daca3a3d8630e425531f34eb77545eea
 size 1475557125

{checkpoint-1000 → checkpoint-2250}/pytorch_model.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6f8aadd399eb31e54990d00c74b732c0319b275fdd1b9ad592f63b3d4dfae103
 size 737788917

 version https://git-lfs.github.com/spec/v1
+oid sha256:627070829ce397286a4c2e3016e3ade6fbe191d90b64283878e08c51320b3b27
 size 737788917

{checkpoint-1000 → checkpoint-2250}/rng_state.pth RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da22004a4944e63e1dbfe1405f93c1d28411a5132dc640d3ac3c6859c3579dde
 size 14575

 version https://git-lfs.github.com/spec/v1
+oid sha256:c5e45bb5a3782239e1b011180d84487acde3e428af09ac96bbf315bcb54dd611
 size 14575

{checkpoint-1000 → checkpoint-2250}/scheduler.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:72009b915e4ec650d135b73c75f2c43ff27dde3a223cbdb31d0e7105432803cc
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:9a4113c70550b7a4ade899ac1f4a91272bb6bdc8dc785c5c4e5e342583232813
 size 627

{checkpoint-1250 → checkpoint-2250}/special_tokens_map.json RENAMED Viewed

File without changes

{checkpoint-1250 → checkpoint-2250}/spm.model RENAMED Viewed

File without changes

{checkpoint-1250 → checkpoint-2250}/tokenizer.json RENAMED Viewed

File without changes

{checkpoint-1250 → checkpoint-2250}/tokenizer_config.json RENAMED Viewed

File without changes

{checkpoint-1000 → checkpoint-2250}/trainer_state.json RENAMED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.0,
-  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -142,11 +142,167 @@
       "eval_system_ram_total": 83.48074722290039,
       "eval_system_ram_used": 4.188880920410156,
       "step": 940
     }
   ],
   "max_steps": 3750,
   "num_train_epochs": 15,
-  "total_flos": 1921836975330048.0,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 9.0,
+  "global_step": 2250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_system_ram_total": 83.48074722290039,
       "eval_system_ram_used": 4.188880920410156,
       "step": 940
+    },
+    {
+      "epoch": 4.51,
+      "learning_rate": 1.3994666666666668e-05,
+      "loss": 0.4972,
+      "step": 1128
+    },
+    {
+      "epoch": 4.51,
+      "eval_accuracy": 0.714,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 29.642135620117188,
+      "eval_f1": 0.7109995031569997,
+      "eval_gpu_ram_allocated": 2.089801788330078,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 47,
+      "eval_loss": 1.0585097074508667,
+      "eval_precision": 0.7129473752365556,
+      "eval_recall": 0.714,
+      "eval_runtime": 2.3843,
+      "eval_samples_per_second": 838.824,
+      "eval_steps_per_second": 26.423,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.176631927490234,
+      "step": 1128
+    },
+    {
+      "epoch": 5.26,
+      "learning_rate": 1.2992e-05,
+      "loss": 0.4555,
+      "step": 1316
+    },
+    {
+      "epoch": 5.26,
+      "eval_accuracy": 0.7075,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 33.76519775390625,
+      "eval_f1": 0.7086283787248422,
+      "eval_gpu_ram_allocated": 2.089810371398926,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 46,
+      "eval_loss": 1.117536187171936,
+      "eval_precision": 0.71510102752271,
+      "eval_recall": 0.7075,
+      "eval_runtime": 2.5545,
+      "eval_samples_per_second": 782.936,
+      "eval_steps_per_second": 24.662,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.225734710693359,
+      "step": 1316
+    },
+    {
+      "epoch": 6.02,
+      "learning_rate": 1.1989333333333336e-05,
+      "loss": 0.3535,
+      "step": 1504
+    },
+    {
+      "epoch": 6.02,
+      "eval_accuracy": 0.708,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 33.76530456542969,
+      "eval_f1": 0.7032209621498534,
+      "eval_gpu_ram_allocated": 2.0898032188415527,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 50,
+      "eval_loss": 1.1748836040496826,
+      "eval_precision": 0.7076659711678004,
+      "eval_recall": 0.708,
+      "eval_runtime": 2.3932,
+      "eval_samples_per_second": 835.715,
+      "eval_steps_per_second": 26.325,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.23016357421875,
+      "step": 1504
+    },
+    {
+      "epoch": 6.77,
+      "learning_rate": 1.0986666666666668e-05,
+      "loss": 0.2614,
+      "step": 1692
+    },
+    {
+      "epoch": 6.77,
+      "eval_accuracy": 0.709,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 33.76542282104492,
+      "eval_f1": 0.7056311006074188,
+      "eval_gpu_ram_allocated": 2.089783191680908,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 49,
+      "eval_loss": 1.2027860879898071,
+      "eval_precision": 0.7079398723985221,
+      "eval_recall": 0.709,
+      "eval_runtime": 2.3888,
+      "eval_samples_per_second": 837.234,
+      "eval_steps_per_second": 26.373,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.237628936767578,
+      "step": 1692
+    },
+    {
+      "epoch": 7.52,
+      "learning_rate": 9.984e-06,
+      "loss": 0.2321,
+      "step": 1880
+    },
+    {
+      "epoch": 7.52,
+      "eval_accuracy": 0.698,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 33.7656135559082,
+      "eval_f1": 0.7018556265437493,
+      "eval_gpu_ram_allocated": 2.089846134185791,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 49,
+      "eval_loss": 1.2960551977157593,
+      "eval_precision": 0.708462957552084,
+      "eval_recall": 0.698,
+      "eval_runtime": 2.391,
+      "eval_samples_per_second": 836.478,
+      "eval_steps_per_second": 26.349,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.224781036376953,
+      "step": 1880
+    },
+    {
+      "epoch": 8.27,
+      "learning_rate": 8.981333333333333e-06,
+      "loss": 0.197,
+      "step": 2068
+    },
+    {
+      "epoch": 8.27,
+      "eval_accuracy": 0.712,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 33.7657470703125,
+      "eval_f1": 0.7097931257647566,
+      "eval_gpu_ram_allocated": 2.0897903442382812,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 45,
+      "eval_loss": 1.3960117101669312,
+      "eval_precision": 0.7137187449926237,
+      "eval_recall": 0.712,
+      "eval_runtime": 2.3878,
+      "eval_samples_per_second": 837.604,
+      "eval_steps_per_second": 26.385,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.219398498535156,
+      "step": 2068
     }
   ],
   "max_steps": 3750,
   "num_train_epochs": 15,
+  "total_flos": 4334831263537536.0,
   "trial_name": null,
   "trial_params": null
 }

{checkpoint-1250 → checkpoint-2250}/training_args.bin RENAMED Viewed

File without changes

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c37c9b2cf82643ccb441b9f2e76d6a70dccdc2fd29cd95cea2aeac5989dcc044
 size 737788917

 version https://git-lfs.github.com/spec/v1
+oid sha256:627070829ce397286a4c2e3016e3ade6fbe191d90b64283878e08c51320b3b27
 size 737788917

runs/Jul22_21-19-20_ab4276e44fca/events.out.tfevents.1690060770.ab4276e44fca.659.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:91c406716727b3f9c785733bad33bcc6f1db69b8352ddbcb0f44eb1f3dc7eb58
-size 11732

 version https://git-lfs.github.com/spec/v1
+oid sha256:9480f7c1ae50fb4afe8f904897ca584d0ecad3a73621bc2257b9e47e566bf68b
+size 17242