End of training

Browse files

Files changed (11) hide show

README.md +4 -4
all_results.json +10 -4
eval_results.json +9 -0
runs/Mar27_19-04-58_hf-dgx-01/events.out.tfevents.1711579662.hf-dgx-01.1894903.1 +3 -0
train_results.json +4 -4
trainer_state.json +394 -394
wandb/debug-internal.log +0 -0
wandb/run-20240327_190513-7p2x8a0l/files/output.log +119 -0
wandb/run-20240327_190513-7p2x8a0l/files/wandb-summary.json +1 -1
wandb/run-20240327_190513-7p2x8a0l/logs/debug-internal.log +0 -0
wandb/run-20240327_190513-7p2x8a0l/run-7p2x8a0l.wandb +2 -2

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ base_model: distil-whisper/distil-large-v3
 tags:
 - generated_from_trainer
 datasets:
-- common_voice_16_1
 metrics:
 - wer
 model-index:
@@ -14,8 +14,8 @@ model-index:
       name: Automatic Speech Recognition
       type: automatic-speech-recognition
     dataset:
-      name: common_voice_16_1
-      type: common_voice_16_1
       config: hi
       split: test
       args: hi
@@ -30,7 +30,7 @@ should probably proofread and complete it, then remove this comment. -->
 # distil-whisper/distil-large-v3
-This model is a fine-tuned version of [distil-whisper/distil-large-v3](https://huggingface.co/distil-whisper/distil-large-v3) on the common_voice_16_1 dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.3749
 - Wer: 0.2664

 tags:
 - generated_from_trainer
 datasets:
+- mozilla-foundation/common_voice_16_1
 metrics:
 - wer
 model-index:
       name: Automatic Speech Recognition
       type: automatic-speech-recognition
     dataset:
+      name: mozilla-foundation/common_voice_16_1 hi
+      type: mozilla-foundation/common_voice_16_1
       config: hi
       split: test
       args: hi
 # distil-whisper/distil-large-v3
+This model is a fine-tuned version of [distil-whisper/distil-large-v3](https://huggingface.co/distil-whisper/distil-large-v3) on the mozilla-foundation/common_voice_16_1 hi dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.3749
 - Wer: 0.2664

all_results.json CHANGED Viewed

@@ -1,8 +1,14 @@
 {
     "epoch": 22.52,
-    "train_loss": 0.17524469082718716,
-    "train_runtime": 15083.6622,
     "train_samples": 7099,
-    "train_samples_per_second": 10.608,
-    "train_steps_per_second": 0.331
 }

 {
     "epoch": 22.52,
+    "eval_loss": 0.37487614154815674,
+    "eval_runtime": 1345.0244,
+    "eval_samples": 3123,
+    "eval_samples_per_second": 2.322,
+    "eval_steps_per_second": 0.073,
+    "eval_wer": 0.26639882562002626,
+    "train_loss": 0.1750582966186106,
+    "train_runtime": 15499.9794,
     "train_samples": 7099,
+    "train_samples_per_second": 10.323,
+    "train_steps_per_second": 0.323
 }

eval_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 22.52,
+    "eval_loss": 0.37487614154815674,
+    "eval_runtime": 1345.0244,
+    "eval_samples": 3123,
+    "eval_samples_per_second": 2.322,
+    "eval_steps_per_second": 0.073,
+    "eval_wer": 0.26639882562002626
+}

runs/Mar27_19-04-58_hf-dgx-01/events.out.tfevents.1711579662.hf-dgx-01.1894903.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d61f0e8201847fd3d90c3dcb9e37b4a3ce389e21a4f5c2a051e7fd7def8bcd93
+size 406

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 22.52,
-    "train_loss": 0.17524469082718716,
-    "train_runtime": 15083.6622,
     "train_samples": 7099,
-    "train_samples_per_second": 10.608,
-    "train_steps_per_second": 0.331
 }

 {
     "epoch": 22.52,
+    "train_loss": 0.1750582966186106,
+    "train_runtime": 15499.9794,
     "train_samples": 7099,
+    "train_samples_per_second": 10.323,
+    "train_steps_per_second": 0.323
 }

trainer_state.json CHANGED Viewed

@@ -10,1457 +10,1457 @@
   "log_history": [
     {
       "epoch": 0.11,
-      "grad_norm": 29.428333282470703,
       "learning_rate": 4.000000000000001e-06,
       "loss": 11.9112,
       "step": 25
     },
     {
       "epoch": 0.23,
-      "grad_norm": 12.572431564331055,
       "learning_rate": 9e-06,
-      "loss": 5.9607,
       "step": 50
     },
     {
       "epoch": 0.34,
-      "grad_norm": 6.247668743133545,
       "learning_rate": 1.4000000000000001e-05,
       "loss": 2.7899,
       "step": 75
     },
     {
       "epoch": 0.45,
-      "grad_norm": 5.499792098999023,
       "learning_rate": 1.9e-05,
-      "loss": 1.934,
       "step": 100
     },
     {
       "epoch": 0.56,
-      "grad_norm": 10.862707138061523,
       "learning_rate": 2.4e-05,
-      "loss": 1.1845,
       "step": 125
     },
     {
       "epoch": 0.68,
-      "grad_norm": 6.8538055419921875,
       "learning_rate": 2.9e-05,
-      "loss": 0.7883,
       "step": 150
     },
     {
       "epoch": 0.79,
-      "grad_norm": 8.127602577209473,
       "learning_rate": 3.4000000000000007e-05,
-      "loss": 0.6147,
       "step": 175
     },
     {
       "epoch": 0.9,
-      "grad_norm": 4.003240585327148,
       "learning_rate": 3.9000000000000006e-05,
-      "loss": 0.5233,
       "step": 200
     },
     {
       "epoch": 1.01,
-      "grad_norm": 3.650707483291626,
       "learning_rate": 4.4000000000000006e-05,
       "loss": 0.453,
       "step": 225
     },
     {
       "epoch": 1.13,
-      "grad_norm": 4.5928239822387695,
       "learning_rate": 4.9e-05,
       "loss": 0.3913,
       "step": 250
     },
     {
       "epoch": 1.24,
-      "grad_norm": 4.008325576782227,
       "learning_rate": 5.4000000000000005e-05,
       "loss": 0.3729,
       "step": 275
     },
     {
       "epoch": 1.35,
-      "grad_norm": 4.239988327026367,
       "learning_rate": 5.9e-05,
       "loss": 0.3544,
       "step": 300
     },
     {
       "epoch": 1.46,
-      "grad_norm": 3.8822410106658936,
       "learning_rate": 6.400000000000001e-05,
-      "loss": 0.3229,
       "step": 325
     },
     {
       "epoch": 1.58,
-      "grad_norm": 3.0306766033172607,
       "learning_rate": 6.9e-05,
-      "loss": 0.3357,
       "step": 350
     },
     {
       "epoch": 1.69,
-      "grad_norm": 2.7435803413391113,
       "learning_rate": 7.4e-05,
-      "loss": 0.3148,
       "step": 375
     },
     {
       "epoch": 1.8,
-      "grad_norm": 3.684567928314209,
       "learning_rate": 7.900000000000001e-05,
-      "loss": 0.2912,
       "step": 400
     },
     {
       "epoch": 1.91,
-      "grad_norm": 2.486985206604004,
       "learning_rate": 8.4e-05,
-      "loss": 0.3058,
       "step": 425
     },
     {
       "epoch": 2.03,
-      "grad_norm": 2.5083959102630615,
       "learning_rate": 8.900000000000001e-05,
-      "loss": 0.2651,
       "step": 450
     },
     {
       "epoch": 2.14,
-      "grad_norm": 4.557464599609375,
       "learning_rate": 9.4e-05,
-      "loss": 0.2339,
       "step": 475
     },
     {
       "epoch": 2.25,
-      "grad_norm": 3.3180325031280518,
       "learning_rate": 9.900000000000001e-05,
-      "loss": 0.2337,
       "step": 500
     },
     {
       "epoch": 2.36,
-      "grad_norm": 2.496147632598877,
       "learning_rate": 9.955555555555556e-05,
-      "loss": 0.2372,
       "step": 525
     },
     {
       "epoch": 2.48,
-      "grad_norm": 2.2330338954925537,
       "learning_rate": 9.900000000000001e-05,
-      "loss": 0.2219,
       "step": 550
     },
     {
       "epoch": 2.59,
-      "grad_norm": 3.0495846271514893,
       "learning_rate": 9.844444444444444e-05,
-      "loss": 0.2323,
       "step": 575
     },
     {
       "epoch": 2.7,
-      "grad_norm": 2.3662843704223633,
       "learning_rate": 9.78888888888889e-05,
-      "loss": 0.2324,
       "step": 600
     },
     {
       "epoch": 2.82,
-      "grad_norm": 1.981231451034546,
       "learning_rate": 9.733333333333335e-05,
-      "loss": 0.2088,
       "step": 625
     },
     {
       "epoch": 2.93,
-      "grad_norm": 2.484710454940796,
       "learning_rate": 9.677777777777778e-05,
-      "loss": 0.2195,
       "step": 650
     },
     {
       "epoch": 3.04,
-      "grad_norm": 1.7488161325454712,
       "learning_rate": 9.622222222222222e-05,
-      "loss": 0.1868,
       "step": 675
     },
     {
       "epoch": 3.15,
-      "grad_norm": 2.266071081161499,
       "learning_rate": 9.566666666666667e-05,
-      "loss": 0.1537,
       "step": 700
     },
     {
       "epoch": 3.27,
-      "grad_norm": 1.6045178174972534,
       "learning_rate": 9.511111111111112e-05,
-      "loss": 0.157,
       "step": 725
     },
     {
       "epoch": 3.38,
-      "grad_norm": 1.8283653259277344,
       "learning_rate": 9.455555555555556e-05,
-      "loss": 0.1516,
       "step": 750
     },
     {
       "epoch": 3.49,
-      "grad_norm": 2.1718389987945557,
       "learning_rate": 9.4e-05,
-      "loss": 0.1657,
       "step": 775
     },
     {
       "epoch": 3.6,
-      "grad_norm": 2.778785467147827,
       "learning_rate": 9.344444444444444e-05,
-      "loss": 0.1529,
       "step": 800
     },
     {
       "epoch": 3.72,
-      "grad_norm": 2.0423874855041504,
       "learning_rate": 9.28888888888889e-05,
-      "loss": 0.153,
       "step": 825
     },
     {
       "epoch": 3.83,
-      "grad_norm": 1.7835185527801514,
       "learning_rate": 9.233333333333333e-05,
-      "loss": 0.1514,
       "step": 850
     },
     {
       "epoch": 3.94,
-      "grad_norm": 2.091015100479126,
       "learning_rate": 9.177777777777778e-05,
-      "loss": 0.151,
       "step": 875
     },
     {
       "epoch": 4.05,
-      "grad_norm": 1.47210693359375,
       "learning_rate": 9.122222222222223e-05,
-      "loss": 0.1248,
       "step": 900
     },
     {
       "epoch": 4.17,
-      "grad_norm": 1.5700939893722534,
       "learning_rate": 9.066666666666667e-05,
-      "loss": 0.0955,
       "step": 925
     },
     {
       "epoch": 4.28,
-      "grad_norm": 1.0798161029815674,
       "learning_rate": 9.011111111111111e-05,
-      "loss": 0.0965,
       "step": 950
     },
     {
       "epoch": 4.39,
-      "grad_norm": 1.250017523765564,
       "learning_rate": 8.955555555555556e-05,
-      "loss": 0.1029,
       "step": 975
     },
     {
       "epoch": 4.5,
-      "grad_norm": 1.3333516120910645,
       "learning_rate": 8.900000000000001e-05,
-      "loss": 0.1015,
       "step": 1000
     },
     {
       "epoch": 4.5,
-      "eval_loss": 0.3065292239189148,
-      "eval_runtime": 1302.6648,
-      "eval_samples_per_second": 2.397,
-      "eval_steps_per_second": 0.075,
-      "eval_wer": 0.3243838368229931,
       "step": 1000
     },
     {
       "epoch": 4.62,
-      "grad_norm": 2.2534544467926025,
       "learning_rate": 8.844444444444445e-05,
-      "loss": 0.1098,
       "step": 1025
     },
     {
       "epoch": 4.73,
-      "grad_norm": 1.6706323623657227,
       "learning_rate": 8.78888888888889e-05,
-      "loss": 0.1066,
       "step": 1050
     },
     {
       "epoch": 4.84,
-      "grad_norm": 1.9353983402252197,
       "learning_rate": 8.733333333333333e-05,
-      "loss": 0.1033,
       "step": 1075
     },
     {
       "epoch": 4.95,
-      "grad_norm": 1.833392858505249,
       "learning_rate": 8.677777777777778e-05,
-      "loss": 0.1041,
       "step": 1100
     },
     {
       "epoch": 5.07,
-      "grad_norm": 1.094043254852295,
       "learning_rate": 8.622222222222222e-05,
-      "loss": 0.0782,
       "step": 1125
     },
     {
       "epoch": 5.18,
-      "grad_norm": 1.6280676126480103,
       "learning_rate": 8.566666666666667e-05,
-      "loss": 0.0604,
       "step": 1150
     },
     {
       "epoch": 5.29,
-      "grad_norm": 1.2326525449752808,
       "learning_rate": 8.511111111111112e-05,
-      "loss": 0.0665,
       "step": 1175
     },
     {
       "epoch": 5.41,
-      "grad_norm": 1.186036467552185,
       "learning_rate": 8.455555555555556e-05,
-      "loss": 0.0679,
       "step": 1200
     },
     {
       "epoch": 5.52,
-      "grad_norm": 1.3472570180892944,
       "learning_rate": 8.4e-05,
-      "loss": 0.0656,
       "step": 1225
     },
     {
       "epoch": 5.63,
-      "grad_norm": 2.1403074264526367,
       "learning_rate": 8.344444444444445e-05,
-      "loss": 0.0674,
       "step": 1250
     },
     {
       "epoch": 5.74,
-      "grad_norm": 1.0580947399139404,
       "learning_rate": 8.28888888888889e-05,
-      "loss": 0.0713,
       "step": 1275
     },
     {
       "epoch": 5.86,
-      "grad_norm": 1.0808650255203247,
       "learning_rate": 8.233333333333333e-05,
-      "loss": 0.0713,
       "step": 1300
     },
     {
       "epoch": 5.97,
-      "grad_norm": 1.0721344947814941,
       "learning_rate": 8.177777777777778e-05,
-      "loss": 0.0707,
       "step": 1325
     },
     {
       "epoch": 6.08,
-      "grad_norm": 1.7433174848556519,
       "learning_rate": 8.122222222222222e-05,
-      "loss": 0.0492,
       "step": 1350
     },
     {
       "epoch": 6.19,
-      "grad_norm": 0.9549305438995361,
       "learning_rate": 8.066666666666667e-05,
-      "loss": 0.0418,
       "step": 1375
     },
     {
       "epoch": 6.31,
-      "grad_norm": 1.4030609130859375,
       "learning_rate": 8.011111111111111e-05,
-      "loss": 0.0382,
       "step": 1400
     },
     {
       "epoch": 6.42,
-      "grad_norm": 0.9085283279418945,
       "learning_rate": 7.955555555555556e-05,
-      "loss": 0.0369,
       "step": 1425
     },
     {
       "epoch": 6.53,
-      "grad_norm": 1.0393314361572266,
       "learning_rate": 7.900000000000001e-05,
-      "loss": 0.0403,
       "step": 1450
     },
     {
       "epoch": 6.64,
-      "grad_norm": 0.675774872303009,
       "learning_rate": 7.844444444444446e-05,
-      "loss": 0.0414,
       "step": 1475
     },
     {
       "epoch": 6.76,
-      "grad_norm": 0.8051535487174988,
       "learning_rate": 7.788888888888888e-05,
-      "loss": 0.0426,
       "step": 1500
     },
     {
       "epoch": 6.87,
-      "grad_norm": 1.4626388549804688,
       "learning_rate": 7.733333333333333e-05,
-      "loss": 0.0436,
       "step": 1525
     },
     {
       "epoch": 6.98,
-      "grad_norm": 0.8418045043945312,
       "learning_rate": 7.677777777777778e-05,
-      "loss": 0.0442,
       "step": 1550
     },
     {
       "epoch": 7.09,
-      "grad_norm": 1.3747352361679077,
       "learning_rate": 7.622222222222223e-05,
-      "loss": 0.0281,
       "step": 1575
     },
     {
       "epoch": 7.21,
-      "grad_norm": 0.5290963649749756,
       "learning_rate": 7.566666666666667e-05,
-      "loss": 0.0237,
       "step": 1600
     },
     {
       "epoch": 7.32,
-      "grad_norm": 1.2137552499771118,
       "learning_rate": 7.511111111111111e-05,
-      "loss": 0.0249,
       "step": 1625
     },
     {
       "epoch": 7.43,
-      "grad_norm": 0.7687398791313171,
       "learning_rate": 7.455555555555556e-05,
-      "loss": 0.0261,
       "step": 1650
     },
     {
       "epoch": 7.55,
-      "grad_norm": 1.1545344591140747,
       "learning_rate": 7.4e-05,
-      "loss": 0.0249,
       "step": 1675
     },
     {
       "epoch": 7.66,
-      "grad_norm": 0.7673143148422241,
       "learning_rate": 7.344444444444445e-05,
-      "loss": 0.0248,
       "step": 1700
     },
     {
       "epoch": 7.77,
-      "grad_norm": 0.9905190467834473,
       "learning_rate": 7.28888888888889e-05,
-      "loss": 0.0254,
       "step": 1725
     },
     {
       "epoch": 7.88,
-      "grad_norm": 1.764397382736206,
       "learning_rate": 7.233333333333335e-05,
-      "loss": 0.0297,
       "step": 1750
     },
     {
       "epoch": 8.0,
-      "grad_norm": 0.9069448709487915,
       "learning_rate": 7.177777777777777e-05,
-      "loss": 0.0275,
       "step": 1775
     },
     {
       "epoch": 8.11,
-      "grad_norm": 1.1385760307312012,
       "learning_rate": 7.122222222222222e-05,
-      "loss": 0.0162,
       "step": 1800
     },
     {
       "epoch": 8.22,
-      "grad_norm": 0.5694571733474731,
       "learning_rate": 7.066666666666667e-05,
-      "loss": 0.0149,
       "step": 1825
     },
     {
       "epoch": 8.33,
-      "grad_norm": 1.0839495658874512,
       "learning_rate": 7.011111111111112e-05,
-      "loss": 0.0175,
       "step": 1850
     },
     {
       "epoch": 8.45,
-      "grad_norm": 0.7086426019668579,
       "learning_rate": 6.955555555555556e-05,
-      "loss": 0.0189,
       "step": 1875
     },
     {
       "epoch": 8.56,
-      "grad_norm": 0.9548362493515015,
       "learning_rate": 6.9e-05,
-      "loss": 0.0193,
       "step": 1900
     },
     {
       "epoch": 8.67,
-      "grad_norm": 0.9621508717536926,
       "learning_rate": 6.844444444444445e-05,
-      "loss": 0.0186,
       "step": 1925
     },
     {
       "epoch": 8.78,
-      "grad_norm": 0.6629220843315125,
       "learning_rate": 6.788888888888888e-05,
-      "loss": 0.0171,
       "step": 1950
     },
     {
       "epoch": 8.9,
-      "grad_norm": 0.7981088161468506,
       "learning_rate": 6.733333333333333e-05,
-      "loss": 0.0175,
       "step": 1975
     },
     {
       "epoch": 9.01,
-      "grad_norm": 0.45495709776878357,
       "learning_rate": 6.677777777777779e-05,
-      "loss": 0.0167,
       "step": 2000
     },
     {
       "epoch": 9.01,
-      "eval_loss": 0.3443203866481781,
-      "eval_runtime": 1269.8219,
-      "eval_samples_per_second": 2.459,
-      "eval_steps_per_second": 0.077,
-      "eval_wer": 0.2994668933013984,
       "step": 2000
     },
     {
       "epoch": 9.12,
-      "grad_norm": 1.0250108242034912,
       "learning_rate": 6.622222222222224e-05,
       "loss": 0.0124,
       "step": 2025
     },
     {
       "epoch": 9.23,
-      "grad_norm": 0.533909022808075,
       "learning_rate": 6.566666666666666e-05,
-      "loss": 0.0128,
       "step": 2050
     },
     {
       "epoch": 9.35,
-      "grad_norm": 0.5022910237312317,
       "learning_rate": 6.511111111111111e-05,
-      "loss": 0.0127,
       "step": 2075
     },
     {
       "epoch": 9.46,
-      "grad_norm": 1.3371328115463257,
       "learning_rate": 6.455555555555556e-05,
-      "loss": 0.0116,
       "step": 2100
     },
     {
       "epoch": 9.57,
-      "grad_norm": 1.2396471500396729,
       "learning_rate": 6.400000000000001e-05,
-      "loss": 0.0112,
       "step": 2125
     },
     {
       "epoch": 9.68,
-      "grad_norm": 1.2121708393096924,
       "learning_rate": 6.344444444444445e-05,
-      "loss": 0.0107,
       "step": 2150
     },
     {
       "epoch": 9.8,
-      "grad_norm": 1.3228121995925903,
       "learning_rate": 6.28888888888889e-05,
-      "loss": 0.0108,
       "step": 2175
     },
     {
       "epoch": 9.91,
-      "grad_norm": 0.6204155683517456,
       "learning_rate": 6.233333333333334e-05,
-      "loss": 0.0123,
       "step": 2200
     },
     {
       "epoch": 10.02,
-      "grad_norm": 0.4221612811088562,
       "learning_rate": 6.177777777777779e-05,
-      "loss": 0.0117,
       "step": 2225
     },
     {
       "epoch": 10.14,
-      "grad_norm": 0.8225328922271729,
       "learning_rate": 6.122222222222222e-05,
-      "loss": 0.008,
       "step": 2250
     },
     {
       "epoch": 10.25,
-      "grad_norm": 0.22648921608924866,
       "learning_rate": 6.066666666666667e-05,
-      "loss": 0.0075,
       "step": 2275
     },
     {
       "epoch": 10.36,
-      "grad_norm": 1.0620574951171875,
       "learning_rate": 6.011111111111112e-05,
-      "loss": 0.0077,
       "step": 2300
     },
     {
       "epoch": 10.47,
-      "grad_norm": 0.5009572505950928,
       "learning_rate": 5.9555555555555554e-05,
-      "loss": 0.008,
       "step": 2325
     },
     {
       "epoch": 10.59,
-      "grad_norm": 0.6466513872146606,
       "learning_rate": 5.9e-05,
-      "loss": 0.0098,
       "step": 2350
     },
     {
       "epoch": 10.7,
-      "grad_norm": 0.2255641371011734,
       "learning_rate": 5.844444444444445e-05,
-      "loss": 0.0094,
       "step": 2375
     },
     {
       "epoch": 10.81,
-      "grad_norm": 0.838545560836792,
       "learning_rate": 5.788888888888889e-05,
-      "loss": 0.0089,
       "step": 2400
     },
     {
       "epoch": 10.92,
-      "grad_norm": 0.6793853044509888,
       "learning_rate": 5.7333333333333336e-05,
-      "loss": 0.0087,
       "step": 2425
     },
     {
       "epoch": 11.04,
-      "grad_norm": 0.548841655254364,
       "learning_rate": 5.6777777777777786e-05,
-      "loss": 0.0069,
       "step": 2450
     },
     {
       "epoch": 11.15,
-      "grad_norm": 0.22741466760635376,
       "learning_rate": 5.622222222222222e-05,
-      "loss": 0.0065,
       "step": 2475
     },
     {
       "epoch": 11.26,
-      "grad_norm": 0.4155316650867462,
       "learning_rate": 5.566666666666667e-05,
-      "loss": 0.0058,
       "step": 2500
     },
     {
       "epoch": 11.37,
-      "grad_norm": 0.48344260454177856,
       "learning_rate": 5.511111111111111e-05,
-      "loss": 0.005,
       "step": 2525
     },
     {
       "epoch": 11.49,
-      "grad_norm": 0.9006750583648682,
       "learning_rate": 5.455555555555556e-05,
-      "loss": 0.0045,
       "step": 2550
     },
     {
       "epoch": 11.6,
-      "grad_norm": 0.9966240525245667,
       "learning_rate": 5.4000000000000005e-05,
-      "loss": 0.0047,
       "step": 2575
     },
     {
       "epoch": 11.71,
-      "grad_norm": 0.39858147501945496,
       "learning_rate": 5.3444444444444455e-05,
-      "loss": 0.0053,
       "step": 2600
     },
     {
       "epoch": 11.82,
-      "grad_norm": 0.6118489503860474,
       "learning_rate": 5.2888888888888885e-05,
-      "loss": 0.0053,
       "step": 2625
     },
     {
       "epoch": 11.94,
-      "grad_norm": 0.5074841976165771,
       "learning_rate": 5.2333333333333336e-05,
-      "loss": 0.0057,
       "step": 2650
     },
     {
       "epoch": 12.05,
-      "grad_norm": 0.6888458728790283,
       "learning_rate": 5.177777777777778e-05,
-      "loss": 0.0053,
       "step": 2675
     },
     {
       "epoch": 12.16,
-      "grad_norm": 0.7311161160469055,
       "learning_rate": 5.122222222222223e-05,
-      "loss": 0.006,
       "step": 2700
     },
     {
       "epoch": 12.27,
-      "grad_norm": 0.47264620661735535,
       "learning_rate": 5.0666666666666674e-05,
-      "loss": 0.0058,
       "step": 2725
     },
     {
       "epoch": 12.39,
-      "grad_norm": 0.6639235019683838,
       "learning_rate": 5.011111111111111e-05,
-      "loss": 0.0052,
       "step": 2750
     },
     {
       "epoch": 12.5,
-      "grad_norm": 0.1161256805062294,
       "learning_rate": 4.955555555555556e-05,
-      "loss": 0.0038,
       "step": 2775
     },
     {
       "epoch": 12.61,
-      "grad_norm": 0.4923400580883026,
       "learning_rate": 4.9e-05,
-      "loss": 0.0036,
       "step": 2800
     },
     {
       "epoch": 12.73,
-      "grad_norm": 0.6149506568908691,
       "learning_rate": 4.844444444444445e-05,
-      "loss": 0.0046,
       "step": 2825
     },
     {
       "epoch": 12.84,
-      "grad_norm": 0.16888651251792908,
       "learning_rate": 4.7888888888888886e-05,
-      "loss": 0.0041,
       "step": 2850
     },
     {
       "epoch": 12.95,
-      "grad_norm": 1.0652014017105103,
       "learning_rate": 4.7333333333333336e-05,
-      "loss": 0.0041,
       "step": 2875
     },
     {
       "epoch": 13.06,
-      "grad_norm": 0.21759897470474243,
       "learning_rate": 4.677777777777778e-05,
-      "loss": 0.003,
       "step": 2900
     },
     {
       "epoch": 13.18,
-      "grad_norm": 0.23394200205802917,
       "learning_rate": 4.6222222222222224e-05,
-      "loss": 0.0034,
       "step": 2925
     },
     {
       "epoch": 13.29,
-      "grad_norm": 0.05768038332462311,
       "learning_rate": 4.566666666666667e-05,
-      "loss": 0.0037,
       "step": 2950
     },
     {
       "epoch": 13.4,
-      "grad_norm": 0.08611828088760376,
       "learning_rate": 4.511111111111112e-05,
-      "loss": 0.0034,
       "step": 2975
     },
     {
       "epoch": 13.51,
-      "grad_norm": 0.1028035581111908,
       "learning_rate": 4.4555555555555555e-05,
-      "loss": 0.0032,
       "step": 3000
     },
     {
       "epoch": 13.51,
-      "eval_loss": 0.3575945198535919,
-      "eval_runtime": 1297.8448,
-      "eval_samples_per_second": 2.406,
-      "eval_steps_per_second": 0.076,
-      "eval_wer": 0.27779494707563934,
       "step": 3000
     },
     {
       "epoch": 13.63,
-      "grad_norm": 0.23182912170886993,
       "learning_rate": 4.4000000000000006e-05,
-      "loss": 0.0027,
       "step": 3025
     },
     {
       "epoch": 13.74,
-      "grad_norm": 0.100206658244133,
       "learning_rate": 4.344444444444445e-05,
-      "loss": 0.0027,
       "step": 3050
     },
     {
       "epoch": 13.85,
-      "grad_norm": 0.9118719100952148,
       "learning_rate": 4.2888888888888886e-05,
-      "loss": 0.003,
       "step": 3075
     },
     {
       "epoch": 13.96,
-      "grad_norm": 0.06793611496686935,
       "learning_rate": 4.233333333333334e-05,
-      "loss": 0.003,
       "step": 3100
     },
     {
       "epoch": 14.08,
-      "grad_norm": 0.0683990940451622,
       "learning_rate": 4.177777777777778e-05,
-      "loss": 0.0021,
       "step": 3125
     },
     {
       "epoch": 14.19,
-      "grad_norm": 0.19087089598178864,
       "learning_rate": 4.1222222222222224e-05,
-      "loss": 0.0028,
       "step": 3150
     },
     {
       "epoch": 14.3,
-      "grad_norm": 0.14526407420635223,
       "learning_rate": 4.066666666666667e-05,
-      "loss": 0.0025,
       "step": 3175
     },
     {
       "epoch": 14.41,
-      "grad_norm": 0.5902572870254517,
       "learning_rate": 4.011111111111111e-05,
-      "loss": 0.0031,
       "step": 3200
     },
     {
       "epoch": 14.53,
-      "grad_norm": 0.1988796442747116,
       "learning_rate": 3.9555555555555556e-05,
-      "loss": 0.0021,
       "step": 3225
     },
     {
       "epoch": 14.64,
-      "grad_norm": 0.178738534450531,
       "learning_rate": 3.9000000000000006e-05,
-      "loss": 0.0031,
       "step": 3250
     },
     {
       "epoch": 14.75,
-      "grad_norm": 0.03732344135642052,
       "learning_rate": 3.844444444444444e-05,
-      "loss": 0.0026,
       "step": 3275
     },
     {
       "epoch": 14.86,
-      "grad_norm": 0.047354888170957565,
       "learning_rate": 3.7888888888888894e-05,
-      "loss": 0.0016,
       "step": 3300
     },
     {
       "epoch": 14.98,
-      "grad_norm": 0.058274924755096436,
       "learning_rate": 3.733333333333334e-05,
-      "loss": 0.0019,
       "step": 3325
     },
     {
       "epoch": 15.09,
-      "grad_norm": 1.4180477857589722,
       "learning_rate": 3.677777777777778e-05,
-      "loss": 0.0016,
       "step": 3350
     },
     {
       "epoch": 15.2,
-      "grad_norm": 0.03281378000974655,
       "learning_rate": 3.6222222222222225e-05,
-      "loss": 0.0016,
       "step": 3375
     },
     {
       "epoch": 15.32,
-      "grad_norm": 0.2159404158592224,
       "learning_rate": 3.566666666666667e-05,
-      "loss": 0.0026,
       "step": 3400
     },
     {
       "epoch": 15.43,
-      "grad_norm": 0.18890638649463654,
       "learning_rate": 3.511111111111111e-05,
-      "loss": 0.0016,
       "step": 3425
     },
     {
       "epoch": 15.54,
-      "grad_norm": 0.022921651601791382,
       "learning_rate": 3.4555555555555556e-05,
-      "loss": 0.0012,
       "step": 3450
     },
     {
       "epoch": 15.65,
-      "grad_norm": 0.02838265895843506,
       "learning_rate": 3.4000000000000007e-05,
-      "loss": 0.0014,
       "step": 3475
     },
     {
       "epoch": 15.77,
-      "grad_norm": 0.04957688972353935,
       "learning_rate": 3.3444444444444443e-05,
-      "loss": 0.0012,
       "step": 3500
     },
     {
       "epoch": 15.88,
-      "grad_norm": 0.03910296410322189,
       "learning_rate": 3.2888888888888894e-05,
-      "loss": 0.0008,
       "step": 3525
     },
     {
       "epoch": 15.99,
-      "grad_norm": 0.3031899034976959,
       "learning_rate": 3.233333333333333e-05,
-      "loss": 0.0015,
       "step": 3550
     },
     {
       "epoch": 16.1,
-      "grad_norm": 0.026370937004685402,
       "learning_rate": 3.177777777777778e-05,
-      "loss": 0.0009,
       "step": 3575
     },
     {
       "epoch": 16.22,
-      "grad_norm": 0.04645024240016937,
       "learning_rate": 3.1222222222222225e-05,
-      "loss": 0.0014,
       "step": 3600
     },
     {
       "epoch": 16.33,
-      "grad_norm": 0.03346904739737511,
       "learning_rate": 3.066666666666667e-05,
-      "loss": 0.001,
       "step": 3625
     },
     {
       "epoch": 16.44,
-      "grad_norm": 0.41791531443595886,
       "learning_rate": 3.0111111111111113e-05,
-      "loss": 0.0019,
       "step": 3650
     },
     {
       "epoch": 16.55,
-      "grad_norm": 0.023621816188097,
       "learning_rate": 2.955555555555556e-05,
-      "loss": 0.0009,
       "step": 3675
     },
     {
       "epoch": 16.67,
-      "grad_norm": 0.020701350644230843,
       "learning_rate": 2.9e-05,
-      "loss": 0.0009,
       "step": 3700
     },
     {
       "epoch": 16.78,
-      "grad_norm": 0.018095409497618675,
       "learning_rate": 2.8444444444444447e-05,
       "loss": 0.0007,
       "step": 3725
     },
     {
       "epoch": 16.89,
-      "grad_norm": 0.03800148516893387,
       "learning_rate": 2.788888888888889e-05,
-      "loss": 0.001,
       "step": 3750
     },
     {
       "epoch": 17.0,
-      "grad_norm": 0.0219491608440876,
       "learning_rate": 2.733333333333333e-05,
-      "loss": 0.0012,
       "step": 3775
     },
     {
       "epoch": 17.12,
-      "grad_norm": 0.19971542060375214,
       "learning_rate": 2.677777777777778e-05,
-      "loss": 0.001,
       "step": 3800
     },
     {
       "epoch": 17.23,
-      "grad_norm": 0.022324278950691223,
       "learning_rate": 2.6222222222222226e-05,
-      "loss": 0.0005,
       "step": 3825
     },
     {
       "epoch": 17.34,
-      "grad_norm": 0.014598184265196323,
       "learning_rate": 2.5666666666666666e-05,
-      "loss": 0.0007,
       "step": 3850
     },
     {
       "epoch": 17.45,
-      "grad_norm": 0.01482320111244917,
       "learning_rate": 2.5111111111111113e-05,
-      "loss": 0.0008,
       "step": 3875
     },
     {
       "epoch": 17.57,
-      "grad_norm": 0.019341906532645226,
       "learning_rate": 2.4555555555555557e-05,
-      "loss": 0.0005,
       "step": 3900
     },
     {
       "epoch": 17.68,
-      "grad_norm": 0.044308606535196304,
       "learning_rate": 2.4e-05,
-      "loss": 0.0008,
       "step": 3925
     },
     {
       "epoch": 17.79,
-      "grad_norm": 0.01700867898762226,
       "learning_rate": 2.3444444444444448e-05,
-      "loss": 0.0009,
       "step": 3950
     },
     {
       "epoch": 17.91,
-      "grad_norm": 0.01428561843931675,
       "learning_rate": 2.288888888888889e-05,
-      "loss": 0.0004,
       "step": 3975
     },
     {
       "epoch": 18.02,
-      "grad_norm": 0.011909844353795052,
       "learning_rate": 2.2333333333333335e-05,
-      "loss": 0.0004,
       "step": 4000
     },
     {
       "epoch": 18.02,
-      "eval_loss": 0.36695417761802673,
-      "eval_runtime": 1296.9402,
-      "eval_samples_per_second": 2.408,
-      "eval_steps_per_second": 0.076,
-      "eval_wer": 0.2677122769064359,
       "step": 4000
     },
     {
       "epoch": 18.13,
-      "grad_norm": 0.011953528970479965,
       "learning_rate": 2.177777777777778e-05,
-      "loss": 0.0004,
       "step": 4025
     },
     {
       "epoch": 18.24,
-      "grad_norm": 0.013035556301474571,
       "learning_rate": 2.1222222222222223e-05,
-      "loss": 0.0005,
       "step": 4050
     },
     {
       "epoch": 18.36,
-      "grad_norm": 0.011018014512956142,
       "learning_rate": 2.0666666666666666e-05,
-      "loss": 0.0003,
       "step": 4075
     },
     {
       "epoch": 18.47,
-      "grad_norm": 0.011594709008932114,
       "learning_rate": 2.011111111111111e-05,
-      "loss": 0.0004,
       "step": 4100
     },
     {
       "epoch": 18.58,
-      "grad_norm": 0.01165748666971922,
       "learning_rate": 1.9555555555555557e-05,
-      "loss": 0.0003,
       "step": 4125
     },
     {
       "epoch": 18.69,
-      "grad_norm": 0.012751756235957146,
       "learning_rate": 1.9e-05,
       "loss": 0.0003,
       "step": 4150
     },
     {
       "epoch": 18.81,
-      "grad_norm": 0.01092427410185337,
       "learning_rate": 1.8444444444444445e-05,
-      "loss": 0.0003,
       "step": 4175
     },
     {
       "epoch": 18.92,
-      "grad_norm": 0.010369419120252132,
       "learning_rate": 1.788888888888889e-05,
-      "loss": 0.0007,
       "step": 4200
     },
     {
       "epoch": 19.03,
-      "grad_norm": 0.009451022371649742,
       "learning_rate": 1.7333333333333336e-05,
       "loss": 0.0004,
       "step": 4225
     },
     {
       "epoch": 19.14,
-      "grad_norm": 0.010264468379318714,
       "learning_rate": 1.677777777777778e-05,
       "loss": 0.0003,
       "step": 4250
     },
     {
       "epoch": 19.26,
-      "grad_norm": 0.009353878907859325,
       "learning_rate": 1.6222222222222223e-05,
       "loss": 0.0003,
       "step": 4275
     },
     {
       "epoch": 19.37,
-      "grad_norm": 0.007795905694365501,
       "learning_rate": 1.5666666666666667e-05,
       "loss": 0.0003,
       "step": 4300
     },
     {
       "epoch": 19.48,
-      "grad_norm": 0.009554468095302582,
       "learning_rate": 1.5111111111111112e-05,
-      "loss": 0.0004,
       "step": 4325
     },
     {
       "epoch": 19.59,
-      "grad_norm": 0.009386077523231506,
       "learning_rate": 1.4555555555555556e-05,
       "loss": 0.0003,
       "step": 4350
     },
     {
       "epoch": 19.71,
-      "grad_norm": 0.007565716747194529,
       "learning_rate": 1.4000000000000001e-05,
-      "loss": 0.0004,
       "step": 4375
     },
     {
       "epoch": 19.82,
-      "grad_norm": 0.011739292182028294,
       "learning_rate": 1.3444444444444445e-05,
       "loss": 0.0003,
       "step": 4400
     },
     {
       "epoch": 19.93,
-      "grad_norm": 0.011955379508435726,
       "learning_rate": 1.2888888888888889e-05,
-      "loss": 0.0004,
       "step": 4425
     },
     {
       "epoch": 20.05,
-      "grad_norm": 0.007369581609964371,
       "learning_rate": 1.2333333333333334e-05,
       "loss": 0.0004,
       "step": 4450
     },
     {
       "epoch": 20.16,
-      "grad_norm": 0.010209435597062111,
       "learning_rate": 1.1777777777777778e-05,
       "loss": 0.0003,
       "step": 4475
     },
     {
       "epoch": 20.27,
-      "grad_norm": 0.009368482045829296,
       "learning_rate": 1.1222222222222224e-05,
       "loss": 0.0003,
       "step": 4500
     },
     {
       "epoch": 20.38,
-      "grad_norm": 0.008915912359952927,
       "learning_rate": 1.0666666666666667e-05,
       "loss": 0.0003,
       "step": 4525
     },
     {
       "epoch": 20.5,
-      "grad_norm": 0.01048735436052084,
       "learning_rate": 1.0111111111111111e-05,
       "loss": 0.0003,
       "step": 4550
     },
     {
       "epoch": 20.61,
-      "grad_norm": 0.010569226928055286,
       "learning_rate": 9.555555555555556e-06,
       "loss": 0.0003,
       "step": 4575
     },
     {
       "epoch": 20.72,
-      "grad_norm": 0.008401792496442795,
       "learning_rate": 9e-06,
-      "loss": 0.0003,
       "step": 4600
     },
     {
       "epoch": 20.83,
-      "grad_norm": 0.01062182616442442,
       "learning_rate": 8.444444444444446e-06,
       "loss": 0.0003,
       "step": 4625
     },
     {
       "epoch": 20.95,
-      "grad_norm": 0.007442856673151255,
       "learning_rate": 7.88888888888889e-06,
-      "loss": 0.0004,
       "step": 4650
     },
     {
       "epoch": 21.06,
-      "grad_norm": 0.007747430354356766,
       "learning_rate": 7.333333333333334e-06,
       "loss": 0.0003,
       "step": 4675
     },
     {
       "epoch": 21.17,
-      "grad_norm": 0.008953329175710678,
       "learning_rate": 6.777777777777779e-06,
       "loss": 0.0003,
       "step": 4700
     },
     {
       "epoch": 21.28,
-      "grad_norm": 0.0087329912930727,
       "learning_rate": 6.222222222222222e-06,
       "loss": 0.0003,
       "step": 4725
     },
     {
       "epoch": 21.4,
-      "grad_norm": 0.007937785238027573,
       "learning_rate": 5.666666666666667e-06,
       "loss": 0.0003,
       "step": 4750
     },
     {
       "epoch": 21.51,
-      "grad_norm": 0.007708992809057236,
       "learning_rate": 5.1111111111111115e-06,
       "loss": 0.0003,
       "step": 4775
     },
     {
       "epoch": 21.62,
-      "grad_norm": 0.011778591200709343,
       "learning_rate": 4.555555555555556e-06,
       "loss": 0.0003,
       "step": 4800
     },
     {
       "epoch": 21.73,
-      "grad_norm": 0.00828944519162178,
       "learning_rate": 4.000000000000001e-06,
-      "loss": 0.0002,
       "step": 4825
     },
     {
       "epoch": 21.85,
-      "grad_norm": 0.007438404019922018,
       "learning_rate": 3.4444444444444444e-06,
       "loss": 0.0003,
       "step": 4850
     },
     {
       "epoch": 21.96,
-      "grad_norm": 0.007443991024047136,
       "learning_rate": 2.888888888888889e-06,
       "loss": 0.0003,
       "step": 4875
     },
     {
       "epoch": 22.07,
-      "grad_norm": 0.008769960142672062,
       "learning_rate": 2.3333333333333336e-06,
       "loss": 0.0003,
       "step": 4900
     },
     {
       "epoch": 22.18,
-      "grad_norm": 0.008519369177520275,
       "learning_rate": 1.777777777777778e-06,
       "loss": 0.0003,
       "step": 4925
     },
     {
       "epoch": 22.3,
-      "grad_norm": 0.007310151122510433,
       "learning_rate": 1.2222222222222223e-06,
       "loss": 0.0002,
       "step": 4950
     },
     {
       "epoch": 22.41,
-      "grad_norm": 0.0072664907202124596,
       "learning_rate": 6.666666666666667e-07,
       "loss": 0.0002,
       "step": 4975
     },
     {
       "epoch": 22.52,
-      "grad_norm": 0.00765978591516614,
       "learning_rate": 1.1111111111111112e-07,
       "loss": 0.0003,
       "step": 5000
     },
     {
       "epoch": 22.52,
-      "eval_loss": 0.3728739619255066,
-      "eval_runtime": 1237.2295,
-      "eval_samples_per_second": 2.524,
-      "eval_steps_per_second": 0.079,
-      "eval_wer": 0.2660897782585181,
       "step": 5000
     },
     {
       "epoch": 22.52,
       "step": 5000,
       "total_flos": 2.532745423355904e+20,
-      "train_loss": 0.17524469082718716,
-      "train_runtime": 15083.6622,
-      "train_samples_per_second": 10.608,
-      "train_steps_per_second": 0.331
     }
   ],
   "logging_steps": 25,

   "log_history": [
     {
       "epoch": 0.11,
+      "grad_norm": 29.43235206604004,
       "learning_rate": 4.000000000000001e-06,
       "loss": 11.9112,
       "step": 25
     },
     {
       "epoch": 0.23,
+      "grad_norm": 12.577169418334961,
       "learning_rate": 9e-06,
+      "loss": 5.9608,
       "step": 50
     },
     {
       "epoch": 0.34,
+      "grad_norm": 6.2483415603637695,
       "learning_rate": 1.4000000000000001e-05,
       "loss": 2.7899,
       "step": 75
     },
     {
       "epoch": 0.45,
+      "grad_norm": 5.498162746429443,
       "learning_rate": 1.9e-05,
+      "loss": 1.9344,
       "step": 100
     },
     {
       "epoch": 0.56,
+      "grad_norm": 10.876449584960938,
       "learning_rate": 2.4e-05,
+      "loss": 1.1848,
       "step": 125
     },
     {
       "epoch": 0.68,
+      "grad_norm": 6.835580825805664,
       "learning_rate": 2.9e-05,
+      "loss": 0.7884,
       "step": 150
     },
     {
       "epoch": 0.79,
+      "grad_norm": 8.131637573242188,
       "learning_rate": 3.4000000000000007e-05,
+      "loss": 0.6148,
       "step": 175
     },
     {
       "epoch": 0.9,
+      "grad_norm": 3.9987149238586426,
       "learning_rate": 3.9000000000000006e-05,
+      "loss": 0.5234,
       "step": 200
     },
     {
       "epoch": 1.01,
+      "grad_norm": 3.6429152488708496,
       "learning_rate": 4.4000000000000006e-05,
       "loss": 0.453,
       "step": 225
     },
     {
       "epoch": 1.13,
+      "grad_norm": 4.594738483428955,
       "learning_rate": 4.9e-05,
       "loss": 0.3913,
       "step": 250
     },
     {
       "epoch": 1.24,
+      "grad_norm": 4.02340841293335,
       "learning_rate": 5.4000000000000005e-05,
       "loss": 0.3729,
       "step": 275
     },
     {
       "epoch": 1.35,
+      "grad_norm": 4.27403450012207,
       "learning_rate": 5.9e-05,
       "loss": 0.3544,
       "step": 300
     },
     {
       "epoch": 1.46,
+      "grad_norm": 3.860103130340576,
       "learning_rate": 6.400000000000001e-05,
+      "loss": 0.3225,
       "step": 325
     },
     {
       "epoch": 1.58,
+      "grad_norm": 2.948971748352051,
       "learning_rate": 6.9e-05,
+      "loss": 0.3358,
       "step": 350
     },
     {
       "epoch": 1.69,
+      "grad_norm": 2.851400852203369,
       "learning_rate": 7.4e-05,
+      "loss": 0.3143,
       "step": 375
     },
     {
       "epoch": 1.8,
+      "grad_norm": 3.7811708450317383,
       "learning_rate": 7.900000000000001e-05,
+      "loss": 0.2908,
       "step": 400
     },
     {
       "epoch": 1.91,
+      "grad_norm": 2.640065908432007,
       "learning_rate": 8.4e-05,
+      "loss": 0.3066,
       "step": 425
     },
     {
       "epoch": 2.03,
+      "grad_norm": 2.669788122177124,
       "learning_rate": 8.900000000000001e-05,
+      "loss": 0.2659,
       "step": 450
     },
     {
       "epoch": 2.14,
+      "grad_norm": 3.3060202598571777,
       "learning_rate": 9.4e-05,
+      "loss": 0.2312,
       "step": 475
     },
     {
       "epoch": 2.25,
+      "grad_norm": 2.385368824005127,
       "learning_rate": 9.900000000000001e-05,
+      "loss": 0.2275,
       "step": 500
     },
     {
       "epoch": 2.36,
+      "grad_norm": 2.557762861251831,
       "learning_rate": 9.955555555555556e-05,
+      "loss": 0.2424,
       "step": 525
     },
     {
       "epoch": 2.48,
+      "grad_norm": 2.552363872528076,
       "learning_rate": 9.900000000000001e-05,
+      "loss": 0.2194,
       "step": 550
     },
     {
       "epoch": 2.59,
+      "grad_norm": 2.041868209838867,
       "learning_rate": 9.844444444444444e-05,
+      "loss": 0.235,
       "step": 575
     },
     {
       "epoch": 2.7,
+      "grad_norm": 2.3052070140838623,
       "learning_rate": 9.78888888888889e-05,
+      "loss": 0.227,
       "step": 600
     },
     {
       "epoch": 2.82,
+      "grad_norm": 2.061685562133789,
       "learning_rate": 9.733333333333335e-05,
+      "loss": 0.2054,
       "step": 625
     },
     {
       "epoch": 2.93,
+      "grad_norm": 2.4055511951446533,
       "learning_rate": 9.677777777777778e-05,
+      "loss": 0.218,
       "step": 650
     },
     {
       "epoch": 3.04,
+      "grad_norm": 1.5952019691467285,
       "learning_rate": 9.622222222222222e-05,
+      "loss": 0.1879,
       "step": 675
     },
     {
       "epoch": 3.15,
+      "grad_norm": 1.5567409992218018,
       "learning_rate": 9.566666666666667e-05,
+      "loss": 0.1524,
       "step": 700
     },
     {
       "epoch": 3.27,
+      "grad_norm": 2.305778741836548,
       "learning_rate": 9.511111111111112e-05,
+      "loss": 0.1553,
       "step": 725
     },
     {
       "epoch": 3.38,
+      "grad_norm": 1.9750478267669678,
       "learning_rate": 9.455555555555556e-05,
+      "loss": 0.1538,
       "step": 750
     },
     {
       "epoch": 3.49,
+      "grad_norm": 2.064730405807495,
       "learning_rate": 9.4e-05,
+      "loss": 0.158,
       "step": 775
     },
     {
       "epoch": 3.6,
+      "grad_norm": 2.288181781768799,
       "learning_rate": 9.344444444444444e-05,
+      "loss": 0.1488,
       "step": 800
     },
     {
       "epoch": 3.72,
+      "grad_norm": 1.7010929584503174,
       "learning_rate": 9.28888888888889e-05,
+      "loss": 0.1491,
       "step": 825
     },
     {
       "epoch": 3.83,
+      "grad_norm": 1.653943657875061,
       "learning_rate": 9.233333333333333e-05,
+      "loss": 0.149,
       "step": 850
     },
     {
       "epoch": 3.94,
+      "grad_norm": 1.655405044555664,
       "learning_rate": 9.177777777777778e-05,
+      "loss": 0.1505,
       "step": 875
     },
     {
       "epoch": 4.05,
+      "grad_norm": 1.766348958015442,
       "learning_rate": 9.122222222222223e-05,
+      "loss": 0.122,
       "step": 900
     },
     {
       "epoch": 4.17,
+      "grad_norm": 1.1792188882827759,
       "learning_rate": 9.066666666666667e-05,
+      "loss": 0.0985,
       "step": 925
     },
     {
       "epoch": 4.28,
+      "grad_norm": 1.3778753280639648,
       "learning_rate": 9.011111111111111e-05,
+      "loss": 0.0994,
       "step": 950
     },
     {
       "epoch": 4.39,
+      "grad_norm": 1.5239909887313843,
       "learning_rate": 8.955555555555556e-05,
+      "loss": 0.1067,
       "step": 975
     },
     {
       "epoch": 4.5,
+      "grad_norm": 1.2479132413864136,
       "learning_rate": 8.900000000000001e-05,
+      "loss": 0.1035,
       "step": 1000
     },
     {
       "epoch": 4.5,
+      "eval_loss": 0.30151915550231934,
+      "eval_runtime": 1360.3971,
+      "eval_samples_per_second": 2.296,
+      "eval_steps_per_second": 0.072,
+      "eval_wer": 0.3249633006258209,
       "step": 1000
     },
     {
       "epoch": 4.62,
+      "grad_norm": 1.8175023794174194,
       "learning_rate": 8.844444444444445e-05,
+      "loss": 0.1114,
       "step": 1025
     },
     {
       "epoch": 4.73,
+      "grad_norm": 1.7170511484146118,
       "learning_rate": 8.78888888888889e-05,
+      "loss": 0.1059,
       "step": 1050
     },
     {
       "epoch": 4.84,
+      "grad_norm": 1.6984485387802124,
       "learning_rate": 8.733333333333333e-05,
+      "loss": 0.1057,
       "step": 1075
     },
     {
       "epoch": 4.95,
+      "grad_norm": 2.2961227893829346,
       "learning_rate": 8.677777777777778e-05,
+      "loss": 0.1008,
       "step": 1100
     },
     {
       "epoch": 5.07,
+      "grad_norm": 1.120532751083374,
       "learning_rate": 8.622222222222222e-05,
+      "loss": 0.0785,
       "step": 1125
     },
     {
       "epoch": 5.18,
+      "grad_norm": 2.4177873134613037,
       "learning_rate": 8.566666666666667e-05,
+      "loss": 0.0587,
       "step": 1150
     },
     {
       "epoch": 5.29,
+      "grad_norm": 1.216162085533142,
       "learning_rate": 8.511111111111112e-05,
+      "loss": 0.0645,
       "step": 1175
     },
     {
       "epoch": 5.41,
+      "grad_norm": 1.3237918615341187,
       "learning_rate": 8.455555555555556e-05,
+      "loss": 0.0667,
       "step": 1200
     },
     {
       "epoch": 5.52,
+      "grad_norm": 1.7567414045333862,
       "learning_rate": 8.4e-05,
+      "loss": 0.0681,
       "step": 1225
     },
     {
       "epoch": 5.63,
+      "grad_norm": 1.6587105989456177,
       "learning_rate": 8.344444444444445e-05,
+      "loss": 0.0687,
       "step": 1250
     },
     {
       "epoch": 5.74,
+      "grad_norm": 1.6716225147247314,
       "learning_rate": 8.28888888888889e-05,
+      "loss": 0.0697,
       "step": 1275
     },
     {
       "epoch": 5.86,
+      "grad_norm": 1.4438738822937012,
       "learning_rate": 8.233333333333333e-05,
+      "loss": 0.0714,
       "step": 1300
     },
     {
       "epoch": 5.97,
+      "grad_norm": 1.4982130527496338,
       "learning_rate": 8.177777777777778e-05,
+      "loss": 0.0713,
       "step": 1325
     },
     {
       "epoch": 6.08,
+      "grad_norm": 1.739702820777893,
       "learning_rate": 8.122222222222222e-05,
+      "loss": 0.0457,
       "step": 1350
     },
     {
       "epoch": 6.19,
+      "grad_norm": 0.8361156582832336,
       "learning_rate": 8.066666666666667e-05,
+      "loss": 0.0379,
       "step": 1375
     },
     {
       "epoch": 6.31,
+      "grad_norm": 0.9192413091659546,
       "learning_rate": 8.011111111111111e-05,
+      "loss": 0.0437,
       "step": 1400
     },
     {
       "epoch": 6.42,
+      "grad_norm": 0.7922126054763794,
       "learning_rate": 7.955555555555556e-05,
+      "loss": 0.0393,
       "step": 1425
     },
     {
       "epoch": 6.53,
+      "grad_norm": 0.7307619452476501,
       "learning_rate": 7.900000000000001e-05,
+      "loss": 0.0398,
       "step": 1450
     },
     {
       "epoch": 6.64,
+      "grad_norm": 0.776818037033081,
       "learning_rate": 7.844444444444446e-05,
+      "loss": 0.0417,
       "step": 1475
     },
     {
       "epoch": 6.76,
+      "grad_norm": 1.0701009035110474,
       "learning_rate": 7.788888888888888e-05,
+      "loss": 0.044,
       "step": 1500
     },
     {
       "epoch": 6.87,
+      "grad_norm": 0.9106244444847107,
       "learning_rate": 7.733333333333333e-05,
+      "loss": 0.0449,
       "step": 1525
     },
     {
       "epoch": 6.98,
+      "grad_norm": 0.8038358688354492,
       "learning_rate": 7.677777777777778e-05,
+      "loss": 0.0443,
       "step": 1550
     },
     {
       "epoch": 7.09,
+      "grad_norm": 0.8888857364654541,
       "learning_rate": 7.622222222222223e-05,
+      "loss": 0.0295,
       "step": 1575
     },
     {
       "epoch": 7.21,
+      "grad_norm": 0.7159153819084167,
       "learning_rate": 7.566666666666667e-05,
+      "loss": 0.0235,
       "step": 1600
     },
     {
       "epoch": 7.32,
+      "grad_norm": 0.9652548432350159,
       "learning_rate": 7.511111111111111e-05,
+      "loss": 0.0276,
       "step": 1625
     },
     {
       "epoch": 7.43,
+      "grad_norm": 0.9230145215988159,
       "learning_rate": 7.455555555555556e-05,
+      "loss": 0.0253,
       "step": 1650
     },
     {
       "epoch": 7.55,
+      "grad_norm": 1.6814730167388916,
       "learning_rate": 7.4e-05,
+      "loss": 0.029,
       "step": 1675
     },
     {
       "epoch": 7.66,
+      "grad_norm": 1.036941409111023,
       "learning_rate": 7.344444444444445e-05,
+      "loss": 0.0262,
       "step": 1700
     },
     {
       "epoch": 7.77,
+      "grad_norm": 0.6998699307441711,
       "learning_rate": 7.28888888888889e-05,
+      "loss": 0.027,
       "step": 1725
     },
     {
       "epoch": 7.88,
+      "grad_norm": 0.7357670068740845,
       "learning_rate": 7.233333333333335e-05,
+      "loss": 0.0254,
       "step": 1750
     },
     {
       "epoch": 8.0,
+      "grad_norm": 2.0645382404327393,
       "learning_rate": 7.177777777777777e-05,
+      "loss": 0.0299,
       "step": 1775
     },
     {
       "epoch": 8.11,
+      "grad_norm": 0.7657965421676636,
       "learning_rate": 7.122222222222222e-05,
+      "loss": 0.0169,
       "step": 1800
     },
     {
       "epoch": 8.22,
+      "grad_norm": 1.1174191236495972,
       "learning_rate": 7.066666666666667e-05,
+      "loss": 0.0142,
       "step": 1825
     },
     {
       "epoch": 8.33,
+      "grad_norm": 1.5552918910980225,
       "learning_rate": 7.011111111111112e-05,
+      "loss": 0.0169,
       "step": 1850
     },
     {
       "epoch": 8.45,
+      "grad_norm": 0.6741360425949097,
       "learning_rate": 6.955555555555556e-05,
+      "loss": 0.0171,
       "step": 1875
     },
     {
       "epoch": 8.56,
+      "grad_norm": 0.6866123080253601,
       "learning_rate": 6.9e-05,
+      "loss": 0.017,
       "step": 1900
     },
     {
       "epoch": 8.67,
+      "grad_norm": 0.5000187754631042,
       "learning_rate": 6.844444444444445e-05,
+      "loss": 0.0158,
       "step": 1925
     },
     {
       "epoch": 8.78,
+      "grad_norm": 0.7297791838645935,
       "learning_rate": 6.788888888888888e-05,
+      "loss": 0.0175,
       "step": 1950
     },
     {
       "epoch": 8.9,
+      "grad_norm": 0.7372362017631531,
       "learning_rate": 6.733333333333333e-05,
+      "loss": 0.0156,
       "step": 1975
     },
     {
       "epoch": 9.01,
+      "grad_norm": 0.7431686520576477,
       "learning_rate": 6.677777777777779e-05,
+      "loss": 0.0165,
       "step": 2000
     },
     {
       "epoch": 9.01,
+      "eval_loss": 0.3495735228061676,
+      "eval_runtime": 1282.4445,
+      "eval_samples_per_second": 2.435,
+      "eval_steps_per_second": 0.076,
+      "eval_wer": 0.30066445182724255,
       "step": 2000
     },
     {
       "epoch": 9.12,
+      "grad_norm": 0.7414669990539551,
       "learning_rate": 6.622222222222224e-05,
       "loss": 0.0124,
       "step": 2025
     },
     {
       "epoch": 9.23,
+      "grad_norm": 0.7123928666114807,
       "learning_rate": 6.566666666666666e-05,
+      "loss": 0.0111,
       "step": 2050
     },
     {
       "epoch": 9.35,
+      "grad_norm": 0.5316759943962097,
       "learning_rate": 6.511111111111111e-05,
+      "loss": 0.0111,
       "step": 2075
     },
     {
       "epoch": 9.46,
+      "grad_norm": 0.2563645839691162,
       "learning_rate": 6.455555555555556e-05,
+      "loss": 0.012,
       "step": 2100
     },
     {
       "epoch": 9.57,
+      "grad_norm": 1.2440812587738037,
       "learning_rate": 6.400000000000001e-05,
+      "loss": 0.0106,
       "step": 2125
     },
     {
       "epoch": 9.68,
+      "grad_norm": 1.0652848482131958,
       "learning_rate": 6.344444444444445e-05,
+      "loss": 0.0082,
       "step": 2150
     },
     {
       "epoch": 9.8,
+      "grad_norm": 1.4494587182998657,
       "learning_rate": 6.28888888888889e-05,
+      "loss": 0.0089,
       "step": 2175
     },
     {
       "epoch": 9.91,
+      "grad_norm": 1.4650260210037231,
       "learning_rate": 6.233333333333334e-05,
+      "loss": 0.0087,
       "step": 2200
     },
     {
       "epoch": 10.02,
+      "grad_norm": 0.8734236359596252,
       "learning_rate": 6.177777777777779e-05,
+      "loss": 0.0081,
       "step": 2225
     },
     {
       "epoch": 10.14,
+      "grad_norm": 0.3450920283794403,
       "learning_rate": 6.122222222222222e-05,
+      "loss": 0.0083,
       "step": 2250
     },
     {
       "epoch": 10.25,
+      "grad_norm": 0.21007080376148224,
       "learning_rate": 6.066666666666667e-05,
+      "loss": 0.0084,
       "step": 2275
     },
     {
       "epoch": 10.36,
+      "grad_norm": 0.646001935005188,
       "learning_rate": 6.011111111111112e-05,
+      "loss": 0.0093,
       "step": 2300
     },
     {
       "epoch": 10.47,
+      "grad_norm": 0.2686345875263214,
       "learning_rate": 5.9555555555555554e-05,
+      "loss": 0.009,
       "step": 2325
     },
     {
       "epoch": 10.59,
+      "grad_norm": 0.4910290241241455,
       "learning_rate": 5.9e-05,
+      "loss": 0.0076,
       "step": 2350
     },
     {
       "epoch": 10.7,
+      "grad_norm": 0.2715873718261719,
       "learning_rate": 5.844444444444445e-05,
+      "loss": 0.0068,
       "step": 2375
     },
     {
       "epoch": 10.81,
+      "grad_norm": 0.4012056589126587,
       "learning_rate": 5.788888888888889e-05,
+      "loss": 0.009,
       "step": 2400
     },
     {
       "epoch": 10.92,
+      "grad_norm": 0.7254907488822937,
       "learning_rate": 5.7333333333333336e-05,
+      "loss": 0.0083,
       "step": 2425
     },
     {
       "epoch": 11.04,
+      "grad_norm": 0.8048310279846191,
       "learning_rate": 5.6777777777777786e-05,
+      "loss": 0.007,
       "step": 2450
     },
     {
       "epoch": 11.15,
+      "grad_norm": 0.4055149555206299,
       "learning_rate": 5.622222222222222e-05,
+      "loss": 0.0063,
       "step": 2475
     },
     {
       "epoch": 11.26,
+      "grad_norm": 0.4453238248825073,
       "learning_rate": 5.566666666666667e-05,
+      "loss": 0.0061,
       "step": 2500
     },
     {
       "epoch": 11.37,
+      "grad_norm": 0.7719666361808777,
       "learning_rate": 5.511111111111111e-05,
+      "loss": 0.006,
       "step": 2525
     },
     {
       "epoch": 11.49,
+      "grad_norm": 0.6845406889915466,
       "learning_rate": 5.455555555555556e-05,
+      "loss": 0.006,
       "step": 2550
     },
     {
       "epoch": 11.6,
+      "grad_norm": 1.624002456665039,
       "learning_rate": 5.4000000000000005e-05,
+      "loss": 0.006,
       "step": 2575
     },
     {
       "epoch": 11.71,
+      "grad_norm": 0.28533199429512024,
       "learning_rate": 5.3444444444444455e-05,
+      "loss": 0.006,
       "step": 2600
     },
     {
       "epoch": 11.82,
+      "grad_norm": 0.6341890096664429,
       "learning_rate": 5.2888888888888885e-05,
+      "loss": 0.0058,
       "step": 2625
     },
     {
       "epoch": 11.94,
+      "grad_norm": 0.8105676770210266,
       "learning_rate": 5.2333333333333336e-05,
+      "loss": 0.0048,
       "step": 2650
     },
     {
       "epoch": 12.05,
+      "grad_norm": 0.4783516824245453,
       "learning_rate": 5.177777777777778e-05,
+      "loss": 0.006,
       "step": 2675
     },
     {
       "epoch": 12.16,
+      "grad_norm": 0.8321937322616577,
       "learning_rate": 5.122222222222223e-05,
+      "loss": 0.0042,
       "step": 2700
     },
     {
       "epoch": 12.27,
+      "grad_norm": 0.1419239044189453,
       "learning_rate": 5.0666666666666674e-05,
+      "loss": 0.0045,
       "step": 2725
     },
     {
       "epoch": 12.39,
+      "grad_norm": 0.19317950308322906,
       "learning_rate": 5.011111111111111e-05,
+      "loss": 0.0036,
       "step": 2750
     },
     {
       "epoch": 12.5,
+      "grad_norm": 0.13593174517154694,
       "learning_rate": 4.955555555555556e-05,
+      "loss": 0.004,
       "step": 2775
     },
     {
       "epoch": 12.61,
+      "grad_norm": 0.3188939094543457,
       "learning_rate": 4.9e-05,
+      "loss": 0.0034,
       "step": 2800
     },
     {
       "epoch": 12.73,
+      "grad_norm": 0.31336820125579834,
       "learning_rate": 4.844444444444445e-05,
+      "loss": 0.0035,
       "step": 2825
     },
     {
       "epoch": 12.84,
+      "grad_norm": 0.25262606143951416,
       "learning_rate": 4.7888888888888886e-05,
+      "loss": 0.0034,
       "step": 2850
     },
     {
       "epoch": 12.95,
+      "grad_norm": 0.4232746362686157,
       "learning_rate": 4.7333333333333336e-05,
+      "loss": 0.0036,
       "step": 2875
     },
     {
       "epoch": 13.06,
+      "grad_norm": 0.37615010142326355,
       "learning_rate": 4.677777777777778e-05,
+      "loss": 0.0034,
       "step": 2900
     },
     {
       "epoch": 13.18,
+      "grad_norm": 0.13130201399326324,
       "learning_rate": 4.6222222222222224e-05,
+      "loss": 0.0024,
       "step": 2925
     },
     {
       "epoch": 13.29,
+      "grad_norm": 0.31216856837272644,
       "learning_rate": 4.566666666666667e-05,
+      "loss": 0.0034,
       "step": 2950
     },
     {
       "epoch": 13.4,
+      "grad_norm": 0.06603355705738068,
       "learning_rate": 4.511111111111112e-05,
+      "loss": 0.0019,
       "step": 2975
     },
     {
       "epoch": 13.51,
+      "grad_norm": 0.2945907711982727,
       "learning_rate": 4.4555555555555555e-05,
+      "loss": 0.0022,
       "step": 3000
     },
     {
       "epoch": 13.51,
+      "eval_loss": 0.36493760347366333,
+      "eval_runtime": 1357.8705,
+      "eval_samples_per_second": 2.3,
+      "eval_steps_per_second": 0.072,
+      "eval_wer": 0.2786061963995982,
       "step": 3000
     },
     {
       "epoch": 13.63,
+      "grad_norm": 0.2547701597213745,
       "learning_rate": 4.4000000000000006e-05,
+      "loss": 0.002,
       "step": 3025
     },
     {
       "epoch": 13.74,
+      "grad_norm": 0.7029439806938171,
       "learning_rate": 4.344444444444445e-05,
+      "loss": 0.002,
       "step": 3050
     },
     {
       "epoch": 13.85,
+      "grad_norm": 0.821772038936615,
       "learning_rate": 4.2888888888888886e-05,
+      "loss": 0.0025,
       "step": 3075
     },
     {
       "epoch": 13.96,
+      "grad_norm": 0.17252065241336823,
       "learning_rate": 4.233333333333334e-05,
+      "loss": 0.0027,
       "step": 3100
     },
     {
       "epoch": 14.08,
+      "grad_norm": 0.18165266513824463,
       "learning_rate": 4.177777777777778e-05,
+      "loss": 0.0022,
       "step": 3125
     },
     {
       "epoch": 14.19,
+      "grad_norm": 0.11963178962469101,
       "learning_rate": 4.1222222222222224e-05,
+      "loss": 0.0023,
       "step": 3150
     },
     {
       "epoch": 14.3,
+      "grad_norm": 1.3975796699523926,
       "learning_rate": 4.066666666666667e-05,
+      "loss": 0.0024,
       "step": 3175
     },
     {
       "epoch": 14.41,
+      "grad_norm": 0.3654703199863434,
       "learning_rate": 4.011111111111111e-05,
+      "loss": 0.0026,
       "step": 3200
     },
     {
       "epoch": 14.53,
+      "grad_norm": 0.22537653148174286,
       "learning_rate": 3.9555555555555556e-05,
+      "loss": 0.003,
       "step": 3225
     },
     {
       "epoch": 14.64,
+      "grad_norm": 0.531537652015686,
       "learning_rate": 3.9000000000000006e-05,
+      "loss": 0.0025,
       "step": 3250
     },
     {
       "epoch": 14.75,
+      "grad_norm": 0.09146568179130554,
       "learning_rate": 3.844444444444444e-05,
+      "loss": 0.0023,
       "step": 3275
     },
     {
       "epoch": 14.86,
+      "grad_norm": 0.3482789993286133,
       "learning_rate": 3.7888888888888894e-05,
+      "loss": 0.0028,
       "step": 3300
     },
     {
       "epoch": 14.98,
+      "grad_norm": 0.15211211144924164,
       "learning_rate": 3.733333333333334e-05,
+      "loss": 0.0032,
       "step": 3325
     },
     {
       "epoch": 15.09,
+      "grad_norm": 0.02796722762286663,
       "learning_rate": 3.677777777777778e-05,
+      "loss": 0.002,
       "step": 3350
     },
     {
       "epoch": 15.2,
+      "grad_norm": 0.022246429696679115,
       "learning_rate": 3.6222222222222225e-05,
+      "loss": 0.0019,
       "step": 3375
     },
     {
       "epoch": 15.32,
+      "grad_norm": 0.2861407995223999,
       "learning_rate": 3.566666666666667e-05,
+      "loss": 0.0029,
       "step": 3400
     },
     {
       "epoch": 15.43,
+      "grad_norm": 0.24615710973739624,
       "learning_rate": 3.511111111111111e-05,
+      "loss": 0.003,
       "step": 3425
     },
     {
       "epoch": 15.54,
+      "grad_norm": 0.20990662276744843,
       "learning_rate": 3.4555555555555556e-05,
+      "loss": 0.003,
       "step": 3450
     },
     {
       "epoch": 15.65,
+      "grad_norm": 0.21401448547840118,
       "learning_rate": 3.4000000000000007e-05,
+      "loss": 0.0017,
       "step": 3475
     },
     {
       "epoch": 15.77,
+      "grad_norm": 0.04294591024518013,
       "learning_rate": 3.3444444444444443e-05,
+      "loss": 0.0019,
       "step": 3500
     },
     {
       "epoch": 15.88,
+      "grad_norm": 0.7296904921531677,
       "learning_rate": 3.2888888888888894e-05,
+      "loss": 0.002,
       "step": 3525
     },
     {
       "epoch": 15.99,
+      "grad_norm": 0.23458455502986908,
       "learning_rate": 3.233333333333333e-05,
+      "loss": 0.0016,
       "step": 3550
     },
     {
       "epoch": 16.1,
+      "grad_norm": 0.02373860962688923,
       "learning_rate": 3.177777777777778e-05,
+      "loss": 0.0007,
       "step": 3575
     },
     {
       "epoch": 16.22,
+      "grad_norm": 0.028949666768312454,
       "learning_rate": 3.1222222222222225e-05,
+      "loss": 0.0017,
       "step": 3600
     },
     {
       "epoch": 16.33,
+      "grad_norm": 0.01981484517455101,
       "learning_rate": 3.066666666666667e-05,
+      "loss": 0.0013,
       "step": 3625
     },
     {
       "epoch": 16.44,
+      "grad_norm": 0.017177563160657883,
       "learning_rate": 3.0111111111111113e-05,
+      "loss": 0.0014,
       "step": 3650
     },
     {
       "epoch": 16.55,
+      "grad_norm": 0.021722471341490746,
       "learning_rate": 2.955555555555556e-05,
+      "loss": 0.0008,
       "step": 3675
     },
     {
       "epoch": 16.67,
+      "grad_norm": 0.10685452073812485,
       "learning_rate": 2.9e-05,
+      "loss": 0.0013,
       "step": 3700
     },
     {
       "epoch": 16.78,
+      "grad_norm": 0.01891660876572132,
       "learning_rate": 2.8444444444444447e-05,
       "loss": 0.0007,
       "step": 3725
     },
     {
       "epoch": 16.89,
+      "grad_norm": 0.021835455670952797,
       "learning_rate": 2.788888888888889e-05,
+      "loss": 0.0011,
       "step": 3750
     },
     {
       "epoch": 17.0,
+      "grad_norm": 0.017706584185361862,
       "learning_rate": 2.733333333333333e-05,
+      "loss": 0.0006,
       "step": 3775
     },
     {
       "epoch": 17.12,
+      "grad_norm": 0.2573525011539459,
       "learning_rate": 2.677777777777778e-05,
+      "loss": 0.0013,
       "step": 3800
     },
     {
       "epoch": 17.23,
+      "grad_norm": 0.015161894261837006,
       "learning_rate": 2.6222222222222226e-05,
+      "loss": 0.0004,
       "step": 3825
     },
     {
       "epoch": 17.34,
+      "grad_norm": 0.017167283222079277,
       "learning_rate": 2.5666666666666666e-05,
+      "loss": 0.0005,
       "step": 3850
     },
     {
       "epoch": 17.45,
+      "grad_norm": 0.019201019778847694,
       "learning_rate": 2.5111111111111113e-05,
+      "loss": 0.0005,
       "step": 3875
     },
     {
       "epoch": 17.57,
+      "grad_norm": 0.024687746539711952,
       "learning_rate": 2.4555555555555557e-05,
+      "loss": 0.0006,
       "step": 3900
     },
     {
       "epoch": 17.68,
+      "grad_norm": 0.016668912023305893,
       "learning_rate": 2.4e-05,
+      "loss": 0.0006,
       "step": 3925
     },
     {
       "epoch": 17.79,
+      "grad_norm": 0.013516202569007874,
       "learning_rate": 2.3444444444444448e-05,
+      "loss": 0.0006,
       "step": 3950
     },
     {
       "epoch": 17.91,
+      "grad_norm": 0.012521643191576004,
       "learning_rate": 2.288888888888889e-05,
+      "loss": 0.0006,
       "step": 3975
     },
     {
       "epoch": 18.02,
+      "grad_norm": 0.013049867004156113,
       "learning_rate": 2.2333333333333335e-05,
+      "loss": 0.0011,
       "step": 4000
     },
     {
       "epoch": 18.02,
+      "eval_loss": 0.3699657618999481,
+      "eval_runtime": 1350.6575,
+      "eval_samples_per_second": 2.312,
+      "eval_steps_per_second": 0.073,
+      "eval_wer": 0.2680985861083211,
       "step": 4000
     },
     {
       "epoch": 18.13,
+      "grad_norm": 0.0116911381483078,
       "learning_rate": 2.177777777777778e-05,
+      "loss": 0.0005,
       "step": 4025
     },
     {
       "epoch": 18.24,
+      "grad_norm": 0.011522598564624786,
       "learning_rate": 2.1222222222222223e-05,
+      "loss": 0.0007,
       "step": 4050
     },
     {
       "epoch": 18.36,
+      "grad_norm": 0.013516987673938274,
       "learning_rate": 2.0666666666666666e-05,
+      "loss": 0.0004,
       "step": 4075
     },
     {
       "epoch": 18.47,
+      "grad_norm": 0.021075060591101646,
       "learning_rate": 2.011111111111111e-05,
+      "loss": 0.0006,
       "step": 4100
     },
     {
       "epoch": 18.58,
+      "grad_norm": 0.012502779252827168,
       "learning_rate": 1.9555555555555557e-05,
+      "loss": 0.0004,
       "step": 4125
     },
     {
       "epoch": 18.69,
+      "grad_norm": 0.011508314870297909,
       "learning_rate": 1.9e-05,
       "loss": 0.0003,
       "step": 4150
     },
     {
       "epoch": 18.81,
+      "grad_norm": 0.013346145860850811,
       "learning_rate": 1.8444444444444445e-05,
+      "loss": 0.0004,
       "step": 4175
     },
     {
       "epoch": 18.92,
+      "grad_norm": 0.011840825900435448,
       "learning_rate": 1.788888888888889e-05,
+      "loss": 0.0004,
       "step": 4200
     },
     {
       "epoch": 19.03,
+      "grad_norm": 0.009515935555100441,
       "learning_rate": 1.7333333333333336e-05,
       "loss": 0.0004,
       "step": 4225
     },
     {
       "epoch": 19.14,
+      "grad_norm": 0.01005704328417778,
       "learning_rate": 1.677777777777778e-05,
       "loss": 0.0003,
       "step": 4250
     },
     {
       "epoch": 19.26,
+      "grad_norm": 0.010899914428591728,
       "learning_rate": 1.6222222222222223e-05,
       "loss": 0.0003,
       "step": 4275
     },
     {
       "epoch": 19.37,
+      "grad_norm": 0.007188358344137669,
       "learning_rate": 1.5666666666666667e-05,
       "loss": 0.0003,
       "step": 4300
     },
     {
       "epoch": 19.48,
+      "grad_norm": 0.00955110415816307,
       "learning_rate": 1.5111111111111112e-05,
+      "loss": 0.0003,
       "step": 4325
     },
     {
       "epoch": 19.59,
+      "grad_norm": 0.007988874800503254,
       "learning_rate": 1.4555555555555556e-05,
       "loss": 0.0003,
       "step": 4350
     },
     {
       "epoch": 19.71,
+      "grad_norm": 0.009029334411025047,
       "learning_rate": 1.4000000000000001e-05,
+      "loss": 0.0005,
       "step": 4375
     },
     {
       "epoch": 19.82,
+      "grad_norm": 0.01156931184232235,
       "learning_rate": 1.3444444444444445e-05,
       "loss": 0.0003,
       "step": 4400
     },
     {
       "epoch": 19.93,
+      "grad_norm": 0.01129342895001173,
       "learning_rate": 1.2888888888888889e-05,
+      "loss": 0.0003,
       "step": 4425
     },
     {
       "epoch": 20.05,
+      "grad_norm": 0.00786085519939661,
       "learning_rate": 1.2333333333333334e-05,
       "loss": 0.0004,
       "step": 4450
     },
     {
       "epoch": 20.16,
+      "grad_norm": 0.009362996555864811,
       "learning_rate": 1.1777777777777778e-05,
       "loss": 0.0003,
       "step": 4475
     },
     {
       "epoch": 20.27,
+      "grad_norm": 0.008518415503203869,
       "learning_rate": 1.1222222222222224e-05,
       "loss": 0.0003,
       "step": 4500
     },
     {
       "epoch": 20.38,
+      "grad_norm": 0.007652095053344965,
       "learning_rate": 1.0666666666666667e-05,
       "loss": 0.0003,
       "step": 4525
     },
     {
       "epoch": 20.5,
+      "grad_norm": 0.008384721353650093,
       "learning_rate": 1.0111111111111111e-05,
       "loss": 0.0003,
       "step": 4550
     },
     {
       "epoch": 20.61,
+      "grad_norm": 0.010271112434566021,
       "learning_rate": 9.555555555555556e-06,
       "loss": 0.0003,
       "step": 4575
     },
     {
       "epoch": 20.72,
+      "grad_norm": 0.0075312405824661255,
       "learning_rate": 9e-06,
+      "loss": 0.0004,
       "step": 4600
     },
     {
       "epoch": 20.83,
+      "grad_norm": 0.009318512864410877,
       "learning_rate": 8.444444444444446e-06,
       "loss": 0.0003,
       "step": 4625
     },
     {
       "epoch": 20.95,
+      "grad_norm": 0.0078095910139381886,
       "learning_rate": 7.88888888888889e-06,
+      "loss": 0.0003,
       "step": 4650
     },
     {
       "epoch": 21.06,
+      "grad_norm": 0.00864331517368555,
       "learning_rate": 7.333333333333334e-06,
       "loss": 0.0003,
       "step": 4675
     },
     {
       "epoch": 21.17,
+      "grad_norm": 0.007982113398611546,
       "learning_rate": 6.777777777777779e-06,
       "loss": 0.0003,
       "step": 4700
     },
     {
       "epoch": 21.28,
+      "grad_norm": 0.009959988296031952,
       "learning_rate": 6.222222222222222e-06,
       "loss": 0.0003,
       "step": 4725
     },
     {
       "epoch": 21.4,
+      "grad_norm": 0.006970000918954611,
       "learning_rate": 5.666666666666667e-06,
       "loss": 0.0003,
       "step": 4750
     },
     {
       "epoch": 21.51,
+      "grad_norm": 0.0091372299939394,
       "learning_rate": 5.1111111111111115e-06,
       "loss": 0.0003,
       "step": 4775
     },
     {
       "epoch": 21.62,
+      "grad_norm": 0.010007310658693314,
       "learning_rate": 4.555555555555556e-06,
       "loss": 0.0003,
       "step": 4800
     },
     {
       "epoch": 21.73,
+      "grad_norm": 0.008494430221617222,
       "learning_rate": 4.000000000000001e-06,
+      "loss": 0.0003,
       "step": 4825
     },
     {
       "epoch": 21.85,
+      "grad_norm": 0.0071678003296256065,
       "learning_rate": 3.4444444444444444e-06,
       "loss": 0.0003,
       "step": 4850
     },
     {
       "epoch": 21.96,
+      "grad_norm": 0.00614485889673233,
       "learning_rate": 2.888888888888889e-06,
       "loss": 0.0003,
       "step": 4875
     },
     {
       "epoch": 22.07,
+      "grad_norm": 0.008994187228381634,
       "learning_rate": 2.3333333333333336e-06,
       "loss": 0.0003,
       "step": 4900
     },
     {
       "epoch": 22.18,
+      "grad_norm": 0.008750267326831818,
       "learning_rate": 1.777777777777778e-06,
       "loss": 0.0003,
       "step": 4925
     },
     {
       "epoch": 22.3,
+      "grad_norm": 0.007052999921143055,
       "learning_rate": 1.2222222222222223e-06,
       "loss": 0.0002,
       "step": 4950
     },
     {
       "epoch": 22.41,
+      "grad_norm": 0.007113702595233917,
       "learning_rate": 6.666666666666667e-07,
       "loss": 0.0002,
       "step": 4975
     },
     {
       "epoch": 22.52,
+      "grad_norm": 0.008453252725303173,
       "learning_rate": 1.1111111111111112e-07,
       "loss": 0.0003,
       "step": 5000
     },
     {
       "epoch": 22.52,
+      "eval_loss": 0.37487614154815674,
+      "eval_runtime": 1348.7243,
+      "eval_samples_per_second": 2.316,
+      "eval_steps_per_second": 0.073,
+      "eval_wer": 0.26639882562002626,
       "step": 5000
     },
     {
       "epoch": 22.52,
       "step": 5000,
       "total_flos": 2.532745423355904e+20,
+      "train_loss": 0.1750582966186106,
+      "train_runtime": 15499.9794,
+      "train_samples_per_second": 10.323,
+      "train_steps_per_second": 0.323
     }
   ],
   "logging_steps": 25,

wandb/debug-internal.log CHANGED Viewed

The diff for this file is too large to render. See raw diff

wandb/run-20240327_190513-7p2x8a0l/files/output.log CHANGED Viewed

@@ -4830,3 +4830,122 @@ Non-default generation parameters: {'max_length': 448, 'begin_suppress_tokens':
 [WARNING|configuration_utils.py:447] 2024-03-27 23:25:02,985 >> Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
 [WARNING|configuration_utils.py:447] 2024-03-27 23:25:02,985 >> Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
 [WARNING|configuration_utils.py:447] 2024-03-27 23:25:02,985 >> Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.

 [WARNING|configuration_utils.py:447] 2024-03-27 23:25:02,985 >> Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
 [WARNING|configuration_utils.py:447] 2024-03-27 23:25:02,985 >> Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
 [WARNING|configuration_utils.py:447] 2024-03-27 23:25:02,985 >> Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[WARNING|configuration_utils.py:447] 2024-03-27 23:25:02,985 >> Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+***** train metrics *****
+  epoch                    =      22.52
+  train_loss               =     0.1751
+  train_runtime            = 4:18:19.97
+  train_samples            =       7099
+  train_samples_per_second =     10.323
+  train_steps_per_second   =      0.323
+03/27/2024 23:25:17 - INFO - __main__ - *** Evaluate ***
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+[INFO|trainer.py:3549] 2024-03-27 23:25:17,894 >>   Batch size = 32e non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+***** eval metrics *****
+  epoch                   =      22.52
+  eval_loss               =     0.3749
+  eval_runtime            = 0:22:25.02
+  eval_samples            =       3123
+  eval_samples_per_second =      2.322
+  eval_steps_per_second   =      0.073
+Non-default generation parameters: {'max_length': 448, 'begin_suppress_tokens': [220, 50257]}arameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+Non-default generation parameters: {'max_length': 448, 'begin_suppress_tokens': [220, 50257]}arameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.

wandb/run-20240327_190513-7p2x8a0l/files/wandb-summary.json CHANGED Viewed

@@ -1 +1 @@

- {"train/loss": 0.0003, "train/grad_norm": 0.008453252725303173, "train/learning_rate": 1.1111111111111112e-07, "train/epoch": 22.52, "train/global_step": 5000, "_timestamp": ~~1711578212~~.~~4327524~~, "_runtime": ~~15498~~.~~784944295883~~, "_step": ~~205~~, "eval/loss": 0.37487614154815674, "eval/wer": 0.26639882562002626, "eval/runtime": ~~1348~~.~~7243~~, "eval/samples_per_second": 2.~~316~~, "eval/steps_per_second": 0.073, "train_runtime": 15499.9794, "train_samples_per_second": 10.323, "train_steps_per_second": 0.323, "total_flos": 2.532745423355904e+20, "train_loss": 0.1750582966186106}

+ {"train/loss": 0.0003, "train/grad_norm": 0.008453252725303173, "train/learning_rate": 1.1111111111111112e-07, "train/epoch": 22.52, "train/global_step": 5000, "_timestamp": 1711579662.9203484, "_runtime": 16949.272540330887, "_step": 206, "eval/loss": 0.37487614154815674, "eval/wer": 0.26639882562002626, "eval/runtime": 1345.0244, "eval/samples_per_second": 2.322, "eval/steps_per_second": 0.073, "train_runtime": 15499.9794, "train_samples_per_second": 10.323, "train_steps_per_second": 0.323, "total_flos": 2.532745423355904e+20, "train_loss": 0.1750582966186106}

wandb/run-20240327_190513-7p2x8a0l/logs/debug-internal.log CHANGED Viewed

The diff for this file is too large to render. See raw diff

wandb/run-20240327_190513-7p2x8a0l/run-7p2x8a0l.wandb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ed5aa6d2654f239d8aa803a04c3f3113aab39361e15504d2efba3ba12465d24
-size 4529113

 version https://git-lfs.github.com/spec/v1
+oid sha256:e57b83715ac212c8caaeb38d450489ce7eb544fbe4d6ac222d7a1a7447549dcc
+size 4818911