empty-michael's picture
End of training
dd10feb verified
{
"best_metric": 2.132894992828369,
"best_model_checkpoint": "/tmp/wandb/run-20240211_061007-slcnkgcr/files/train_output/checkpoint-10000",
"epoch": 2.042133333333333,
"eval_steps": 500,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"MSE": 891.9713033040365,
"MSE/layer0": 891.9713033040365,
"dead_code_fraction": 0.1506,
"dead_code_fraction/layer0": 0.1506,
"epoch": 0.0,
"input_norm": 31.997233708699547,
"input_norm/layer0": 31.997233708699547,
"learning_rate": 0.0005,
"loss": 8.0845,
"max_norm": 34.580135345458984,
"max_norm/layer0": 34.580135345458984,
"mean_norm": 31.989344596862793,
"mean_norm/layer0": 31.989344596862793,
"multicode_k": 1,
"output_norm": 8.584638833999634,
"output_norm/layer0": 8.584638833999634,
"step": 1
},
{
"MSE": 883.0105907414232,
"MSE/layer0": 883.0105907414232,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.01,
"input_norm": 31.99778711876902,
"input_norm/layer0": 31.99778711876902,
"learning_rate": 0.0005,
"loss": 4.8444,
"max_norm": 34.610191345214844,
"max_norm/layer0": 34.610191345214844,
"mean_norm": 32.02294731140137,
"mean_norm/layer0": 32.02294731140137,
"multicode_k": 1,
"output_norm": 8.645599765842462,
"output_norm/layer0": 8.645599765842462,
"step": 50
},
{
"MSE": 872.9267329915364,
"MSE/layer0": 872.9267329915364,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.01,
"input_norm": 31.998572165171304,
"input_norm/layer0": 31.998572165171304,
"learning_rate": 0.0005,
"loss": 3.9294,
"max_norm": 34.62763595581055,
"max_norm/layer0": 34.62763595581055,
"mean_norm": 32.06278419494629,
"mean_norm/layer0": 32.06278419494629,
"multicode_k": 1,
"output_norm": 8.74148860613505,
"output_norm/layer0": 8.74148860613505,
"step": 100
},
{
"MSE": 866.7590488688152,
"MSE/layer0": 866.7590488688152,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.01,
"input_norm": 31.99865425427754,
"input_norm/layer0": 31.99865425427754,
"learning_rate": 0.0005,
"loss": 3.5413,
"max_norm": 34.65019607543945,
"max_norm/layer0": 34.65019607543945,
"mean_norm": 32.1027717590332,
"mean_norm/layer0": 32.1027717590332,
"multicode_k": 1,
"output_norm": 8.811674615542097,
"output_norm/layer0": 8.811674615542097,
"step": 150
},
{
"MSE": 858.8314244588221,
"MSE/layer0": 858.8314244588221,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.02,
"input_norm": 31.998634125391646,
"input_norm/layer0": 31.998634125391646,
"learning_rate": 0.0005,
"loss": 3.3381,
"max_norm": 34.73014831542969,
"max_norm/layer0": 34.73014831542969,
"mean_norm": 32.17362403869629,
"mean_norm/layer0": 32.17362403869629,
"multicode_k": 1,
"output_norm": 8.925555121103923,
"output_norm/layer0": 8.925555121103923,
"step": 200
},
{
"MSE": 849.6408699544276,
"MSE/layer0": 849.6408699544276,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.03,
"input_norm": 31.9986141427358,
"input_norm/layer0": 31.9986141427358,
"learning_rate": 0.0005,
"loss": 3.2486,
"max_norm": 34.8281364440918,
"max_norm/layer0": 34.8281364440918,
"mean_norm": 32.26718330383301,
"mean_norm/layer0": 32.26718330383301,
"multicode_k": 1,
"output_norm": 9.101092262268068,
"output_norm/layer0": 9.101092262268068,
"step": 250
},
{
"MSE": 841.0051658121741,
"MSE/layer0": 841.0051658121741,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.03,
"input_norm": 31.99862952232361,
"input_norm/layer0": 31.99862952232361,
"learning_rate": 0.0005,
"loss": 3.1503,
"max_norm": 34.946006774902344,
"max_norm/layer0": 34.946006774902344,
"mean_norm": 32.361915588378906,
"mean_norm/layer0": 32.361915588378906,
"multicode_k": 1,
"output_norm": 9.305952178637185,
"output_norm/layer0": 9.305952178637185,
"step": 300
},
{
"MSE": 833.1103855387371,
"MSE/layer0": 833.1103855387371,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.04,
"input_norm": 31.998617506027223,
"input_norm/layer0": 31.998617506027223,
"learning_rate": 0.0005,
"loss": 3.0966,
"max_norm": 35.09696578979492,
"max_norm/layer0": 35.09696578979492,
"mean_norm": 32.463951110839844,
"mean_norm/layer0": 32.463951110839844,
"multicode_k": 1,
"output_norm": 9.513547644615176,
"output_norm/layer0": 9.513547644615176,
"step": 350
},
{
"MSE": 824.8635622151694,
"MSE/layer0": 824.8635622151694,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.04,
"input_norm": 31.998617092768363,
"input_norm/layer0": 31.998617092768363,
"learning_rate": 0.0005,
"loss": 3.0998,
"max_norm": 35.28767013549805,
"max_norm/layer0": 35.28767013549805,
"mean_norm": 32.571420669555664,
"mean_norm/layer0": 32.571420669555664,
"multicode_k": 1,
"output_norm": 9.74717748324076,
"output_norm/layer0": 9.74717748324076,
"step": 400
},
{
"MSE": 817.218793334961,
"MSE/layer0": 817.218793334961,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.04,
"input_norm": 31.99862334251403,
"input_norm/layer0": 31.99862334251403,
"learning_rate": 0.0005,
"loss": 3.0603,
"max_norm": 35.4771842956543,
"max_norm/layer0": 35.4771842956543,
"mean_norm": 32.68177795410156,
"mean_norm/layer0": 32.68177795410156,
"multicode_k": 1,
"output_norm": 9.985308513641357,
"output_norm/layer0": 9.985308513641357,
"step": 450
},
{
"MSE": 809.1558084106446,
"MSE/layer0": 809.1558084106446,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.05,
"input_norm": 31.998615137736,
"input_norm/layer0": 31.998615137736,
"learning_rate": 0.0005,
"loss": 3.0494,
"max_norm": 35.6486701965332,
"max_norm/layer0": 35.6486701965332,
"mean_norm": 32.793779373168945,
"mean_norm/layer0": 32.793779373168945,
"multicode_k": 1,
"output_norm": 10.232081251144415,
"output_norm/layer0": 10.232081251144415,
"step": 500
},
{
"epoch": 0.05,
"eval_MSE/layer0": 805.1675846628777,
"eval_accuracy": 0.41770872781318447,
"eval_dead_code_fraction/layer0": 0.0,
"eval_input_norm/layer0": 31.998606410347342,
"eval_loss": 2.992654323577881,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 10.360000263063938,
"eval_runtime": 159.8847,
"eval_samples_per_second": 28.915,
"eval_steps_per_second": 1.808,
"step": 500
},
{
"MSE": 801.7215725708003,
"MSE/layer0": 801.7215725708003,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.06,
"input_norm": 31.998598546981817,
"input_norm/layer0": 31.998598546981817,
"learning_rate": 0.0005,
"loss": 2.9547,
"max_norm": 35.86976623535156,
"max_norm/layer0": 35.86976623535156,
"mean_norm": 32.91193962097168,
"mean_norm/layer0": 32.91193962097168,
"multicode_k": 1,
"output_norm": 10.47719025929769,
"output_norm/layer0": 10.47719025929769,
"step": 550
},
{
"MSE": 794.043483174642,
"MSE/layer0": 794.043483174642,
"dead_code_fraction": 0.0,
"dead_code_fraction/layer0": 0.0,
"epoch": 0.06,
"input_norm": 31.99859639167787,
"input_norm/layer0": 31.99859639167787,
"learning_rate": 0.0005,
"loss": 2.9506,
"max_norm": 36.08134078979492,
"max_norm/layer0": 36.08134078979492,
"mean_norm": 33.03110313415527,
"mean_norm/layer0": 33.03110313415527,
"multicode_k": 1,
"output_norm": 10.729146582285566,
"output_norm/layer0": 10.729146582285566,
"step": 600
},
{
"MSE": 786.3193520100913,
"MSE/layer0": 786.3193520100913,
"dead_code_fraction": 5e-05,
"dead_code_fraction/layer0": 5e-05,
"epoch": 0.07,
"input_norm": 31.99857716878254,
"input_norm/layer0": 31.99857716878254,
"learning_rate": 0.0005,
"loss": 2.8944,
"max_norm": 36.33954620361328,
"max_norm/layer0": 36.33954620361328,
"mean_norm": 33.15106773376465,
"mean_norm/layer0": 33.15106773376465,
"multicode_k": 1,
"output_norm": 10.987898168563845,
"output_norm/layer0": 10.987898168563845,
"step": 650
},
{
"MSE": 780.0598099772137,
"MSE/layer0": 780.0598099772137,
"dead_code_fraction": 0.0001,
"dead_code_fraction/layer0": 0.0001,
"epoch": 0.07,
"input_norm": 31.998565645217887,
"input_norm/layer0": 31.998565645217887,
"learning_rate": 0.0005,
"loss": 2.8643,
"max_norm": 36.55862808227539,
"max_norm/layer0": 36.55862808227539,
"mean_norm": 33.269744873046875,
"mean_norm/layer0": 33.269744873046875,
"multicode_k": 1,
"output_norm": 11.218051005999246,
"output_norm/layer0": 11.218051005999246,
"step": 700
},
{
"MSE": 772.4797055053714,
"MSE/layer0": 772.4797055053714,
"dead_code_fraction": 0.00045,
"dead_code_fraction/layer0": 0.00045,
"epoch": 0.07,
"input_norm": 31.998559678395594,
"input_norm/layer0": 31.998559678395594,
"learning_rate": 0.0005,
"loss": 2.8618,
"max_norm": 36.793521881103516,
"max_norm/layer0": 36.793521881103516,
"mean_norm": 33.39421844482422,
"mean_norm/layer0": 33.39421844482422,
"multicode_k": 1,
"output_norm": 11.470201053619387,
"output_norm/layer0": 11.470201053619387,
"step": 750
},
{
"MSE": 766.037492879232,
"MSE/layer0": 766.037492879232,
"dead_code_fraction": 0.00055,
"dead_code_fraction/layer0": 0.00055,
"epoch": 0.08,
"input_norm": 31.99854364713033,
"input_norm/layer0": 31.99854364713033,
"learning_rate": 0.0005,
"loss": 2.8403,
"max_norm": 37.0079231262207,
"max_norm/layer0": 37.0079231262207,
"mean_norm": 33.52132034301758,
"mean_norm/layer0": 33.52132034301758,
"multicode_k": 1,
"output_norm": 11.711471532185875,
"output_norm/layer0": 11.711471532185875,
"step": 800
},
{
"MSE": 759.9610600789387,
"MSE/layer0": 759.9610600789387,
"dead_code_fraction": 0.00135,
"dead_code_fraction/layer0": 0.00135,
"epoch": 0.09,
"input_norm": 31.998529828389472,
"input_norm/layer0": 31.998529828389472,
"learning_rate": 0.0005,
"loss": 2.7453,
"max_norm": 37.20747375488281,
"max_norm/layer0": 37.20747375488281,
"mean_norm": 33.64577674865723,
"mean_norm/layer0": 33.64577674865723,
"multicode_k": 1,
"output_norm": 11.93199801921844,
"output_norm/layer0": 11.93199801921844,
"step": 850
},
{
"MSE": 753.5576912434896,
"MSE/layer0": 753.5576912434896,
"dead_code_fraction": 0.00205,
"dead_code_fraction/layer0": 0.00205,
"epoch": 0.09,
"input_norm": 31.99852911949157,
"input_norm/layer0": 31.99852911949157,
"learning_rate": 0.0005,
"loss": 2.7975,
"max_norm": 37.432743072509766,
"max_norm/layer0": 37.432743072509766,
"mean_norm": 33.778066635131836,
"mean_norm/layer0": 33.778066635131836,
"multicode_k": 1,
"output_norm": 12.165767738024394,
"output_norm/layer0": 12.165767738024394,
"step": 900
},
{
"MSE": 747.6473927815753,
"MSE/layer0": 747.6473927815753,
"dead_code_fraction": 0.00335,
"dead_code_fraction/layer0": 0.00335,
"epoch": 0.1,
"input_norm": 31.998517106374106,
"input_norm/layer0": 31.998517106374106,
"learning_rate": 0.0005,
"loss": 2.7378,
"max_norm": 37.62055969238281,
"max_norm/layer0": 37.62055969238281,
"mean_norm": 33.90963554382324,
"mean_norm/layer0": 33.90963554382324,
"multicode_k": 1,
"output_norm": 12.390189347267153,
"output_norm/layer0": 12.390189347267153,
"step": 950
},
{
"MSE": 742.6674826049805,
"MSE/layer0": 742.6674826049805,
"dead_code_fraction": 0.0048,
"dead_code_fraction/layer0": 0.0048,
"epoch": 0.1,
"input_norm": 31.998499689102182,
"input_norm/layer0": 31.998499689102182,
"learning_rate": 0.0005,
"loss": 2.6986,
"max_norm": 37.880615234375,
"max_norm/layer0": 37.880615234375,
"mean_norm": 34.04428672790527,
"mean_norm/layer0": 34.04428672790527,
"multicode_k": 1,
"output_norm": 12.59642965157827,
"output_norm/layer0": 12.59642965157827,
"step": 1000
},
{
"epoch": 0.1,
"eval_MSE/layer0": 739.3243520424373,
"eval_accuracy": 0.44721058737930897,
"eval_dead_code_fraction/layer0": 0.00845,
"eval_input_norm/layer0": 31.998487053973697,
"eval_loss": 2.707960367202759,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 12.71647696584792,
"eval_runtime": 157.5908,
"eval_samples_per_second": 29.335,
"eval_steps_per_second": 1.834,
"step": 1000
},
{
"MSE": 736.2877898152667,
"MSE/layer0": 736.2877898152667,
"dead_code_fraction": 0.00735,
"dead_code_fraction/layer0": 0.00735,
"epoch": 0.1,
"input_norm": 31.998484554290766,
"input_norm/layer0": 31.998484554290766,
"learning_rate": 0.0005,
"loss": 2.7222,
"max_norm": 38.21133804321289,
"max_norm/layer0": 38.21133804321289,
"mean_norm": 34.17984199523926,
"mean_norm/layer0": 34.17984199523926,
"multicode_k": 1,
"output_norm": 12.82279133001963,
"output_norm/layer0": 12.82279133001963,
"step": 1050
},
{
"MSE": 731.6754523722336,
"MSE/layer0": 731.6754523722336,
"dead_code_fraction": 0.01015,
"dead_code_fraction/layer0": 0.01015,
"epoch": 0.11,
"input_norm": 31.998473711013787,
"input_norm/layer0": 31.998473711013787,
"learning_rate": 0.0005,
"loss": 2.652,
"max_norm": 38.533973693847656,
"max_norm/layer0": 38.533973693847656,
"mean_norm": 34.31424903869629,
"mean_norm/layer0": 34.31424903869629,
"multicode_k": 1,
"output_norm": 13.017293116251633,
"output_norm/layer0": 13.017293116251633,
"step": 1100
},
{
"MSE": 726.8081079101562,
"MSE/layer0": 726.8081079101562,
"dead_code_fraction": 0.013,
"dead_code_fraction/layer0": 0.013,
"epoch": 0.12,
"input_norm": 31.99846080144247,
"input_norm/layer0": 31.99846080144247,
"learning_rate": 0.0005,
"loss": 2.6519,
"max_norm": 38.87154769897461,
"max_norm/layer0": 38.87154769897461,
"mean_norm": 34.454498291015625,
"mean_norm/layer0": 34.454498291015625,
"multicode_k": 1,
"output_norm": 13.209378539721174,
"output_norm/layer0": 13.209378539721174,
"step": 1150
},
{
"MSE": 722.3268162027996,
"MSE/layer0": 722.3268162027996,
"dead_code_fraction": 0.01565,
"dead_code_fraction/layer0": 0.01565,
"epoch": 0.12,
"input_norm": 31.998446766535434,
"input_norm/layer0": 31.998446766535434,
"learning_rate": 0.0005,
"loss": 2.6464,
"max_norm": 39.23857879638672,
"max_norm/layer0": 39.23857879638672,
"mean_norm": 34.597312927246094,
"mean_norm/layer0": 34.597312927246094,
"multicode_k": 1,
"output_norm": 13.40400979042053,
"output_norm/layer0": 13.40400979042053,
"step": 1200
},
{
"MSE": 717.3231912231446,
"MSE/layer0": 717.3231912231446,
"dead_code_fraction": 0.0241,
"dead_code_fraction/layer0": 0.0241,
"epoch": 0.12,
"input_norm": 31.998441489537555,
"input_norm/layer0": 31.998441489537555,
"learning_rate": 0.0005,
"loss": 2.6563,
"max_norm": 39.60569381713867,
"max_norm/layer0": 39.60569381713867,
"mean_norm": 34.73863220214844,
"mean_norm/layer0": 34.73863220214844,
"multicode_k": 1,
"output_norm": 13.590513488451638,
"output_norm/layer0": 13.590513488451638,
"step": 1250
},
{
"MSE": 713.6523872884117,
"MSE/layer0": 713.6523872884117,
"dead_code_fraction": 0.02485,
"dead_code_fraction/layer0": 0.02485,
"epoch": 0.13,
"input_norm": 31.998419742584225,
"input_norm/layer0": 31.998419742584225,
"learning_rate": 0.0005,
"loss": 2.5806,
"max_norm": 39.939239501953125,
"max_norm/layer0": 39.939239501953125,
"mean_norm": 34.87986946105957,
"mean_norm/layer0": 34.87986946105957,
"multicode_k": 1,
"output_norm": 13.766959317525227,
"output_norm/layer0": 13.766959317525227,
"step": 1300
},
{
"MSE": 709.5852165730794,
"MSE/layer0": 709.5852165730794,
"dead_code_fraction": 0.02925,
"dead_code_fraction/layer0": 0.02925,
"epoch": 0.14,
"input_norm": 31.998412898381545,
"input_norm/layer0": 31.998412898381545,
"learning_rate": 0.0005,
"loss": 2.5789,
"max_norm": 40.28993225097656,
"max_norm/layer0": 40.28993225097656,
"mean_norm": 35.022348403930664,
"mean_norm/layer0": 35.022348403930664,
"multicode_k": 1,
"output_norm": 13.93345036347707,
"output_norm/layer0": 13.93345036347707,
"step": 1350
},
{
"MSE": 705.2143248494463,
"MSE/layer0": 705.2143248494463,
"dead_code_fraction": 0.03375,
"dead_code_fraction/layer0": 0.03375,
"epoch": 0.14,
"input_norm": 31.9984123802185,
"input_norm/layer0": 31.9984123802185,
"learning_rate": 0.0005,
"loss": 2.5943,
"max_norm": 40.63530349731445,
"max_norm/layer0": 40.63530349731445,
"mean_norm": 35.164276123046875,
"mean_norm/layer0": 35.164276123046875,
"multicode_k": 1,
"output_norm": 14.105911358197524,
"output_norm/layer0": 14.105911358197524,
"step": 1400
},
{
"MSE": 702.3593349202476,
"MSE/layer0": 702.3593349202476,
"dead_code_fraction": 0.0404,
"dead_code_fraction/layer0": 0.0404,
"epoch": 0.14,
"input_norm": 31.99839937845865,
"input_norm/layer0": 31.99839937845865,
"learning_rate": 0.0005,
"loss": 2.5407,
"max_norm": 40.98182678222656,
"max_norm/layer0": 40.98182678222656,
"mean_norm": 35.30343246459961,
"mean_norm/layer0": 35.30343246459961,
"multicode_k": 1,
"output_norm": 14.2450444761912,
"output_norm/layer0": 14.2450444761912,
"step": 1450
},
{
"MSE": 699.0307844034837,
"MSE/layer0": 699.0307844034837,
"dead_code_fraction": 0.04535,
"dead_code_fraction/layer0": 0.04535,
"epoch": 0.15,
"input_norm": 31.998390986124676,
"input_norm/layer0": 31.998390986124676,
"learning_rate": 0.0005,
"loss": 2.5145,
"max_norm": 41.328433990478516,
"max_norm/layer0": 41.328433990478516,
"mean_norm": 35.445411682128906,
"mean_norm/layer0": 35.445411682128906,
"multicode_k": 1,
"output_norm": 14.399013953208918,
"output_norm/layer0": 14.399013953208918,
"step": 1500
},
{
"epoch": 0.15,
"eval_MSE/layer0": 697.1178701616536,
"eval_accuracy": 0.4637486628652817,
"eval_dead_code_fraction/layer0": 0.05465,
"eval_input_norm/layer0": 31.99837304089923,
"eval_loss": 2.525156259536743,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 14.48893911880305,
"eval_runtime": 156.9005,
"eval_samples_per_second": 29.465,
"eval_steps_per_second": 1.842,
"step": 1500
},
{
"MSE": 696.0442759195965,
"MSE/layer0": 696.0442759195965,
"dead_code_fraction": 0.05145,
"dead_code_fraction/layer0": 0.05145,
"epoch": 0.15,
"input_norm": 31.99836520512899,
"input_norm/layer0": 31.99836520512899,
"learning_rate": 0.0005,
"loss": 2.4631,
"max_norm": 41.6606559753418,
"max_norm/layer0": 41.6606559753418,
"mean_norm": 35.58424758911133,
"mean_norm/layer0": 35.58424758911133,
"multicode_k": 1,
"output_norm": 14.54295777956645,
"output_norm/layer0": 14.54295777956645,
"step": 1550
},
{
"MSE": 691.8516132609051,
"MSE/layer0": 691.8516132609051,
"dead_code_fraction": 0.0558,
"dead_code_fraction/layer0": 0.0558,
"epoch": 0.16,
"input_norm": 31.998375968933097,
"input_norm/layer0": 31.998375968933097,
"learning_rate": 0.0005,
"loss": 2.5501,
"max_norm": 42.08574676513672,
"max_norm/layer0": 42.08574676513672,
"mean_norm": 35.72518730163574,
"mean_norm/layer0": 35.72518730163574,
"multicode_k": 1,
"output_norm": 14.692513732910157,
"output_norm/layer0": 14.692513732910157,
"step": 1600
},
{
"MSE": 688.7181396484375,
"MSE/layer0": 688.7181396484375,
"dead_code_fraction": 0.0595,
"dead_code_fraction/layer0": 0.0595,
"epoch": 0.17,
"input_norm": 31.99835859616598,
"input_norm/layer0": 31.99835859616598,
"learning_rate": 0.0005,
"loss": 2.4699,
"max_norm": 42.610233306884766,
"max_norm/layer0": 42.610233306884766,
"mean_norm": 35.86595916748047,
"mean_norm/layer0": 35.86595916748047,
"multicode_k": 1,
"output_norm": 14.833582207361854,
"output_norm/layer0": 14.833582207361854,
"step": 1650
},
{
"MSE": 685.5445822143549,
"MSE/layer0": 685.5445822143549,
"dead_code_fraction": 0.06595,
"dead_code_fraction/layer0": 0.06595,
"epoch": 0.17,
"input_norm": 31.99835782368978,
"input_norm/layer0": 31.99835782368978,
"learning_rate": 0.0005,
"loss": 2.5014,
"max_norm": 43.15216064453125,
"max_norm/layer0": 43.15216064453125,
"mean_norm": 36.00602149963379,
"mean_norm/layer0": 36.00602149963379,
"multicode_k": 1,
"output_norm": 14.96381513118744,
"output_norm/layer0": 14.96381513118744,
"step": 1700
},
{
"MSE": 683.2388099161783,
"MSE/layer0": 683.2388099161783,
"dead_code_fraction": 0.0708,
"dead_code_fraction/layer0": 0.0708,
"epoch": 0.17,
"input_norm": 31.998353064854925,
"input_norm/layer0": 31.998353064854925,
"learning_rate": 0.0005,
"loss": 2.4762,
"max_norm": 43.683807373046875,
"max_norm/layer0": 43.683807373046875,
"mean_norm": 36.14344596862793,
"mean_norm/layer0": 36.14344596862793,
"multicode_k": 1,
"output_norm": 15.08479848066965,
"output_norm/layer0": 15.08479848066965,
"step": 1750
},
{
"MSE": 680.5147140502929,
"MSE/layer0": 680.5147140502929,
"dead_code_fraction": 0.0711,
"dead_code_fraction/layer0": 0.0711,
"epoch": 0.18,
"input_norm": 31.998323942820228,
"input_norm/layer0": 31.998323942820228,
"learning_rate": 0.0005,
"loss": 2.4017,
"max_norm": 44.204158782958984,
"max_norm/layer0": 44.204158782958984,
"mean_norm": 36.281328201293945,
"mean_norm/layer0": 36.281328201293945,
"multicode_k": 1,
"output_norm": 15.21150853157043,
"output_norm/layer0": 15.21150853157043,
"step": 1800
},
{
"MSE": 677.8235699462891,
"MSE/layer0": 677.8235699462891,
"dead_code_fraction": 0.0789,
"dead_code_fraction/layer0": 0.0789,
"epoch": 0.18,
"input_norm": 31.99832211176553,
"input_norm/layer0": 31.99832211176553,
"learning_rate": 0.0005,
"loss": 2.4204,
"max_norm": 44.73421096801758,
"max_norm/layer0": 44.73421096801758,
"mean_norm": 36.41860580444336,
"mean_norm/layer0": 36.41860580444336,
"multicode_k": 1,
"output_norm": 15.32913914521535,
"output_norm/layer0": 15.32913914521535,
"step": 1850
},
{
"MSE": 674.8260657755535,
"MSE/layer0": 674.8260657755535,
"dead_code_fraction": 0.0859,
"dead_code_fraction/layer0": 0.0859,
"epoch": 0.19,
"input_norm": 31.998327109018952,
"input_norm/layer0": 31.998327109018952,
"learning_rate": 0.0005,
"loss": 2.4612,
"max_norm": 45.264217376708984,
"max_norm/layer0": 45.264217376708984,
"mean_norm": 36.55377197265625,
"mean_norm/layer0": 36.55377197265625,
"multicode_k": 1,
"output_norm": 15.449233846664427,
"output_norm/layer0": 15.449233846664427,
"step": 1900
},
{
"MSE": 672.4308366902667,
"MSE/layer0": 672.4308366902667,
"dead_code_fraction": 0.08975,
"dead_code_fraction/layer0": 0.08975,
"epoch": 0.2,
"input_norm": 31.998313461939492,
"input_norm/layer0": 31.998313461939492,
"learning_rate": 0.0005,
"loss": 2.413,
"max_norm": 45.7476692199707,
"max_norm/layer0": 45.7476692199707,
"mean_norm": 36.687320709228516,
"mean_norm/layer0": 36.687320709228516,
"multicode_k": 1,
"output_norm": 15.564360074996952,
"output_norm/layer0": 15.564360074996952,
"step": 1950
},
{
"MSE": 669.9350853474932,
"MSE/layer0": 669.9350853474932,
"dead_code_fraction": 0.09495,
"dead_code_fraction/layer0": 0.09495,
"epoch": 0.2,
"input_norm": 31.998307892481467,
"input_norm/layer0": 31.998307892481467,
"learning_rate": 0.0005,
"loss": 2.4197,
"max_norm": 46.2595100402832,
"max_norm/layer0": 46.2595100402832,
"mean_norm": 36.82127571105957,
"mean_norm/layer0": 36.82127571105957,
"multicode_k": 1,
"output_norm": 15.671763955752056,
"output_norm/layer0": 15.671763955752056,
"step": 2000
},
{
"epoch": 0.2,
"eval_MSE/layer0": 670.0254334077002,
"eval_accuracy": 0.47584128742153486,
"eval_dead_code_fraction/layer0": 0.0988,
"eval_input_norm/layer0": 31.99830309178647,
"eval_loss": 2.409283399581909,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 15.728763990528059,
"eval_runtime": 158.0617,
"eval_samples_per_second": 29.248,
"eval_steps_per_second": 1.828,
"step": 2000
},
{
"MSE": 667.9600658162435,
"MSE/layer0": 667.9600658162435,
"dead_code_fraction": 0.09825,
"dead_code_fraction/layer0": 0.09825,
"epoch": 0.2,
"input_norm": 31.99829890569051,
"input_norm/layer0": 31.99829890569051,
"learning_rate": 0.0005,
"loss": 2.3908,
"max_norm": 46.76186752319336,
"max_norm/layer0": 46.76186752319336,
"mean_norm": 36.954044342041016,
"mean_norm/layer0": 36.954044342041016,
"multicode_k": 1,
"output_norm": 15.786985732714339,
"output_norm/layer0": 15.786985732714339,
"step": 2050
},
{
"MSE": 665.8677533976238,
"MSE/layer0": 665.8677533976238,
"dead_code_fraction": 0.10105,
"dead_code_fraction/layer0": 0.10105,
"epoch": 0.21,
"input_norm": 31.998287776311233,
"input_norm/layer0": 31.998287776311233,
"learning_rate": 0.0005,
"loss": 2.3532,
"max_norm": 47.23879623413086,
"max_norm/layer0": 47.23879623413086,
"mean_norm": 37.08414268493652,
"mean_norm/layer0": 37.08414268493652,
"multicode_k": 1,
"output_norm": 15.887771523793544,
"output_norm/layer0": 15.887771523793544,
"step": 2100
},
{
"MSE": 664.0484969075521,
"MSE/layer0": 664.0484969075521,
"dead_code_fraction": 0.10515,
"dead_code_fraction/layer0": 0.10515,
"epoch": 0.21,
"input_norm": 31.998289143244435,
"input_norm/layer0": 31.998289143244435,
"learning_rate": 0.0005,
"loss": 2.3835,
"max_norm": 47.72446823120117,
"max_norm/layer0": 47.72446823120117,
"mean_norm": 37.21368408203125,
"mean_norm/layer0": 37.21368408203125,
"multicode_k": 1,
"output_norm": 15.987558364868171,
"output_norm/layer0": 15.987558364868171,
"step": 2150
},
{
"MSE": 662.043323059082,
"MSE/layer0": 662.043323059082,
"dead_code_fraction": 0.11065,
"dead_code_fraction/layer0": 0.11065,
"epoch": 0.22,
"input_norm": 31.998284489313747,
"input_norm/layer0": 31.998284489313747,
"learning_rate": 0.0005,
"loss": 2.3711,
"max_norm": 48.21998596191406,
"max_norm/layer0": 48.21998596191406,
"mean_norm": 37.34214973449707,
"mean_norm/layer0": 37.34214973449707,
"multicode_k": 1,
"output_norm": 16.084624527295432,
"output_norm/layer0": 16.084624527295432,
"step": 2200
},
{
"MSE": 660.071201883952,
"MSE/layer0": 660.071201883952,
"dead_code_fraction": 0.1138,
"dead_code_fraction/layer0": 0.1138,
"epoch": 0.23,
"input_norm": 31.998274552027382,
"input_norm/layer0": 31.998274552027382,
"learning_rate": 0.0005,
"loss": 2.3361,
"max_norm": 48.656124114990234,
"max_norm/layer0": 48.656124114990234,
"mean_norm": 37.46707344055176,
"mean_norm/layer0": 37.46707344055176,
"multicode_k": 1,
"output_norm": 16.1770029671987,
"output_norm/layer0": 16.1770029671987,
"step": 2250
},
{
"MSE": 658.2848066202794,
"MSE/layer0": 658.2848066202794,
"dead_code_fraction": 0.11715,
"dead_code_fraction/layer0": 0.11715,
"epoch": 0.23,
"input_norm": 31.998281342188513,
"input_norm/layer0": 31.998281342188513,
"learning_rate": 0.0005,
"loss": 2.3697,
"max_norm": 49.14850616455078,
"max_norm/layer0": 49.14850616455078,
"mean_norm": 37.592119216918945,
"mean_norm/layer0": 37.592119216918945,
"multicode_k": 1,
"output_norm": 16.273267321586616,
"output_norm/layer0": 16.273267321586616,
"step": 2300
},
{
"MSE": 656.6614913940434,
"MSE/layer0": 656.6614913940434,
"dead_code_fraction": 0.1208,
"dead_code_fraction/layer0": 0.1208,
"epoch": 0.23,
"input_norm": 31.99827545166017,
"input_norm/layer0": 31.99827545166017,
"learning_rate": 0.0005,
"loss": 2.3691,
"max_norm": 49.611228942871094,
"max_norm/layer0": 49.611228942871094,
"mean_norm": 37.71496772766113,
"mean_norm/layer0": 37.71496772766113,
"multicode_k": 1,
"output_norm": 16.361617434819536,
"output_norm/layer0": 16.361617434819536,
"step": 2350
},
{
"MSE": 654.7551118977863,
"MSE/layer0": 654.7551118977863,
"dead_code_fraction": 0.12205,
"dead_code_fraction/layer0": 0.12205,
"epoch": 0.24,
"input_norm": 31.998258228302007,
"input_norm/layer0": 31.998258228302007,
"learning_rate": 0.0005,
"loss": 2.3413,
"max_norm": 50.082008361816406,
"max_norm/layer0": 50.082008361816406,
"mean_norm": 37.836740493774414,
"mean_norm/layer0": 37.836740493774414,
"multicode_k": 1,
"output_norm": 16.442067163785307,
"output_norm/layer0": 16.442067163785307,
"step": 2400
},
{
"MSE": 653.2320398966472,
"MSE/layer0": 653.2320398966472,
"dead_code_fraction": 0.1261,
"dead_code_fraction/layer0": 0.1261,
"epoch": 0.24,
"input_norm": 31.99826599121093,
"input_norm/layer0": 31.99826599121093,
"learning_rate": 0.0005,
"loss": 2.3415,
"max_norm": 50.542850494384766,
"max_norm/layer0": 50.542850494384766,
"mean_norm": 37.956573486328125,
"mean_norm/layer0": 37.956573486328125,
"multicode_k": 1,
"output_norm": 16.545647277832018,
"output_norm/layer0": 16.545647277832018,
"step": 2450
},
{
"MSE": 652.0689453124999,
"MSE/layer0": 652.0689453124999,
"dead_code_fraction": 0.1305,
"dead_code_fraction/layer0": 0.1305,
"epoch": 0.25,
"input_norm": 31.998266054789227,
"input_norm/layer0": 31.998266054789227,
"learning_rate": 0.0005,
"loss": 2.3541,
"max_norm": 50.972904205322266,
"max_norm/layer0": 50.972904205322266,
"mean_norm": 38.07469177246094,
"mean_norm/layer0": 38.07469177246094,
"multicode_k": 1,
"output_norm": 16.614015088081356,
"output_norm/layer0": 16.614015088081356,
"step": 2500
},
{
"epoch": 0.25,
"eval_MSE/layer0": 651.1296869864225,
"eval_accuracy": 0.48371217143066175,
"eval_dead_code_fraction/layer0": 0.1337,
"eval_input_norm/layer0": 31.998264631048162,
"eval_loss": 2.340399742126465,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 16.66022368217996,
"eval_runtime": 157.8946,
"eval_samples_per_second": 29.279,
"eval_steps_per_second": 1.83,
"step": 2500
},
{
"MSE": 650.5154676310221,
"MSE/layer0": 650.5154676310221,
"dead_code_fraction": 0.1312,
"dead_code_fraction/layer0": 0.1312,
"epoch": 0.26,
"input_norm": 31.99826429367065,
"input_norm/layer0": 31.99826429367065,
"learning_rate": 0.0005,
"loss": 2.3374,
"max_norm": 51.42794418334961,
"max_norm/layer0": 51.42794418334961,
"mean_norm": 38.19082260131836,
"mean_norm/layer0": 38.19082260131836,
"multicode_k": 1,
"output_norm": 16.705677251815793,
"output_norm/layer0": 16.705677251815793,
"step": 2550
},
{
"MSE": 649.4798397827149,
"MSE/layer0": 649.4798397827149,
"dead_code_fraction": 0.13625,
"dead_code_fraction/layer0": 0.13625,
"epoch": 0.26,
"input_norm": 31.99826188405354,
"input_norm/layer0": 31.99826188405354,
"learning_rate": 0.0005,
"loss": 2.3364,
"max_norm": 51.84079360961914,
"max_norm/layer0": 51.84079360961914,
"mean_norm": 38.306650161743164,
"mean_norm/layer0": 38.306650161743164,
"multicode_k": 1,
"output_norm": 16.774758176803587,
"output_norm/layer0": 16.774758176803587,
"step": 2600
},
{
"MSE": 648.4373052978513,
"MSE/layer0": 648.4373052978513,
"dead_code_fraction": 0.13795,
"dead_code_fraction/layer0": 0.13795,
"epoch": 0.27,
"input_norm": 31.998252007166542,
"input_norm/layer0": 31.998252007166542,
"learning_rate": 0.0005,
"loss": 2.3162,
"max_norm": 52.24661636352539,
"max_norm/layer0": 52.24661636352539,
"mean_norm": 38.41937828063965,
"mean_norm/layer0": 38.41937828063965,
"multicode_k": 1,
"output_norm": 16.851604979832963,
"output_norm/layer0": 16.851604979832963,
"step": 2650
},
{
"MSE": 647.0678014119467,
"MSE/layer0": 647.0678014119467,
"dead_code_fraction": 0.1397,
"dead_code_fraction/layer0": 0.1397,
"epoch": 0.27,
"input_norm": 31.998265930811563,
"input_norm/layer0": 31.998265930811563,
"learning_rate": 0.0005,
"loss": 2.3497,
"max_norm": 52.66170120239258,
"max_norm/layer0": 52.66170120239258,
"mean_norm": 38.53024482727051,
"mean_norm/layer0": 38.53024482727051,
"multicode_k": 1,
"output_norm": 16.925416787465398,
"output_norm/layer0": 16.925416787465398,
"step": 2700
},
{
"MSE": 646.4085242716471,
"MSE/layer0": 646.4085242716471,
"dead_code_fraction": 0.14125,
"dead_code_fraction/layer0": 0.14125,
"epoch": 0.28,
"input_norm": 31.99825245221455,
"input_norm/layer0": 31.99825245221455,
"learning_rate": 0.0005,
"loss": 2.301,
"max_norm": 53.03037643432617,
"max_norm/layer0": 53.03037643432617,
"mean_norm": 38.63713836669922,
"mean_norm/layer0": 38.63713836669922,
"multicode_k": 1,
"output_norm": 16.985576423009235,
"output_norm/layer0": 16.985576423009235,
"step": 2750
},
{
"MSE": 644.7344170125325,
"MSE/layer0": 644.7344170125325,
"dead_code_fraction": 0.14415,
"dead_code_fraction/layer0": 0.14415,
"epoch": 0.28,
"input_norm": 31.998260081609082,
"input_norm/layer0": 31.998260081609082,
"learning_rate": 0.0005,
"loss": 2.3395,
"max_norm": 53.41487503051758,
"max_norm/layer0": 53.41487503051758,
"mean_norm": 38.74285697937012,
"mean_norm/layer0": 38.74285697937012,
"multicode_k": 1,
"output_norm": 17.068980147043867,
"output_norm/layer0": 17.068980147043867,
"step": 2800
},
{
"MSE": 644.636144104004,
"MSE/layer0": 644.636144104004,
"dead_code_fraction": 0.14565,
"dead_code_fraction/layer0": 0.14565,
"epoch": 0.28,
"input_norm": 31.998243366877247,
"input_norm/layer0": 31.998243366877247,
"learning_rate": 0.0005,
"loss": 2.2757,
"max_norm": 53.792579650878906,
"max_norm/layer0": 53.792579650878906,
"mean_norm": 38.84635543823242,
"mean_norm/layer0": 38.84635543823242,
"multicode_k": 1,
"output_norm": 17.124992834726967,
"output_norm/layer0": 17.124992834726967,
"step": 2850
},
{
"MSE": 643.8843309529623,
"MSE/layer0": 643.8843309529623,
"dead_code_fraction": 0.14495,
"dead_code_fraction/layer0": 0.14495,
"epoch": 0.29,
"input_norm": 31.998242295583093,
"input_norm/layer0": 31.998242295583093,
"learning_rate": 0.0005,
"loss": 2.3057,
"max_norm": 54.146453857421875,
"max_norm/layer0": 54.146453857421875,
"mean_norm": 38.947309494018555,
"mean_norm/layer0": 38.947309494018555,
"multicode_k": 1,
"output_norm": 17.17694611549377,
"output_norm/layer0": 17.17694611549377,
"step": 2900
},
{
"MSE": 642.6776557413741,
"MSE/layer0": 642.6776557413741,
"dead_code_fraction": 0.1504,
"dead_code_fraction/layer0": 0.1504,
"epoch": 0.29,
"input_norm": 31.998272593816125,
"input_norm/layer0": 31.998272593816125,
"learning_rate": 0.0005,
"loss": 2.3545,
"max_norm": 54.51527404785156,
"max_norm/layer0": 54.51527404785156,
"mean_norm": 39.047607421875,
"mean_norm/layer0": 39.047607421875,
"multicode_k": 1,
"output_norm": 17.240235595703133,
"output_norm/layer0": 17.240235595703133,
"step": 2950
},
{
"MSE": 643.1047460937498,
"MSE/layer0": 643.1047460937498,
"dead_code_fraction": 0.1483,
"dead_code_fraction/layer0": 0.1483,
"epoch": 0.3,
"input_norm": 31.998249003092454,
"input_norm/layer0": 31.998249003092454,
"learning_rate": 0.0005,
"loss": 2.2742,
"max_norm": 54.86568832397461,
"max_norm/layer0": 54.86568832397461,
"mean_norm": 39.14469337463379,
"mean_norm/layer0": 39.14469337463379,
"multicode_k": 1,
"output_norm": 17.28876600265503,
"output_norm/layer0": 17.28876600265503,
"step": 3000
},
{
"epoch": 0.3,
"eval_MSE/layer0": 642.6360311704152,
"eval_accuracy": 0.49030507287608877,
"eval_dead_code_fraction/layer0": 0.14995,
"eval_input_norm/layer0": 31.998255163205542,
"eval_loss": 2.2907073497772217,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 17.324301861386118,
"eval_runtime": 157.9262,
"eval_samples_per_second": 29.273,
"eval_steps_per_second": 1.83,
"step": 3000
},
{
"MSE": 641.9817254638668,
"MSE/layer0": 641.9817254638668,
"dead_code_fraction": 0.1511,
"dead_code_fraction/layer0": 0.1511,
"epoch": 0.3,
"input_norm": 31.99826343536376,
"input_norm/layer0": 31.99826343536376,
"learning_rate": 0.0005,
"loss": 2.3422,
"max_norm": 55.2226676940918,
"max_norm/layer0": 55.2226676940918,
"mean_norm": 39.23999786376953,
"mean_norm/layer0": 39.23999786376953,
"multicode_k": 1,
"output_norm": 17.350644410451252,
"output_norm/layer0": 17.350644410451252,
"step": 3050
},
{
"MSE": 641.9993333943689,
"MSE/layer0": 641.9993333943689,
"dead_code_fraction": 0.1504,
"dead_code_fraction/layer0": 0.1504,
"epoch": 0.31,
"input_norm": 31.998250141143807,
"input_norm/layer0": 31.998250141143807,
"learning_rate": 0.0005,
"loss": 2.2814,
"max_norm": 55.56163787841797,
"max_norm/layer0": 55.56163787841797,
"mean_norm": 39.33370780944824,
"mean_norm/layer0": 39.33370780944824,
"multicode_k": 1,
"output_norm": 17.39312816301982,
"output_norm/layer0": 17.39312816301982,
"step": 3100
},
{
"MSE": 641.5148900349936,
"MSE/layer0": 641.5148900349936,
"dead_code_fraction": 0.15185,
"dead_code_fraction/layer0": 0.15185,
"epoch": 0.32,
"input_norm": 31.998260364532467,
"input_norm/layer0": 31.998260364532467,
"learning_rate": 0.0005,
"loss": 2.3152,
"max_norm": 55.8856315612793,
"max_norm/layer0": 55.8856315612793,
"mean_norm": 39.42481803894043,
"mean_norm/layer0": 39.42481803894043,
"multicode_k": 1,
"output_norm": 17.44178107897441,
"output_norm/layer0": 17.44178107897441,
"step": 3150
},
{
"MSE": 640.499552408854,
"MSE/layer0": 640.499552408854,
"dead_code_fraction": 0.1516,
"dead_code_fraction/layer0": 0.1516,
"epoch": 0.32,
"input_norm": 31.99825292587281,
"input_norm/layer0": 31.99825292587281,
"learning_rate": 0.0005,
"loss": 2.2462,
"max_norm": 56.21445846557617,
"max_norm/layer0": 56.21445846557617,
"mean_norm": 39.51395606994629,
"mean_norm/layer0": 39.51395606994629,
"multicode_k": 1,
"output_norm": 17.50789775530497,
"output_norm/layer0": 17.50789775530497,
"step": 3200
},
{
"MSE": 640.565166829427,
"MSE/layer0": 640.565166829427,
"dead_code_fraction": 0.15285,
"dead_code_fraction/layer0": 0.15285,
"epoch": 0.33,
"input_norm": 31.998250306447353,
"input_norm/layer0": 31.998250306447353,
"learning_rate": 0.0005,
"loss": 2.2595,
"max_norm": 56.526973724365234,
"max_norm/layer0": 56.526973724365234,
"mean_norm": 39.601173400878906,
"mean_norm/layer0": 39.601173400878906,
"multicode_k": 1,
"output_norm": 17.54366443951924,
"output_norm/layer0": 17.54366443951924,
"step": 3250
},
{
"MSE": 640.8991118367509,
"MSE/layer0": 640.8991118367509,
"dead_code_fraction": 0.1531,
"dead_code_fraction/layer0": 0.1531,
"epoch": 0.33,
"input_norm": 31.998245798746755,
"input_norm/layer0": 31.998245798746755,
"learning_rate": 0.0005,
"loss": 2.2326,
"max_norm": 56.82651138305664,
"max_norm/layer0": 56.82651138305664,
"mean_norm": 39.684635162353516,
"mean_norm/layer0": 39.684635162353516,
"multicode_k": 1,
"output_norm": 17.578553660710664,
"output_norm/layer0": 17.578553660710664,
"step": 3300
},
{
"MSE": 640.486218770345,
"MSE/layer0": 640.486218770345,
"dead_code_fraction": 0.15345,
"dead_code_fraction/layer0": 0.15345,
"epoch": 0.34,
"input_norm": 31.998255780537924,
"input_norm/layer0": 31.998255780537924,
"learning_rate": 0.0005,
"loss": 2.2733,
"max_norm": 57.12877655029297,
"max_norm/layer0": 57.12877655029297,
"mean_norm": 39.76711463928223,
"mean_norm/layer0": 39.76711463928223,
"multicode_k": 1,
"output_norm": 17.619242086410516,
"output_norm/layer0": 17.619242086410516,
"step": 3350
},
{
"MSE": 639.5240251668292,
"MSE/layer0": 639.5240251668292,
"dead_code_fraction": 0.15565,
"dead_code_fraction/layer0": 0.15565,
"epoch": 0.34,
"input_norm": 31.998264500300095,
"input_norm/layer0": 31.998264500300095,
"learning_rate": 0.0005,
"loss": 2.2633,
"max_norm": 57.42041778564453,
"max_norm/layer0": 57.42041778564453,
"mean_norm": 39.84800338745117,
"mean_norm/layer0": 39.84800338745117,
"multicode_k": 1,
"output_norm": 17.667484652201342,
"output_norm/layer0": 17.667484652201342,
"step": 3400
},
{
"MSE": 639.2691174316408,
"MSE/layer0": 639.2691174316408,
"dead_code_fraction": 0.15605,
"dead_code_fraction/layer0": 0.15605,
"epoch": 0.34,
"input_norm": 31.99825723965962,
"input_norm/layer0": 31.99825723965962,
"learning_rate": 0.0005,
"loss": 2.2495,
"max_norm": 57.706260681152344,
"max_norm/layer0": 57.706260681152344,
"mean_norm": 39.92698097229004,
"mean_norm/layer0": 39.92698097229004,
"multicode_k": 1,
"output_norm": 17.705148900349947,
"output_norm/layer0": 17.705148900349947,
"step": 3450
},
{
"MSE": 639.3908192952478,
"MSE/layer0": 639.3908192952478,
"dead_code_fraction": 0.15655,
"dead_code_fraction/layer0": 0.15655,
"epoch": 0.35,
"input_norm": 31.9982618745168,
"input_norm/layer0": 31.9982618745168,
"learning_rate": 0.0005,
"loss": 2.2488,
"max_norm": 57.98209762573242,
"max_norm/layer0": 57.98209762573242,
"mean_norm": 40.005022048950195,
"mean_norm/layer0": 40.005022048950195,
"multicode_k": 1,
"output_norm": 17.73683495521545,
"output_norm/layer0": 17.73683495521545,
"step": 3500
},
{
"epoch": 0.35,
"eval_MSE/layer0": 640.3158307464355,
"eval_accuracy": 0.49451074349024987,
"eval_dead_code_fraction/layer0": 0.1575,
"eval_input_norm/layer0": 31.99825158244007,
"eval_loss": 2.2564537525177,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 17.756634140179678,
"eval_runtime": 157.599,
"eval_samples_per_second": 29.334,
"eval_steps_per_second": 1.834,
"step": 3500
},
{
"MSE": 639.6838141886391,
"MSE/layer0": 639.6838141886391,
"dead_code_fraction": 0.157,
"dead_code_fraction/layer0": 0.157,
"epoch": 0.35,
"input_norm": 31.99826737085978,
"input_norm/layer0": 31.99826737085978,
"learning_rate": 0.0005,
"loss": 2.2738,
"max_norm": 58.24713897705078,
"max_norm/layer0": 58.24713897705078,
"mean_norm": 40.08023262023926,
"mean_norm/layer0": 40.08023262023926,
"multicode_k": 1,
"output_norm": 17.755876312255864,
"output_norm/layer0": 17.755876312255864,
"step": 3550
},
{
"MSE": 639.2954257202149,
"MSE/layer0": 639.2954257202149,
"dead_code_fraction": 0.1559,
"dead_code_fraction/layer0": 0.1559,
"epoch": 0.36,
"input_norm": 31.998245531717938,
"input_norm/layer0": 31.998245531717938,
"learning_rate": 0.0005,
"loss": 2.2036,
"max_norm": 58.50635528564453,
"max_norm/layer0": 58.50635528564453,
"mean_norm": 40.15370178222656,
"mean_norm/layer0": 40.15370178222656,
"multicode_k": 1,
"output_norm": 17.812968953450515,
"output_norm/layer0": 17.812968953450515,
"step": 3600
},
{
"MSE": 639.3338773600263,
"MSE/layer0": 639.3338773600263,
"dead_code_fraction": 0.15905,
"dead_code_fraction/layer0": 0.15905,
"epoch": 0.36,
"input_norm": 31.99827084223429,
"input_norm/layer0": 31.99827084223429,
"learning_rate": 0.0005,
"loss": 2.2672,
"max_norm": 58.76622009277344,
"max_norm/layer0": 58.76622009277344,
"mean_norm": 40.22719192504883,
"mean_norm/layer0": 40.22719192504883,
"multicode_k": 1,
"output_norm": 17.821751413345332,
"output_norm/layer0": 17.821751413345332,
"step": 3650
},
{
"MSE": 639.0531684366863,
"MSE/layer0": 639.0531684366863,
"dead_code_fraction": 0.15975,
"dead_code_fraction/layer0": 0.15975,
"epoch": 0.37,
"input_norm": 31.99827636400858,
"input_norm/layer0": 31.99827636400858,
"learning_rate": 0.0005,
"loss": 2.2444,
"max_norm": 59.02393341064453,
"max_norm/layer0": 59.02393341064453,
"mean_norm": 40.298166275024414,
"mean_norm/layer0": 40.298166275024414,
"multicode_k": 1,
"output_norm": 17.85403926849365,
"output_norm/layer0": 17.85403926849365,
"step": 3700
},
{
"MSE": 638.9355230712894,
"MSE/layer0": 638.9355230712894,
"dead_code_fraction": 0.1605,
"dead_code_fraction/layer0": 0.1605,
"epoch": 0.38,
"input_norm": 31.99827863057454,
"input_norm/layer0": 31.99827863057454,
"learning_rate": 0.0005,
"loss": 2.2454,
"max_norm": 59.28853225708008,
"max_norm/layer0": 59.28853225708008,
"mean_norm": 40.36880111694336,
"mean_norm/layer0": 40.36880111694336,
"multicode_k": 1,
"output_norm": 17.88599282582601,
"output_norm/layer0": 17.88599282582601,
"step": 3750
},
{
"MSE": 639.0086972045899,
"MSE/layer0": 639.0086972045899,
"dead_code_fraction": 0.16125,
"dead_code_fraction/layer0": 0.16125,
"epoch": 0.38,
"input_norm": 31.9982850710551,
"input_norm/layer0": 31.9982850710551,
"learning_rate": 0.0005,
"loss": 2.27,
"max_norm": 59.546451568603516,
"max_norm/layer0": 59.546451568603516,
"mean_norm": 40.43776512145996,
"mean_norm/layer0": 40.43776512145996,
"multicode_k": 1,
"output_norm": 17.90943570454915,
"output_norm/layer0": 17.90943570454915,
"step": 3800
},
{
"MSE": 638.9462019856769,
"MSE/layer0": 638.9462019856769,
"dead_code_fraction": 0.1583,
"dead_code_fraction/layer0": 0.1583,
"epoch": 0.39,
"input_norm": 31.998278980255122,
"input_norm/layer0": 31.998278980255122,
"learning_rate": 0.0005,
"loss": 2.2438,
"max_norm": 59.80894470214844,
"max_norm/layer0": 59.80894470214844,
"mean_norm": 40.50556945800781,
"mean_norm/layer0": 40.50556945800781,
"multicode_k": 1,
"output_norm": 17.947645209630338,
"output_norm/layer0": 17.947645209630338,
"step": 3850
},
{
"MSE": 639.4130173746743,
"MSE/layer0": 639.4130173746743,
"dead_code_fraction": 0.16135,
"dead_code_fraction/layer0": 0.16135,
"epoch": 0.39,
"input_norm": 31.998284943898526,
"input_norm/layer0": 31.998284943898526,
"learning_rate": 0.0005,
"loss": 2.2526,
"max_norm": 60.04655075073242,
"max_norm/layer0": 60.04655075073242,
"mean_norm": 40.57136535644531,
"mean_norm/layer0": 40.57136535644531,
"multicode_k": 1,
"output_norm": 17.960218969980872,
"output_norm/layer0": 17.960218969980872,
"step": 3900
},
{
"MSE": 639.8756245930986,
"MSE/layer0": 639.8756245930986,
"dead_code_fraction": 0.15755,
"dead_code_fraction/layer0": 0.15755,
"epoch": 0.4,
"input_norm": 31.998285398483272,
"input_norm/layer0": 31.998285398483272,
"learning_rate": 0.0005,
"loss": 2.2266,
"max_norm": 60.29011154174805,
"max_norm/layer0": 60.29011154174805,
"mean_norm": 40.63625144958496,
"mean_norm/layer0": 40.63625144958496,
"multicode_k": 1,
"output_norm": 17.97526204744974,
"output_norm/layer0": 17.97526204744974,
"step": 3950
},
{
"MSE": 640.046054585775,
"MSE/layer0": 640.046054585775,
"dead_code_fraction": 0.1605,
"dead_code_fraction/layer0": 0.1605,
"epoch": 0.4,
"input_norm": 31.998285433451336,
"input_norm/layer0": 31.998285433451336,
"learning_rate": 0.0005,
"loss": 2.2287,
"max_norm": 60.52168655395508,
"max_norm/layer0": 60.52168655395508,
"mean_norm": 40.698753356933594,
"mean_norm/layer0": 40.698753356933594,
"multicode_k": 1,
"output_norm": 17.997498016357426,
"output_norm/layer0": 17.997498016357426,
"step": 4000
},
{
"epoch": 0.4,
"eval_MSE/layer0": 638.8422855589264,
"eval_accuracy": 0.49670513512593434,
"eval_dead_code_fraction/layer0": 0.16135,
"eval_input_norm/layer0": 31.99827300782795,
"eval_loss": 2.2332887649536133,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 18.022313365115252,
"eval_runtime": 158.1975,
"eval_samples_per_second": 29.223,
"eval_steps_per_second": 1.827,
"step": 4000
},
{
"MSE": 639.952128804525,
"MSE/layer0": 639.952128804525,
"dead_code_fraction": 0.16035,
"dead_code_fraction/layer0": 0.16035,
"epoch": 0.41,
"input_norm": 31.998286927541105,
"input_norm/layer0": 31.998286927541105,
"learning_rate": 0.0005,
"loss": 2.2193,
"max_norm": 60.76009750366211,
"max_norm/layer0": 60.76009750366211,
"mean_norm": 40.75992393493652,
"mean_norm/layer0": 40.75992393493652,
"multicode_k": 1,
"output_norm": 18.024092137018826,
"output_norm/layer0": 18.024092137018826,
"step": 4050
},
{
"MSE": 640.5730131022133,
"MSE/layer0": 640.5730131022133,
"dead_code_fraction": 0.1634,
"dead_code_fraction/layer0": 0.1634,
"epoch": 0.41,
"input_norm": 31.99828769365946,
"input_norm/layer0": 31.99828769365946,
"learning_rate": 0.0005,
"loss": 2.2301,
"max_norm": 60.98118591308594,
"max_norm/layer0": 60.98118591308594,
"mean_norm": 40.8208122253418,
"mean_norm/layer0": 40.8208122253418,
"multicode_k": 1,
"output_norm": 18.02807092984518,
"output_norm/layer0": 18.02807092984518,
"step": 4100
},
{
"MSE": 640.4258350626628,
"MSE/layer0": 640.4258350626628,
"dead_code_fraction": 0.1612,
"dead_code_fraction/layer0": 0.1612,
"epoch": 0.41,
"input_norm": 31.998297268549607,
"input_norm/layer0": 31.998297268549607,
"learning_rate": 0.0005,
"loss": 2.2307,
"max_norm": 61.19542694091797,
"max_norm/layer0": 61.19542694091797,
"mean_norm": 40.88128852844238,
"mean_norm/layer0": 40.88128852844238,
"multicode_k": 1,
"output_norm": 18.04158842404684,
"output_norm/layer0": 18.04158842404684,
"step": 4150
},
{
"MSE": 639.5022987874349,
"MSE/layer0": 639.5022987874349,
"dead_code_fraction": 0.16015,
"dead_code_fraction/layer0": 0.16015,
"epoch": 0.42,
"input_norm": 31.99830362319948,
"input_norm/layer0": 31.99830362319948,
"learning_rate": 0.0005,
"loss": 2.247,
"max_norm": 61.4282341003418,
"max_norm/layer0": 61.4282341003418,
"mean_norm": 40.941017150878906,
"mean_norm/layer0": 40.941017150878906,
"multicode_k": 1,
"output_norm": 18.079462760289516,
"output_norm/layer0": 18.079462760289516,
"step": 4200
},
{
"MSE": 640.0252755737306,
"MSE/layer0": 640.0252755737306,
"dead_code_fraction": 0.1604,
"dead_code_fraction/layer0": 0.1604,
"epoch": 0.42,
"input_norm": 31.99830138524374,
"input_norm/layer0": 31.99830138524374,
"learning_rate": 0.0005,
"loss": 2.2314,
"max_norm": 61.648414611816406,
"max_norm/layer0": 61.648414611816406,
"mean_norm": 40.99977684020996,
"mean_norm/layer0": 40.99977684020996,
"multicode_k": 1,
"output_norm": 18.09024664878845,
"output_norm/layer0": 18.09024664878845,
"step": 4250
},
{
"MSE": 639.7621870930992,
"MSE/layer0": 639.7621870930992,
"dead_code_fraction": 0.16365,
"dead_code_fraction/layer0": 0.16365,
"epoch": 0.43,
"input_norm": 31.99830169359842,
"input_norm/layer0": 31.99830169359842,
"learning_rate": 0.0005,
"loss": 2.2144,
"max_norm": 61.86562728881836,
"max_norm/layer0": 61.86562728881836,
"mean_norm": 41.05688667297363,
"mean_norm/layer0": 41.05688667297363,
"multicode_k": 1,
"output_norm": 18.11899041493734,
"output_norm/layer0": 18.11899041493734,
"step": 4300
},
{
"MSE": 640.3955947875975,
"MSE/layer0": 640.3955947875975,
"dead_code_fraction": 0.1592,
"dead_code_fraction/layer0": 0.1592,
"epoch": 0.43,
"input_norm": 31.998302787144976,
"input_norm/layer0": 31.998302787144976,
"learning_rate": 0.0005,
"loss": 2.2077,
"max_norm": 62.060550689697266,
"max_norm/layer0": 62.060550689697266,
"mean_norm": 41.11246681213379,
"mean_norm/layer0": 41.11246681213379,
"multicode_k": 1,
"output_norm": 18.121066271464024,
"output_norm/layer0": 18.121066271464024,
"step": 4350
},
{
"MSE": 639.8066222127281,
"MSE/layer0": 639.8066222127281,
"dead_code_fraction": 0.1635,
"dead_code_fraction/layer0": 0.1635,
"epoch": 0.44,
"input_norm": 31.998314228057872,
"input_norm/layer0": 31.998314228057872,
"learning_rate": 0.0005,
"loss": 2.2287,
"max_norm": 62.275943756103516,
"max_norm/layer0": 62.275943756103516,
"mean_norm": 41.167396545410156,
"mean_norm/layer0": 41.167396545410156,
"multicode_k": 1,
"output_norm": 18.142933632532753,
"output_norm/layer0": 18.142933632532753,
"step": 4400
},
{
"MSE": 639.8160334269206,
"MSE/layer0": 639.8160334269206,
"dead_code_fraction": 0.16385,
"dead_code_fraction/layer0": 0.16385,
"epoch": 0.45,
"input_norm": 31.99831516901653,
"input_norm/layer0": 31.99831516901653,
"learning_rate": 0.0005,
"loss": 2.215,
"max_norm": 62.486793518066406,
"max_norm/layer0": 62.486793518066406,
"mean_norm": 41.221702575683594,
"mean_norm/layer0": 41.221702575683594,
"multicode_k": 1,
"output_norm": 18.167670075098677,
"output_norm/layer0": 18.167670075098677,
"step": 4450
},
{
"MSE": 640.1416244506836,
"MSE/layer0": 640.1416244506836,
"dead_code_fraction": 0.16675,
"dead_code_fraction/layer0": 0.16675,
"epoch": 0.45,
"input_norm": 31.998327512741074,
"input_norm/layer0": 31.998327512741074,
"learning_rate": 0.0005,
"loss": 2.2576,
"max_norm": 62.67790222167969,
"max_norm/layer0": 62.67790222167969,
"mean_norm": 41.275705337524414,
"mean_norm/layer0": 41.275705337524414,
"multicode_k": 1,
"output_norm": 18.162402251561495,
"output_norm/layer0": 18.162402251561495,
"step": 4500
},
{
"epoch": 0.45,
"eval_MSE/layer0": 639.7464034476376,
"eval_accuracy": 0.49916912103175737,
"eval_dead_code_fraction/layer0": 0.16755,
"eval_input_norm/layer0": 31.998309449821527,
"eval_loss": 2.215489387512207,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 18.191884751910905,
"eval_runtime": 157.9108,
"eval_samples_per_second": 29.276,
"eval_steps_per_second": 1.83,
"step": 4500
},
{
"MSE": 640.4858755493162,
"MSE/layer0": 640.4858755493162,
"dead_code_fraction": 0.1633,
"dead_code_fraction/layer0": 0.1633,
"epoch": 0.46,
"input_norm": 31.99831475257874,
"input_norm/layer0": 31.99831475257874,
"learning_rate": 0.0005,
"loss": 2.1869,
"max_norm": 62.88029861450195,
"max_norm/layer0": 62.88029861450195,
"mean_norm": 41.32845115661621,
"mean_norm/layer0": 41.32845115661621,
"multicode_k": 1,
"output_norm": 18.18791744550069,
"output_norm/layer0": 18.18791744550069,
"step": 4550
},
{
"MSE": 640.7411174519859,
"MSE/layer0": 640.7411174519859,
"dead_code_fraction": 0.16375,
"dead_code_fraction/layer0": 0.16375,
"epoch": 0.46,
"input_norm": 31.998337395985924,
"input_norm/layer0": 31.998337395985924,
"learning_rate": 0.0005,
"loss": 2.2426,
"max_norm": 63.06687545776367,
"max_norm/layer0": 63.06687545776367,
"mean_norm": 41.38063049316406,
"mean_norm/layer0": 41.38063049316406,
"multicode_k": 1,
"output_norm": 18.185693721771244,
"output_norm/layer0": 18.185693721771244,
"step": 4600
},
{
"MSE": 640.3254055786131,
"MSE/layer0": 640.3254055786131,
"dead_code_fraction": 0.1637,
"dead_code_fraction/layer0": 0.1637,
"epoch": 0.47,
"input_norm": 31.998331034978236,
"input_norm/layer0": 31.998331034978236,
"learning_rate": 0.0005,
"loss": 2.2103,
"max_norm": 63.24494171142578,
"max_norm/layer0": 63.24494171142578,
"mean_norm": 41.4316463470459,
"mean_norm/layer0": 41.4316463470459,
"multicode_k": 1,
"output_norm": 18.215761318206788,
"output_norm/layer0": 18.215761318206788,
"step": 4650
},
{
"MSE": 640.0117889404299,
"MSE/layer0": 640.0117889404299,
"dead_code_fraction": 0.1653,
"dead_code_fraction/layer0": 0.1653,
"epoch": 0.47,
"input_norm": 31.998331683476753,
"input_norm/layer0": 31.998331683476753,
"learning_rate": 0.0005,
"loss": 2.189,
"max_norm": 63.429969787597656,
"max_norm/layer0": 63.429969787597656,
"mean_norm": 41.481590270996094,
"mean_norm/layer0": 41.481590270996094,
"multicode_k": 1,
"output_norm": 18.22781534512837,
"output_norm/layer0": 18.22781534512837,
"step": 4700
},
{
"MSE": 640.034366455078,
"MSE/layer0": 640.034366455078,
"dead_code_fraction": 0.16355,
"dead_code_fraction/layer0": 0.16355,
"epoch": 0.47,
"input_norm": 31.998335037231442,
"input_norm/layer0": 31.998335037231442,
"learning_rate": 0.0005,
"loss": 2.1746,
"max_norm": 63.604644775390625,
"max_norm/layer0": 63.604644775390625,
"mean_norm": 41.530447006225586,
"mean_norm/layer0": 41.530447006225586,
"multicode_k": 1,
"output_norm": 18.247568238576257,
"output_norm/layer0": 18.247568238576257,
"step": 4750
},
{
"MSE": 641.3402144411094,
"MSE/layer0": 641.3402144411094,
"dead_code_fraction": 0.16465,
"dead_code_fraction/layer0": 0.16465,
"epoch": 1.0,
"input_norm": 31.998328861016873,
"input_norm/layer0": 31.998328861016873,
"learning_rate": 0.0005,
"loss": 2.1589,
"max_norm": 63.7794303894043,
"max_norm/layer0": 63.7794303894043,
"mean_norm": 41.577613830566406,
"mean_norm/layer0": 41.577613830566406,
"multicode_k": 1,
"output_norm": 18.227145007068557,
"output_norm/layer0": 18.227145007068557,
"step": 4800
},
{
"MSE": 640.0454110717772,
"MSE/layer0": 640.0454110717772,
"dead_code_fraction": 0.16635,
"dead_code_fraction/layer0": 0.16635,
"epoch": 1.01,
"input_norm": 31.998361120224008,
"input_norm/layer0": 31.998361120224008,
"learning_rate": 0.0005,
"loss": 2.2585,
"max_norm": 63.96126937866211,
"max_norm/layer0": 63.96126937866211,
"mean_norm": 41.62501525878906,
"mean_norm/layer0": 41.62501525878906,
"multicode_k": 1,
"output_norm": 18.258941303888953,
"output_norm/layer0": 18.258941303888953,
"step": 4850
},
{
"MSE": 640.0055624389651,
"MSE/layer0": 640.0055624389651,
"dead_code_fraction": 0.16515,
"dead_code_fraction/layer0": 0.16515,
"epoch": 1.01,
"input_norm": 31.998340495427446,
"input_norm/layer0": 31.998340495427446,
"learning_rate": 0.0005,
"loss": 2.1578,
"max_norm": 64.13137817382812,
"max_norm/layer0": 64.13137817382812,
"mean_norm": 41.672542572021484,
"mean_norm/layer0": 41.672542572021484,
"multicode_k": 1,
"output_norm": 18.272732003529867,
"output_norm/layer0": 18.272732003529867,
"step": 4900
},
{
"MSE": 640.108183898926,
"MSE/layer0": 640.108183898926,
"dead_code_fraction": 0.1668,
"dead_code_fraction/layer0": 0.1668,
"epoch": 1.02,
"input_norm": 31.998351519902535,
"input_norm/layer0": 31.998351519902535,
"learning_rate": 0.0005,
"loss": 2.1809,
"max_norm": 64.30120086669922,
"max_norm/layer0": 64.30120086669922,
"mean_norm": 41.71914291381836,
"mean_norm/layer0": 41.71914291381836,
"multicode_k": 1,
"output_norm": 18.278290322621658,
"output_norm/layer0": 18.278290322621658,
"step": 4950
},
{
"MSE": 639.8438139851887,
"MSE/layer0": 639.8438139851887,
"dead_code_fraction": 0.1671,
"dead_code_fraction/layer0": 0.1671,
"epoch": 1.02,
"input_norm": 31.998358796437586,
"input_norm/layer0": 31.998358796437586,
"learning_rate": 0.0005,
"loss": 2.1901,
"max_norm": 64.4720230102539,
"max_norm/layer0": 64.4720230102539,
"mean_norm": 41.76571464538574,
"mean_norm/layer0": 41.76571464538574,
"multicode_k": 1,
"output_norm": 18.29636260350546,
"output_norm/layer0": 18.29636260350546,
"step": 5000
},
{
"epoch": 1.02,
"eval_MSE/layer0": 638.1766108092672,
"eval_accuracy": 0.5013711247409516,
"eval_dead_code_fraction/layer0": 0.16955,
"eval_input_norm/layer0": 31.99836045128427,
"eval_loss": 2.202561616897583,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 18.311866774487846,
"eval_runtime": 158.3836,
"eval_samples_per_second": 29.189,
"eval_steps_per_second": 1.825,
"step": 5000
},
{
"MSE": 639.5863418579103,
"MSE/layer0": 639.5863418579103,
"dead_code_fraction": 0.1675,
"dead_code_fraction/layer0": 0.1675,
"epoch": 1.03,
"input_norm": 31.99836014429728,
"input_norm/layer0": 31.99836014429728,
"learning_rate": 0.0005,
"loss": 2.1914,
"max_norm": 64.65907287597656,
"max_norm/layer0": 64.65907287597656,
"mean_norm": 41.8120174407959,
"mean_norm/layer0": 41.8120174407959,
"multicode_k": 1,
"output_norm": 18.301887426376346,
"output_norm/layer0": 18.301887426376346,
"step": 5050
},
{
"MSE": 639.5830181884764,
"MSE/layer0": 639.5830181884764,
"dead_code_fraction": 0.16545,
"dead_code_fraction/layer0": 0.16545,
"epoch": 1.03,
"input_norm": 31.998363596598292,
"input_norm/layer0": 31.998363596598292,
"learning_rate": 0.0005,
"loss": 2.1503,
"max_norm": 64.83207702636719,
"max_norm/layer0": 64.83207702636719,
"mean_norm": 41.85700988769531,
"mean_norm/layer0": 41.85700988769531,
"multicode_k": 1,
"output_norm": 18.3204355875651,
"output_norm/layer0": 18.3204355875651,
"step": 5100
},
{
"MSE": 640.3749603271485,
"MSE/layer0": 640.3749603271485,
"dead_code_fraction": 0.16725,
"dead_code_fraction/layer0": 0.16725,
"epoch": 1.04,
"input_norm": 31.9983703358968,
"input_norm/layer0": 31.9983703358968,
"learning_rate": 0.0005,
"loss": 2.1634,
"max_norm": 65.003662109375,
"max_norm/layer0": 65.003662109375,
"mean_norm": 41.90180778503418,
"mean_norm/layer0": 41.90180778503418,
"multicode_k": 1,
"output_norm": 18.316434319814057,
"output_norm/layer0": 18.316434319814057,
"step": 5150
},
{
"MSE": 639.0211893717446,
"MSE/layer0": 639.0211893717446,
"dead_code_fraction": 0.16875,
"dead_code_fraction/layer0": 0.16875,
"epoch": 1.04,
"input_norm": 31.998389561971024,
"input_norm/layer0": 31.998389561971024,
"learning_rate": 0.0005,
"loss": 2.224,
"max_norm": 65.19213104248047,
"max_norm/layer0": 65.19213104248047,
"mean_norm": 41.94645309448242,
"mean_norm/layer0": 41.94645309448242,
"multicode_k": 1,
"output_norm": 18.33804360071819,
"output_norm/layer0": 18.33804360071819,
"step": 5200
},
{
"MSE": 638.6207899983721,
"MSE/layer0": 638.6207899983721,
"dead_code_fraction": 0.17055,
"dead_code_fraction/layer0": 0.17055,
"epoch": 1.05,
"input_norm": 31.998394203186038,
"input_norm/layer0": 31.998394203186038,
"learning_rate": 0.0005,
"loss": 2.2235,
"max_norm": 65.36846160888672,
"max_norm/layer0": 65.36846160888672,
"mean_norm": 41.991315841674805,
"mean_norm/layer0": 41.991315841674805,
"multicode_k": 1,
"output_norm": 18.346421286265045,
"output_norm/layer0": 18.346421286265045,
"step": 5250
},
{
"MSE": 638.3484961954751,
"MSE/layer0": 638.3484961954751,
"dead_code_fraction": 0.1704,
"dead_code_fraction/layer0": 0.1704,
"epoch": 1.05,
"input_norm": 31.998402004241942,
"input_norm/layer0": 31.998402004241942,
"learning_rate": 0.0005,
"loss": 2.209,
"max_norm": 65.53041076660156,
"max_norm/layer0": 65.53041076660156,
"mean_norm": 42.0357780456543,
"mean_norm/layer0": 42.0357780456543,
"multicode_k": 1,
"output_norm": 18.351918992996215,
"output_norm/layer0": 18.351918992996215,
"step": 5300
},
{
"MSE": 638.9349023437496,
"MSE/layer0": 638.9349023437496,
"dead_code_fraction": 0.1671,
"dead_code_fraction/layer0": 0.1671,
"epoch": 1.06,
"input_norm": 31.998392171859756,
"input_norm/layer0": 31.998392171859756,
"learning_rate": 0.0005,
"loss": 2.1737,
"max_norm": 65.69444274902344,
"max_norm/layer0": 65.69444274902344,
"mean_norm": 42.078935623168945,
"mean_norm/layer0": 42.078935623168945,
"multicode_k": 1,
"output_norm": 18.365610707600908,
"output_norm/layer0": 18.365610707600908,
"step": 5350
},
{
"MSE": 638.1850768025716,
"MSE/layer0": 638.1850768025716,
"dead_code_fraction": 0.17125,
"dead_code_fraction/layer0": 0.17125,
"epoch": 1.06,
"input_norm": 31.99840373039246,
"input_norm/layer0": 31.99840373039246,
"learning_rate": 0.0005,
"loss": 2.1904,
"max_norm": 65.84613037109375,
"max_norm/layer0": 65.84613037109375,
"mean_norm": 42.122589111328125,
"mean_norm/layer0": 42.122589111328125,
"multicode_k": 1,
"output_norm": 18.371175734202062,
"output_norm/layer0": 18.371175734202062,
"step": 5400
},
{
"MSE": 637.5771400960282,
"MSE/layer0": 637.5771400960282,
"dead_code_fraction": 0.17005,
"dead_code_fraction/layer0": 0.17005,
"epoch": 1.07,
"input_norm": 31.998408838907892,
"input_norm/layer0": 31.998408838907892,
"learning_rate": 0.0005,
"loss": 2.2013,
"max_norm": 66.00259399414062,
"max_norm/layer0": 66.00259399414062,
"mean_norm": 42.16551399230957,
"mean_norm/layer0": 42.16551399230957,
"multicode_k": 1,
"output_norm": 18.396056934992465,
"output_norm/layer0": 18.396056934992465,
"step": 5450
},
{
"MSE": 637.4973764038084,
"MSE/layer0": 637.4973764038084,
"dead_code_fraction": 0.17135,
"dead_code_fraction/layer0": 0.17135,
"epoch": 1.07,
"input_norm": 31.998402996063238,
"input_norm/layer0": 31.998402996063238,
"learning_rate": 0.0005,
"loss": 2.1686,
"max_norm": 66.15951538085938,
"max_norm/layer0": 66.15951538085938,
"mean_norm": 42.207963943481445,
"mean_norm/layer0": 42.207963943481445,
"multicode_k": 1,
"output_norm": 18.402882191340133,
"output_norm/layer0": 18.402882191340133,
"step": 5500
},
{
"epoch": 1.07,
"eval_MSE/layer0": 638.6084431543663,
"eval_accuracy": 0.5026125270625071,
"eval_dead_code_fraction/layer0": 0.17165,
"eval_input_norm/layer0": 31.99841410479916,
"eval_loss": 2.1934523582458496,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 18.401259186926417,
"eval_runtime": 158.4926,
"eval_samples_per_second": 29.169,
"eval_steps_per_second": 1.823,
"step": 5500
},
{
"MSE": 637.3816906738282,
"MSE/layer0": 637.3816906738282,
"dead_code_fraction": 0.17125,
"dead_code_fraction/layer0": 0.17125,
"epoch": 1.08,
"input_norm": 31.998415158589676,
"input_norm/layer0": 31.998415158589676,
"learning_rate": 0.0005,
"loss": 2.2097,
"max_norm": 66.32366180419922,
"max_norm/layer0": 66.32366180419922,
"mean_norm": 42.25027084350586,
"mean_norm/layer0": 42.25027084350586,
"multicode_k": 1,
"output_norm": 18.40568763732911,
"output_norm/layer0": 18.40568763732911,
"step": 5550
},
{
"MSE": 636.5928268432615,
"MSE/layer0": 636.5928268432615,
"dead_code_fraction": 0.1711,
"dead_code_fraction/layer0": 0.1711,
"epoch": 1.08,
"input_norm": 31.99841807047526,
"input_norm/layer0": 31.99841807047526,
"learning_rate": 0.0005,
"loss": 2.1987,
"max_norm": 66.49840545654297,
"max_norm/layer0": 66.49840545654297,
"mean_norm": 42.29284858703613,
"mean_norm/layer0": 42.29284858703613,
"multicode_k": 1,
"output_norm": 18.424939454396565,
"output_norm/layer0": 18.424939454396565,
"step": 5600
},
{
"MSE": 637.195534973145,
"MSE/layer0": 637.195534973145,
"dead_code_fraction": 0.17175,
"dead_code_fraction/layer0": 0.17175,
"epoch": 1.09,
"input_norm": 31.99841377894082,
"input_norm/layer0": 31.99841377894082,
"learning_rate": 0.0005,
"loss": 2.1571,
"max_norm": 66.6655502319336,
"max_norm/layer0": 66.6655502319336,
"mean_norm": 42.33401679992676,
"mean_norm/layer0": 42.33401679992676,
"multicode_k": 1,
"output_norm": 18.427337226867675,
"output_norm/layer0": 18.427337226867675,
"step": 5650
},
{
"MSE": 635.8865025838217,
"MSE/layer0": 635.8865025838217,
"dead_code_fraction": 0.1736,
"dead_code_fraction/layer0": 0.1736,
"epoch": 1.09,
"input_norm": 31.998435058593753,
"input_norm/layer0": 31.998435058593753,
"learning_rate": 0.0005,
"loss": 2.2146,
"max_norm": 66.82868957519531,
"max_norm/layer0": 66.82868957519531,
"mean_norm": 42.37582206726074,
"mean_norm/layer0": 42.37582206726074,
"multicode_k": 1,
"output_norm": 18.443573204676298,
"output_norm/layer0": 18.443573204676298,
"step": 5700
},
{
"MSE": 636.1581252034503,
"MSE/layer0": 636.1581252034503,
"dead_code_fraction": 0.17225,
"dead_code_fraction/layer0": 0.17225,
"epoch": 1.1,
"input_norm": 31.998433354695635,
"input_norm/layer0": 31.998433354695635,
"learning_rate": 0.0005,
"loss": 2.171,
"max_norm": 66.9796371459961,
"max_norm/layer0": 66.9796371459961,
"mean_norm": 42.41728591918945,
"mean_norm/layer0": 42.41728591918945,
"multicode_k": 1,
"output_norm": 18.440257479349775,
"output_norm/layer0": 18.440257479349775,
"step": 5750
},
{
"MSE": 636.7286339314779,
"MSE/layer0": 636.7286339314779,
"dead_code_fraction": 0.1738,
"dead_code_fraction/layer0": 0.1738,
"epoch": 1.1,
"input_norm": 31.998429416020713,
"input_norm/layer0": 31.998429416020713,
"learning_rate": 0.0005,
"loss": 2.1502,
"max_norm": 67.13478088378906,
"max_norm/layer0": 67.13478088378906,
"mean_norm": 42.45817756652832,
"mean_norm/layer0": 42.45817756652832,
"multicode_k": 1,
"output_norm": 18.442232058842986,
"output_norm/layer0": 18.442232058842986,
"step": 5800
},
{
"MSE": 635.2576449584958,
"MSE/layer0": 635.2576449584958,
"dead_code_fraction": 0.17405,
"dead_code_fraction/layer0": 0.17405,
"epoch": 1.11,
"input_norm": 31.99844219843547,
"input_norm/layer0": 31.99844219843547,
"learning_rate": 0.0005,
"loss": 2.2067,
"max_norm": 67.28919982910156,
"max_norm/layer0": 67.28919982910156,
"mean_norm": 42.49948501586914,
"mean_norm/layer0": 42.49948501586914,
"multicode_k": 1,
"output_norm": 18.46717386881511,
"output_norm/layer0": 18.46717386881511,
"step": 5850
},
{
"MSE": 636.0759664916989,
"MSE/layer0": 636.0759664916989,
"dead_code_fraction": 0.17355,
"dead_code_fraction/layer0": 0.17355,
"epoch": 1.11,
"input_norm": 31.998439470926915,
"input_norm/layer0": 31.998439470926915,
"learning_rate": 0.0005,
"loss": 2.1543,
"max_norm": 67.44383239746094,
"max_norm/layer0": 67.44383239746094,
"mean_norm": 42.53946495056152,
"mean_norm/layer0": 42.53946495056152,
"multicode_k": 1,
"output_norm": 18.469777971903483,
"output_norm/layer0": 18.469777971903483,
"step": 5900
},
{
"MSE": 635.3813305664057,
"MSE/layer0": 635.3813305664057,
"dead_code_fraction": 0.17405,
"dead_code_fraction/layer0": 0.17405,
"epoch": 1.12,
"input_norm": 31.99844372113545,
"input_norm/layer0": 31.99844372113545,
"learning_rate": 0.0005,
"loss": 2.1846,
"max_norm": 67.59025573730469,
"max_norm/layer0": 67.59025573730469,
"mean_norm": 42.58071327209473,
"mean_norm/layer0": 42.58071327209473,
"multicode_k": 1,
"output_norm": 18.477715517679847,
"output_norm/layer0": 18.477715517679847,
"step": 5950
},
{
"MSE": 634.5524212646484,
"MSE/layer0": 634.5524212646484,
"dead_code_fraction": 0.17535,
"dead_code_fraction/layer0": 0.17535,
"epoch": 1.12,
"input_norm": 31.998457225163776,
"input_norm/layer0": 31.998457225163776,
"learning_rate": 0.0005,
"loss": 2.2158,
"max_norm": 67.7379379272461,
"max_norm/layer0": 67.7379379272461,
"mean_norm": 42.62178421020508,
"mean_norm/layer0": 42.62178421020508,
"multicode_k": 1,
"output_norm": 18.489366165796913,
"output_norm/layer0": 18.489366165796913,
"step": 6000
},
{
"epoch": 1.12,
"eval_MSE/layer0": 632.9325560995336,
"eval_accuracy": 0.5036799089257694,
"eval_dead_code_fraction/layer0": 0.17795,
"eval_input_norm/layer0": 31.998461353451354,
"eval_loss": 2.1832942962646484,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 18.51493810096293,
"eval_runtime": 158.8489,
"eval_samples_per_second": 29.103,
"eval_steps_per_second": 1.819,
"step": 6000
},
{
"MSE": 634.7784757486979,
"MSE/layer0": 634.7784757486979,
"dead_code_fraction": 0.1755,
"dead_code_fraction/layer0": 0.1755,
"epoch": 1.13,
"input_norm": 31.99845712025961,
"input_norm/layer0": 31.99845712025961,
"learning_rate": 0.0005,
"loss": 2.1789,
"max_norm": 67.8902816772461,
"max_norm/layer0": 67.8902816772461,
"mean_norm": 42.66269874572754,
"mean_norm/layer0": 42.66269874572754,
"multicode_k": 1,
"output_norm": 18.49625307718913,
"output_norm/layer0": 18.49625307718913,
"step": 6050
},
{
"MSE": 634.5078458658851,
"MSE/layer0": 634.5078458658851,
"dead_code_fraction": 0.17445,
"dead_code_fraction/layer0": 0.17445,
"epoch": 1.13,
"input_norm": 31.99845917383831,
"input_norm/layer0": 31.99845917383831,
"learning_rate": 0.0005,
"loss": 2.2009,
"max_norm": 68.04124450683594,
"max_norm/layer0": 68.04124450683594,
"mean_norm": 42.70250701904297,
"mean_norm/layer0": 42.70250701904297,
"multicode_k": 1,
"output_norm": 18.514623686472582,
"output_norm/layer0": 18.514623686472582,
"step": 6100
},
{
"MSE": 634.443066914876,
"MSE/layer0": 634.443066914876,
"dead_code_fraction": 0.17575,
"dead_code_fraction/layer0": 0.17575,
"epoch": 1.14,
"input_norm": 31.99845913887024,
"input_norm/layer0": 31.99845913887024,
"learning_rate": 0.0005,
"loss": 2.1623,
"max_norm": 68.17865753173828,
"max_norm/layer0": 68.17865753173828,
"mean_norm": 42.742488861083984,
"mean_norm/layer0": 42.742488861083984,
"multicode_k": 1,
"output_norm": 18.513023862838743,
"output_norm/layer0": 18.513023862838743,
"step": 6150
},
{
"MSE": 633.6522382609048,
"MSE/layer0": 633.6522382609048,
"dead_code_fraction": 0.17475,
"dead_code_fraction/layer0": 0.17475,
"epoch": 1.14,
"input_norm": 31.998471844991045,
"input_norm/layer0": 31.998471844991045,
"learning_rate": 0.0005,
"loss": 2.1824,
"max_norm": 68.31253051757812,
"max_norm/layer0": 68.31253051757812,
"mean_norm": 42.782148361206055,
"mean_norm/layer0": 42.782148361206055,
"multicode_k": 1,
"output_norm": 18.529316590627033,
"output_norm/layer0": 18.529316590627033,
"step": 6200
},
{
"MSE": 634.0474910481774,
"MSE/layer0": 634.0474910481774,
"dead_code_fraction": 0.1771,
"dead_code_fraction/layer0": 0.1771,
"epoch": 1.15,
"input_norm": 31.998480736414585,
"input_norm/layer0": 31.998480736414585,
"learning_rate": 0.0005,
"loss": 2.1948,
"max_norm": 68.44271850585938,
"max_norm/layer0": 68.44271850585938,
"mean_norm": 42.82079887390137,
"mean_norm/layer0": 42.82079887390137,
"multicode_k": 1,
"output_norm": 18.524528849919633,
"output_norm/layer0": 18.524528849919633,
"step": 6250
},
{
"MSE": 633.648407084147,
"MSE/layer0": 633.648407084147,
"dead_code_fraction": 0.1745,
"dead_code_fraction/layer0": 0.1745,
"epoch": 1.15,
"input_norm": 31.998468182881673,
"input_norm/layer0": 31.998468182881673,
"learning_rate": 0.0005,
"loss": 2.1145,
"max_norm": 68.57721710205078,
"max_norm/layer0": 68.57721710205078,
"mean_norm": 42.859825134277344,
"mean_norm/layer0": 42.859825134277344,
"multicode_k": 1,
"output_norm": 18.540853935877482,
"output_norm/layer0": 18.540853935877482,
"step": 6300
},
{
"MSE": 633.5945191446937,
"MSE/layer0": 633.5945191446937,
"dead_code_fraction": 0.17705,
"dead_code_fraction/layer0": 0.17705,
"epoch": 1.16,
"input_norm": 31.99847273508707,
"input_norm/layer0": 31.99847273508707,
"learning_rate": 0.0005,
"loss": 2.1507,
"max_norm": 68.7186050415039,
"max_norm/layer0": 68.7186050415039,
"mean_norm": 42.897830963134766,
"mean_norm/layer0": 42.897830963134766,
"multicode_k": 1,
"output_norm": 18.55124579429626,
"output_norm/layer0": 18.55124579429626,
"step": 6350
},
{
"MSE": 632.1478841145836,
"MSE/layer0": 632.1478841145836,
"dead_code_fraction": 0.1775,
"dead_code_fraction/layer0": 0.1775,
"epoch": 1.16,
"input_norm": 31.9984964243571,
"input_norm/layer0": 31.9984964243571,
"learning_rate": 0.0005,
"loss": 2.1962,
"max_norm": 68.85418701171875,
"max_norm/layer0": 68.85418701171875,
"mean_norm": 42.937448501586914,
"mean_norm/layer0": 42.937448501586914,
"multicode_k": 1,
"output_norm": 18.5615934785207,
"output_norm/layer0": 18.5615934785207,
"step": 6400
},
{
"MSE": 632.57952931722,
"MSE/layer0": 632.57952931722,
"dead_code_fraction": 0.1777,
"dead_code_fraction/layer0": 0.1777,
"epoch": 1.17,
"input_norm": 31.998487294514977,
"input_norm/layer0": 31.998487294514977,
"learning_rate": 0.0005,
"loss": 2.1627,
"max_norm": 69.0008316040039,
"max_norm/layer0": 69.0008316040039,
"mean_norm": 42.97622108459473,
"mean_norm/layer0": 42.97622108459473,
"multicode_k": 1,
"output_norm": 18.57248200734457,
"output_norm/layer0": 18.57248200734457,
"step": 6450
},
{
"MSE": 631.0360174560547,
"MSE/layer0": 631.0360174560547,
"dead_code_fraction": 0.1784,
"dead_code_fraction/layer0": 0.1784,
"epoch": 1.17,
"input_norm": 31.998495709101356,
"input_norm/layer0": 31.998495709101356,
"learning_rate": 0.0005,
"loss": 2.1843,
"max_norm": 69.13652038574219,
"max_norm/layer0": 69.13652038574219,
"mean_norm": 43.01558876037598,
"mean_norm/layer0": 43.01558876037598,
"multicode_k": 1,
"output_norm": 18.591586551666268,
"output_norm/layer0": 18.591586551666268,
"step": 6500
},
{
"epoch": 1.17,
"eval_MSE/layer0": 631.2925020152297,
"eval_accuracy": 0.5039093283634951,
"eval_dead_code_fraction/layer0": 0.1797,
"eval_input_norm/layer0": 31.99848882414009,
"eval_loss": 2.175981044769287,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 18.598594732777567,
"eval_runtime": 158.1453,
"eval_samples_per_second": 29.233,
"eval_steps_per_second": 1.827,
"step": 6500
},
{
"MSE": 631.294188741048,
"MSE/layer0": 631.294188741048,
"dead_code_fraction": 0.1796,
"dead_code_fraction/layer0": 0.1796,
"epoch": 1.18,
"input_norm": 31.998505541483564,
"input_norm/layer0": 31.998505541483564,
"learning_rate": 0.0005,
"loss": 2.1855,
"max_norm": 69.26646423339844,
"max_norm/layer0": 69.26646423339844,
"mean_norm": 43.0548152923584,
"mean_norm/layer0": 43.0548152923584,
"multicode_k": 1,
"output_norm": 18.585241152445477,
"output_norm/layer0": 18.585241152445477,
"step": 6550
},
{
"MSE": 631.297376505534,
"MSE/layer0": 631.297376505534,
"dead_code_fraction": 0.1779,
"dead_code_fraction/layer0": 0.1779,
"epoch": 1.18,
"input_norm": 31.998487745920816,
"input_norm/layer0": 31.998487745920816,
"learning_rate": 0.0005,
"loss": 2.1197,
"max_norm": 69.3987808227539,
"max_norm/layer0": 69.3987808227539,
"mean_norm": 43.093589782714844,
"mean_norm/layer0": 43.093589782714844,
"multicode_k": 1,
"output_norm": 18.605287278493257,
"output_norm/layer0": 18.605287278493257,
"step": 6600
},
{
"MSE": 630.8991915893555,
"MSE/layer0": 630.8991915893555,
"dead_code_fraction": 0.17815,
"dead_code_fraction/layer0": 0.17815,
"epoch": 1.19,
"input_norm": 31.99848988215129,
"input_norm/layer0": 31.99848988215129,
"learning_rate": 0.0005,
"loss": 2.1248,
"max_norm": 69.52507019042969,
"max_norm/layer0": 69.52507019042969,
"mean_norm": 43.132524490356445,
"mean_norm/layer0": 43.132524490356445,
"multicode_k": 1,
"output_norm": 18.61235850652059,
"output_norm/layer0": 18.61235850652059,
"step": 6650
},
{
"MSE": 629.604686584473,
"MSE/layer0": 629.604686584473,
"dead_code_fraction": 0.17965,
"dead_code_fraction/layer0": 0.17965,
"epoch": 1.19,
"input_norm": 31.99852681477865,
"input_norm/layer0": 31.99852681477865,
"learning_rate": 0.0005,
"loss": 2.2265,
"max_norm": 69.66030883789062,
"max_norm/layer0": 69.66030883789062,
"mean_norm": 43.17206573486328,
"mean_norm/layer0": 43.17206573486328,
"multicode_k": 1,
"output_norm": 18.626948499679564,
"output_norm/layer0": 18.626948499679564,
"step": 6700
},
{
"MSE": 629.7875715128578,
"MSE/layer0": 629.7875715128578,
"dead_code_fraction": 0.1802,
"dead_code_fraction/layer0": 0.1802,
"epoch": 1.2,
"input_norm": 31.998509550094596,
"input_norm/layer0": 31.998509550094596,
"learning_rate": 0.0005,
"loss": 2.1432,
"max_norm": 69.78119659423828,
"max_norm/layer0": 69.78119659423828,
"mean_norm": 43.21029472351074,
"mean_norm/layer0": 43.21029472351074,
"multicode_k": 1,
"output_norm": 18.639319947560622,
"output_norm/layer0": 18.639319947560622,
"step": 6750
},
{
"MSE": 629.3708419799802,
"MSE/layer0": 629.3708419799802,
"dead_code_fraction": 0.18015,
"dead_code_fraction/layer0": 0.18015,
"epoch": 1.2,
"input_norm": 31.99851152102152,
"input_norm/layer0": 31.99851152102152,
"learning_rate": 0.0005,
"loss": 2.1606,
"max_norm": 69.91252899169922,
"max_norm/layer0": 69.91252899169922,
"mean_norm": 43.24948692321777,
"mean_norm/layer0": 43.24948692321777,
"multicode_k": 1,
"output_norm": 18.64606482187906,
"output_norm/layer0": 18.64606482187906,
"step": 6800
},
{
"MSE": 628.4038922119142,
"MSE/layer0": 628.4038922119142,
"dead_code_fraction": 0.1806,
"dead_code_fraction/layer0": 0.1806,
"epoch": 1.21,
"input_norm": 31.998516721725462,
"input_norm/layer0": 31.998516721725462,
"learning_rate": 0.0005,
"loss": 2.1582,
"max_norm": 70.04332733154297,
"max_norm/layer0": 70.04332733154297,
"mean_norm": 43.28862762451172,
"mean_norm/layer0": 43.28862762451172,
"multicode_k": 1,
"output_norm": 18.669758415222162,
"output_norm/layer0": 18.669758415222162,
"step": 6850
},
{
"MSE": 628.1812467447919,
"MSE/layer0": 628.1812467447919,
"dead_code_fraction": 0.18055,
"dead_code_fraction/layer0": 0.18055,
"epoch": 1.21,
"input_norm": 31.998515844345086,
"input_norm/layer0": 31.998515844345086,
"learning_rate": 0.0005,
"loss": 2.1433,
"max_norm": 70.16979217529297,
"max_norm/layer0": 70.16979217529297,
"mean_norm": 43.327192306518555,
"mean_norm/layer0": 43.327192306518555,
"multicode_k": 1,
"output_norm": 18.674684073130294,
"output_norm/layer0": 18.674684073130294,
"step": 6900
},
{
"MSE": 628.1862957763672,
"MSE/layer0": 628.1862957763672,
"dead_code_fraction": 0.18045,
"dead_code_fraction/layer0": 0.18045,
"epoch": 1.22,
"input_norm": 31.99852259953816,
"input_norm/layer0": 31.99852259953816,
"learning_rate": 0.0005,
"loss": 2.1458,
"max_norm": 70.29747772216797,
"max_norm/layer0": 70.29747772216797,
"mean_norm": 43.36609077453613,
"mean_norm/layer0": 43.36609077453613,
"multicode_k": 1,
"output_norm": 18.682749029795335,
"output_norm/layer0": 18.682749029795335,
"step": 6950
},
{
"MSE": 627.7981392415361,
"MSE/layer0": 627.7981392415361,
"dead_code_fraction": 0.18045,
"dead_code_fraction/layer0": 0.18045,
"epoch": 1.22,
"input_norm": 31.998523871103927,
"input_norm/layer0": 31.998523871103927,
"learning_rate": 0.0005,
"loss": 2.1339,
"max_norm": 70.425537109375,
"max_norm/layer0": 70.425537109375,
"mean_norm": 43.40445899963379,
"mean_norm/layer0": 43.40445899963379,
"multicode_k": 1,
"output_norm": 18.696380834579458,
"output_norm/layer0": 18.696380834579458,
"step": 7000
},
{
"epoch": 1.22,
"eval_MSE/layer0": 627.9790743019787,
"eval_accuracy": 0.5048263717749389,
"eval_dead_code_fraction/layer0": 0.1819,
"eval_input_norm/layer0": 31.998524618592334,
"eval_loss": 2.1696202754974365,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 18.705300997223095,
"eval_runtime": 159.1692,
"eval_samples_per_second": 29.045,
"eval_steps_per_second": 1.816,
"step": 7000
},
{
"MSE": 627.3165437825519,
"MSE/layer0": 627.3165437825519,
"dead_code_fraction": 0.1822,
"dead_code_fraction/layer0": 0.1822,
"epoch": 1.23,
"input_norm": 31.99852600097656,
"input_norm/layer0": 31.99852600097656,
"learning_rate": 0.0005,
"loss": 2.1483,
"max_norm": 70.54450988769531,
"max_norm/layer0": 70.54450988769531,
"mean_norm": 43.442848205566406,
"mean_norm/layer0": 43.442848205566406,
"multicode_k": 1,
"output_norm": 18.700957148869843,
"output_norm/layer0": 18.700957148869843,
"step": 7050
},
{
"MSE": 626.7479965209961,
"MSE/layer0": 626.7479965209961,
"dead_code_fraction": 0.1804,
"dead_code_fraction/layer0": 0.1804,
"epoch": 1.23,
"input_norm": 31.998541386922206,
"input_norm/layer0": 31.998541386922206,
"learning_rate": 0.0005,
"loss": 2.1512,
"max_norm": 70.66608428955078,
"max_norm/layer0": 70.66608428955078,
"mean_norm": 43.48159599304199,
"mean_norm/layer0": 43.48159599304199,
"multicode_k": 1,
"output_norm": 18.714396947224948,
"output_norm/layer0": 18.714396947224948,
"step": 7100
},
{
"MSE": 626.4450497436519,
"MSE/layer0": 626.4450497436519,
"dead_code_fraction": 0.1823,
"dead_code_fraction/layer0": 0.1823,
"epoch": 1.24,
"input_norm": 31.998545411427806,
"input_norm/layer0": 31.998545411427806,
"learning_rate": 0.0005,
"loss": 2.1586,
"max_norm": 70.7937240600586,
"max_norm/layer0": 70.7937240600586,
"mean_norm": 43.5198860168457,
"mean_norm/layer0": 43.5198860168457,
"multicode_k": 1,
"output_norm": 18.726943721771242,
"output_norm/layer0": 18.726943721771242,
"step": 7150
},
{
"MSE": 626.1652618408202,
"MSE/layer0": 626.1652618408202,
"dead_code_fraction": 0.1814,
"dead_code_fraction/layer0": 0.1814,
"epoch": 1.24,
"input_norm": 31.998541978200272,
"input_norm/layer0": 31.998541978200272,
"learning_rate": 0.0005,
"loss": 2.1552,
"max_norm": 70.90862274169922,
"max_norm/layer0": 70.90862274169922,
"mean_norm": 43.55833053588867,
"mean_norm/layer0": 43.55833053588867,
"multicode_k": 1,
"output_norm": 18.731371542612706,
"output_norm/layer0": 18.731371542612706,
"step": 7200
},
{
"MSE": 625.2572497558597,
"MSE/layer0": 625.2572497558597,
"dead_code_fraction": 0.1839,
"dead_code_fraction/layer0": 0.1839,
"epoch": 1.25,
"input_norm": 31.998552770614626,
"input_norm/layer0": 31.998552770614626,
"learning_rate": 0.0005,
"loss": 2.1673,
"max_norm": 71.0332260131836,
"max_norm/layer0": 71.0332260131836,
"mean_norm": 43.5967960357666,
"mean_norm/layer0": 43.5967960357666,
"multicode_k": 1,
"output_norm": 18.756609748204536,
"output_norm/layer0": 18.756609748204536,
"step": 7250
},
{
"MSE": 624.7860372924804,
"MSE/layer0": 624.7860372924804,
"dead_code_fraction": 0.1831,
"dead_code_fraction/layer0": 0.1831,
"epoch": 1.25,
"input_norm": 31.998555002212534,
"input_norm/layer0": 31.998555002212534,
"learning_rate": 0.0005,
"loss": 2.1575,
"max_norm": 71.15364837646484,
"max_norm/layer0": 71.15364837646484,
"mean_norm": 43.63525199890137,
"mean_norm/layer0": 43.63525199890137,
"multicode_k": 1,
"output_norm": 18.767410192489628,
"output_norm/layer0": 18.767410192489628,
"step": 7300
},
{
"MSE": 624.7060753377278,
"MSE/layer0": 624.7060753377278,
"dead_code_fraction": 0.18335,
"dead_code_fraction/layer0": 0.18335,
"epoch": 1.26,
"input_norm": 31.99856230099995,
"input_norm/layer0": 31.99856230099995,
"learning_rate": 0.0005,
"loss": 2.1622,
"max_norm": 71.2812271118164,
"max_norm/layer0": 71.2812271118164,
"mean_norm": 43.67383575439453,
"mean_norm/layer0": 43.67383575439453,
"multicode_k": 1,
"output_norm": 18.77556623776755,
"output_norm/layer0": 18.77556623776755,
"step": 7350
},
{
"MSE": 623.9612900797528,
"MSE/layer0": 623.9612900797528,
"dead_code_fraction": 0.1834,
"dead_code_fraction/layer0": 0.1834,
"epoch": 1.26,
"input_norm": 31.998564265569062,
"input_norm/layer0": 31.998564265569062,
"learning_rate": 0.0005,
"loss": 2.1721,
"max_norm": 71.4082260131836,
"max_norm/layer0": 71.4082260131836,
"mean_norm": 43.71280097961426,
"mean_norm/layer0": 43.71280097961426,
"multicode_k": 1,
"output_norm": 18.78839166323344,
"output_norm/layer0": 18.78839166323344,
"step": 7400
},
{
"MSE": 623.9870674641929,
"MSE/layer0": 623.9870674641929,
"dead_code_fraction": 0.18355,
"dead_code_fraction/layer0": 0.18355,
"epoch": 1.27,
"input_norm": 31.998560991287228,
"input_norm/layer0": 31.998560991287228,
"learning_rate": 0.0005,
"loss": 2.1424,
"max_norm": 71.52973937988281,
"max_norm/layer0": 71.52973937988281,
"mean_norm": 43.75117111206055,
"mean_norm/layer0": 43.75117111206055,
"multicode_k": 1,
"output_norm": 18.79942525227863,
"output_norm/layer0": 18.79942525227863,
"step": 7450
},
{
"MSE": 622.7629538981118,
"MSE/layer0": 622.7629538981118,
"dead_code_fraction": 0.1844,
"dead_code_fraction/layer0": 0.1844,
"epoch": 1.27,
"input_norm": 31.998580735524506,
"input_norm/layer0": 31.998580735524506,
"learning_rate": 0.0005,
"loss": 2.187,
"max_norm": 71.64968872070312,
"max_norm/layer0": 71.64968872070312,
"mean_norm": 43.790061950683594,
"mean_norm/layer0": 43.790061950683594,
"multicode_k": 1,
"output_norm": 18.81509483655294,
"output_norm/layer0": 18.81509483655294,
"step": 7500
},
{
"epoch": 1.27,
"eval_MSE/layer0": 622.122652727573,
"eval_accuracy": 0.5062701283839631,
"eval_dead_code_fraction/layer0": 0.18665,
"eval_input_norm/layer0": 31.998566619663464,
"eval_loss": 2.1583967208862305,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 18.83381110374323,
"eval_runtime": 158.6442,
"eval_samples_per_second": 29.141,
"eval_steps_per_second": 1.822,
"step": 7500
},
{
"MSE": 622.9042826334635,
"MSE/layer0": 622.9042826334635,
"dead_code_fraction": 0.1841,
"dead_code_fraction/layer0": 0.1841,
"epoch": 1.28,
"input_norm": 31.998572101593023,
"input_norm/layer0": 31.998572101593023,
"learning_rate": 0.0005,
"loss": 2.1434,
"max_norm": 71.76019287109375,
"max_norm/layer0": 71.76019287109375,
"mean_norm": 43.828460693359375,
"mean_norm/layer0": 43.828460693359375,
"multicode_k": 1,
"output_norm": 18.82229045232136,
"output_norm/layer0": 18.82229045232136,
"step": 7550
},
{
"MSE": 621.695281575521,
"MSE/layer0": 621.695281575521,
"dead_code_fraction": 0.1854,
"dead_code_fraction/layer0": 0.1854,
"epoch": 1.28,
"input_norm": 31.998584995269773,
"input_norm/layer0": 31.998584995269773,
"learning_rate": 0.0005,
"loss": 2.1712,
"max_norm": 71.87606048583984,
"max_norm/layer0": 71.87606048583984,
"mean_norm": 43.867136001586914,
"mean_norm/layer0": 43.867136001586914,
"multicode_k": 1,
"output_norm": 18.84749958992006,
"output_norm/layer0": 18.84749958992006,
"step": 7600
},
{
"MSE": 622.6274766031902,
"MSE/layer0": 622.6274766031902,
"dead_code_fraction": 0.18355,
"dead_code_fraction/layer0": 0.18355,
"epoch": 1.29,
"input_norm": 31.998571812311802,
"input_norm/layer0": 31.998571812311802,
"learning_rate": 0.0005,
"loss": 2.1412,
"max_norm": 71.98139953613281,
"max_norm/layer0": 71.98139953613281,
"mean_norm": 43.90544891357422,
"mean_norm/layer0": 43.90544891357422,
"multicode_k": 1,
"output_norm": 18.83851943016053,
"output_norm/layer0": 18.83851943016053,
"step": 7650
},
{
"MSE": 621.3046355183919,
"MSE/layer0": 621.3046355183919,
"dead_code_fraction": 0.18495,
"dead_code_fraction/layer0": 0.18495,
"epoch": 1.29,
"input_norm": 31.998585087458295,
"input_norm/layer0": 31.998585087458295,
"learning_rate": 0.0005,
"loss": 2.1711,
"max_norm": 72.08447265625,
"max_norm/layer0": 72.08447265625,
"mean_norm": 43.94407653808594,
"mean_norm/layer0": 43.94407653808594,
"multicode_k": 1,
"output_norm": 18.86037411053976,
"output_norm/layer0": 18.86037411053976,
"step": 7700
},
{
"MSE": 620.5873645019533,
"MSE/layer0": 620.5873645019533,
"dead_code_fraction": 0.18485,
"dead_code_fraction/layer0": 0.18485,
"epoch": 1.3,
"input_norm": 31.998606751759848,
"input_norm/layer0": 31.998606751759848,
"learning_rate": 0.0005,
"loss": 2.2069,
"max_norm": 72.18034362792969,
"max_norm/layer0": 72.18034362792969,
"mean_norm": 43.9833927154541,
"mean_norm/layer0": 43.9833927154541,
"multicode_k": 1,
"output_norm": 18.87507179578146,
"output_norm/layer0": 18.87507179578146,
"step": 7750
},
{
"MSE": 621.2272378540041,
"MSE/layer0": 621.2272378540041,
"dead_code_fraction": 0.18385,
"dead_code_fraction/layer0": 0.18385,
"epoch": 1.3,
"input_norm": 31.998583949406935,
"input_norm/layer0": 31.998583949406935,
"learning_rate": 0.0005,
"loss": 2.1217,
"max_norm": 72.27928924560547,
"max_norm/layer0": 72.27928924560547,
"mean_norm": 44.021806716918945,
"mean_norm/layer0": 44.021806716918945,
"multicode_k": 1,
"output_norm": 18.877027104695642,
"output_norm/layer0": 18.877027104695642,
"step": 7800
},
{
"MSE": 620.067134602865,
"MSE/layer0": 620.067134602865,
"dead_code_fraction": 0.18535,
"dead_code_fraction/layer0": 0.18535,
"epoch": 1.31,
"input_norm": 31.998594888051343,
"input_norm/layer0": 31.998594888051343,
"learning_rate": 0.0005,
"loss": 2.1753,
"max_norm": 72.39033508300781,
"max_norm/layer0": 72.39033508300781,
"mean_norm": 44.060611724853516,
"mean_norm/layer0": 44.060611724853516,
"multicode_k": 1,
"output_norm": 18.89820697466533,
"output_norm/layer0": 18.89820697466533,
"step": 7850
},
{
"MSE": 620.6704218546549,
"MSE/layer0": 620.6704218546549,
"dead_code_fraction": 0.18735,
"dead_code_fraction/layer0": 0.18735,
"epoch": 1.31,
"input_norm": 31.998597246805822,
"input_norm/layer0": 31.998597246805822,
"learning_rate": 0.0005,
"loss": 2.1778,
"max_norm": 72.4916000366211,
"max_norm/layer0": 72.4916000366211,
"mean_norm": 44.09913635253906,
"mean_norm/layer0": 44.09913635253906,
"multicode_k": 1,
"output_norm": 18.890051161448145,
"output_norm/layer0": 18.890051161448145,
"step": 7900
},
{
"MSE": 619.2155123901367,
"MSE/layer0": 619.2155123901367,
"dead_code_fraction": 0.1863,
"dead_code_fraction/layer0": 0.1863,
"epoch": 1.32,
"input_norm": 31.99860541343688,
"input_norm/layer0": 31.99860541343688,
"learning_rate": 0.0005,
"loss": 2.1684,
"max_norm": 72.59037017822266,
"max_norm/layer0": 72.59037017822266,
"mean_norm": 44.13744926452637,
"mean_norm/layer0": 44.13744926452637,
"multicode_k": 1,
"output_norm": 18.920912733078,
"output_norm/layer0": 18.920912733078,
"step": 7950
},
{
"MSE": 618.8985408528646,
"MSE/layer0": 618.8985408528646,
"dead_code_fraction": 0.1867,
"dead_code_fraction/layer0": 0.1867,
"epoch": 1.32,
"input_norm": 31.998596220016488,
"input_norm/layer0": 31.998596220016488,
"learning_rate": 0.0005,
"loss": 2.1302,
"max_norm": 72.69281768798828,
"max_norm/layer0": 72.69281768798828,
"mean_norm": 44.176042556762695,
"mean_norm/layer0": 44.176042556762695,
"multicode_k": 1,
"output_norm": 18.93559975624085,
"output_norm/layer0": 18.93559975624085,
"step": 8000
},
{
"epoch": 1.32,
"eval_MSE/layer0": 617.7161538934592,
"eval_accuracy": 0.5071360017457022,
"eval_dead_code_fraction/layer0": 0.18755,
"eval_input_norm/layer0": 31.99860155017712,
"eval_loss": 2.150786876678467,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 18.949325338731256,
"eval_runtime": 158.4669,
"eval_samples_per_second": 29.173,
"eval_steps_per_second": 1.824,
"step": 8000
},
{
"MSE": 619.1937561035155,
"MSE/layer0": 619.1937561035155,
"dead_code_fraction": 0.18685,
"dead_code_fraction/layer0": 0.18685,
"epoch": 1.33,
"input_norm": 31.998596970240285,
"input_norm/layer0": 31.998596970240285,
"learning_rate": 0.0005,
"loss": 2.1279,
"max_norm": 72.79032135009766,
"max_norm/layer0": 72.79032135009766,
"mean_norm": 44.21445846557617,
"mean_norm/layer0": 44.21445846557617,
"multicode_k": 1,
"output_norm": 18.93686810175578,
"output_norm/layer0": 18.93686810175578,
"step": 8050
},
{
"MSE": 619.539402567546,
"MSE/layer0": 619.539402567546,
"dead_code_fraction": 0.18665,
"dead_code_fraction/layer0": 0.18665,
"epoch": 1.33,
"input_norm": 31.998598492940268,
"input_norm/layer0": 31.998598492940268,
"learning_rate": 0.0005,
"loss": 2.1113,
"max_norm": 72.88322448730469,
"max_norm/layer0": 72.88322448730469,
"mean_norm": 44.251609802246094,
"mean_norm/layer0": 44.251609802246094,
"multicode_k": 1,
"output_norm": 18.939144274393726,
"output_norm/layer0": 18.939144274393726,
"step": 8100
},
{
"MSE": 617.7248203531905,
"MSE/layer0": 617.7248203531905,
"dead_code_fraction": 0.18555,
"dead_code_fraction/layer0": 0.18555,
"epoch": 1.34,
"input_norm": 31.99861437161764,
"input_norm/layer0": 31.99861437161764,
"learning_rate": 0.0005,
"loss": 2.1592,
"max_norm": 72.97504425048828,
"max_norm/layer0": 72.97504425048828,
"mean_norm": 44.289913177490234,
"mean_norm/layer0": 44.289913177490234,
"multicode_k": 1,
"output_norm": 18.963457323710102,
"output_norm/layer0": 18.963457323710102,
"step": 8150
},
{
"MSE": 617.1626446533202,
"MSE/layer0": 617.1626446533202,
"dead_code_fraction": 0.1856,
"dead_code_fraction/layer0": 0.1856,
"epoch": 1.34,
"input_norm": 31.998610553741443,
"input_norm/layer0": 31.998610553741443,
"learning_rate": 0.0005,
"loss": 2.1339,
"max_norm": 73.06546020507812,
"max_norm/layer0": 73.06546020507812,
"mean_norm": 44.32819747924805,
"mean_norm/layer0": 44.32819747924805,
"multicode_k": 1,
"output_norm": 18.980771627426144,
"output_norm/layer0": 18.980771627426144,
"step": 8200
},
{
"MSE": 616.5359758504233,
"MSE/layer0": 616.5359758504233,
"dead_code_fraction": 0.18785,
"dead_code_fraction/layer0": 0.18785,
"epoch": 1.35,
"input_norm": 31.99861484845479,
"input_norm/layer0": 31.99861484845479,
"learning_rate": 0.0005,
"loss": 2.1287,
"max_norm": 73.1684341430664,
"max_norm/layer0": 73.1684341430664,
"mean_norm": 44.36627197265625,
"mean_norm/layer0": 44.36627197265625,
"multicode_k": 1,
"output_norm": 19.002285525004055,
"output_norm/layer0": 19.002285525004055,
"step": 8250
},
{
"MSE": 616.9324924723311,
"MSE/layer0": 616.9324924723311,
"dead_code_fraction": 0.18715,
"dead_code_fraction/layer0": 0.18715,
"epoch": 1.35,
"input_norm": 31.998625895182286,
"input_norm/layer0": 31.998625895182286,
"learning_rate": 0.0005,
"loss": 2.1575,
"max_norm": 73.259521484375,
"max_norm/layer0": 73.259521484375,
"mean_norm": 44.40446090698242,
"mean_norm/layer0": 44.40446090698242,
"multicode_k": 1,
"output_norm": 18.992992315292362,
"output_norm/layer0": 18.992992315292362,
"step": 8300
},
{
"MSE": 616.2650039672851,
"MSE/layer0": 616.2650039672851,
"dead_code_fraction": 0.18655,
"dead_code_fraction/layer0": 0.18655,
"epoch": 1.36,
"input_norm": 31.99862662315369,
"input_norm/layer0": 31.99862662315369,
"learning_rate": 0.0005,
"loss": 2.139,
"max_norm": 73.36270141601562,
"max_norm/layer0": 73.36270141601562,
"mean_norm": 44.44254493713379,
"mean_norm/layer0": 44.44254493713379,
"multicode_k": 1,
"output_norm": 19.00672375679015,
"output_norm/layer0": 19.00672375679015,
"step": 8350
},
{
"MSE": 615.5159185791019,
"MSE/layer0": 615.5159185791019,
"dead_code_fraction": 0.18685,
"dead_code_fraction/layer0": 0.18685,
"epoch": 1.36,
"input_norm": 31.998618663152055,
"input_norm/layer0": 31.998618663152055,
"learning_rate": 0.0005,
"loss": 2.1207,
"max_norm": 73.45561981201172,
"max_norm/layer0": 73.45561981201172,
"mean_norm": 44.48077201843262,
"mean_norm/layer0": 44.48077201843262,
"multicode_k": 1,
"output_norm": 19.030768597920748,
"output_norm/layer0": 19.030768597920748,
"step": 8400
},
{
"MSE": 615.7112675984704,
"MSE/layer0": 615.7112675984704,
"dead_code_fraction": 0.18675,
"dead_code_fraction/layer0": 0.18675,
"epoch": 1.37,
"input_norm": 31.99863114674885,
"input_norm/layer0": 31.99863114674885,
"learning_rate": 0.0005,
"loss": 2.1394,
"max_norm": 73.54468536376953,
"max_norm/layer0": 73.54468536376953,
"mean_norm": 44.5194206237793,
"mean_norm/layer0": 44.5194206237793,
"multicode_k": 1,
"output_norm": 19.03362373669942,
"output_norm/layer0": 19.03362373669942,
"step": 8450
},
{
"MSE": 615.0864140828453,
"MSE/layer0": 615.0864140828453,
"dead_code_fraction": 0.1866,
"dead_code_fraction/layer0": 0.1866,
"epoch": 1.37,
"input_norm": 31.9986399269104,
"input_norm/layer0": 31.9986399269104,
"learning_rate": 0.0005,
"loss": 2.1471,
"max_norm": 73.64068603515625,
"max_norm/layer0": 73.64068603515625,
"mean_norm": 44.55780220031738,
"mean_norm/layer0": 44.55780220031738,
"multicode_k": 1,
"output_norm": 19.04360143979391,
"output_norm/layer0": 19.04360143979391,
"step": 8500
},
{
"epoch": 1.37,
"eval_MSE/layer0": 613.7248421548741,
"eval_accuracy": 0.5081896395873495,
"eval_dead_code_fraction/layer0": 0.1885,
"eval_input_norm/layer0": 31.998632826486393,
"eval_loss": 2.1443779468536377,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 19.066619998676906,
"eval_runtime": 158.5923,
"eval_samples_per_second": 29.15,
"eval_steps_per_second": 1.822,
"step": 8500
},
{
"MSE": 614.1585445149744,
"MSE/layer0": 614.1585445149744,
"dead_code_fraction": 0.18715,
"dead_code_fraction/layer0": 0.18715,
"epoch": 1.38,
"input_norm": 31.99863867441813,
"input_norm/layer0": 31.99863867441813,
"learning_rate": 0.0005,
"loss": 2.1506,
"max_norm": 73.73002624511719,
"max_norm/layer0": 73.73002624511719,
"mean_norm": 44.597002029418945,
"mean_norm/layer0": 44.597002029418945,
"multicode_k": 1,
"output_norm": 19.06499721844991,
"output_norm/layer0": 19.06499721844991,
"step": 8550
},
{
"MSE": 614.256539204915,
"MSE/layer0": 614.256539204915,
"dead_code_fraction": 0.1879,
"dead_code_fraction/layer0": 0.1879,
"epoch": 1.38,
"input_norm": 31.998648173014317,
"input_norm/layer0": 31.998648173014317,
"learning_rate": 0.0005,
"loss": 2.1643,
"max_norm": 73.80333709716797,
"max_norm/layer0": 73.80333709716797,
"mean_norm": 44.63543891906738,
"mean_norm/layer0": 44.63543891906738,
"multicode_k": 1,
"output_norm": 19.078293412526467,
"output_norm/layer0": 19.078293412526467,
"step": 8600
},
{
"MSE": 613.3546946207681,
"MSE/layer0": 613.3546946207681,
"dead_code_fraction": 0.1879,
"dead_code_fraction/layer0": 0.1879,
"epoch": 1.39,
"input_norm": 31.99864864667257,
"input_norm/layer0": 31.99864864667257,
"learning_rate": 0.0005,
"loss": 2.1535,
"max_norm": 73.89517974853516,
"max_norm/layer0": 73.89517974853516,
"mean_norm": 44.674211502075195,
"mean_norm/layer0": 44.674211502075195,
"multicode_k": 1,
"output_norm": 19.09559381167095,
"output_norm/layer0": 19.09559381167095,
"step": 8650
},
{
"MSE": 613.6053087361654,
"MSE/layer0": 613.6053087361654,
"dead_code_fraction": 0.18645,
"dead_code_fraction/layer0": 0.18645,
"epoch": 1.39,
"input_norm": 31.998652140299477,
"input_norm/layer0": 31.998652140299477,
"learning_rate": 0.0005,
"loss": 2.137,
"max_norm": 73.9770736694336,
"max_norm/layer0": 73.9770736694336,
"mean_norm": 44.71265983581543,
"mean_norm/layer0": 44.71265983581543,
"multicode_k": 1,
"output_norm": 19.098618446985878,
"output_norm/layer0": 19.098618446985878,
"step": 8700
},
{
"MSE": 613.292506408691,
"MSE/layer0": 613.292506408691,
"dead_code_fraction": 0.1876,
"dead_code_fraction/layer0": 0.1876,
"epoch": 1.4,
"input_norm": 31.998654588063562,
"input_norm/layer0": 31.998654588063562,
"learning_rate": 0.0005,
"loss": 2.1482,
"max_norm": 74.05269622802734,
"max_norm/layer0": 74.05269622802734,
"mean_norm": 44.750946044921875,
"mean_norm/layer0": 44.750946044921875,
"multicode_k": 1,
"output_norm": 19.104494848251342,
"output_norm/layer0": 19.104494848251342,
"step": 8750
},
{
"MSE": 613.8824895222986,
"MSE/layer0": 613.8824895222986,
"dead_code_fraction": 0.1868,
"dead_code_fraction/layer0": 0.1868,
"epoch": 1.4,
"input_norm": 31.998655049006146,
"input_norm/layer0": 31.998655049006146,
"learning_rate": 0.0005,
"loss": 2.1331,
"max_norm": 74.12651824951172,
"max_norm/layer0": 74.12651824951172,
"mean_norm": 44.7886848449707,
"mean_norm/layer0": 44.7886848449707,
"multicode_k": 1,
"output_norm": 19.110120385487882,
"output_norm/layer0": 19.110120385487882,
"step": 8800
},
{
"MSE": 613.8568901570636,
"MSE/layer0": 613.8568901570636,
"dead_code_fraction": 0.18675,
"dead_code_fraction/layer0": 0.18675,
"epoch": 1.41,
"input_norm": 31.99864878336588,
"input_norm/layer0": 31.99864878336588,
"learning_rate": 0.0005,
"loss": 2.1038,
"max_norm": 74.20288848876953,
"max_norm/layer0": 74.20288848876953,
"mean_norm": 44.82563400268555,
"mean_norm/layer0": 44.82563400268555,
"multicode_k": 1,
"output_norm": 19.120709832509363,
"output_norm/layer0": 19.120709832509363,
"step": 8850
},
{
"MSE": 612.8203454589843,
"MSE/layer0": 612.8203454589843,
"dead_code_fraction": 0.18635,
"dead_code_fraction/layer0": 0.18635,
"epoch": 1.41,
"input_norm": 31.99866209030152,
"input_norm/layer0": 31.99866209030152,
"learning_rate": 0.0005,
"loss": 2.1619,
"max_norm": 74.27029418945312,
"max_norm/layer0": 74.27029418945312,
"mean_norm": 44.863847732543945,
"mean_norm/layer0": 44.863847732543945,
"multicode_k": 1,
"output_norm": 19.13362557093303,
"output_norm/layer0": 19.13362557093303,
"step": 8900
},
{
"MSE": 612.7508836873369,
"MSE/layer0": 612.7508836873369,
"dead_code_fraction": 0.1865,
"dead_code_fraction/layer0": 0.1865,
"epoch": 1.42,
"input_norm": 31.998662964502977,
"input_norm/layer0": 31.998662964502977,
"learning_rate": 0.0005,
"loss": 2.1274,
"max_norm": 74.35165405273438,
"max_norm/layer0": 74.35165405273438,
"mean_norm": 44.90276908874512,
"mean_norm/layer0": 44.90276908874512,
"multicode_k": 1,
"output_norm": 19.13368027687074,
"output_norm/layer0": 19.13368027687074,
"step": 8950
},
{
"MSE": 611.3088948567707,
"MSE/layer0": 611.3088948567707,
"dead_code_fraction": 0.18625,
"dead_code_fraction/layer0": 0.18625,
"epoch": 1.42,
"input_norm": 31.998670199712116,
"input_norm/layer0": 31.998670199712116,
"learning_rate": 0.0005,
"loss": 2.1556,
"max_norm": 74.43575286865234,
"max_norm/layer0": 74.43575286865234,
"mean_norm": 44.94179916381836,
"mean_norm/layer0": 44.94179916381836,
"multicode_k": 1,
"output_norm": 19.165478760401413,
"output_norm/layer0": 19.165478760401413,
"step": 9000
},
{
"epoch": 1.42,
"eval_MSE/layer0": 610.3757424029645,
"eval_accuracy": 0.5087341142897861,
"eval_dead_code_fraction/layer0": 0.18805,
"eval_input_norm/layer0": 31.998659288421646,
"eval_loss": 2.139230489730835,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 19.181722183648382,
"eval_runtime": 158.0526,
"eval_samples_per_second": 29.25,
"eval_steps_per_second": 1.829,
"step": 9000
},
{
"MSE": 611.2356985473632,
"MSE/layer0": 611.2356985473632,
"dead_code_fraction": 0.1879,
"dead_code_fraction/layer0": 0.1879,
"epoch": 1.43,
"input_norm": 31.998666836420703,
"input_norm/layer0": 31.998666836420703,
"learning_rate": 0.0005,
"loss": 2.1388,
"max_norm": 74.51050567626953,
"max_norm/layer0": 74.51050567626953,
"mean_norm": 44.98063850402832,
"mean_norm/layer0": 44.98063850402832,
"multicode_k": 1,
"output_norm": 19.177389281590777,
"output_norm/layer0": 19.177389281590777,
"step": 9050
},
{
"MSE": 610.8344569905598,
"MSE/layer0": 610.8344569905598,
"dead_code_fraction": 0.18865,
"dead_code_fraction/layer0": 0.18865,
"epoch": 1.43,
"input_norm": 31.99867141723631,
"input_norm/layer0": 31.99867141723631,
"learning_rate": 0.0005,
"loss": 2.1328,
"max_norm": 74.59440612792969,
"max_norm/layer0": 74.59440612792969,
"mean_norm": 45.01910400390625,
"mean_norm/layer0": 45.01910400390625,
"multicode_k": 1,
"output_norm": 19.185275354385375,
"output_norm/layer0": 19.185275354385375,
"step": 9100
},
{
"MSE": 610.7402758789062,
"MSE/layer0": 610.7402758789062,
"dead_code_fraction": 0.1871,
"dead_code_fraction/layer0": 0.1871,
"epoch": 1.44,
"input_norm": 31.99866997400921,
"input_norm/layer0": 31.99866997400921,
"learning_rate": 0.0005,
"loss": 2.117,
"max_norm": 74.67122650146484,
"max_norm/layer0": 74.67122650146484,
"mean_norm": 45.05727577209473,
"mean_norm/layer0": 45.05727577209473,
"multicode_k": 1,
"output_norm": 19.190109596252437,
"output_norm/layer0": 19.190109596252437,
"step": 9150
},
{
"MSE": 610.1339531453451,
"MSE/layer0": 610.1339531453451,
"dead_code_fraction": 0.18745,
"dead_code_fraction/layer0": 0.18745,
"epoch": 1.44,
"input_norm": 31.998679358164473,
"input_norm/layer0": 31.998679358164473,
"learning_rate": 0.0005,
"loss": 2.1459,
"max_norm": 74.7430419921875,
"max_norm/layer0": 74.7430419921875,
"mean_norm": 45.095571517944336,
"mean_norm/layer0": 45.095571517944336,
"multicode_k": 1,
"output_norm": 19.203376553853335,
"output_norm/layer0": 19.203376553853335,
"step": 9200
},
{
"MSE": 609.6957601928709,
"MSE/layer0": 609.6957601928709,
"dead_code_fraction": 0.1878,
"dead_code_fraction/layer0": 0.1878,
"epoch": 1.45,
"input_norm": 31.99868172009785,
"input_norm/layer0": 31.99868172009785,
"learning_rate": 0.0005,
"loss": 2.142,
"max_norm": 74.8177490234375,
"max_norm/layer0": 74.8177490234375,
"mean_norm": 45.133853912353516,
"mean_norm/layer0": 45.133853912353516,
"multicode_k": 1,
"output_norm": 19.22210531552632,
"output_norm/layer0": 19.22210531552632,
"step": 9250
},
{
"MSE": 609.5997785441082,
"MSE/layer0": 609.5997785441082,
"dead_code_fraction": 0.18805,
"dead_code_fraction/layer0": 0.18805,
"epoch": 1.45,
"input_norm": 31.998693205515544,
"input_norm/layer0": 31.998693205515544,
"learning_rate": 0.0005,
"loss": 2.18,
"max_norm": 74.87744140625,
"max_norm/layer0": 74.87744140625,
"mean_norm": 45.172555923461914,
"mean_norm/layer0": 45.172555923461914,
"multicode_k": 1,
"output_norm": 19.226630802154542,
"output_norm/layer0": 19.226630802154542,
"step": 9300
},
{
"MSE": 609.8342389933271,
"MSE/layer0": 609.8342389933271,
"dead_code_fraction": 0.18735,
"dead_code_fraction/layer0": 0.18735,
"epoch": 1.46,
"input_norm": 31.998687505722053,
"input_norm/layer0": 31.998687505722053,
"learning_rate": 0.0005,
"loss": 2.1164,
"max_norm": 74.94609069824219,
"max_norm/layer0": 74.94609069824219,
"mean_norm": 45.21059799194336,
"mean_norm/layer0": 45.21059799194336,
"multicode_k": 1,
"output_norm": 19.234882882436114,
"output_norm/layer0": 19.234882882436114,
"step": 9350
},
{
"MSE": 609.2034523518882,
"MSE/layer0": 609.2034523518882,
"dead_code_fraction": 0.1869,
"dead_code_fraction/layer0": 0.1869,
"epoch": 1.46,
"input_norm": 31.99869050979616,
"input_norm/layer0": 31.99869050979616,
"learning_rate": 0.0005,
"loss": 2.1316,
"max_norm": 75.01142883300781,
"max_norm/layer0": 75.01142883300781,
"mean_norm": 45.248979568481445,
"mean_norm/layer0": 45.248979568481445,
"multicode_k": 1,
"output_norm": 19.247848326365144,
"output_norm/layer0": 19.247848326365144,
"step": 9400
},
{
"MSE": 609.0324313354497,
"MSE/layer0": 609.0324313354497,
"dead_code_fraction": 0.18745,
"dead_code_fraction/layer0": 0.18745,
"epoch": 1.47,
"input_norm": 31.99869132041931,
"input_norm/layer0": 31.99869132041931,
"learning_rate": 0.0005,
"loss": 2.1214,
"max_norm": 75.07112121582031,
"max_norm/layer0": 75.07112121582031,
"mean_norm": 45.287214279174805,
"mean_norm/layer0": 45.287214279174805,
"multicode_k": 1,
"output_norm": 19.25519768079122,
"output_norm/layer0": 19.25519768079122,
"step": 9450
},
{
"MSE": 607.8594933064783,
"MSE/layer0": 607.8594933064783,
"dead_code_fraction": 0.18835,
"dead_code_fraction/layer0": 0.18835,
"epoch": 1.47,
"input_norm": 31.998687744140625,
"input_norm/layer0": 31.998687744140625,
"learning_rate": 0.0005,
"loss": 2.1067,
"max_norm": 75.15766143798828,
"max_norm/layer0": 75.15766143798828,
"mean_norm": 45.32560920715332,
"mean_norm/layer0": 45.32560920715332,
"multicode_k": 1,
"output_norm": 19.27704188664754,
"output_norm/layer0": 19.27704188664754,
"step": 9500
},
{
"epoch": 1.47,
"eval_MSE/layer0": 608.6866096036146,
"eval_accuracy": 0.5090880757079915,
"eval_dead_code_fraction/layer0": 0.18755,
"eval_input_norm/layer0": 31.998685899710146,
"eval_loss": 2.1350600719451904,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 19.283631281241068,
"eval_runtime": 158.1797,
"eval_samples_per_second": 29.226,
"eval_steps_per_second": 1.827,
"step": 9500
},
{
"MSE": 607.5302533983886,
"MSE/layer0": 607.5302533983886,
"dead_code_fraction": 0.1872,
"dead_code_fraction/layer0": 0.1872,
"epoch": 1.48,
"input_norm": 31.99869025141972,
"input_norm/layer0": 31.99869025141972,
"learning_rate": 0.0005,
"loss": 2.1075,
"max_norm": 75.2263412475586,
"max_norm/layer0": 75.2263412475586,
"mean_norm": 45.363752365112305,
"mean_norm/layer0": 45.363752365112305,
"multicode_k": 1,
"output_norm": 19.2927733112995,
"output_norm/layer0": 19.2927733112995,
"step": 9550
},
{
"MSE": 608.902215973978,
"MSE/layer0": 608.902215973978,
"dead_code_fraction": 0.187,
"dead_code_fraction/layer0": 0.187,
"epoch": 2.0,
"input_norm": 31.998686492629858,
"input_norm/layer0": 31.998686492629858,
"learning_rate": 0.0005,
"loss": 2.1013,
"max_norm": 75.294677734375,
"max_norm/layer0": 75.294677734375,
"mean_norm": 45.40024948120117,
"mean_norm/layer0": 45.40024948120117,
"multicode_k": 1,
"output_norm": 19.268582361188244,
"output_norm/layer0": 19.268582361188244,
"step": 9600
},
{
"MSE": 606.3796120198567,
"MSE/layer0": 606.3796120198567,
"dead_code_fraction": 0.18715,
"dead_code_fraction/layer0": 0.18715,
"epoch": 2.01,
"input_norm": 31.998710851669312,
"input_norm/layer0": 31.998710851669312,
"learning_rate": 0.0005,
"loss": 2.17,
"max_norm": 75.35186004638672,
"max_norm/layer0": 75.35186004638672,
"mean_norm": 45.4382266998291,
"mean_norm/layer0": 45.4382266998291,
"multicode_k": 1,
"output_norm": 19.314183537165327,
"output_norm/layer0": 19.314183537165327,
"step": 9650
},
{
"MSE": 606.9239878336591,
"MSE/layer0": 606.9239878336591,
"dead_code_fraction": 0.1877,
"dead_code_fraction/layer0": 0.1877,
"epoch": 2.01,
"input_norm": 31.99869126637776,
"input_norm/layer0": 31.99869126637776,
"learning_rate": 0.0005,
"loss": 2.0661,
"max_norm": 75.44601440429688,
"max_norm/layer0": 75.44601440429688,
"mean_norm": 45.47653579711914,
"mean_norm/layer0": 45.47653579711914,
"multicode_k": 1,
"output_norm": 19.313949975967407,
"output_norm/layer0": 19.313949975967407,
"step": 9700
},
{
"MSE": 606.1468785603844,
"MSE/layer0": 606.1468785603844,
"dead_code_fraction": 0.18755,
"dead_code_fraction/layer0": 0.18755,
"epoch": 2.02,
"input_norm": 31.998706903457652,
"input_norm/layer0": 31.998706903457652,
"learning_rate": 0.0005,
"loss": 2.1325,
"max_norm": 75.6237564086914,
"max_norm/layer0": 75.6237564086914,
"mean_norm": 45.51473808288574,
"mean_norm/layer0": 45.51473808288574,
"multicode_k": 1,
"output_norm": 19.331538470586143,
"output_norm/layer0": 19.331538470586143,
"step": 9750
},
{
"MSE": 606.2908910115561,
"MSE/layer0": 606.2908910115561,
"dead_code_fraction": 0.18715,
"dead_code_fraction/layer0": 0.18715,
"epoch": 2.02,
"input_norm": 31.998702777226768,
"input_norm/layer0": 31.998702777226768,
"learning_rate": 0.0005,
"loss": 2.0999,
"max_norm": 75.77623748779297,
"max_norm/layer0": 75.77623748779297,
"mean_norm": 45.55307388305664,
"mean_norm/layer0": 45.55307388305664,
"multicode_k": 1,
"output_norm": 19.340178826649982,
"output_norm/layer0": 19.340178826649982,
"step": 9800
},
{
"MSE": 605.7215723673501,
"MSE/layer0": 605.7215723673501,
"dead_code_fraction": 0.18635,
"dead_code_fraction/layer0": 0.18635,
"epoch": 2.03,
"input_norm": 31.998708073298122,
"input_norm/layer0": 31.998708073298122,
"learning_rate": 0.0005,
"loss": 2.1015,
"max_norm": 75.92095184326172,
"max_norm/layer0": 75.92095184326172,
"mean_norm": 45.591548919677734,
"mean_norm/layer0": 45.591548919677734,
"multicode_k": 1,
"output_norm": 19.351260058085124,
"output_norm/layer0": 19.351260058085124,
"step": 9850
},
{
"MSE": 605.7307819620769,
"MSE/layer0": 605.7307819620769,
"dead_code_fraction": 0.1879,
"dead_code_fraction/layer0": 0.1879,
"epoch": 2.03,
"input_norm": 31.99871432304383,
"input_norm/layer0": 31.99871432304383,
"learning_rate": 0.0005,
"loss": 2.1079,
"max_norm": 76.06104278564453,
"max_norm/layer0": 76.06104278564453,
"mean_norm": 45.62945747375488,
"mean_norm/layer0": 45.62945747375488,
"multicode_k": 1,
"output_norm": 19.36078415234882,
"output_norm/layer0": 19.36078415234882,
"step": 9900
},
{
"MSE": 605.7736006673174,
"MSE/layer0": 605.7736006673174,
"dead_code_fraction": 0.1873,
"dead_code_fraction/layer0": 0.1873,
"epoch": 2.04,
"input_norm": 31.99871180534363,
"input_norm/layer0": 31.99871180534363,
"learning_rate": 0.0005,
"loss": 2.102,
"max_norm": 76.22486877441406,
"max_norm/layer0": 76.22486877441406,
"mean_norm": 45.66733360290527,
"mean_norm/layer0": 45.66733360290527,
"multicode_k": 1,
"output_norm": 19.36815209388733,
"output_norm/layer0": 19.36815209388733,
"step": 9950
},
{
"MSE": 604.9809751383466,
"MSE/layer0": 604.9809751383466,
"dead_code_fraction": 0.1872,
"dead_code_fraction/layer0": 0.1872,
"epoch": 2.04,
"input_norm": 31.998728539148978,
"input_norm/layer0": 31.998728539148978,
"learning_rate": 0.0005,
"loss": 2.1536,
"max_norm": 76.40007019042969,
"max_norm/layer0": 76.40007019042969,
"mean_norm": 45.70543670654297,
"mean_norm/layer0": 45.70543670654297,
"multicode_k": 1,
"output_norm": 19.38911464373271,
"output_norm/layer0": 19.38911464373271,
"step": 10000
},
{
"epoch": 2.04,
"eval_MSE/layer0": 604.5096733395267,
"eval_accuracy": 0.5091345939349958,
"eval_dead_code_fraction/layer0": 0.18795,
"eval_input_norm/layer0": 31.99872850438308,
"eval_loss": 2.132894992828369,
"eval_multicode_k": 1,
"eval_output_norm/layer0": 19.389702240368152,
"eval_runtime": 158.9177,
"eval_samples_per_second": 29.091,
"eval_steps_per_second": 1.819,
"step": 10000
},
{
"MSE": 0.0,
"MSE/layer0": 0.0,
"dead_code_fraction": 1.0,
"dead_code_fraction/layer0": 1.0,
"epoch": 2.04,
"input_norm": 0.0,
"input_norm/layer0": 0.0,
"max_norm": 76.40007019042969,
"max_norm/layer0": 76.40007019042969,
"mean_norm": 45.70543670654297,
"mean_norm/layer0": 45.70543670654297,
"multicode_k": 1,
"output_norm": 0.0,
"output_norm/layer0": 0.0,
"step": 10000,
"total_flos": 7.43098011353088e+16,
"train_loss": 2.325971780395508,
"train_runtime": 15639.0026,
"train_samples_per_second": 61.385,
"train_steps_per_second": 0.639
}
],
"logging_steps": 50,
"max_steps": 10000,
"num_train_epochs": 9223372036854775807,
"save_steps": 500,
"total_flos": 7.43098011353088e+16,
"trial_name": null,
"trial_params": null
}