{ "best_metric": 2.132894992828369, "best_model_checkpoint": "/tmp/wandb/run-20240211_061007-slcnkgcr/files/train_output/checkpoint-10000", "epoch": 2.042133333333333, "eval_steps": 500, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "MSE": 891.9713033040365, "MSE/layer0": 891.9713033040365, "dead_code_fraction": 0.1506, "dead_code_fraction/layer0": 0.1506, "epoch": 0.0, "input_norm": 31.997233708699547, "input_norm/layer0": 31.997233708699547, "learning_rate": 0.0005, "loss": 8.0845, "max_norm": 34.580135345458984, "max_norm/layer0": 34.580135345458984, "mean_norm": 31.989344596862793, "mean_norm/layer0": 31.989344596862793, "multicode_k": 1, "output_norm": 8.584638833999634, "output_norm/layer0": 8.584638833999634, "step": 1 }, { "MSE": 883.0105907414232, "MSE/layer0": 883.0105907414232, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.01, "input_norm": 31.99778711876902, "input_norm/layer0": 31.99778711876902, "learning_rate": 0.0005, "loss": 4.8444, "max_norm": 34.610191345214844, "max_norm/layer0": 34.610191345214844, "mean_norm": 32.02294731140137, "mean_norm/layer0": 32.02294731140137, "multicode_k": 1, "output_norm": 8.645599765842462, "output_norm/layer0": 8.645599765842462, "step": 50 }, { "MSE": 872.9267329915364, "MSE/layer0": 872.9267329915364, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.01, "input_norm": 31.998572165171304, "input_norm/layer0": 31.998572165171304, "learning_rate": 0.0005, "loss": 3.9294, "max_norm": 34.62763595581055, "max_norm/layer0": 34.62763595581055, "mean_norm": 32.06278419494629, "mean_norm/layer0": 32.06278419494629, "multicode_k": 1, "output_norm": 8.74148860613505, "output_norm/layer0": 8.74148860613505, "step": 100 }, { "MSE": 866.7590488688152, "MSE/layer0": 866.7590488688152, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.01, "input_norm": 31.99865425427754, "input_norm/layer0": 31.99865425427754, "learning_rate": 0.0005, "loss": 3.5413, "max_norm": 34.65019607543945, "max_norm/layer0": 34.65019607543945, "mean_norm": 32.1027717590332, "mean_norm/layer0": 32.1027717590332, "multicode_k": 1, "output_norm": 8.811674615542097, "output_norm/layer0": 8.811674615542097, "step": 150 }, { "MSE": 858.8314244588221, "MSE/layer0": 858.8314244588221, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.02, "input_norm": 31.998634125391646, "input_norm/layer0": 31.998634125391646, "learning_rate": 0.0005, "loss": 3.3381, "max_norm": 34.73014831542969, "max_norm/layer0": 34.73014831542969, "mean_norm": 32.17362403869629, "mean_norm/layer0": 32.17362403869629, "multicode_k": 1, "output_norm": 8.925555121103923, "output_norm/layer0": 8.925555121103923, "step": 200 }, { "MSE": 849.6408699544276, "MSE/layer0": 849.6408699544276, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.03, "input_norm": 31.9986141427358, "input_norm/layer0": 31.9986141427358, "learning_rate": 0.0005, "loss": 3.2486, "max_norm": 34.8281364440918, "max_norm/layer0": 34.8281364440918, "mean_norm": 32.26718330383301, "mean_norm/layer0": 32.26718330383301, "multicode_k": 1, "output_norm": 9.101092262268068, "output_norm/layer0": 9.101092262268068, "step": 250 }, { "MSE": 841.0051658121741, "MSE/layer0": 841.0051658121741, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.03, "input_norm": 31.99862952232361, "input_norm/layer0": 31.99862952232361, "learning_rate": 0.0005, "loss": 3.1503, "max_norm": 34.946006774902344, "max_norm/layer0": 34.946006774902344, "mean_norm": 32.361915588378906, "mean_norm/layer0": 32.361915588378906, "multicode_k": 1, "output_norm": 9.305952178637185, "output_norm/layer0": 9.305952178637185, "step": 300 }, { "MSE": 833.1103855387371, "MSE/layer0": 833.1103855387371, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.04, "input_norm": 31.998617506027223, "input_norm/layer0": 31.998617506027223, "learning_rate": 0.0005, "loss": 3.0966, "max_norm": 35.09696578979492, "max_norm/layer0": 35.09696578979492, "mean_norm": 32.463951110839844, "mean_norm/layer0": 32.463951110839844, "multicode_k": 1, "output_norm": 9.513547644615176, "output_norm/layer0": 9.513547644615176, "step": 350 }, { "MSE": 824.8635622151694, "MSE/layer0": 824.8635622151694, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.04, "input_norm": 31.998617092768363, "input_norm/layer0": 31.998617092768363, "learning_rate": 0.0005, "loss": 3.0998, "max_norm": 35.28767013549805, "max_norm/layer0": 35.28767013549805, "mean_norm": 32.571420669555664, "mean_norm/layer0": 32.571420669555664, "multicode_k": 1, "output_norm": 9.74717748324076, "output_norm/layer0": 9.74717748324076, "step": 400 }, { "MSE": 817.218793334961, "MSE/layer0": 817.218793334961, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.04, "input_norm": 31.99862334251403, "input_norm/layer0": 31.99862334251403, "learning_rate": 0.0005, "loss": 3.0603, "max_norm": 35.4771842956543, "max_norm/layer0": 35.4771842956543, "mean_norm": 32.68177795410156, "mean_norm/layer0": 32.68177795410156, "multicode_k": 1, "output_norm": 9.985308513641357, "output_norm/layer0": 9.985308513641357, "step": 450 }, { "MSE": 809.1558084106446, "MSE/layer0": 809.1558084106446, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.05, "input_norm": 31.998615137736, "input_norm/layer0": 31.998615137736, "learning_rate": 0.0005, "loss": 3.0494, "max_norm": 35.6486701965332, "max_norm/layer0": 35.6486701965332, "mean_norm": 32.793779373168945, "mean_norm/layer0": 32.793779373168945, "multicode_k": 1, "output_norm": 10.232081251144415, "output_norm/layer0": 10.232081251144415, "step": 500 }, { "epoch": 0.05, "eval_MSE/layer0": 805.1675846628777, "eval_accuracy": 0.41770872781318447, "eval_dead_code_fraction/layer0": 0.0, "eval_input_norm/layer0": 31.998606410347342, "eval_loss": 2.992654323577881, "eval_multicode_k": 1, "eval_output_norm/layer0": 10.360000263063938, "eval_runtime": 159.8847, "eval_samples_per_second": 28.915, "eval_steps_per_second": 1.808, "step": 500 }, { "MSE": 801.7215725708003, "MSE/layer0": 801.7215725708003, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.06, "input_norm": 31.998598546981817, "input_norm/layer0": 31.998598546981817, "learning_rate": 0.0005, "loss": 2.9547, "max_norm": 35.86976623535156, "max_norm/layer0": 35.86976623535156, "mean_norm": 32.91193962097168, "mean_norm/layer0": 32.91193962097168, "multicode_k": 1, "output_norm": 10.47719025929769, "output_norm/layer0": 10.47719025929769, "step": 550 }, { "MSE": 794.043483174642, "MSE/layer0": 794.043483174642, "dead_code_fraction": 0.0, "dead_code_fraction/layer0": 0.0, "epoch": 0.06, "input_norm": 31.99859639167787, "input_norm/layer0": 31.99859639167787, "learning_rate": 0.0005, "loss": 2.9506, "max_norm": 36.08134078979492, "max_norm/layer0": 36.08134078979492, "mean_norm": 33.03110313415527, "mean_norm/layer0": 33.03110313415527, "multicode_k": 1, "output_norm": 10.729146582285566, "output_norm/layer0": 10.729146582285566, "step": 600 }, { "MSE": 786.3193520100913, "MSE/layer0": 786.3193520100913, "dead_code_fraction": 5e-05, "dead_code_fraction/layer0": 5e-05, "epoch": 0.07, "input_norm": 31.99857716878254, "input_norm/layer0": 31.99857716878254, "learning_rate": 0.0005, "loss": 2.8944, "max_norm": 36.33954620361328, "max_norm/layer0": 36.33954620361328, "mean_norm": 33.15106773376465, "mean_norm/layer0": 33.15106773376465, "multicode_k": 1, "output_norm": 10.987898168563845, "output_norm/layer0": 10.987898168563845, "step": 650 }, { "MSE": 780.0598099772137, "MSE/layer0": 780.0598099772137, "dead_code_fraction": 0.0001, "dead_code_fraction/layer0": 0.0001, "epoch": 0.07, "input_norm": 31.998565645217887, "input_norm/layer0": 31.998565645217887, "learning_rate": 0.0005, "loss": 2.8643, "max_norm": 36.55862808227539, "max_norm/layer0": 36.55862808227539, "mean_norm": 33.269744873046875, "mean_norm/layer0": 33.269744873046875, "multicode_k": 1, "output_norm": 11.218051005999246, "output_norm/layer0": 11.218051005999246, "step": 700 }, { "MSE": 772.4797055053714, "MSE/layer0": 772.4797055053714, "dead_code_fraction": 0.00045, "dead_code_fraction/layer0": 0.00045, "epoch": 0.07, "input_norm": 31.998559678395594, "input_norm/layer0": 31.998559678395594, "learning_rate": 0.0005, "loss": 2.8618, "max_norm": 36.793521881103516, "max_norm/layer0": 36.793521881103516, "mean_norm": 33.39421844482422, "mean_norm/layer0": 33.39421844482422, "multicode_k": 1, "output_norm": 11.470201053619387, "output_norm/layer0": 11.470201053619387, "step": 750 }, { "MSE": 766.037492879232, "MSE/layer0": 766.037492879232, "dead_code_fraction": 0.00055, "dead_code_fraction/layer0": 0.00055, "epoch": 0.08, "input_norm": 31.99854364713033, "input_norm/layer0": 31.99854364713033, "learning_rate": 0.0005, "loss": 2.8403, "max_norm": 37.0079231262207, "max_norm/layer0": 37.0079231262207, "mean_norm": 33.52132034301758, "mean_norm/layer0": 33.52132034301758, "multicode_k": 1, "output_norm": 11.711471532185875, "output_norm/layer0": 11.711471532185875, "step": 800 }, { "MSE": 759.9610600789387, "MSE/layer0": 759.9610600789387, "dead_code_fraction": 0.00135, "dead_code_fraction/layer0": 0.00135, "epoch": 0.09, "input_norm": 31.998529828389472, "input_norm/layer0": 31.998529828389472, "learning_rate": 0.0005, "loss": 2.7453, "max_norm": 37.20747375488281, "max_norm/layer0": 37.20747375488281, "mean_norm": 33.64577674865723, "mean_norm/layer0": 33.64577674865723, "multicode_k": 1, "output_norm": 11.93199801921844, "output_norm/layer0": 11.93199801921844, "step": 850 }, { "MSE": 753.5576912434896, "MSE/layer0": 753.5576912434896, "dead_code_fraction": 0.00205, "dead_code_fraction/layer0": 0.00205, "epoch": 0.09, "input_norm": 31.99852911949157, "input_norm/layer0": 31.99852911949157, "learning_rate": 0.0005, "loss": 2.7975, "max_norm": 37.432743072509766, "max_norm/layer0": 37.432743072509766, "mean_norm": 33.778066635131836, "mean_norm/layer0": 33.778066635131836, "multicode_k": 1, "output_norm": 12.165767738024394, "output_norm/layer0": 12.165767738024394, "step": 900 }, { "MSE": 747.6473927815753, "MSE/layer0": 747.6473927815753, "dead_code_fraction": 0.00335, "dead_code_fraction/layer0": 0.00335, "epoch": 0.1, "input_norm": 31.998517106374106, "input_norm/layer0": 31.998517106374106, "learning_rate": 0.0005, "loss": 2.7378, "max_norm": 37.62055969238281, "max_norm/layer0": 37.62055969238281, "mean_norm": 33.90963554382324, "mean_norm/layer0": 33.90963554382324, "multicode_k": 1, "output_norm": 12.390189347267153, "output_norm/layer0": 12.390189347267153, "step": 950 }, { "MSE": 742.6674826049805, "MSE/layer0": 742.6674826049805, "dead_code_fraction": 0.0048, "dead_code_fraction/layer0": 0.0048, "epoch": 0.1, "input_norm": 31.998499689102182, "input_norm/layer0": 31.998499689102182, "learning_rate": 0.0005, "loss": 2.6986, "max_norm": 37.880615234375, "max_norm/layer0": 37.880615234375, "mean_norm": 34.04428672790527, "mean_norm/layer0": 34.04428672790527, "multicode_k": 1, "output_norm": 12.59642965157827, "output_norm/layer0": 12.59642965157827, "step": 1000 }, { "epoch": 0.1, "eval_MSE/layer0": 739.3243520424373, "eval_accuracy": 0.44721058737930897, "eval_dead_code_fraction/layer0": 0.00845, "eval_input_norm/layer0": 31.998487053973697, "eval_loss": 2.707960367202759, "eval_multicode_k": 1, "eval_output_norm/layer0": 12.71647696584792, "eval_runtime": 157.5908, "eval_samples_per_second": 29.335, "eval_steps_per_second": 1.834, "step": 1000 }, { "MSE": 736.2877898152667, "MSE/layer0": 736.2877898152667, "dead_code_fraction": 0.00735, "dead_code_fraction/layer0": 0.00735, "epoch": 0.1, "input_norm": 31.998484554290766, "input_norm/layer0": 31.998484554290766, "learning_rate": 0.0005, "loss": 2.7222, "max_norm": 38.21133804321289, "max_norm/layer0": 38.21133804321289, "mean_norm": 34.17984199523926, "mean_norm/layer0": 34.17984199523926, "multicode_k": 1, "output_norm": 12.82279133001963, "output_norm/layer0": 12.82279133001963, "step": 1050 }, { "MSE": 731.6754523722336, "MSE/layer0": 731.6754523722336, "dead_code_fraction": 0.01015, "dead_code_fraction/layer0": 0.01015, "epoch": 0.11, "input_norm": 31.998473711013787, "input_norm/layer0": 31.998473711013787, "learning_rate": 0.0005, "loss": 2.652, "max_norm": 38.533973693847656, "max_norm/layer0": 38.533973693847656, "mean_norm": 34.31424903869629, "mean_norm/layer0": 34.31424903869629, "multicode_k": 1, "output_norm": 13.017293116251633, "output_norm/layer0": 13.017293116251633, "step": 1100 }, { "MSE": 726.8081079101562, "MSE/layer0": 726.8081079101562, "dead_code_fraction": 0.013, "dead_code_fraction/layer0": 0.013, "epoch": 0.12, "input_norm": 31.99846080144247, "input_norm/layer0": 31.99846080144247, "learning_rate": 0.0005, "loss": 2.6519, "max_norm": 38.87154769897461, "max_norm/layer0": 38.87154769897461, "mean_norm": 34.454498291015625, "mean_norm/layer0": 34.454498291015625, "multicode_k": 1, "output_norm": 13.209378539721174, "output_norm/layer0": 13.209378539721174, "step": 1150 }, { "MSE": 722.3268162027996, "MSE/layer0": 722.3268162027996, "dead_code_fraction": 0.01565, "dead_code_fraction/layer0": 0.01565, "epoch": 0.12, "input_norm": 31.998446766535434, "input_norm/layer0": 31.998446766535434, "learning_rate": 0.0005, "loss": 2.6464, "max_norm": 39.23857879638672, "max_norm/layer0": 39.23857879638672, "mean_norm": 34.597312927246094, "mean_norm/layer0": 34.597312927246094, "multicode_k": 1, "output_norm": 13.40400979042053, "output_norm/layer0": 13.40400979042053, "step": 1200 }, { "MSE": 717.3231912231446, "MSE/layer0": 717.3231912231446, "dead_code_fraction": 0.0241, "dead_code_fraction/layer0": 0.0241, "epoch": 0.12, "input_norm": 31.998441489537555, "input_norm/layer0": 31.998441489537555, "learning_rate": 0.0005, "loss": 2.6563, "max_norm": 39.60569381713867, "max_norm/layer0": 39.60569381713867, "mean_norm": 34.73863220214844, "mean_norm/layer0": 34.73863220214844, "multicode_k": 1, "output_norm": 13.590513488451638, "output_norm/layer0": 13.590513488451638, "step": 1250 }, { "MSE": 713.6523872884117, "MSE/layer0": 713.6523872884117, "dead_code_fraction": 0.02485, "dead_code_fraction/layer0": 0.02485, "epoch": 0.13, "input_norm": 31.998419742584225, "input_norm/layer0": 31.998419742584225, "learning_rate": 0.0005, "loss": 2.5806, "max_norm": 39.939239501953125, "max_norm/layer0": 39.939239501953125, "mean_norm": 34.87986946105957, "mean_norm/layer0": 34.87986946105957, "multicode_k": 1, "output_norm": 13.766959317525227, "output_norm/layer0": 13.766959317525227, "step": 1300 }, { "MSE": 709.5852165730794, "MSE/layer0": 709.5852165730794, "dead_code_fraction": 0.02925, "dead_code_fraction/layer0": 0.02925, "epoch": 0.14, "input_norm": 31.998412898381545, "input_norm/layer0": 31.998412898381545, "learning_rate": 0.0005, "loss": 2.5789, "max_norm": 40.28993225097656, "max_norm/layer0": 40.28993225097656, "mean_norm": 35.022348403930664, "mean_norm/layer0": 35.022348403930664, "multicode_k": 1, "output_norm": 13.93345036347707, "output_norm/layer0": 13.93345036347707, "step": 1350 }, { "MSE": 705.2143248494463, "MSE/layer0": 705.2143248494463, "dead_code_fraction": 0.03375, "dead_code_fraction/layer0": 0.03375, "epoch": 0.14, "input_norm": 31.9984123802185, "input_norm/layer0": 31.9984123802185, "learning_rate": 0.0005, "loss": 2.5943, "max_norm": 40.63530349731445, "max_norm/layer0": 40.63530349731445, "mean_norm": 35.164276123046875, "mean_norm/layer0": 35.164276123046875, "multicode_k": 1, "output_norm": 14.105911358197524, "output_norm/layer0": 14.105911358197524, "step": 1400 }, { "MSE": 702.3593349202476, "MSE/layer0": 702.3593349202476, "dead_code_fraction": 0.0404, "dead_code_fraction/layer0": 0.0404, "epoch": 0.14, "input_norm": 31.99839937845865, "input_norm/layer0": 31.99839937845865, "learning_rate": 0.0005, "loss": 2.5407, "max_norm": 40.98182678222656, "max_norm/layer0": 40.98182678222656, "mean_norm": 35.30343246459961, "mean_norm/layer0": 35.30343246459961, "multicode_k": 1, "output_norm": 14.2450444761912, "output_norm/layer0": 14.2450444761912, "step": 1450 }, { "MSE": 699.0307844034837, "MSE/layer0": 699.0307844034837, "dead_code_fraction": 0.04535, "dead_code_fraction/layer0": 0.04535, "epoch": 0.15, "input_norm": 31.998390986124676, "input_norm/layer0": 31.998390986124676, "learning_rate": 0.0005, "loss": 2.5145, "max_norm": 41.328433990478516, "max_norm/layer0": 41.328433990478516, "mean_norm": 35.445411682128906, "mean_norm/layer0": 35.445411682128906, "multicode_k": 1, "output_norm": 14.399013953208918, "output_norm/layer0": 14.399013953208918, "step": 1500 }, { "epoch": 0.15, "eval_MSE/layer0": 697.1178701616536, "eval_accuracy": 0.4637486628652817, "eval_dead_code_fraction/layer0": 0.05465, "eval_input_norm/layer0": 31.99837304089923, "eval_loss": 2.525156259536743, "eval_multicode_k": 1, "eval_output_norm/layer0": 14.48893911880305, "eval_runtime": 156.9005, "eval_samples_per_second": 29.465, "eval_steps_per_second": 1.842, "step": 1500 }, { "MSE": 696.0442759195965, "MSE/layer0": 696.0442759195965, "dead_code_fraction": 0.05145, "dead_code_fraction/layer0": 0.05145, "epoch": 0.15, "input_norm": 31.99836520512899, "input_norm/layer0": 31.99836520512899, "learning_rate": 0.0005, "loss": 2.4631, "max_norm": 41.6606559753418, "max_norm/layer0": 41.6606559753418, "mean_norm": 35.58424758911133, "mean_norm/layer0": 35.58424758911133, "multicode_k": 1, "output_norm": 14.54295777956645, "output_norm/layer0": 14.54295777956645, "step": 1550 }, { "MSE": 691.8516132609051, "MSE/layer0": 691.8516132609051, "dead_code_fraction": 0.0558, "dead_code_fraction/layer0": 0.0558, "epoch": 0.16, "input_norm": 31.998375968933097, "input_norm/layer0": 31.998375968933097, "learning_rate": 0.0005, "loss": 2.5501, "max_norm": 42.08574676513672, "max_norm/layer0": 42.08574676513672, "mean_norm": 35.72518730163574, "mean_norm/layer0": 35.72518730163574, "multicode_k": 1, "output_norm": 14.692513732910157, "output_norm/layer0": 14.692513732910157, "step": 1600 }, { "MSE": 688.7181396484375, "MSE/layer0": 688.7181396484375, "dead_code_fraction": 0.0595, "dead_code_fraction/layer0": 0.0595, "epoch": 0.17, "input_norm": 31.99835859616598, "input_norm/layer0": 31.99835859616598, "learning_rate": 0.0005, "loss": 2.4699, "max_norm": 42.610233306884766, "max_norm/layer0": 42.610233306884766, "mean_norm": 35.86595916748047, "mean_norm/layer0": 35.86595916748047, "multicode_k": 1, "output_norm": 14.833582207361854, "output_norm/layer0": 14.833582207361854, "step": 1650 }, { "MSE": 685.5445822143549, "MSE/layer0": 685.5445822143549, "dead_code_fraction": 0.06595, "dead_code_fraction/layer0": 0.06595, "epoch": 0.17, "input_norm": 31.99835782368978, "input_norm/layer0": 31.99835782368978, "learning_rate": 0.0005, "loss": 2.5014, "max_norm": 43.15216064453125, "max_norm/layer0": 43.15216064453125, "mean_norm": 36.00602149963379, "mean_norm/layer0": 36.00602149963379, "multicode_k": 1, "output_norm": 14.96381513118744, "output_norm/layer0": 14.96381513118744, "step": 1700 }, { "MSE": 683.2388099161783, "MSE/layer0": 683.2388099161783, "dead_code_fraction": 0.0708, "dead_code_fraction/layer0": 0.0708, "epoch": 0.17, "input_norm": 31.998353064854925, "input_norm/layer0": 31.998353064854925, "learning_rate": 0.0005, "loss": 2.4762, "max_norm": 43.683807373046875, "max_norm/layer0": 43.683807373046875, "mean_norm": 36.14344596862793, "mean_norm/layer0": 36.14344596862793, "multicode_k": 1, "output_norm": 15.08479848066965, "output_norm/layer0": 15.08479848066965, "step": 1750 }, { "MSE": 680.5147140502929, "MSE/layer0": 680.5147140502929, "dead_code_fraction": 0.0711, "dead_code_fraction/layer0": 0.0711, "epoch": 0.18, "input_norm": 31.998323942820228, "input_norm/layer0": 31.998323942820228, "learning_rate": 0.0005, "loss": 2.4017, "max_norm": 44.204158782958984, "max_norm/layer0": 44.204158782958984, "mean_norm": 36.281328201293945, "mean_norm/layer0": 36.281328201293945, "multicode_k": 1, "output_norm": 15.21150853157043, "output_norm/layer0": 15.21150853157043, "step": 1800 }, { "MSE": 677.8235699462891, "MSE/layer0": 677.8235699462891, "dead_code_fraction": 0.0789, "dead_code_fraction/layer0": 0.0789, "epoch": 0.18, "input_norm": 31.99832211176553, "input_norm/layer0": 31.99832211176553, "learning_rate": 0.0005, "loss": 2.4204, "max_norm": 44.73421096801758, "max_norm/layer0": 44.73421096801758, "mean_norm": 36.41860580444336, "mean_norm/layer0": 36.41860580444336, "multicode_k": 1, "output_norm": 15.32913914521535, "output_norm/layer0": 15.32913914521535, "step": 1850 }, { "MSE": 674.8260657755535, "MSE/layer0": 674.8260657755535, "dead_code_fraction": 0.0859, "dead_code_fraction/layer0": 0.0859, "epoch": 0.19, "input_norm": 31.998327109018952, "input_norm/layer0": 31.998327109018952, "learning_rate": 0.0005, "loss": 2.4612, "max_norm": 45.264217376708984, "max_norm/layer0": 45.264217376708984, "mean_norm": 36.55377197265625, "mean_norm/layer0": 36.55377197265625, "multicode_k": 1, "output_norm": 15.449233846664427, "output_norm/layer0": 15.449233846664427, "step": 1900 }, { "MSE": 672.4308366902667, "MSE/layer0": 672.4308366902667, "dead_code_fraction": 0.08975, "dead_code_fraction/layer0": 0.08975, "epoch": 0.2, "input_norm": 31.998313461939492, "input_norm/layer0": 31.998313461939492, "learning_rate": 0.0005, "loss": 2.413, "max_norm": 45.7476692199707, "max_norm/layer0": 45.7476692199707, "mean_norm": 36.687320709228516, "mean_norm/layer0": 36.687320709228516, "multicode_k": 1, "output_norm": 15.564360074996952, "output_norm/layer0": 15.564360074996952, "step": 1950 }, { "MSE": 669.9350853474932, "MSE/layer0": 669.9350853474932, "dead_code_fraction": 0.09495, "dead_code_fraction/layer0": 0.09495, "epoch": 0.2, "input_norm": 31.998307892481467, "input_norm/layer0": 31.998307892481467, "learning_rate": 0.0005, "loss": 2.4197, "max_norm": 46.2595100402832, "max_norm/layer0": 46.2595100402832, "mean_norm": 36.82127571105957, "mean_norm/layer0": 36.82127571105957, "multicode_k": 1, "output_norm": 15.671763955752056, "output_norm/layer0": 15.671763955752056, "step": 2000 }, { "epoch": 0.2, "eval_MSE/layer0": 670.0254334077002, "eval_accuracy": 0.47584128742153486, "eval_dead_code_fraction/layer0": 0.0988, "eval_input_norm/layer0": 31.99830309178647, "eval_loss": 2.409283399581909, "eval_multicode_k": 1, "eval_output_norm/layer0": 15.728763990528059, "eval_runtime": 158.0617, "eval_samples_per_second": 29.248, "eval_steps_per_second": 1.828, "step": 2000 }, { "MSE": 667.9600658162435, "MSE/layer0": 667.9600658162435, "dead_code_fraction": 0.09825, "dead_code_fraction/layer0": 0.09825, "epoch": 0.2, "input_norm": 31.99829890569051, "input_norm/layer0": 31.99829890569051, "learning_rate": 0.0005, "loss": 2.3908, "max_norm": 46.76186752319336, "max_norm/layer0": 46.76186752319336, "mean_norm": 36.954044342041016, "mean_norm/layer0": 36.954044342041016, "multicode_k": 1, "output_norm": 15.786985732714339, "output_norm/layer0": 15.786985732714339, "step": 2050 }, { "MSE": 665.8677533976238, "MSE/layer0": 665.8677533976238, "dead_code_fraction": 0.10105, "dead_code_fraction/layer0": 0.10105, "epoch": 0.21, "input_norm": 31.998287776311233, "input_norm/layer0": 31.998287776311233, "learning_rate": 0.0005, "loss": 2.3532, "max_norm": 47.23879623413086, "max_norm/layer0": 47.23879623413086, "mean_norm": 37.08414268493652, "mean_norm/layer0": 37.08414268493652, "multicode_k": 1, "output_norm": 15.887771523793544, "output_norm/layer0": 15.887771523793544, "step": 2100 }, { "MSE": 664.0484969075521, "MSE/layer0": 664.0484969075521, "dead_code_fraction": 0.10515, "dead_code_fraction/layer0": 0.10515, "epoch": 0.21, "input_norm": 31.998289143244435, "input_norm/layer0": 31.998289143244435, "learning_rate": 0.0005, "loss": 2.3835, "max_norm": 47.72446823120117, "max_norm/layer0": 47.72446823120117, "mean_norm": 37.21368408203125, "mean_norm/layer0": 37.21368408203125, "multicode_k": 1, "output_norm": 15.987558364868171, "output_norm/layer0": 15.987558364868171, "step": 2150 }, { "MSE": 662.043323059082, "MSE/layer0": 662.043323059082, "dead_code_fraction": 0.11065, "dead_code_fraction/layer0": 0.11065, "epoch": 0.22, "input_norm": 31.998284489313747, "input_norm/layer0": 31.998284489313747, "learning_rate": 0.0005, "loss": 2.3711, "max_norm": 48.21998596191406, "max_norm/layer0": 48.21998596191406, "mean_norm": 37.34214973449707, "mean_norm/layer0": 37.34214973449707, "multicode_k": 1, "output_norm": 16.084624527295432, "output_norm/layer0": 16.084624527295432, "step": 2200 }, { "MSE": 660.071201883952, "MSE/layer0": 660.071201883952, "dead_code_fraction": 0.1138, "dead_code_fraction/layer0": 0.1138, "epoch": 0.23, "input_norm": 31.998274552027382, "input_norm/layer0": 31.998274552027382, "learning_rate": 0.0005, "loss": 2.3361, "max_norm": 48.656124114990234, "max_norm/layer0": 48.656124114990234, "mean_norm": 37.46707344055176, "mean_norm/layer0": 37.46707344055176, "multicode_k": 1, "output_norm": 16.1770029671987, "output_norm/layer0": 16.1770029671987, "step": 2250 }, { "MSE": 658.2848066202794, "MSE/layer0": 658.2848066202794, "dead_code_fraction": 0.11715, "dead_code_fraction/layer0": 0.11715, "epoch": 0.23, "input_norm": 31.998281342188513, "input_norm/layer0": 31.998281342188513, "learning_rate": 0.0005, "loss": 2.3697, "max_norm": 49.14850616455078, "max_norm/layer0": 49.14850616455078, "mean_norm": 37.592119216918945, "mean_norm/layer0": 37.592119216918945, "multicode_k": 1, "output_norm": 16.273267321586616, "output_norm/layer0": 16.273267321586616, "step": 2300 }, { "MSE": 656.6614913940434, "MSE/layer0": 656.6614913940434, "dead_code_fraction": 0.1208, "dead_code_fraction/layer0": 0.1208, "epoch": 0.23, "input_norm": 31.99827545166017, "input_norm/layer0": 31.99827545166017, "learning_rate": 0.0005, "loss": 2.3691, "max_norm": 49.611228942871094, "max_norm/layer0": 49.611228942871094, "mean_norm": 37.71496772766113, "mean_norm/layer0": 37.71496772766113, "multicode_k": 1, "output_norm": 16.361617434819536, "output_norm/layer0": 16.361617434819536, "step": 2350 }, { "MSE": 654.7551118977863, "MSE/layer0": 654.7551118977863, "dead_code_fraction": 0.12205, "dead_code_fraction/layer0": 0.12205, "epoch": 0.24, "input_norm": 31.998258228302007, "input_norm/layer0": 31.998258228302007, "learning_rate": 0.0005, "loss": 2.3413, "max_norm": 50.082008361816406, "max_norm/layer0": 50.082008361816406, "mean_norm": 37.836740493774414, "mean_norm/layer0": 37.836740493774414, "multicode_k": 1, "output_norm": 16.442067163785307, "output_norm/layer0": 16.442067163785307, "step": 2400 }, { "MSE": 653.2320398966472, "MSE/layer0": 653.2320398966472, "dead_code_fraction": 0.1261, "dead_code_fraction/layer0": 0.1261, "epoch": 0.24, "input_norm": 31.99826599121093, "input_norm/layer0": 31.99826599121093, "learning_rate": 0.0005, "loss": 2.3415, "max_norm": 50.542850494384766, "max_norm/layer0": 50.542850494384766, "mean_norm": 37.956573486328125, "mean_norm/layer0": 37.956573486328125, "multicode_k": 1, "output_norm": 16.545647277832018, "output_norm/layer0": 16.545647277832018, "step": 2450 }, { "MSE": 652.0689453124999, "MSE/layer0": 652.0689453124999, "dead_code_fraction": 0.1305, "dead_code_fraction/layer0": 0.1305, "epoch": 0.25, "input_norm": 31.998266054789227, "input_norm/layer0": 31.998266054789227, "learning_rate": 0.0005, "loss": 2.3541, "max_norm": 50.972904205322266, "max_norm/layer0": 50.972904205322266, "mean_norm": 38.07469177246094, "mean_norm/layer0": 38.07469177246094, "multicode_k": 1, "output_norm": 16.614015088081356, "output_norm/layer0": 16.614015088081356, "step": 2500 }, { "epoch": 0.25, "eval_MSE/layer0": 651.1296869864225, "eval_accuracy": 0.48371217143066175, "eval_dead_code_fraction/layer0": 0.1337, "eval_input_norm/layer0": 31.998264631048162, "eval_loss": 2.340399742126465, "eval_multicode_k": 1, "eval_output_norm/layer0": 16.66022368217996, "eval_runtime": 157.8946, "eval_samples_per_second": 29.279, "eval_steps_per_second": 1.83, "step": 2500 }, { "MSE": 650.5154676310221, "MSE/layer0": 650.5154676310221, "dead_code_fraction": 0.1312, "dead_code_fraction/layer0": 0.1312, "epoch": 0.26, "input_norm": 31.99826429367065, "input_norm/layer0": 31.99826429367065, "learning_rate": 0.0005, "loss": 2.3374, "max_norm": 51.42794418334961, "max_norm/layer0": 51.42794418334961, "mean_norm": 38.19082260131836, "mean_norm/layer0": 38.19082260131836, "multicode_k": 1, "output_norm": 16.705677251815793, "output_norm/layer0": 16.705677251815793, "step": 2550 }, { "MSE": 649.4798397827149, "MSE/layer0": 649.4798397827149, "dead_code_fraction": 0.13625, "dead_code_fraction/layer0": 0.13625, "epoch": 0.26, "input_norm": 31.99826188405354, "input_norm/layer0": 31.99826188405354, "learning_rate": 0.0005, "loss": 2.3364, "max_norm": 51.84079360961914, "max_norm/layer0": 51.84079360961914, "mean_norm": 38.306650161743164, "mean_norm/layer0": 38.306650161743164, "multicode_k": 1, "output_norm": 16.774758176803587, "output_norm/layer0": 16.774758176803587, "step": 2600 }, { "MSE": 648.4373052978513, "MSE/layer0": 648.4373052978513, "dead_code_fraction": 0.13795, "dead_code_fraction/layer0": 0.13795, "epoch": 0.27, "input_norm": 31.998252007166542, "input_norm/layer0": 31.998252007166542, "learning_rate": 0.0005, "loss": 2.3162, "max_norm": 52.24661636352539, "max_norm/layer0": 52.24661636352539, "mean_norm": 38.41937828063965, "mean_norm/layer0": 38.41937828063965, "multicode_k": 1, "output_norm": 16.851604979832963, "output_norm/layer0": 16.851604979832963, "step": 2650 }, { "MSE": 647.0678014119467, "MSE/layer0": 647.0678014119467, "dead_code_fraction": 0.1397, "dead_code_fraction/layer0": 0.1397, "epoch": 0.27, "input_norm": 31.998265930811563, "input_norm/layer0": 31.998265930811563, "learning_rate": 0.0005, "loss": 2.3497, "max_norm": 52.66170120239258, "max_norm/layer0": 52.66170120239258, "mean_norm": 38.53024482727051, "mean_norm/layer0": 38.53024482727051, "multicode_k": 1, "output_norm": 16.925416787465398, "output_norm/layer0": 16.925416787465398, "step": 2700 }, { "MSE": 646.4085242716471, "MSE/layer0": 646.4085242716471, "dead_code_fraction": 0.14125, "dead_code_fraction/layer0": 0.14125, "epoch": 0.28, "input_norm": 31.99825245221455, "input_norm/layer0": 31.99825245221455, "learning_rate": 0.0005, "loss": 2.301, "max_norm": 53.03037643432617, "max_norm/layer0": 53.03037643432617, "mean_norm": 38.63713836669922, "mean_norm/layer0": 38.63713836669922, "multicode_k": 1, "output_norm": 16.985576423009235, "output_norm/layer0": 16.985576423009235, "step": 2750 }, { "MSE": 644.7344170125325, "MSE/layer0": 644.7344170125325, "dead_code_fraction": 0.14415, "dead_code_fraction/layer0": 0.14415, "epoch": 0.28, "input_norm": 31.998260081609082, "input_norm/layer0": 31.998260081609082, "learning_rate": 0.0005, "loss": 2.3395, "max_norm": 53.41487503051758, "max_norm/layer0": 53.41487503051758, "mean_norm": 38.74285697937012, "mean_norm/layer0": 38.74285697937012, "multicode_k": 1, "output_norm": 17.068980147043867, "output_norm/layer0": 17.068980147043867, "step": 2800 }, { "MSE": 644.636144104004, "MSE/layer0": 644.636144104004, "dead_code_fraction": 0.14565, "dead_code_fraction/layer0": 0.14565, "epoch": 0.28, "input_norm": 31.998243366877247, "input_norm/layer0": 31.998243366877247, "learning_rate": 0.0005, "loss": 2.2757, "max_norm": 53.792579650878906, "max_norm/layer0": 53.792579650878906, "mean_norm": 38.84635543823242, "mean_norm/layer0": 38.84635543823242, "multicode_k": 1, "output_norm": 17.124992834726967, "output_norm/layer0": 17.124992834726967, "step": 2850 }, { "MSE": 643.8843309529623, "MSE/layer0": 643.8843309529623, "dead_code_fraction": 0.14495, "dead_code_fraction/layer0": 0.14495, "epoch": 0.29, "input_norm": 31.998242295583093, "input_norm/layer0": 31.998242295583093, "learning_rate": 0.0005, "loss": 2.3057, "max_norm": 54.146453857421875, "max_norm/layer0": 54.146453857421875, "mean_norm": 38.947309494018555, "mean_norm/layer0": 38.947309494018555, "multicode_k": 1, "output_norm": 17.17694611549377, "output_norm/layer0": 17.17694611549377, "step": 2900 }, { "MSE": 642.6776557413741, "MSE/layer0": 642.6776557413741, "dead_code_fraction": 0.1504, "dead_code_fraction/layer0": 0.1504, "epoch": 0.29, "input_norm": 31.998272593816125, "input_norm/layer0": 31.998272593816125, "learning_rate": 0.0005, "loss": 2.3545, "max_norm": 54.51527404785156, "max_norm/layer0": 54.51527404785156, "mean_norm": 39.047607421875, "mean_norm/layer0": 39.047607421875, "multicode_k": 1, "output_norm": 17.240235595703133, "output_norm/layer0": 17.240235595703133, "step": 2950 }, { "MSE": 643.1047460937498, "MSE/layer0": 643.1047460937498, "dead_code_fraction": 0.1483, "dead_code_fraction/layer0": 0.1483, "epoch": 0.3, "input_norm": 31.998249003092454, "input_norm/layer0": 31.998249003092454, "learning_rate": 0.0005, "loss": 2.2742, "max_norm": 54.86568832397461, "max_norm/layer0": 54.86568832397461, "mean_norm": 39.14469337463379, "mean_norm/layer0": 39.14469337463379, "multicode_k": 1, "output_norm": 17.28876600265503, "output_norm/layer0": 17.28876600265503, "step": 3000 }, { "epoch": 0.3, "eval_MSE/layer0": 642.6360311704152, "eval_accuracy": 0.49030507287608877, "eval_dead_code_fraction/layer0": 0.14995, "eval_input_norm/layer0": 31.998255163205542, "eval_loss": 2.2907073497772217, "eval_multicode_k": 1, "eval_output_norm/layer0": 17.324301861386118, "eval_runtime": 157.9262, "eval_samples_per_second": 29.273, "eval_steps_per_second": 1.83, "step": 3000 }, { "MSE": 641.9817254638668, "MSE/layer0": 641.9817254638668, "dead_code_fraction": 0.1511, "dead_code_fraction/layer0": 0.1511, "epoch": 0.3, "input_norm": 31.99826343536376, "input_norm/layer0": 31.99826343536376, "learning_rate": 0.0005, "loss": 2.3422, "max_norm": 55.2226676940918, "max_norm/layer0": 55.2226676940918, "mean_norm": 39.23999786376953, "mean_norm/layer0": 39.23999786376953, "multicode_k": 1, "output_norm": 17.350644410451252, "output_norm/layer0": 17.350644410451252, "step": 3050 }, { "MSE": 641.9993333943689, "MSE/layer0": 641.9993333943689, "dead_code_fraction": 0.1504, "dead_code_fraction/layer0": 0.1504, "epoch": 0.31, "input_norm": 31.998250141143807, "input_norm/layer0": 31.998250141143807, "learning_rate": 0.0005, "loss": 2.2814, "max_norm": 55.56163787841797, "max_norm/layer0": 55.56163787841797, "mean_norm": 39.33370780944824, "mean_norm/layer0": 39.33370780944824, "multicode_k": 1, "output_norm": 17.39312816301982, "output_norm/layer0": 17.39312816301982, "step": 3100 }, { "MSE": 641.5148900349936, "MSE/layer0": 641.5148900349936, "dead_code_fraction": 0.15185, "dead_code_fraction/layer0": 0.15185, "epoch": 0.32, "input_norm": 31.998260364532467, "input_norm/layer0": 31.998260364532467, "learning_rate": 0.0005, "loss": 2.3152, "max_norm": 55.8856315612793, "max_norm/layer0": 55.8856315612793, "mean_norm": 39.42481803894043, "mean_norm/layer0": 39.42481803894043, "multicode_k": 1, "output_norm": 17.44178107897441, "output_norm/layer0": 17.44178107897441, "step": 3150 }, { "MSE": 640.499552408854, "MSE/layer0": 640.499552408854, "dead_code_fraction": 0.1516, "dead_code_fraction/layer0": 0.1516, "epoch": 0.32, "input_norm": 31.99825292587281, "input_norm/layer0": 31.99825292587281, "learning_rate": 0.0005, "loss": 2.2462, "max_norm": 56.21445846557617, "max_norm/layer0": 56.21445846557617, "mean_norm": 39.51395606994629, "mean_norm/layer0": 39.51395606994629, "multicode_k": 1, "output_norm": 17.50789775530497, "output_norm/layer0": 17.50789775530497, "step": 3200 }, { "MSE": 640.565166829427, "MSE/layer0": 640.565166829427, "dead_code_fraction": 0.15285, "dead_code_fraction/layer0": 0.15285, "epoch": 0.33, "input_norm": 31.998250306447353, "input_norm/layer0": 31.998250306447353, "learning_rate": 0.0005, "loss": 2.2595, "max_norm": 56.526973724365234, "max_norm/layer0": 56.526973724365234, "mean_norm": 39.601173400878906, "mean_norm/layer0": 39.601173400878906, "multicode_k": 1, "output_norm": 17.54366443951924, "output_norm/layer0": 17.54366443951924, "step": 3250 }, { "MSE": 640.8991118367509, "MSE/layer0": 640.8991118367509, "dead_code_fraction": 0.1531, "dead_code_fraction/layer0": 0.1531, "epoch": 0.33, "input_norm": 31.998245798746755, "input_norm/layer0": 31.998245798746755, "learning_rate": 0.0005, "loss": 2.2326, "max_norm": 56.82651138305664, "max_norm/layer0": 56.82651138305664, "mean_norm": 39.684635162353516, "mean_norm/layer0": 39.684635162353516, "multicode_k": 1, "output_norm": 17.578553660710664, "output_norm/layer0": 17.578553660710664, "step": 3300 }, { "MSE": 640.486218770345, "MSE/layer0": 640.486218770345, "dead_code_fraction": 0.15345, "dead_code_fraction/layer0": 0.15345, "epoch": 0.34, "input_norm": 31.998255780537924, "input_norm/layer0": 31.998255780537924, "learning_rate": 0.0005, "loss": 2.2733, "max_norm": 57.12877655029297, "max_norm/layer0": 57.12877655029297, "mean_norm": 39.76711463928223, "mean_norm/layer0": 39.76711463928223, "multicode_k": 1, "output_norm": 17.619242086410516, "output_norm/layer0": 17.619242086410516, "step": 3350 }, { "MSE": 639.5240251668292, "MSE/layer0": 639.5240251668292, "dead_code_fraction": 0.15565, "dead_code_fraction/layer0": 0.15565, "epoch": 0.34, "input_norm": 31.998264500300095, "input_norm/layer0": 31.998264500300095, "learning_rate": 0.0005, "loss": 2.2633, "max_norm": 57.42041778564453, "max_norm/layer0": 57.42041778564453, "mean_norm": 39.84800338745117, "mean_norm/layer0": 39.84800338745117, "multicode_k": 1, "output_norm": 17.667484652201342, "output_norm/layer0": 17.667484652201342, "step": 3400 }, { "MSE": 639.2691174316408, "MSE/layer0": 639.2691174316408, "dead_code_fraction": 0.15605, "dead_code_fraction/layer0": 0.15605, "epoch": 0.34, "input_norm": 31.99825723965962, "input_norm/layer0": 31.99825723965962, "learning_rate": 0.0005, "loss": 2.2495, "max_norm": 57.706260681152344, "max_norm/layer0": 57.706260681152344, "mean_norm": 39.92698097229004, "mean_norm/layer0": 39.92698097229004, "multicode_k": 1, "output_norm": 17.705148900349947, "output_norm/layer0": 17.705148900349947, "step": 3450 }, { "MSE": 639.3908192952478, "MSE/layer0": 639.3908192952478, "dead_code_fraction": 0.15655, "dead_code_fraction/layer0": 0.15655, "epoch": 0.35, "input_norm": 31.9982618745168, "input_norm/layer0": 31.9982618745168, "learning_rate": 0.0005, "loss": 2.2488, "max_norm": 57.98209762573242, "max_norm/layer0": 57.98209762573242, "mean_norm": 40.005022048950195, "mean_norm/layer0": 40.005022048950195, "multicode_k": 1, "output_norm": 17.73683495521545, "output_norm/layer0": 17.73683495521545, "step": 3500 }, { "epoch": 0.35, "eval_MSE/layer0": 640.3158307464355, "eval_accuracy": 0.49451074349024987, "eval_dead_code_fraction/layer0": 0.1575, "eval_input_norm/layer0": 31.99825158244007, "eval_loss": 2.2564537525177, "eval_multicode_k": 1, "eval_output_norm/layer0": 17.756634140179678, "eval_runtime": 157.599, "eval_samples_per_second": 29.334, "eval_steps_per_second": 1.834, "step": 3500 }, { "MSE": 639.6838141886391, "MSE/layer0": 639.6838141886391, "dead_code_fraction": 0.157, "dead_code_fraction/layer0": 0.157, "epoch": 0.35, "input_norm": 31.99826737085978, "input_norm/layer0": 31.99826737085978, "learning_rate": 0.0005, "loss": 2.2738, "max_norm": 58.24713897705078, "max_norm/layer0": 58.24713897705078, "mean_norm": 40.08023262023926, "mean_norm/layer0": 40.08023262023926, "multicode_k": 1, "output_norm": 17.755876312255864, "output_norm/layer0": 17.755876312255864, "step": 3550 }, { "MSE": 639.2954257202149, "MSE/layer0": 639.2954257202149, "dead_code_fraction": 0.1559, "dead_code_fraction/layer0": 0.1559, "epoch": 0.36, "input_norm": 31.998245531717938, "input_norm/layer0": 31.998245531717938, "learning_rate": 0.0005, "loss": 2.2036, "max_norm": 58.50635528564453, "max_norm/layer0": 58.50635528564453, "mean_norm": 40.15370178222656, "mean_norm/layer0": 40.15370178222656, "multicode_k": 1, "output_norm": 17.812968953450515, "output_norm/layer0": 17.812968953450515, "step": 3600 }, { "MSE": 639.3338773600263, "MSE/layer0": 639.3338773600263, "dead_code_fraction": 0.15905, "dead_code_fraction/layer0": 0.15905, "epoch": 0.36, "input_norm": 31.99827084223429, "input_norm/layer0": 31.99827084223429, "learning_rate": 0.0005, "loss": 2.2672, "max_norm": 58.76622009277344, "max_norm/layer0": 58.76622009277344, "mean_norm": 40.22719192504883, "mean_norm/layer0": 40.22719192504883, "multicode_k": 1, "output_norm": 17.821751413345332, "output_norm/layer0": 17.821751413345332, "step": 3650 }, { "MSE": 639.0531684366863, "MSE/layer0": 639.0531684366863, "dead_code_fraction": 0.15975, "dead_code_fraction/layer0": 0.15975, "epoch": 0.37, "input_norm": 31.99827636400858, "input_norm/layer0": 31.99827636400858, "learning_rate": 0.0005, "loss": 2.2444, "max_norm": 59.02393341064453, "max_norm/layer0": 59.02393341064453, "mean_norm": 40.298166275024414, "mean_norm/layer0": 40.298166275024414, "multicode_k": 1, "output_norm": 17.85403926849365, "output_norm/layer0": 17.85403926849365, "step": 3700 }, { "MSE": 638.9355230712894, "MSE/layer0": 638.9355230712894, "dead_code_fraction": 0.1605, "dead_code_fraction/layer0": 0.1605, "epoch": 0.38, "input_norm": 31.99827863057454, "input_norm/layer0": 31.99827863057454, "learning_rate": 0.0005, "loss": 2.2454, "max_norm": 59.28853225708008, "max_norm/layer0": 59.28853225708008, "mean_norm": 40.36880111694336, "mean_norm/layer0": 40.36880111694336, "multicode_k": 1, "output_norm": 17.88599282582601, "output_norm/layer0": 17.88599282582601, "step": 3750 }, { "MSE": 639.0086972045899, "MSE/layer0": 639.0086972045899, "dead_code_fraction": 0.16125, "dead_code_fraction/layer0": 0.16125, "epoch": 0.38, "input_norm": 31.9982850710551, "input_norm/layer0": 31.9982850710551, "learning_rate": 0.0005, "loss": 2.27, "max_norm": 59.546451568603516, "max_norm/layer0": 59.546451568603516, "mean_norm": 40.43776512145996, "mean_norm/layer0": 40.43776512145996, "multicode_k": 1, "output_norm": 17.90943570454915, "output_norm/layer0": 17.90943570454915, "step": 3800 }, { "MSE": 638.9462019856769, "MSE/layer0": 638.9462019856769, "dead_code_fraction": 0.1583, "dead_code_fraction/layer0": 0.1583, "epoch": 0.39, "input_norm": 31.998278980255122, "input_norm/layer0": 31.998278980255122, "learning_rate": 0.0005, "loss": 2.2438, "max_norm": 59.80894470214844, "max_norm/layer0": 59.80894470214844, "mean_norm": 40.50556945800781, "mean_norm/layer0": 40.50556945800781, "multicode_k": 1, "output_norm": 17.947645209630338, "output_norm/layer0": 17.947645209630338, "step": 3850 }, { "MSE": 639.4130173746743, "MSE/layer0": 639.4130173746743, "dead_code_fraction": 0.16135, "dead_code_fraction/layer0": 0.16135, "epoch": 0.39, "input_norm": 31.998284943898526, "input_norm/layer0": 31.998284943898526, "learning_rate": 0.0005, "loss": 2.2526, "max_norm": 60.04655075073242, "max_norm/layer0": 60.04655075073242, "mean_norm": 40.57136535644531, "mean_norm/layer0": 40.57136535644531, "multicode_k": 1, "output_norm": 17.960218969980872, "output_norm/layer0": 17.960218969980872, "step": 3900 }, { "MSE": 639.8756245930986, "MSE/layer0": 639.8756245930986, "dead_code_fraction": 0.15755, "dead_code_fraction/layer0": 0.15755, "epoch": 0.4, "input_norm": 31.998285398483272, "input_norm/layer0": 31.998285398483272, "learning_rate": 0.0005, "loss": 2.2266, "max_norm": 60.29011154174805, "max_norm/layer0": 60.29011154174805, "mean_norm": 40.63625144958496, "mean_norm/layer0": 40.63625144958496, "multicode_k": 1, "output_norm": 17.97526204744974, "output_norm/layer0": 17.97526204744974, "step": 3950 }, { "MSE": 640.046054585775, "MSE/layer0": 640.046054585775, "dead_code_fraction": 0.1605, "dead_code_fraction/layer0": 0.1605, "epoch": 0.4, "input_norm": 31.998285433451336, "input_norm/layer0": 31.998285433451336, "learning_rate": 0.0005, "loss": 2.2287, "max_norm": 60.52168655395508, "max_norm/layer0": 60.52168655395508, "mean_norm": 40.698753356933594, "mean_norm/layer0": 40.698753356933594, "multicode_k": 1, "output_norm": 17.997498016357426, "output_norm/layer0": 17.997498016357426, "step": 4000 }, { "epoch": 0.4, "eval_MSE/layer0": 638.8422855589264, "eval_accuracy": 0.49670513512593434, "eval_dead_code_fraction/layer0": 0.16135, "eval_input_norm/layer0": 31.99827300782795, "eval_loss": 2.2332887649536133, "eval_multicode_k": 1, "eval_output_norm/layer0": 18.022313365115252, "eval_runtime": 158.1975, "eval_samples_per_second": 29.223, "eval_steps_per_second": 1.827, "step": 4000 }, { "MSE": 639.952128804525, "MSE/layer0": 639.952128804525, "dead_code_fraction": 0.16035, "dead_code_fraction/layer0": 0.16035, "epoch": 0.41, "input_norm": 31.998286927541105, "input_norm/layer0": 31.998286927541105, "learning_rate": 0.0005, "loss": 2.2193, "max_norm": 60.76009750366211, "max_norm/layer0": 60.76009750366211, "mean_norm": 40.75992393493652, "mean_norm/layer0": 40.75992393493652, "multicode_k": 1, "output_norm": 18.024092137018826, "output_norm/layer0": 18.024092137018826, "step": 4050 }, { "MSE": 640.5730131022133, "MSE/layer0": 640.5730131022133, "dead_code_fraction": 0.1634, "dead_code_fraction/layer0": 0.1634, "epoch": 0.41, "input_norm": 31.99828769365946, "input_norm/layer0": 31.99828769365946, "learning_rate": 0.0005, "loss": 2.2301, "max_norm": 60.98118591308594, "max_norm/layer0": 60.98118591308594, "mean_norm": 40.8208122253418, "mean_norm/layer0": 40.8208122253418, "multicode_k": 1, "output_norm": 18.02807092984518, "output_norm/layer0": 18.02807092984518, "step": 4100 }, { "MSE": 640.4258350626628, "MSE/layer0": 640.4258350626628, "dead_code_fraction": 0.1612, "dead_code_fraction/layer0": 0.1612, "epoch": 0.41, "input_norm": 31.998297268549607, "input_norm/layer0": 31.998297268549607, "learning_rate": 0.0005, "loss": 2.2307, "max_norm": 61.19542694091797, "max_norm/layer0": 61.19542694091797, "mean_norm": 40.88128852844238, "mean_norm/layer0": 40.88128852844238, "multicode_k": 1, "output_norm": 18.04158842404684, "output_norm/layer0": 18.04158842404684, "step": 4150 }, { "MSE": 639.5022987874349, "MSE/layer0": 639.5022987874349, "dead_code_fraction": 0.16015, "dead_code_fraction/layer0": 0.16015, "epoch": 0.42, "input_norm": 31.99830362319948, "input_norm/layer0": 31.99830362319948, "learning_rate": 0.0005, "loss": 2.247, "max_norm": 61.4282341003418, "max_norm/layer0": 61.4282341003418, "mean_norm": 40.941017150878906, "mean_norm/layer0": 40.941017150878906, "multicode_k": 1, "output_norm": 18.079462760289516, "output_norm/layer0": 18.079462760289516, "step": 4200 }, { "MSE": 640.0252755737306, "MSE/layer0": 640.0252755737306, "dead_code_fraction": 0.1604, "dead_code_fraction/layer0": 0.1604, "epoch": 0.42, "input_norm": 31.99830138524374, "input_norm/layer0": 31.99830138524374, "learning_rate": 0.0005, "loss": 2.2314, "max_norm": 61.648414611816406, "max_norm/layer0": 61.648414611816406, "mean_norm": 40.99977684020996, "mean_norm/layer0": 40.99977684020996, "multicode_k": 1, "output_norm": 18.09024664878845, "output_norm/layer0": 18.09024664878845, "step": 4250 }, { "MSE": 639.7621870930992, "MSE/layer0": 639.7621870930992, "dead_code_fraction": 0.16365, "dead_code_fraction/layer0": 0.16365, "epoch": 0.43, "input_norm": 31.99830169359842, "input_norm/layer0": 31.99830169359842, "learning_rate": 0.0005, "loss": 2.2144, "max_norm": 61.86562728881836, "max_norm/layer0": 61.86562728881836, "mean_norm": 41.05688667297363, "mean_norm/layer0": 41.05688667297363, "multicode_k": 1, "output_norm": 18.11899041493734, "output_norm/layer0": 18.11899041493734, "step": 4300 }, { "MSE": 640.3955947875975, "MSE/layer0": 640.3955947875975, "dead_code_fraction": 0.1592, "dead_code_fraction/layer0": 0.1592, "epoch": 0.43, "input_norm": 31.998302787144976, "input_norm/layer0": 31.998302787144976, "learning_rate": 0.0005, "loss": 2.2077, "max_norm": 62.060550689697266, "max_norm/layer0": 62.060550689697266, "mean_norm": 41.11246681213379, "mean_norm/layer0": 41.11246681213379, "multicode_k": 1, "output_norm": 18.121066271464024, "output_norm/layer0": 18.121066271464024, "step": 4350 }, { "MSE": 639.8066222127281, "MSE/layer0": 639.8066222127281, "dead_code_fraction": 0.1635, "dead_code_fraction/layer0": 0.1635, "epoch": 0.44, "input_norm": 31.998314228057872, "input_norm/layer0": 31.998314228057872, "learning_rate": 0.0005, "loss": 2.2287, "max_norm": 62.275943756103516, "max_norm/layer0": 62.275943756103516, "mean_norm": 41.167396545410156, "mean_norm/layer0": 41.167396545410156, "multicode_k": 1, "output_norm": 18.142933632532753, "output_norm/layer0": 18.142933632532753, "step": 4400 }, { "MSE": 639.8160334269206, "MSE/layer0": 639.8160334269206, "dead_code_fraction": 0.16385, "dead_code_fraction/layer0": 0.16385, "epoch": 0.45, "input_norm": 31.99831516901653, "input_norm/layer0": 31.99831516901653, "learning_rate": 0.0005, "loss": 2.215, "max_norm": 62.486793518066406, "max_norm/layer0": 62.486793518066406, "mean_norm": 41.221702575683594, "mean_norm/layer0": 41.221702575683594, "multicode_k": 1, "output_norm": 18.167670075098677, "output_norm/layer0": 18.167670075098677, "step": 4450 }, { "MSE": 640.1416244506836, "MSE/layer0": 640.1416244506836, "dead_code_fraction": 0.16675, "dead_code_fraction/layer0": 0.16675, "epoch": 0.45, "input_norm": 31.998327512741074, "input_norm/layer0": 31.998327512741074, "learning_rate": 0.0005, "loss": 2.2576, "max_norm": 62.67790222167969, "max_norm/layer0": 62.67790222167969, "mean_norm": 41.275705337524414, "mean_norm/layer0": 41.275705337524414, "multicode_k": 1, "output_norm": 18.162402251561495, "output_norm/layer0": 18.162402251561495, "step": 4500 }, { "epoch": 0.45, "eval_MSE/layer0": 639.7464034476376, "eval_accuracy": 0.49916912103175737, "eval_dead_code_fraction/layer0": 0.16755, "eval_input_norm/layer0": 31.998309449821527, "eval_loss": 2.215489387512207, "eval_multicode_k": 1, "eval_output_norm/layer0": 18.191884751910905, "eval_runtime": 157.9108, "eval_samples_per_second": 29.276, "eval_steps_per_second": 1.83, "step": 4500 }, { "MSE": 640.4858755493162, "MSE/layer0": 640.4858755493162, "dead_code_fraction": 0.1633, "dead_code_fraction/layer0": 0.1633, "epoch": 0.46, "input_norm": 31.99831475257874, "input_norm/layer0": 31.99831475257874, "learning_rate": 0.0005, "loss": 2.1869, "max_norm": 62.88029861450195, "max_norm/layer0": 62.88029861450195, "mean_norm": 41.32845115661621, "mean_norm/layer0": 41.32845115661621, "multicode_k": 1, "output_norm": 18.18791744550069, "output_norm/layer0": 18.18791744550069, "step": 4550 }, { "MSE": 640.7411174519859, "MSE/layer0": 640.7411174519859, "dead_code_fraction": 0.16375, "dead_code_fraction/layer0": 0.16375, "epoch": 0.46, "input_norm": 31.998337395985924, "input_norm/layer0": 31.998337395985924, "learning_rate": 0.0005, "loss": 2.2426, "max_norm": 63.06687545776367, "max_norm/layer0": 63.06687545776367, "mean_norm": 41.38063049316406, "mean_norm/layer0": 41.38063049316406, "multicode_k": 1, "output_norm": 18.185693721771244, "output_norm/layer0": 18.185693721771244, "step": 4600 }, { "MSE": 640.3254055786131, "MSE/layer0": 640.3254055786131, "dead_code_fraction": 0.1637, "dead_code_fraction/layer0": 0.1637, "epoch": 0.47, "input_norm": 31.998331034978236, "input_norm/layer0": 31.998331034978236, "learning_rate": 0.0005, "loss": 2.2103, "max_norm": 63.24494171142578, "max_norm/layer0": 63.24494171142578, "mean_norm": 41.4316463470459, "mean_norm/layer0": 41.4316463470459, "multicode_k": 1, "output_norm": 18.215761318206788, "output_norm/layer0": 18.215761318206788, "step": 4650 }, { "MSE": 640.0117889404299, "MSE/layer0": 640.0117889404299, "dead_code_fraction": 0.1653, "dead_code_fraction/layer0": 0.1653, "epoch": 0.47, "input_norm": 31.998331683476753, "input_norm/layer0": 31.998331683476753, "learning_rate": 0.0005, "loss": 2.189, "max_norm": 63.429969787597656, "max_norm/layer0": 63.429969787597656, "mean_norm": 41.481590270996094, "mean_norm/layer0": 41.481590270996094, "multicode_k": 1, "output_norm": 18.22781534512837, "output_norm/layer0": 18.22781534512837, "step": 4700 }, { "MSE": 640.034366455078, "MSE/layer0": 640.034366455078, "dead_code_fraction": 0.16355, "dead_code_fraction/layer0": 0.16355, "epoch": 0.47, "input_norm": 31.998335037231442, "input_norm/layer0": 31.998335037231442, "learning_rate": 0.0005, "loss": 2.1746, "max_norm": 63.604644775390625, "max_norm/layer0": 63.604644775390625, "mean_norm": 41.530447006225586, "mean_norm/layer0": 41.530447006225586, "multicode_k": 1, "output_norm": 18.247568238576257, "output_norm/layer0": 18.247568238576257, "step": 4750 }, { "MSE": 641.3402144411094, "MSE/layer0": 641.3402144411094, "dead_code_fraction": 0.16465, "dead_code_fraction/layer0": 0.16465, "epoch": 1.0, "input_norm": 31.998328861016873, "input_norm/layer0": 31.998328861016873, "learning_rate": 0.0005, "loss": 2.1589, "max_norm": 63.7794303894043, "max_norm/layer0": 63.7794303894043, "mean_norm": 41.577613830566406, "mean_norm/layer0": 41.577613830566406, "multicode_k": 1, "output_norm": 18.227145007068557, "output_norm/layer0": 18.227145007068557, "step": 4800 }, { "MSE": 640.0454110717772, "MSE/layer0": 640.0454110717772, "dead_code_fraction": 0.16635, "dead_code_fraction/layer0": 0.16635, "epoch": 1.01, "input_norm": 31.998361120224008, "input_norm/layer0": 31.998361120224008, "learning_rate": 0.0005, "loss": 2.2585, "max_norm": 63.96126937866211, "max_norm/layer0": 63.96126937866211, "mean_norm": 41.62501525878906, "mean_norm/layer0": 41.62501525878906, "multicode_k": 1, "output_norm": 18.258941303888953, "output_norm/layer0": 18.258941303888953, "step": 4850 }, { "MSE": 640.0055624389651, "MSE/layer0": 640.0055624389651, "dead_code_fraction": 0.16515, "dead_code_fraction/layer0": 0.16515, "epoch": 1.01, "input_norm": 31.998340495427446, "input_norm/layer0": 31.998340495427446, "learning_rate": 0.0005, "loss": 2.1578, "max_norm": 64.13137817382812, "max_norm/layer0": 64.13137817382812, "mean_norm": 41.672542572021484, "mean_norm/layer0": 41.672542572021484, "multicode_k": 1, "output_norm": 18.272732003529867, "output_norm/layer0": 18.272732003529867, "step": 4900 }, { "MSE": 640.108183898926, "MSE/layer0": 640.108183898926, "dead_code_fraction": 0.1668, "dead_code_fraction/layer0": 0.1668, "epoch": 1.02, "input_norm": 31.998351519902535, "input_norm/layer0": 31.998351519902535, "learning_rate": 0.0005, "loss": 2.1809, "max_norm": 64.30120086669922, "max_norm/layer0": 64.30120086669922, "mean_norm": 41.71914291381836, "mean_norm/layer0": 41.71914291381836, "multicode_k": 1, "output_norm": 18.278290322621658, "output_norm/layer0": 18.278290322621658, "step": 4950 }, { "MSE": 639.8438139851887, "MSE/layer0": 639.8438139851887, "dead_code_fraction": 0.1671, "dead_code_fraction/layer0": 0.1671, "epoch": 1.02, "input_norm": 31.998358796437586, "input_norm/layer0": 31.998358796437586, "learning_rate": 0.0005, "loss": 2.1901, "max_norm": 64.4720230102539, "max_norm/layer0": 64.4720230102539, "mean_norm": 41.76571464538574, "mean_norm/layer0": 41.76571464538574, "multicode_k": 1, "output_norm": 18.29636260350546, "output_norm/layer0": 18.29636260350546, "step": 5000 }, { "epoch": 1.02, "eval_MSE/layer0": 638.1766108092672, "eval_accuracy": 0.5013711247409516, "eval_dead_code_fraction/layer0": 0.16955, "eval_input_norm/layer0": 31.99836045128427, "eval_loss": 2.202561616897583, "eval_multicode_k": 1, "eval_output_norm/layer0": 18.311866774487846, "eval_runtime": 158.3836, "eval_samples_per_second": 29.189, "eval_steps_per_second": 1.825, "step": 5000 }, { "MSE": 639.5863418579103, "MSE/layer0": 639.5863418579103, "dead_code_fraction": 0.1675, "dead_code_fraction/layer0": 0.1675, "epoch": 1.03, "input_norm": 31.99836014429728, "input_norm/layer0": 31.99836014429728, "learning_rate": 0.0005, "loss": 2.1914, "max_norm": 64.65907287597656, "max_norm/layer0": 64.65907287597656, "mean_norm": 41.8120174407959, "mean_norm/layer0": 41.8120174407959, "multicode_k": 1, "output_norm": 18.301887426376346, "output_norm/layer0": 18.301887426376346, "step": 5050 }, { "MSE": 639.5830181884764, "MSE/layer0": 639.5830181884764, "dead_code_fraction": 0.16545, "dead_code_fraction/layer0": 0.16545, "epoch": 1.03, "input_norm": 31.998363596598292, "input_norm/layer0": 31.998363596598292, "learning_rate": 0.0005, "loss": 2.1503, "max_norm": 64.83207702636719, "max_norm/layer0": 64.83207702636719, "mean_norm": 41.85700988769531, "mean_norm/layer0": 41.85700988769531, "multicode_k": 1, "output_norm": 18.3204355875651, "output_norm/layer0": 18.3204355875651, "step": 5100 }, { "MSE": 640.3749603271485, "MSE/layer0": 640.3749603271485, "dead_code_fraction": 0.16725, "dead_code_fraction/layer0": 0.16725, "epoch": 1.04, "input_norm": 31.9983703358968, "input_norm/layer0": 31.9983703358968, "learning_rate": 0.0005, "loss": 2.1634, "max_norm": 65.003662109375, "max_norm/layer0": 65.003662109375, "mean_norm": 41.90180778503418, "mean_norm/layer0": 41.90180778503418, "multicode_k": 1, "output_norm": 18.316434319814057, "output_norm/layer0": 18.316434319814057, "step": 5150 }, { "MSE": 639.0211893717446, "MSE/layer0": 639.0211893717446, "dead_code_fraction": 0.16875, "dead_code_fraction/layer0": 0.16875, "epoch": 1.04, "input_norm": 31.998389561971024, "input_norm/layer0": 31.998389561971024, "learning_rate": 0.0005, "loss": 2.224, "max_norm": 65.19213104248047, "max_norm/layer0": 65.19213104248047, "mean_norm": 41.94645309448242, "mean_norm/layer0": 41.94645309448242, "multicode_k": 1, "output_norm": 18.33804360071819, "output_norm/layer0": 18.33804360071819, "step": 5200 }, { "MSE": 638.6207899983721, "MSE/layer0": 638.6207899983721, "dead_code_fraction": 0.17055, "dead_code_fraction/layer0": 0.17055, "epoch": 1.05, "input_norm": 31.998394203186038, "input_norm/layer0": 31.998394203186038, "learning_rate": 0.0005, "loss": 2.2235, "max_norm": 65.36846160888672, "max_norm/layer0": 65.36846160888672, "mean_norm": 41.991315841674805, "mean_norm/layer0": 41.991315841674805, "multicode_k": 1, "output_norm": 18.346421286265045, "output_norm/layer0": 18.346421286265045, "step": 5250 }, { "MSE": 638.3484961954751, "MSE/layer0": 638.3484961954751, "dead_code_fraction": 0.1704, "dead_code_fraction/layer0": 0.1704, "epoch": 1.05, "input_norm": 31.998402004241942, "input_norm/layer0": 31.998402004241942, "learning_rate": 0.0005, "loss": 2.209, "max_norm": 65.53041076660156, "max_norm/layer0": 65.53041076660156, "mean_norm": 42.0357780456543, "mean_norm/layer0": 42.0357780456543, "multicode_k": 1, "output_norm": 18.351918992996215, "output_norm/layer0": 18.351918992996215, "step": 5300 }, { "MSE": 638.9349023437496, "MSE/layer0": 638.9349023437496, "dead_code_fraction": 0.1671, "dead_code_fraction/layer0": 0.1671, "epoch": 1.06, "input_norm": 31.998392171859756, "input_norm/layer0": 31.998392171859756, "learning_rate": 0.0005, "loss": 2.1737, "max_norm": 65.69444274902344, "max_norm/layer0": 65.69444274902344, "mean_norm": 42.078935623168945, "mean_norm/layer0": 42.078935623168945, "multicode_k": 1, "output_norm": 18.365610707600908, "output_norm/layer0": 18.365610707600908, "step": 5350 }, { "MSE": 638.1850768025716, "MSE/layer0": 638.1850768025716, "dead_code_fraction": 0.17125, "dead_code_fraction/layer0": 0.17125, "epoch": 1.06, "input_norm": 31.99840373039246, "input_norm/layer0": 31.99840373039246, "learning_rate": 0.0005, "loss": 2.1904, "max_norm": 65.84613037109375, "max_norm/layer0": 65.84613037109375, "mean_norm": 42.122589111328125, "mean_norm/layer0": 42.122589111328125, "multicode_k": 1, "output_norm": 18.371175734202062, "output_norm/layer0": 18.371175734202062, "step": 5400 }, { "MSE": 637.5771400960282, "MSE/layer0": 637.5771400960282, "dead_code_fraction": 0.17005, "dead_code_fraction/layer0": 0.17005, "epoch": 1.07, "input_norm": 31.998408838907892, "input_norm/layer0": 31.998408838907892, "learning_rate": 0.0005, "loss": 2.2013, "max_norm": 66.00259399414062, "max_norm/layer0": 66.00259399414062, "mean_norm": 42.16551399230957, "mean_norm/layer0": 42.16551399230957, "multicode_k": 1, "output_norm": 18.396056934992465, "output_norm/layer0": 18.396056934992465, "step": 5450 }, { "MSE": 637.4973764038084, "MSE/layer0": 637.4973764038084, "dead_code_fraction": 0.17135, "dead_code_fraction/layer0": 0.17135, "epoch": 1.07, "input_norm": 31.998402996063238, "input_norm/layer0": 31.998402996063238, "learning_rate": 0.0005, "loss": 2.1686, "max_norm": 66.15951538085938, "max_norm/layer0": 66.15951538085938, "mean_norm": 42.207963943481445, "mean_norm/layer0": 42.207963943481445, "multicode_k": 1, "output_norm": 18.402882191340133, "output_norm/layer0": 18.402882191340133, "step": 5500 }, { "epoch": 1.07, "eval_MSE/layer0": 638.6084431543663, "eval_accuracy": 0.5026125270625071, "eval_dead_code_fraction/layer0": 0.17165, "eval_input_norm/layer0": 31.99841410479916, "eval_loss": 2.1934523582458496, "eval_multicode_k": 1, "eval_output_norm/layer0": 18.401259186926417, "eval_runtime": 158.4926, "eval_samples_per_second": 29.169, "eval_steps_per_second": 1.823, "step": 5500 }, { "MSE": 637.3816906738282, "MSE/layer0": 637.3816906738282, "dead_code_fraction": 0.17125, "dead_code_fraction/layer0": 0.17125, "epoch": 1.08, "input_norm": 31.998415158589676, "input_norm/layer0": 31.998415158589676, "learning_rate": 0.0005, "loss": 2.2097, "max_norm": 66.32366180419922, "max_norm/layer0": 66.32366180419922, "mean_norm": 42.25027084350586, "mean_norm/layer0": 42.25027084350586, "multicode_k": 1, "output_norm": 18.40568763732911, "output_norm/layer0": 18.40568763732911, "step": 5550 }, { "MSE": 636.5928268432615, "MSE/layer0": 636.5928268432615, "dead_code_fraction": 0.1711, "dead_code_fraction/layer0": 0.1711, "epoch": 1.08, "input_norm": 31.99841807047526, "input_norm/layer0": 31.99841807047526, "learning_rate": 0.0005, "loss": 2.1987, "max_norm": 66.49840545654297, "max_norm/layer0": 66.49840545654297, "mean_norm": 42.29284858703613, "mean_norm/layer0": 42.29284858703613, "multicode_k": 1, "output_norm": 18.424939454396565, "output_norm/layer0": 18.424939454396565, "step": 5600 }, { "MSE": 637.195534973145, "MSE/layer0": 637.195534973145, "dead_code_fraction": 0.17175, "dead_code_fraction/layer0": 0.17175, "epoch": 1.09, "input_norm": 31.99841377894082, "input_norm/layer0": 31.99841377894082, "learning_rate": 0.0005, "loss": 2.1571, "max_norm": 66.6655502319336, "max_norm/layer0": 66.6655502319336, "mean_norm": 42.33401679992676, "mean_norm/layer0": 42.33401679992676, "multicode_k": 1, "output_norm": 18.427337226867675, "output_norm/layer0": 18.427337226867675, "step": 5650 }, { "MSE": 635.8865025838217, "MSE/layer0": 635.8865025838217, "dead_code_fraction": 0.1736, "dead_code_fraction/layer0": 0.1736, "epoch": 1.09, "input_norm": 31.998435058593753, "input_norm/layer0": 31.998435058593753, "learning_rate": 0.0005, "loss": 2.2146, "max_norm": 66.82868957519531, "max_norm/layer0": 66.82868957519531, "mean_norm": 42.37582206726074, "mean_norm/layer0": 42.37582206726074, "multicode_k": 1, "output_norm": 18.443573204676298, "output_norm/layer0": 18.443573204676298, "step": 5700 }, { "MSE": 636.1581252034503, "MSE/layer0": 636.1581252034503, "dead_code_fraction": 0.17225, "dead_code_fraction/layer0": 0.17225, "epoch": 1.1, "input_norm": 31.998433354695635, "input_norm/layer0": 31.998433354695635, "learning_rate": 0.0005, "loss": 2.171, "max_norm": 66.9796371459961, "max_norm/layer0": 66.9796371459961, "mean_norm": 42.41728591918945, "mean_norm/layer0": 42.41728591918945, "multicode_k": 1, "output_norm": 18.440257479349775, "output_norm/layer0": 18.440257479349775, "step": 5750 }, { "MSE": 636.7286339314779, "MSE/layer0": 636.7286339314779, "dead_code_fraction": 0.1738, "dead_code_fraction/layer0": 0.1738, "epoch": 1.1, "input_norm": 31.998429416020713, "input_norm/layer0": 31.998429416020713, "learning_rate": 0.0005, "loss": 2.1502, "max_norm": 67.13478088378906, "max_norm/layer0": 67.13478088378906, "mean_norm": 42.45817756652832, "mean_norm/layer0": 42.45817756652832, "multicode_k": 1, "output_norm": 18.442232058842986, "output_norm/layer0": 18.442232058842986, "step": 5800 }, { "MSE": 635.2576449584958, "MSE/layer0": 635.2576449584958, "dead_code_fraction": 0.17405, "dead_code_fraction/layer0": 0.17405, "epoch": 1.11, "input_norm": 31.99844219843547, "input_norm/layer0": 31.99844219843547, "learning_rate": 0.0005, "loss": 2.2067, "max_norm": 67.28919982910156, "max_norm/layer0": 67.28919982910156, "mean_norm": 42.49948501586914, "mean_norm/layer0": 42.49948501586914, "multicode_k": 1, "output_norm": 18.46717386881511, "output_norm/layer0": 18.46717386881511, "step": 5850 }, { "MSE": 636.0759664916989, "MSE/layer0": 636.0759664916989, "dead_code_fraction": 0.17355, "dead_code_fraction/layer0": 0.17355, "epoch": 1.11, "input_norm": 31.998439470926915, "input_norm/layer0": 31.998439470926915, "learning_rate": 0.0005, "loss": 2.1543, "max_norm": 67.44383239746094, "max_norm/layer0": 67.44383239746094, "mean_norm": 42.53946495056152, "mean_norm/layer0": 42.53946495056152, "multicode_k": 1, "output_norm": 18.469777971903483, "output_norm/layer0": 18.469777971903483, "step": 5900 }, { "MSE": 635.3813305664057, "MSE/layer0": 635.3813305664057, "dead_code_fraction": 0.17405, "dead_code_fraction/layer0": 0.17405, "epoch": 1.12, "input_norm": 31.99844372113545, "input_norm/layer0": 31.99844372113545, "learning_rate": 0.0005, "loss": 2.1846, "max_norm": 67.59025573730469, "max_norm/layer0": 67.59025573730469, "mean_norm": 42.58071327209473, "mean_norm/layer0": 42.58071327209473, "multicode_k": 1, "output_norm": 18.477715517679847, "output_norm/layer0": 18.477715517679847, "step": 5950 }, { "MSE": 634.5524212646484, "MSE/layer0": 634.5524212646484, "dead_code_fraction": 0.17535, "dead_code_fraction/layer0": 0.17535, "epoch": 1.12, "input_norm": 31.998457225163776, "input_norm/layer0": 31.998457225163776, "learning_rate": 0.0005, "loss": 2.2158, "max_norm": 67.7379379272461, "max_norm/layer0": 67.7379379272461, "mean_norm": 42.62178421020508, "mean_norm/layer0": 42.62178421020508, "multicode_k": 1, "output_norm": 18.489366165796913, "output_norm/layer0": 18.489366165796913, "step": 6000 }, { "epoch": 1.12, "eval_MSE/layer0": 632.9325560995336, "eval_accuracy": 0.5036799089257694, "eval_dead_code_fraction/layer0": 0.17795, "eval_input_norm/layer0": 31.998461353451354, "eval_loss": 2.1832942962646484, "eval_multicode_k": 1, "eval_output_norm/layer0": 18.51493810096293, "eval_runtime": 158.8489, "eval_samples_per_second": 29.103, "eval_steps_per_second": 1.819, "step": 6000 }, { "MSE": 634.7784757486979, "MSE/layer0": 634.7784757486979, "dead_code_fraction": 0.1755, "dead_code_fraction/layer0": 0.1755, "epoch": 1.13, "input_norm": 31.99845712025961, "input_norm/layer0": 31.99845712025961, "learning_rate": 0.0005, "loss": 2.1789, "max_norm": 67.8902816772461, "max_norm/layer0": 67.8902816772461, "mean_norm": 42.66269874572754, "mean_norm/layer0": 42.66269874572754, "multicode_k": 1, "output_norm": 18.49625307718913, "output_norm/layer0": 18.49625307718913, "step": 6050 }, { "MSE": 634.5078458658851, "MSE/layer0": 634.5078458658851, "dead_code_fraction": 0.17445, "dead_code_fraction/layer0": 0.17445, "epoch": 1.13, "input_norm": 31.99845917383831, "input_norm/layer0": 31.99845917383831, "learning_rate": 0.0005, "loss": 2.2009, "max_norm": 68.04124450683594, "max_norm/layer0": 68.04124450683594, "mean_norm": 42.70250701904297, "mean_norm/layer0": 42.70250701904297, "multicode_k": 1, "output_norm": 18.514623686472582, "output_norm/layer0": 18.514623686472582, "step": 6100 }, { "MSE": 634.443066914876, "MSE/layer0": 634.443066914876, "dead_code_fraction": 0.17575, "dead_code_fraction/layer0": 0.17575, "epoch": 1.14, "input_norm": 31.99845913887024, "input_norm/layer0": 31.99845913887024, "learning_rate": 0.0005, "loss": 2.1623, "max_norm": 68.17865753173828, "max_norm/layer0": 68.17865753173828, "mean_norm": 42.742488861083984, "mean_norm/layer0": 42.742488861083984, "multicode_k": 1, "output_norm": 18.513023862838743, "output_norm/layer0": 18.513023862838743, "step": 6150 }, { "MSE": 633.6522382609048, "MSE/layer0": 633.6522382609048, "dead_code_fraction": 0.17475, "dead_code_fraction/layer0": 0.17475, "epoch": 1.14, "input_norm": 31.998471844991045, "input_norm/layer0": 31.998471844991045, "learning_rate": 0.0005, "loss": 2.1824, "max_norm": 68.31253051757812, "max_norm/layer0": 68.31253051757812, "mean_norm": 42.782148361206055, "mean_norm/layer0": 42.782148361206055, "multicode_k": 1, "output_norm": 18.529316590627033, "output_norm/layer0": 18.529316590627033, "step": 6200 }, { "MSE": 634.0474910481774, "MSE/layer0": 634.0474910481774, "dead_code_fraction": 0.1771, "dead_code_fraction/layer0": 0.1771, "epoch": 1.15, "input_norm": 31.998480736414585, "input_norm/layer0": 31.998480736414585, "learning_rate": 0.0005, "loss": 2.1948, "max_norm": 68.44271850585938, "max_norm/layer0": 68.44271850585938, "mean_norm": 42.82079887390137, "mean_norm/layer0": 42.82079887390137, "multicode_k": 1, "output_norm": 18.524528849919633, "output_norm/layer0": 18.524528849919633, "step": 6250 }, { "MSE": 633.648407084147, "MSE/layer0": 633.648407084147, "dead_code_fraction": 0.1745, "dead_code_fraction/layer0": 0.1745, "epoch": 1.15, "input_norm": 31.998468182881673, "input_norm/layer0": 31.998468182881673, "learning_rate": 0.0005, "loss": 2.1145, "max_norm": 68.57721710205078, "max_norm/layer0": 68.57721710205078, "mean_norm": 42.859825134277344, "mean_norm/layer0": 42.859825134277344, "multicode_k": 1, "output_norm": 18.540853935877482, "output_norm/layer0": 18.540853935877482, "step": 6300 }, { "MSE": 633.5945191446937, "MSE/layer0": 633.5945191446937, "dead_code_fraction": 0.17705, "dead_code_fraction/layer0": 0.17705, "epoch": 1.16, "input_norm": 31.99847273508707, "input_norm/layer0": 31.99847273508707, "learning_rate": 0.0005, "loss": 2.1507, "max_norm": 68.7186050415039, "max_norm/layer0": 68.7186050415039, "mean_norm": 42.897830963134766, "mean_norm/layer0": 42.897830963134766, "multicode_k": 1, "output_norm": 18.55124579429626, "output_norm/layer0": 18.55124579429626, "step": 6350 }, { "MSE": 632.1478841145836, "MSE/layer0": 632.1478841145836, "dead_code_fraction": 0.1775, "dead_code_fraction/layer0": 0.1775, "epoch": 1.16, "input_norm": 31.9984964243571, "input_norm/layer0": 31.9984964243571, "learning_rate": 0.0005, "loss": 2.1962, "max_norm": 68.85418701171875, "max_norm/layer0": 68.85418701171875, "mean_norm": 42.937448501586914, "mean_norm/layer0": 42.937448501586914, "multicode_k": 1, "output_norm": 18.5615934785207, "output_norm/layer0": 18.5615934785207, "step": 6400 }, { "MSE": 632.57952931722, "MSE/layer0": 632.57952931722, "dead_code_fraction": 0.1777, "dead_code_fraction/layer0": 0.1777, "epoch": 1.17, "input_norm": 31.998487294514977, "input_norm/layer0": 31.998487294514977, "learning_rate": 0.0005, "loss": 2.1627, "max_norm": 69.0008316040039, "max_norm/layer0": 69.0008316040039, "mean_norm": 42.97622108459473, "mean_norm/layer0": 42.97622108459473, "multicode_k": 1, "output_norm": 18.57248200734457, "output_norm/layer0": 18.57248200734457, "step": 6450 }, { "MSE": 631.0360174560547, "MSE/layer0": 631.0360174560547, "dead_code_fraction": 0.1784, "dead_code_fraction/layer0": 0.1784, "epoch": 1.17, "input_norm": 31.998495709101356, "input_norm/layer0": 31.998495709101356, "learning_rate": 0.0005, "loss": 2.1843, "max_norm": 69.13652038574219, "max_norm/layer0": 69.13652038574219, "mean_norm": 43.01558876037598, "mean_norm/layer0": 43.01558876037598, "multicode_k": 1, "output_norm": 18.591586551666268, "output_norm/layer0": 18.591586551666268, "step": 6500 }, { "epoch": 1.17, "eval_MSE/layer0": 631.2925020152297, "eval_accuracy": 0.5039093283634951, "eval_dead_code_fraction/layer0": 0.1797, "eval_input_norm/layer0": 31.99848882414009, "eval_loss": 2.175981044769287, "eval_multicode_k": 1, "eval_output_norm/layer0": 18.598594732777567, "eval_runtime": 158.1453, "eval_samples_per_second": 29.233, "eval_steps_per_second": 1.827, "step": 6500 }, { "MSE": 631.294188741048, "MSE/layer0": 631.294188741048, "dead_code_fraction": 0.1796, "dead_code_fraction/layer0": 0.1796, "epoch": 1.18, "input_norm": 31.998505541483564, "input_norm/layer0": 31.998505541483564, "learning_rate": 0.0005, "loss": 2.1855, "max_norm": 69.26646423339844, "max_norm/layer0": 69.26646423339844, "mean_norm": 43.0548152923584, "mean_norm/layer0": 43.0548152923584, "multicode_k": 1, "output_norm": 18.585241152445477, "output_norm/layer0": 18.585241152445477, "step": 6550 }, { "MSE": 631.297376505534, "MSE/layer0": 631.297376505534, "dead_code_fraction": 0.1779, "dead_code_fraction/layer0": 0.1779, "epoch": 1.18, "input_norm": 31.998487745920816, "input_norm/layer0": 31.998487745920816, "learning_rate": 0.0005, "loss": 2.1197, "max_norm": 69.3987808227539, "max_norm/layer0": 69.3987808227539, "mean_norm": 43.093589782714844, "mean_norm/layer0": 43.093589782714844, "multicode_k": 1, "output_norm": 18.605287278493257, "output_norm/layer0": 18.605287278493257, "step": 6600 }, { "MSE": 630.8991915893555, "MSE/layer0": 630.8991915893555, "dead_code_fraction": 0.17815, "dead_code_fraction/layer0": 0.17815, "epoch": 1.19, "input_norm": 31.99848988215129, "input_norm/layer0": 31.99848988215129, "learning_rate": 0.0005, "loss": 2.1248, "max_norm": 69.52507019042969, "max_norm/layer0": 69.52507019042969, "mean_norm": 43.132524490356445, "mean_norm/layer0": 43.132524490356445, "multicode_k": 1, "output_norm": 18.61235850652059, "output_norm/layer0": 18.61235850652059, "step": 6650 }, { "MSE": 629.604686584473, "MSE/layer0": 629.604686584473, "dead_code_fraction": 0.17965, "dead_code_fraction/layer0": 0.17965, "epoch": 1.19, "input_norm": 31.99852681477865, "input_norm/layer0": 31.99852681477865, "learning_rate": 0.0005, "loss": 2.2265, "max_norm": 69.66030883789062, "max_norm/layer0": 69.66030883789062, "mean_norm": 43.17206573486328, "mean_norm/layer0": 43.17206573486328, "multicode_k": 1, "output_norm": 18.626948499679564, "output_norm/layer0": 18.626948499679564, "step": 6700 }, { "MSE": 629.7875715128578, "MSE/layer0": 629.7875715128578, "dead_code_fraction": 0.1802, "dead_code_fraction/layer0": 0.1802, "epoch": 1.2, "input_norm": 31.998509550094596, "input_norm/layer0": 31.998509550094596, "learning_rate": 0.0005, "loss": 2.1432, "max_norm": 69.78119659423828, "max_norm/layer0": 69.78119659423828, "mean_norm": 43.21029472351074, "mean_norm/layer0": 43.21029472351074, "multicode_k": 1, "output_norm": 18.639319947560622, "output_norm/layer0": 18.639319947560622, "step": 6750 }, { "MSE": 629.3708419799802, "MSE/layer0": 629.3708419799802, "dead_code_fraction": 0.18015, "dead_code_fraction/layer0": 0.18015, "epoch": 1.2, "input_norm": 31.99851152102152, "input_norm/layer0": 31.99851152102152, "learning_rate": 0.0005, "loss": 2.1606, "max_norm": 69.91252899169922, "max_norm/layer0": 69.91252899169922, "mean_norm": 43.24948692321777, "mean_norm/layer0": 43.24948692321777, "multicode_k": 1, "output_norm": 18.64606482187906, "output_norm/layer0": 18.64606482187906, "step": 6800 }, { "MSE": 628.4038922119142, "MSE/layer0": 628.4038922119142, "dead_code_fraction": 0.1806, "dead_code_fraction/layer0": 0.1806, "epoch": 1.21, "input_norm": 31.998516721725462, "input_norm/layer0": 31.998516721725462, "learning_rate": 0.0005, "loss": 2.1582, "max_norm": 70.04332733154297, "max_norm/layer0": 70.04332733154297, "mean_norm": 43.28862762451172, "mean_norm/layer0": 43.28862762451172, "multicode_k": 1, "output_norm": 18.669758415222162, "output_norm/layer0": 18.669758415222162, "step": 6850 }, { "MSE": 628.1812467447919, "MSE/layer0": 628.1812467447919, "dead_code_fraction": 0.18055, "dead_code_fraction/layer0": 0.18055, "epoch": 1.21, "input_norm": 31.998515844345086, "input_norm/layer0": 31.998515844345086, "learning_rate": 0.0005, "loss": 2.1433, "max_norm": 70.16979217529297, "max_norm/layer0": 70.16979217529297, "mean_norm": 43.327192306518555, "mean_norm/layer0": 43.327192306518555, "multicode_k": 1, "output_norm": 18.674684073130294, "output_norm/layer0": 18.674684073130294, "step": 6900 }, { "MSE": 628.1862957763672, "MSE/layer0": 628.1862957763672, "dead_code_fraction": 0.18045, "dead_code_fraction/layer0": 0.18045, "epoch": 1.22, "input_norm": 31.99852259953816, "input_norm/layer0": 31.99852259953816, "learning_rate": 0.0005, "loss": 2.1458, "max_norm": 70.29747772216797, "max_norm/layer0": 70.29747772216797, "mean_norm": 43.36609077453613, "mean_norm/layer0": 43.36609077453613, "multicode_k": 1, "output_norm": 18.682749029795335, "output_norm/layer0": 18.682749029795335, "step": 6950 }, { "MSE": 627.7981392415361, "MSE/layer0": 627.7981392415361, "dead_code_fraction": 0.18045, "dead_code_fraction/layer0": 0.18045, "epoch": 1.22, "input_norm": 31.998523871103927, "input_norm/layer0": 31.998523871103927, "learning_rate": 0.0005, "loss": 2.1339, "max_norm": 70.425537109375, "max_norm/layer0": 70.425537109375, "mean_norm": 43.40445899963379, "mean_norm/layer0": 43.40445899963379, "multicode_k": 1, "output_norm": 18.696380834579458, "output_norm/layer0": 18.696380834579458, "step": 7000 }, { "epoch": 1.22, "eval_MSE/layer0": 627.9790743019787, "eval_accuracy": 0.5048263717749389, "eval_dead_code_fraction/layer0": 0.1819, "eval_input_norm/layer0": 31.998524618592334, "eval_loss": 2.1696202754974365, "eval_multicode_k": 1, "eval_output_norm/layer0": 18.705300997223095, "eval_runtime": 159.1692, "eval_samples_per_second": 29.045, "eval_steps_per_second": 1.816, "step": 7000 }, { "MSE": 627.3165437825519, "MSE/layer0": 627.3165437825519, "dead_code_fraction": 0.1822, "dead_code_fraction/layer0": 0.1822, "epoch": 1.23, "input_norm": 31.99852600097656, "input_norm/layer0": 31.99852600097656, "learning_rate": 0.0005, "loss": 2.1483, "max_norm": 70.54450988769531, "max_norm/layer0": 70.54450988769531, "mean_norm": 43.442848205566406, "mean_norm/layer0": 43.442848205566406, "multicode_k": 1, "output_norm": 18.700957148869843, "output_norm/layer0": 18.700957148869843, "step": 7050 }, { "MSE": 626.7479965209961, "MSE/layer0": 626.7479965209961, "dead_code_fraction": 0.1804, "dead_code_fraction/layer0": 0.1804, "epoch": 1.23, "input_norm": 31.998541386922206, "input_norm/layer0": 31.998541386922206, "learning_rate": 0.0005, "loss": 2.1512, "max_norm": 70.66608428955078, "max_norm/layer0": 70.66608428955078, "mean_norm": 43.48159599304199, "mean_norm/layer0": 43.48159599304199, "multicode_k": 1, "output_norm": 18.714396947224948, "output_norm/layer0": 18.714396947224948, "step": 7100 }, { "MSE": 626.4450497436519, "MSE/layer0": 626.4450497436519, "dead_code_fraction": 0.1823, "dead_code_fraction/layer0": 0.1823, "epoch": 1.24, "input_norm": 31.998545411427806, "input_norm/layer0": 31.998545411427806, "learning_rate": 0.0005, "loss": 2.1586, "max_norm": 70.7937240600586, "max_norm/layer0": 70.7937240600586, "mean_norm": 43.5198860168457, "mean_norm/layer0": 43.5198860168457, "multicode_k": 1, "output_norm": 18.726943721771242, "output_norm/layer0": 18.726943721771242, "step": 7150 }, { "MSE": 626.1652618408202, "MSE/layer0": 626.1652618408202, "dead_code_fraction": 0.1814, "dead_code_fraction/layer0": 0.1814, "epoch": 1.24, "input_norm": 31.998541978200272, "input_norm/layer0": 31.998541978200272, "learning_rate": 0.0005, "loss": 2.1552, "max_norm": 70.90862274169922, "max_norm/layer0": 70.90862274169922, "mean_norm": 43.55833053588867, "mean_norm/layer0": 43.55833053588867, "multicode_k": 1, "output_norm": 18.731371542612706, "output_norm/layer0": 18.731371542612706, "step": 7200 }, { "MSE": 625.2572497558597, "MSE/layer0": 625.2572497558597, "dead_code_fraction": 0.1839, "dead_code_fraction/layer0": 0.1839, "epoch": 1.25, "input_norm": 31.998552770614626, "input_norm/layer0": 31.998552770614626, "learning_rate": 0.0005, "loss": 2.1673, "max_norm": 71.0332260131836, "max_norm/layer0": 71.0332260131836, "mean_norm": 43.5967960357666, "mean_norm/layer0": 43.5967960357666, "multicode_k": 1, "output_norm": 18.756609748204536, "output_norm/layer0": 18.756609748204536, "step": 7250 }, { "MSE": 624.7860372924804, "MSE/layer0": 624.7860372924804, "dead_code_fraction": 0.1831, "dead_code_fraction/layer0": 0.1831, "epoch": 1.25, "input_norm": 31.998555002212534, "input_norm/layer0": 31.998555002212534, "learning_rate": 0.0005, "loss": 2.1575, "max_norm": 71.15364837646484, "max_norm/layer0": 71.15364837646484, "mean_norm": 43.63525199890137, "mean_norm/layer0": 43.63525199890137, "multicode_k": 1, "output_norm": 18.767410192489628, "output_norm/layer0": 18.767410192489628, "step": 7300 }, { "MSE": 624.7060753377278, "MSE/layer0": 624.7060753377278, "dead_code_fraction": 0.18335, "dead_code_fraction/layer0": 0.18335, "epoch": 1.26, "input_norm": 31.99856230099995, "input_norm/layer0": 31.99856230099995, "learning_rate": 0.0005, "loss": 2.1622, "max_norm": 71.2812271118164, "max_norm/layer0": 71.2812271118164, "mean_norm": 43.67383575439453, "mean_norm/layer0": 43.67383575439453, "multicode_k": 1, "output_norm": 18.77556623776755, "output_norm/layer0": 18.77556623776755, "step": 7350 }, { "MSE": 623.9612900797528, "MSE/layer0": 623.9612900797528, "dead_code_fraction": 0.1834, "dead_code_fraction/layer0": 0.1834, "epoch": 1.26, "input_norm": 31.998564265569062, "input_norm/layer0": 31.998564265569062, "learning_rate": 0.0005, "loss": 2.1721, "max_norm": 71.4082260131836, "max_norm/layer0": 71.4082260131836, "mean_norm": 43.71280097961426, "mean_norm/layer0": 43.71280097961426, "multicode_k": 1, "output_norm": 18.78839166323344, "output_norm/layer0": 18.78839166323344, "step": 7400 }, { "MSE": 623.9870674641929, "MSE/layer0": 623.9870674641929, "dead_code_fraction": 0.18355, "dead_code_fraction/layer0": 0.18355, "epoch": 1.27, "input_norm": 31.998560991287228, "input_norm/layer0": 31.998560991287228, "learning_rate": 0.0005, "loss": 2.1424, "max_norm": 71.52973937988281, "max_norm/layer0": 71.52973937988281, "mean_norm": 43.75117111206055, "mean_norm/layer0": 43.75117111206055, "multicode_k": 1, "output_norm": 18.79942525227863, "output_norm/layer0": 18.79942525227863, "step": 7450 }, { "MSE": 622.7629538981118, "MSE/layer0": 622.7629538981118, "dead_code_fraction": 0.1844, "dead_code_fraction/layer0": 0.1844, "epoch": 1.27, "input_norm": 31.998580735524506, "input_norm/layer0": 31.998580735524506, "learning_rate": 0.0005, "loss": 2.187, "max_norm": 71.64968872070312, "max_norm/layer0": 71.64968872070312, "mean_norm": 43.790061950683594, "mean_norm/layer0": 43.790061950683594, "multicode_k": 1, "output_norm": 18.81509483655294, "output_norm/layer0": 18.81509483655294, "step": 7500 }, { "epoch": 1.27, "eval_MSE/layer0": 622.122652727573, "eval_accuracy": 0.5062701283839631, "eval_dead_code_fraction/layer0": 0.18665, "eval_input_norm/layer0": 31.998566619663464, "eval_loss": 2.1583967208862305, "eval_multicode_k": 1, "eval_output_norm/layer0": 18.83381110374323, "eval_runtime": 158.6442, "eval_samples_per_second": 29.141, "eval_steps_per_second": 1.822, "step": 7500 }, { "MSE": 622.9042826334635, "MSE/layer0": 622.9042826334635, "dead_code_fraction": 0.1841, "dead_code_fraction/layer0": 0.1841, "epoch": 1.28, "input_norm": 31.998572101593023, "input_norm/layer0": 31.998572101593023, "learning_rate": 0.0005, "loss": 2.1434, "max_norm": 71.76019287109375, "max_norm/layer0": 71.76019287109375, "mean_norm": 43.828460693359375, "mean_norm/layer0": 43.828460693359375, "multicode_k": 1, "output_norm": 18.82229045232136, "output_norm/layer0": 18.82229045232136, "step": 7550 }, { "MSE": 621.695281575521, "MSE/layer0": 621.695281575521, "dead_code_fraction": 0.1854, "dead_code_fraction/layer0": 0.1854, "epoch": 1.28, "input_norm": 31.998584995269773, "input_norm/layer0": 31.998584995269773, "learning_rate": 0.0005, "loss": 2.1712, "max_norm": 71.87606048583984, "max_norm/layer0": 71.87606048583984, "mean_norm": 43.867136001586914, "mean_norm/layer0": 43.867136001586914, "multicode_k": 1, "output_norm": 18.84749958992006, "output_norm/layer0": 18.84749958992006, "step": 7600 }, { "MSE": 622.6274766031902, "MSE/layer0": 622.6274766031902, "dead_code_fraction": 0.18355, "dead_code_fraction/layer0": 0.18355, "epoch": 1.29, "input_norm": 31.998571812311802, "input_norm/layer0": 31.998571812311802, "learning_rate": 0.0005, "loss": 2.1412, "max_norm": 71.98139953613281, "max_norm/layer0": 71.98139953613281, "mean_norm": 43.90544891357422, "mean_norm/layer0": 43.90544891357422, "multicode_k": 1, "output_norm": 18.83851943016053, "output_norm/layer0": 18.83851943016053, "step": 7650 }, { "MSE": 621.3046355183919, "MSE/layer0": 621.3046355183919, "dead_code_fraction": 0.18495, "dead_code_fraction/layer0": 0.18495, "epoch": 1.29, "input_norm": 31.998585087458295, "input_norm/layer0": 31.998585087458295, "learning_rate": 0.0005, "loss": 2.1711, "max_norm": 72.08447265625, "max_norm/layer0": 72.08447265625, "mean_norm": 43.94407653808594, "mean_norm/layer0": 43.94407653808594, "multicode_k": 1, "output_norm": 18.86037411053976, "output_norm/layer0": 18.86037411053976, "step": 7700 }, { "MSE": 620.5873645019533, "MSE/layer0": 620.5873645019533, "dead_code_fraction": 0.18485, "dead_code_fraction/layer0": 0.18485, "epoch": 1.3, "input_norm": 31.998606751759848, "input_norm/layer0": 31.998606751759848, "learning_rate": 0.0005, "loss": 2.2069, "max_norm": 72.18034362792969, "max_norm/layer0": 72.18034362792969, "mean_norm": 43.9833927154541, "mean_norm/layer0": 43.9833927154541, "multicode_k": 1, "output_norm": 18.87507179578146, "output_norm/layer0": 18.87507179578146, "step": 7750 }, { "MSE": 621.2272378540041, "MSE/layer0": 621.2272378540041, "dead_code_fraction": 0.18385, "dead_code_fraction/layer0": 0.18385, "epoch": 1.3, "input_norm": 31.998583949406935, "input_norm/layer0": 31.998583949406935, "learning_rate": 0.0005, "loss": 2.1217, "max_norm": 72.27928924560547, "max_norm/layer0": 72.27928924560547, "mean_norm": 44.021806716918945, "mean_norm/layer0": 44.021806716918945, "multicode_k": 1, "output_norm": 18.877027104695642, "output_norm/layer0": 18.877027104695642, "step": 7800 }, { "MSE": 620.067134602865, "MSE/layer0": 620.067134602865, "dead_code_fraction": 0.18535, "dead_code_fraction/layer0": 0.18535, "epoch": 1.31, "input_norm": 31.998594888051343, "input_norm/layer0": 31.998594888051343, "learning_rate": 0.0005, "loss": 2.1753, "max_norm": 72.39033508300781, "max_norm/layer0": 72.39033508300781, "mean_norm": 44.060611724853516, "mean_norm/layer0": 44.060611724853516, "multicode_k": 1, "output_norm": 18.89820697466533, "output_norm/layer0": 18.89820697466533, "step": 7850 }, { "MSE": 620.6704218546549, "MSE/layer0": 620.6704218546549, "dead_code_fraction": 0.18735, "dead_code_fraction/layer0": 0.18735, "epoch": 1.31, "input_norm": 31.998597246805822, "input_norm/layer0": 31.998597246805822, "learning_rate": 0.0005, "loss": 2.1778, "max_norm": 72.4916000366211, "max_norm/layer0": 72.4916000366211, "mean_norm": 44.09913635253906, "mean_norm/layer0": 44.09913635253906, "multicode_k": 1, "output_norm": 18.890051161448145, "output_norm/layer0": 18.890051161448145, "step": 7900 }, { "MSE": 619.2155123901367, "MSE/layer0": 619.2155123901367, "dead_code_fraction": 0.1863, "dead_code_fraction/layer0": 0.1863, "epoch": 1.32, "input_norm": 31.99860541343688, "input_norm/layer0": 31.99860541343688, "learning_rate": 0.0005, "loss": 2.1684, "max_norm": 72.59037017822266, "max_norm/layer0": 72.59037017822266, "mean_norm": 44.13744926452637, "mean_norm/layer0": 44.13744926452637, "multicode_k": 1, "output_norm": 18.920912733078, "output_norm/layer0": 18.920912733078, "step": 7950 }, { "MSE": 618.8985408528646, "MSE/layer0": 618.8985408528646, "dead_code_fraction": 0.1867, "dead_code_fraction/layer0": 0.1867, "epoch": 1.32, "input_norm": 31.998596220016488, "input_norm/layer0": 31.998596220016488, "learning_rate": 0.0005, "loss": 2.1302, "max_norm": 72.69281768798828, "max_norm/layer0": 72.69281768798828, "mean_norm": 44.176042556762695, "mean_norm/layer0": 44.176042556762695, "multicode_k": 1, "output_norm": 18.93559975624085, "output_norm/layer0": 18.93559975624085, "step": 8000 }, { "epoch": 1.32, "eval_MSE/layer0": 617.7161538934592, "eval_accuracy": 0.5071360017457022, "eval_dead_code_fraction/layer0": 0.18755, "eval_input_norm/layer0": 31.99860155017712, "eval_loss": 2.150786876678467, "eval_multicode_k": 1, "eval_output_norm/layer0": 18.949325338731256, "eval_runtime": 158.4669, "eval_samples_per_second": 29.173, "eval_steps_per_second": 1.824, "step": 8000 }, { "MSE": 619.1937561035155, "MSE/layer0": 619.1937561035155, "dead_code_fraction": 0.18685, "dead_code_fraction/layer0": 0.18685, "epoch": 1.33, "input_norm": 31.998596970240285, "input_norm/layer0": 31.998596970240285, "learning_rate": 0.0005, "loss": 2.1279, "max_norm": 72.79032135009766, "max_norm/layer0": 72.79032135009766, "mean_norm": 44.21445846557617, "mean_norm/layer0": 44.21445846557617, "multicode_k": 1, "output_norm": 18.93686810175578, "output_norm/layer0": 18.93686810175578, "step": 8050 }, { "MSE": 619.539402567546, "MSE/layer0": 619.539402567546, "dead_code_fraction": 0.18665, "dead_code_fraction/layer0": 0.18665, "epoch": 1.33, "input_norm": 31.998598492940268, "input_norm/layer0": 31.998598492940268, "learning_rate": 0.0005, "loss": 2.1113, "max_norm": 72.88322448730469, "max_norm/layer0": 72.88322448730469, "mean_norm": 44.251609802246094, "mean_norm/layer0": 44.251609802246094, "multicode_k": 1, "output_norm": 18.939144274393726, "output_norm/layer0": 18.939144274393726, "step": 8100 }, { "MSE": 617.7248203531905, "MSE/layer0": 617.7248203531905, "dead_code_fraction": 0.18555, "dead_code_fraction/layer0": 0.18555, "epoch": 1.34, "input_norm": 31.99861437161764, "input_norm/layer0": 31.99861437161764, "learning_rate": 0.0005, "loss": 2.1592, "max_norm": 72.97504425048828, "max_norm/layer0": 72.97504425048828, "mean_norm": 44.289913177490234, "mean_norm/layer0": 44.289913177490234, "multicode_k": 1, "output_norm": 18.963457323710102, "output_norm/layer0": 18.963457323710102, "step": 8150 }, { "MSE": 617.1626446533202, "MSE/layer0": 617.1626446533202, "dead_code_fraction": 0.1856, "dead_code_fraction/layer0": 0.1856, "epoch": 1.34, "input_norm": 31.998610553741443, "input_norm/layer0": 31.998610553741443, "learning_rate": 0.0005, "loss": 2.1339, "max_norm": 73.06546020507812, "max_norm/layer0": 73.06546020507812, "mean_norm": 44.32819747924805, "mean_norm/layer0": 44.32819747924805, "multicode_k": 1, "output_norm": 18.980771627426144, "output_norm/layer0": 18.980771627426144, "step": 8200 }, { "MSE": 616.5359758504233, "MSE/layer0": 616.5359758504233, "dead_code_fraction": 0.18785, "dead_code_fraction/layer0": 0.18785, "epoch": 1.35, "input_norm": 31.99861484845479, "input_norm/layer0": 31.99861484845479, "learning_rate": 0.0005, "loss": 2.1287, "max_norm": 73.1684341430664, "max_norm/layer0": 73.1684341430664, "mean_norm": 44.36627197265625, "mean_norm/layer0": 44.36627197265625, "multicode_k": 1, "output_norm": 19.002285525004055, "output_norm/layer0": 19.002285525004055, "step": 8250 }, { "MSE": 616.9324924723311, "MSE/layer0": 616.9324924723311, "dead_code_fraction": 0.18715, "dead_code_fraction/layer0": 0.18715, "epoch": 1.35, "input_norm": 31.998625895182286, "input_norm/layer0": 31.998625895182286, "learning_rate": 0.0005, "loss": 2.1575, "max_norm": 73.259521484375, "max_norm/layer0": 73.259521484375, "mean_norm": 44.40446090698242, "mean_norm/layer0": 44.40446090698242, "multicode_k": 1, "output_norm": 18.992992315292362, "output_norm/layer0": 18.992992315292362, "step": 8300 }, { "MSE": 616.2650039672851, "MSE/layer0": 616.2650039672851, "dead_code_fraction": 0.18655, "dead_code_fraction/layer0": 0.18655, "epoch": 1.36, "input_norm": 31.99862662315369, "input_norm/layer0": 31.99862662315369, "learning_rate": 0.0005, "loss": 2.139, "max_norm": 73.36270141601562, "max_norm/layer0": 73.36270141601562, "mean_norm": 44.44254493713379, "mean_norm/layer0": 44.44254493713379, "multicode_k": 1, "output_norm": 19.00672375679015, "output_norm/layer0": 19.00672375679015, "step": 8350 }, { "MSE": 615.5159185791019, "MSE/layer0": 615.5159185791019, "dead_code_fraction": 0.18685, "dead_code_fraction/layer0": 0.18685, "epoch": 1.36, "input_norm": 31.998618663152055, "input_norm/layer0": 31.998618663152055, "learning_rate": 0.0005, "loss": 2.1207, "max_norm": 73.45561981201172, "max_norm/layer0": 73.45561981201172, "mean_norm": 44.48077201843262, "mean_norm/layer0": 44.48077201843262, "multicode_k": 1, "output_norm": 19.030768597920748, "output_norm/layer0": 19.030768597920748, "step": 8400 }, { "MSE": 615.7112675984704, "MSE/layer0": 615.7112675984704, "dead_code_fraction": 0.18675, "dead_code_fraction/layer0": 0.18675, "epoch": 1.37, "input_norm": 31.99863114674885, "input_norm/layer0": 31.99863114674885, "learning_rate": 0.0005, "loss": 2.1394, "max_norm": 73.54468536376953, "max_norm/layer0": 73.54468536376953, "mean_norm": 44.5194206237793, "mean_norm/layer0": 44.5194206237793, "multicode_k": 1, "output_norm": 19.03362373669942, "output_norm/layer0": 19.03362373669942, "step": 8450 }, { "MSE": 615.0864140828453, "MSE/layer0": 615.0864140828453, "dead_code_fraction": 0.1866, "dead_code_fraction/layer0": 0.1866, "epoch": 1.37, "input_norm": 31.9986399269104, "input_norm/layer0": 31.9986399269104, "learning_rate": 0.0005, "loss": 2.1471, "max_norm": 73.64068603515625, "max_norm/layer0": 73.64068603515625, "mean_norm": 44.55780220031738, "mean_norm/layer0": 44.55780220031738, "multicode_k": 1, "output_norm": 19.04360143979391, "output_norm/layer0": 19.04360143979391, "step": 8500 }, { "epoch": 1.37, "eval_MSE/layer0": 613.7248421548741, "eval_accuracy": 0.5081896395873495, "eval_dead_code_fraction/layer0": 0.1885, "eval_input_norm/layer0": 31.998632826486393, "eval_loss": 2.1443779468536377, "eval_multicode_k": 1, "eval_output_norm/layer0": 19.066619998676906, "eval_runtime": 158.5923, "eval_samples_per_second": 29.15, "eval_steps_per_second": 1.822, "step": 8500 }, { "MSE": 614.1585445149744, "MSE/layer0": 614.1585445149744, "dead_code_fraction": 0.18715, "dead_code_fraction/layer0": 0.18715, "epoch": 1.38, "input_norm": 31.99863867441813, "input_norm/layer0": 31.99863867441813, "learning_rate": 0.0005, "loss": 2.1506, "max_norm": 73.73002624511719, "max_norm/layer0": 73.73002624511719, "mean_norm": 44.597002029418945, "mean_norm/layer0": 44.597002029418945, "multicode_k": 1, "output_norm": 19.06499721844991, "output_norm/layer0": 19.06499721844991, "step": 8550 }, { "MSE": 614.256539204915, "MSE/layer0": 614.256539204915, "dead_code_fraction": 0.1879, "dead_code_fraction/layer0": 0.1879, "epoch": 1.38, "input_norm": 31.998648173014317, "input_norm/layer0": 31.998648173014317, "learning_rate": 0.0005, "loss": 2.1643, "max_norm": 73.80333709716797, "max_norm/layer0": 73.80333709716797, "mean_norm": 44.63543891906738, "mean_norm/layer0": 44.63543891906738, "multicode_k": 1, "output_norm": 19.078293412526467, "output_norm/layer0": 19.078293412526467, "step": 8600 }, { "MSE": 613.3546946207681, "MSE/layer0": 613.3546946207681, "dead_code_fraction": 0.1879, "dead_code_fraction/layer0": 0.1879, "epoch": 1.39, "input_norm": 31.99864864667257, "input_norm/layer0": 31.99864864667257, "learning_rate": 0.0005, "loss": 2.1535, "max_norm": 73.89517974853516, "max_norm/layer0": 73.89517974853516, "mean_norm": 44.674211502075195, "mean_norm/layer0": 44.674211502075195, "multicode_k": 1, "output_norm": 19.09559381167095, "output_norm/layer0": 19.09559381167095, "step": 8650 }, { "MSE": 613.6053087361654, "MSE/layer0": 613.6053087361654, "dead_code_fraction": 0.18645, "dead_code_fraction/layer0": 0.18645, "epoch": 1.39, "input_norm": 31.998652140299477, "input_norm/layer0": 31.998652140299477, "learning_rate": 0.0005, "loss": 2.137, "max_norm": 73.9770736694336, "max_norm/layer0": 73.9770736694336, "mean_norm": 44.71265983581543, "mean_norm/layer0": 44.71265983581543, "multicode_k": 1, "output_norm": 19.098618446985878, "output_norm/layer0": 19.098618446985878, "step": 8700 }, { "MSE": 613.292506408691, "MSE/layer0": 613.292506408691, "dead_code_fraction": 0.1876, "dead_code_fraction/layer0": 0.1876, "epoch": 1.4, "input_norm": 31.998654588063562, "input_norm/layer0": 31.998654588063562, "learning_rate": 0.0005, "loss": 2.1482, "max_norm": 74.05269622802734, "max_norm/layer0": 74.05269622802734, "mean_norm": 44.750946044921875, "mean_norm/layer0": 44.750946044921875, "multicode_k": 1, "output_norm": 19.104494848251342, "output_norm/layer0": 19.104494848251342, "step": 8750 }, { "MSE": 613.8824895222986, "MSE/layer0": 613.8824895222986, "dead_code_fraction": 0.1868, "dead_code_fraction/layer0": 0.1868, "epoch": 1.4, "input_norm": 31.998655049006146, "input_norm/layer0": 31.998655049006146, "learning_rate": 0.0005, "loss": 2.1331, "max_norm": 74.12651824951172, "max_norm/layer0": 74.12651824951172, "mean_norm": 44.7886848449707, "mean_norm/layer0": 44.7886848449707, "multicode_k": 1, "output_norm": 19.110120385487882, "output_norm/layer0": 19.110120385487882, "step": 8800 }, { "MSE": 613.8568901570636, "MSE/layer0": 613.8568901570636, "dead_code_fraction": 0.18675, "dead_code_fraction/layer0": 0.18675, "epoch": 1.41, "input_norm": 31.99864878336588, "input_norm/layer0": 31.99864878336588, "learning_rate": 0.0005, "loss": 2.1038, "max_norm": 74.20288848876953, "max_norm/layer0": 74.20288848876953, "mean_norm": 44.82563400268555, "mean_norm/layer0": 44.82563400268555, "multicode_k": 1, "output_norm": 19.120709832509363, "output_norm/layer0": 19.120709832509363, "step": 8850 }, { "MSE": 612.8203454589843, "MSE/layer0": 612.8203454589843, "dead_code_fraction": 0.18635, "dead_code_fraction/layer0": 0.18635, "epoch": 1.41, "input_norm": 31.99866209030152, "input_norm/layer0": 31.99866209030152, "learning_rate": 0.0005, "loss": 2.1619, "max_norm": 74.27029418945312, "max_norm/layer0": 74.27029418945312, "mean_norm": 44.863847732543945, "mean_norm/layer0": 44.863847732543945, "multicode_k": 1, "output_norm": 19.13362557093303, "output_norm/layer0": 19.13362557093303, "step": 8900 }, { "MSE": 612.7508836873369, "MSE/layer0": 612.7508836873369, "dead_code_fraction": 0.1865, "dead_code_fraction/layer0": 0.1865, "epoch": 1.42, "input_norm": 31.998662964502977, "input_norm/layer0": 31.998662964502977, "learning_rate": 0.0005, "loss": 2.1274, "max_norm": 74.35165405273438, "max_norm/layer0": 74.35165405273438, "mean_norm": 44.90276908874512, "mean_norm/layer0": 44.90276908874512, "multicode_k": 1, "output_norm": 19.13368027687074, "output_norm/layer0": 19.13368027687074, "step": 8950 }, { "MSE": 611.3088948567707, "MSE/layer0": 611.3088948567707, "dead_code_fraction": 0.18625, "dead_code_fraction/layer0": 0.18625, "epoch": 1.42, "input_norm": 31.998670199712116, "input_norm/layer0": 31.998670199712116, "learning_rate": 0.0005, "loss": 2.1556, "max_norm": 74.43575286865234, "max_norm/layer0": 74.43575286865234, "mean_norm": 44.94179916381836, "mean_norm/layer0": 44.94179916381836, "multicode_k": 1, "output_norm": 19.165478760401413, "output_norm/layer0": 19.165478760401413, "step": 9000 }, { "epoch": 1.42, "eval_MSE/layer0": 610.3757424029645, "eval_accuracy": 0.5087341142897861, "eval_dead_code_fraction/layer0": 0.18805, "eval_input_norm/layer0": 31.998659288421646, "eval_loss": 2.139230489730835, "eval_multicode_k": 1, "eval_output_norm/layer0": 19.181722183648382, "eval_runtime": 158.0526, "eval_samples_per_second": 29.25, "eval_steps_per_second": 1.829, "step": 9000 }, { "MSE": 611.2356985473632, "MSE/layer0": 611.2356985473632, "dead_code_fraction": 0.1879, "dead_code_fraction/layer0": 0.1879, "epoch": 1.43, "input_norm": 31.998666836420703, "input_norm/layer0": 31.998666836420703, "learning_rate": 0.0005, "loss": 2.1388, "max_norm": 74.51050567626953, "max_norm/layer0": 74.51050567626953, "mean_norm": 44.98063850402832, "mean_norm/layer0": 44.98063850402832, "multicode_k": 1, "output_norm": 19.177389281590777, "output_norm/layer0": 19.177389281590777, "step": 9050 }, { "MSE": 610.8344569905598, "MSE/layer0": 610.8344569905598, "dead_code_fraction": 0.18865, "dead_code_fraction/layer0": 0.18865, "epoch": 1.43, "input_norm": 31.99867141723631, "input_norm/layer0": 31.99867141723631, "learning_rate": 0.0005, "loss": 2.1328, "max_norm": 74.59440612792969, "max_norm/layer0": 74.59440612792969, "mean_norm": 45.01910400390625, "mean_norm/layer0": 45.01910400390625, "multicode_k": 1, "output_norm": 19.185275354385375, "output_norm/layer0": 19.185275354385375, "step": 9100 }, { "MSE": 610.7402758789062, "MSE/layer0": 610.7402758789062, "dead_code_fraction": 0.1871, "dead_code_fraction/layer0": 0.1871, "epoch": 1.44, "input_norm": 31.99866997400921, "input_norm/layer0": 31.99866997400921, "learning_rate": 0.0005, "loss": 2.117, "max_norm": 74.67122650146484, "max_norm/layer0": 74.67122650146484, "mean_norm": 45.05727577209473, "mean_norm/layer0": 45.05727577209473, "multicode_k": 1, "output_norm": 19.190109596252437, "output_norm/layer0": 19.190109596252437, "step": 9150 }, { "MSE": 610.1339531453451, "MSE/layer0": 610.1339531453451, "dead_code_fraction": 0.18745, "dead_code_fraction/layer0": 0.18745, "epoch": 1.44, "input_norm": 31.998679358164473, "input_norm/layer0": 31.998679358164473, "learning_rate": 0.0005, "loss": 2.1459, "max_norm": 74.7430419921875, "max_norm/layer0": 74.7430419921875, "mean_norm": 45.095571517944336, "mean_norm/layer0": 45.095571517944336, "multicode_k": 1, "output_norm": 19.203376553853335, "output_norm/layer0": 19.203376553853335, "step": 9200 }, { "MSE": 609.6957601928709, "MSE/layer0": 609.6957601928709, "dead_code_fraction": 0.1878, "dead_code_fraction/layer0": 0.1878, "epoch": 1.45, "input_norm": 31.99868172009785, "input_norm/layer0": 31.99868172009785, "learning_rate": 0.0005, "loss": 2.142, "max_norm": 74.8177490234375, "max_norm/layer0": 74.8177490234375, "mean_norm": 45.133853912353516, "mean_norm/layer0": 45.133853912353516, "multicode_k": 1, "output_norm": 19.22210531552632, "output_norm/layer0": 19.22210531552632, "step": 9250 }, { "MSE": 609.5997785441082, "MSE/layer0": 609.5997785441082, "dead_code_fraction": 0.18805, "dead_code_fraction/layer0": 0.18805, "epoch": 1.45, "input_norm": 31.998693205515544, "input_norm/layer0": 31.998693205515544, "learning_rate": 0.0005, "loss": 2.18, "max_norm": 74.87744140625, "max_norm/layer0": 74.87744140625, "mean_norm": 45.172555923461914, "mean_norm/layer0": 45.172555923461914, "multicode_k": 1, "output_norm": 19.226630802154542, "output_norm/layer0": 19.226630802154542, "step": 9300 }, { "MSE": 609.8342389933271, "MSE/layer0": 609.8342389933271, "dead_code_fraction": 0.18735, "dead_code_fraction/layer0": 0.18735, "epoch": 1.46, "input_norm": 31.998687505722053, "input_norm/layer0": 31.998687505722053, "learning_rate": 0.0005, "loss": 2.1164, "max_norm": 74.94609069824219, "max_norm/layer0": 74.94609069824219, "mean_norm": 45.21059799194336, "mean_norm/layer0": 45.21059799194336, "multicode_k": 1, "output_norm": 19.234882882436114, "output_norm/layer0": 19.234882882436114, "step": 9350 }, { "MSE": 609.2034523518882, "MSE/layer0": 609.2034523518882, "dead_code_fraction": 0.1869, "dead_code_fraction/layer0": 0.1869, "epoch": 1.46, "input_norm": 31.99869050979616, "input_norm/layer0": 31.99869050979616, "learning_rate": 0.0005, "loss": 2.1316, "max_norm": 75.01142883300781, "max_norm/layer0": 75.01142883300781, "mean_norm": 45.248979568481445, "mean_norm/layer0": 45.248979568481445, "multicode_k": 1, "output_norm": 19.247848326365144, "output_norm/layer0": 19.247848326365144, "step": 9400 }, { "MSE": 609.0324313354497, "MSE/layer0": 609.0324313354497, "dead_code_fraction": 0.18745, "dead_code_fraction/layer0": 0.18745, "epoch": 1.47, "input_norm": 31.99869132041931, "input_norm/layer0": 31.99869132041931, "learning_rate": 0.0005, "loss": 2.1214, "max_norm": 75.07112121582031, "max_norm/layer0": 75.07112121582031, "mean_norm": 45.287214279174805, "mean_norm/layer0": 45.287214279174805, "multicode_k": 1, "output_norm": 19.25519768079122, "output_norm/layer0": 19.25519768079122, "step": 9450 }, { "MSE": 607.8594933064783, "MSE/layer0": 607.8594933064783, "dead_code_fraction": 0.18835, "dead_code_fraction/layer0": 0.18835, "epoch": 1.47, "input_norm": 31.998687744140625, "input_norm/layer0": 31.998687744140625, "learning_rate": 0.0005, "loss": 2.1067, "max_norm": 75.15766143798828, "max_norm/layer0": 75.15766143798828, "mean_norm": 45.32560920715332, "mean_norm/layer0": 45.32560920715332, "multicode_k": 1, "output_norm": 19.27704188664754, "output_norm/layer0": 19.27704188664754, "step": 9500 }, { "epoch": 1.47, "eval_MSE/layer0": 608.6866096036146, "eval_accuracy": 0.5090880757079915, "eval_dead_code_fraction/layer0": 0.18755, "eval_input_norm/layer0": 31.998685899710146, "eval_loss": 2.1350600719451904, "eval_multicode_k": 1, "eval_output_norm/layer0": 19.283631281241068, "eval_runtime": 158.1797, "eval_samples_per_second": 29.226, "eval_steps_per_second": 1.827, "step": 9500 }, { "MSE": 607.5302533983886, "MSE/layer0": 607.5302533983886, "dead_code_fraction": 0.1872, "dead_code_fraction/layer0": 0.1872, "epoch": 1.48, "input_norm": 31.99869025141972, "input_norm/layer0": 31.99869025141972, "learning_rate": 0.0005, "loss": 2.1075, "max_norm": 75.2263412475586, "max_norm/layer0": 75.2263412475586, "mean_norm": 45.363752365112305, "mean_norm/layer0": 45.363752365112305, "multicode_k": 1, "output_norm": 19.2927733112995, "output_norm/layer0": 19.2927733112995, "step": 9550 }, { "MSE": 608.902215973978, "MSE/layer0": 608.902215973978, "dead_code_fraction": 0.187, "dead_code_fraction/layer0": 0.187, "epoch": 2.0, "input_norm": 31.998686492629858, "input_norm/layer0": 31.998686492629858, "learning_rate": 0.0005, "loss": 2.1013, "max_norm": 75.294677734375, "max_norm/layer0": 75.294677734375, "mean_norm": 45.40024948120117, "mean_norm/layer0": 45.40024948120117, "multicode_k": 1, "output_norm": 19.268582361188244, "output_norm/layer0": 19.268582361188244, "step": 9600 }, { "MSE": 606.3796120198567, "MSE/layer0": 606.3796120198567, "dead_code_fraction": 0.18715, "dead_code_fraction/layer0": 0.18715, "epoch": 2.01, "input_norm": 31.998710851669312, "input_norm/layer0": 31.998710851669312, "learning_rate": 0.0005, "loss": 2.17, "max_norm": 75.35186004638672, "max_norm/layer0": 75.35186004638672, "mean_norm": 45.4382266998291, "mean_norm/layer0": 45.4382266998291, "multicode_k": 1, "output_norm": 19.314183537165327, "output_norm/layer0": 19.314183537165327, "step": 9650 }, { "MSE": 606.9239878336591, "MSE/layer0": 606.9239878336591, "dead_code_fraction": 0.1877, "dead_code_fraction/layer0": 0.1877, "epoch": 2.01, "input_norm": 31.99869126637776, "input_norm/layer0": 31.99869126637776, "learning_rate": 0.0005, "loss": 2.0661, "max_norm": 75.44601440429688, "max_norm/layer0": 75.44601440429688, "mean_norm": 45.47653579711914, "mean_norm/layer0": 45.47653579711914, "multicode_k": 1, "output_norm": 19.313949975967407, "output_norm/layer0": 19.313949975967407, "step": 9700 }, { "MSE": 606.1468785603844, "MSE/layer0": 606.1468785603844, "dead_code_fraction": 0.18755, "dead_code_fraction/layer0": 0.18755, "epoch": 2.02, "input_norm": 31.998706903457652, "input_norm/layer0": 31.998706903457652, "learning_rate": 0.0005, "loss": 2.1325, "max_norm": 75.6237564086914, "max_norm/layer0": 75.6237564086914, "mean_norm": 45.51473808288574, "mean_norm/layer0": 45.51473808288574, "multicode_k": 1, "output_norm": 19.331538470586143, "output_norm/layer0": 19.331538470586143, "step": 9750 }, { "MSE": 606.2908910115561, "MSE/layer0": 606.2908910115561, "dead_code_fraction": 0.18715, "dead_code_fraction/layer0": 0.18715, "epoch": 2.02, "input_norm": 31.998702777226768, "input_norm/layer0": 31.998702777226768, "learning_rate": 0.0005, "loss": 2.0999, "max_norm": 75.77623748779297, "max_norm/layer0": 75.77623748779297, "mean_norm": 45.55307388305664, "mean_norm/layer0": 45.55307388305664, "multicode_k": 1, "output_norm": 19.340178826649982, "output_norm/layer0": 19.340178826649982, "step": 9800 }, { "MSE": 605.7215723673501, "MSE/layer0": 605.7215723673501, "dead_code_fraction": 0.18635, "dead_code_fraction/layer0": 0.18635, "epoch": 2.03, "input_norm": 31.998708073298122, "input_norm/layer0": 31.998708073298122, "learning_rate": 0.0005, "loss": 2.1015, "max_norm": 75.92095184326172, "max_norm/layer0": 75.92095184326172, "mean_norm": 45.591548919677734, "mean_norm/layer0": 45.591548919677734, "multicode_k": 1, "output_norm": 19.351260058085124, "output_norm/layer0": 19.351260058085124, "step": 9850 }, { "MSE": 605.7307819620769, "MSE/layer0": 605.7307819620769, "dead_code_fraction": 0.1879, "dead_code_fraction/layer0": 0.1879, "epoch": 2.03, "input_norm": 31.99871432304383, "input_norm/layer0": 31.99871432304383, "learning_rate": 0.0005, "loss": 2.1079, "max_norm": 76.06104278564453, "max_norm/layer0": 76.06104278564453, "mean_norm": 45.62945747375488, "mean_norm/layer0": 45.62945747375488, "multicode_k": 1, "output_norm": 19.36078415234882, "output_norm/layer0": 19.36078415234882, "step": 9900 }, { "MSE": 605.7736006673174, "MSE/layer0": 605.7736006673174, "dead_code_fraction": 0.1873, "dead_code_fraction/layer0": 0.1873, "epoch": 2.04, "input_norm": 31.99871180534363, "input_norm/layer0": 31.99871180534363, "learning_rate": 0.0005, "loss": 2.102, "max_norm": 76.22486877441406, "max_norm/layer0": 76.22486877441406, "mean_norm": 45.66733360290527, "mean_norm/layer0": 45.66733360290527, "multicode_k": 1, "output_norm": 19.36815209388733, "output_norm/layer0": 19.36815209388733, "step": 9950 }, { "MSE": 604.9809751383466, "MSE/layer0": 604.9809751383466, "dead_code_fraction": 0.1872, "dead_code_fraction/layer0": 0.1872, "epoch": 2.04, "input_norm": 31.998728539148978, "input_norm/layer0": 31.998728539148978, "learning_rate": 0.0005, "loss": 2.1536, "max_norm": 76.40007019042969, "max_norm/layer0": 76.40007019042969, "mean_norm": 45.70543670654297, "mean_norm/layer0": 45.70543670654297, "multicode_k": 1, "output_norm": 19.38911464373271, "output_norm/layer0": 19.38911464373271, "step": 10000 }, { "epoch": 2.04, "eval_MSE/layer0": 604.5096733395267, "eval_accuracy": 0.5091345939349958, "eval_dead_code_fraction/layer0": 0.18795, "eval_input_norm/layer0": 31.99872850438308, "eval_loss": 2.132894992828369, "eval_multicode_k": 1, "eval_output_norm/layer0": 19.389702240368152, "eval_runtime": 158.9177, "eval_samples_per_second": 29.091, "eval_steps_per_second": 1.819, "step": 10000 }, { "MSE": 0.0, "MSE/layer0": 0.0, "dead_code_fraction": 1.0, "dead_code_fraction/layer0": 1.0, "epoch": 2.04, "input_norm": 0.0, "input_norm/layer0": 0.0, "max_norm": 76.40007019042969, "max_norm/layer0": 76.40007019042969, "mean_norm": 45.70543670654297, "mean_norm/layer0": 45.70543670654297, "multicode_k": 1, "output_norm": 0.0, "output_norm/layer0": 0.0, "step": 10000, "total_flos": 7.43098011353088e+16, "train_loss": 2.325971780395508, "train_runtime": 15639.0026, "train_samples_per_second": 61.385, "train_steps_per_second": 0.639 } ], "logging_steps": 50, "max_steps": 10000, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 7.43098011353088e+16, "trial_name": null, "trial_params": null }