| { | |
| "best_metric": 2.132894992828369, | |
| "best_model_checkpoint": "/tmp/wandb/run-20240211_061007-slcnkgcr/files/train_output/checkpoint-10000", | |
| "epoch": 2.042133333333333, | |
| "eval_steps": 500, | |
| "global_step": 10000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "MSE": 891.9713033040365, | |
| "MSE/layer0": 891.9713033040365, | |
| "dead_code_fraction": 0.1506, | |
| "dead_code_fraction/layer0": 0.1506, | |
| "epoch": 0.0, | |
| "input_norm": 31.997233708699547, | |
| "input_norm/layer0": 31.997233708699547, | |
| "learning_rate": 0.0005, | |
| "loss": 8.0845, | |
| "max_norm": 34.580135345458984, | |
| "max_norm/layer0": 34.580135345458984, | |
| "mean_norm": 31.989344596862793, | |
| "mean_norm/layer0": 31.989344596862793, | |
| "multicode_k": 1, | |
| "output_norm": 8.584638833999634, | |
| "output_norm/layer0": 8.584638833999634, | |
| "step": 1 | |
| }, | |
| { | |
| "MSE": 883.0105907414232, | |
| "MSE/layer0": 883.0105907414232, | |
| "dead_code_fraction": 0.0, | |
| "dead_code_fraction/layer0": 0.0, | |
| "epoch": 0.01, | |
| "input_norm": 31.99778711876902, | |
| "input_norm/layer0": 31.99778711876902, | |
| "learning_rate": 0.0005, | |
| "loss": 4.8444, | |
| "max_norm": 34.610191345214844, | |
| "max_norm/layer0": 34.610191345214844, | |
| "mean_norm": 32.02294731140137, | |
| "mean_norm/layer0": 32.02294731140137, | |
| "multicode_k": 1, | |
| "output_norm": 8.645599765842462, | |
| "output_norm/layer0": 8.645599765842462, | |
| "step": 50 | |
| }, | |
| { | |
| "MSE": 872.9267329915364, | |
| "MSE/layer0": 872.9267329915364, | |
| "dead_code_fraction": 0.0, | |
| "dead_code_fraction/layer0": 0.0, | |
| "epoch": 0.01, | |
| "input_norm": 31.998572165171304, | |
| "input_norm/layer0": 31.998572165171304, | |
| "learning_rate": 0.0005, | |
| "loss": 3.9294, | |
| "max_norm": 34.62763595581055, | |
| "max_norm/layer0": 34.62763595581055, | |
| "mean_norm": 32.06278419494629, | |
| "mean_norm/layer0": 32.06278419494629, | |
| "multicode_k": 1, | |
| "output_norm": 8.74148860613505, | |
| "output_norm/layer0": 8.74148860613505, | |
| "step": 100 | |
| }, | |
| { | |
| "MSE": 866.7590488688152, | |
| "MSE/layer0": 866.7590488688152, | |
| "dead_code_fraction": 0.0, | |
| "dead_code_fraction/layer0": 0.0, | |
| "epoch": 0.01, | |
| "input_norm": 31.99865425427754, | |
| "input_norm/layer0": 31.99865425427754, | |
| "learning_rate": 0.0005, | |
| "loss": 3.5413, | |
| "max_norm": 34.65019607543945, | |
| "max_norm/layer0": 34.65019607543945, | |
| "mean_norm": 32.1027717590332, | |
| "mean_norm/layer0": 32.1027717590332, | |
| "multicode_k": 1, | |
| "output_norm": 8.811674615542097, | |
| "output_norm/layer0": 8.811674615542097, | |
| "step": 150 | |
| }, | |
| { | |
| "MSE": 858.8314244588221, | |
| "MSE/layer0": 858.8314244588221, | |
| "dead_code_fraction": 0.0, | |
| "dead_code_fraction/layer0": 0.0, | |
| "epoch": 0.02, | |
| "input_norm": 31.998634125391646, | |
| "input_norm/layer0": 31.998634125391646, | |
| "learning_rate": 0.0005, | |
| "loss": 3.3381, | |
| "max_norm": 34.73014831542969, | |
| "max_norm/layer0": 34.73014831542969, | |
| "mean_norm": 32.17362403869629, | |
| "mean_norm/layer0": 32.17362403869629, | |
| "multicode_k": 1, | |
| "output_norm": 8.925555121103923, | |
| "output_norm/layer0": 8.925555121103923, | |
| "step": 200 | |
| }, | |
| { | |
| "MSE": 849.6408699544276, | |
| "MSE/layer0": 849.6408699544276, | |
| "dead_code_fraction": 0.0, | |
| "dead_code_fraction/layer0": 0.0, | |
| "epoch": 0.03, | |
| "input_norm": 31.9986141427358, | |
| "input_norm/layer0": 31.9986141427358, | |
| "learning_rate": 0.0005, | |
| "loss": 3.2486, | |
| "max_norm": 34.8281364440918, | |
| "max_norm/layer0": 34.8281364440918, | |
| "mean_norm": 32.26718330383301, | |
| "mean_norm/layer0": 32.26718330383301, | |
| "multicode_k": 1, | |
| "output_norm": 9.101092262268068, | |
| "output_norm/layer0": 9.101092262268068, | |
| "step": 250 | |
| }, | |
| { | |
| "MSE": 841.0051658121741, | |
| "MSE/layer0": 841.0051658121741, | |
| "dead_code_fraction": 0.0, | |
| "dead_code_fraction/layer0": 0.0, | |
| "epoch": 0.03, | |
| "input_norm": 31.99862952232361, | |
| "input_norm/layer0": 31.99862952232361, | |
| "learning_rate": 0.0005, | |
| "loss": 3.1503, | |
| "max_norm": 34.946006774902344, | |
| "max_norm/layer0": 34.946006774902344, | |
| "mean_norm": 32.361915588378906, | |
| "mean_norm/layer0": 32.361915588378906, | |
| "multicode_k": 1, | |
| "output_norm": 9.305952178637185, | |
| "output_norm/layer0": 9.305952178637185, | |
| "step": 300 | |
| }, | |
| { | |
| "MSE": 833.1103855387371, | |
| "MSE/layer0": 833.1103855387371, | |
| "dead_code_fraction": 0.0, | |
| "dead_code_fraction/layer0": 0.0, | |
| "epoch": 0.04, | |
| "input_norm": 31.998617506027223, | |
| "input_norm/layer0": 31.998617506027223, | |
| "learning_rate": 0.0005, | |
| "loss": 3.0966, | |
| "max_norm": 35.09696578979492, | |
| "max_norm/layer0": 35.09696578979492, | |
| "mean_norm": 32.463951110839844, | |
| "mean_norm/layer0": 32.463951110839844, | |
| "multicode_k": 1, | |
| "output_norm": 9.513547644615176, | |
| "output_norm/layer0": 9.513547644615176, | |
| "step": 350 | |
| }, | |
| { | |
| "MSE": 824.8635622151694, | |
| "MSE/layer0": 824.8635622151694, | |
| "dead_code_fraction": 0.0, | |
| "dead_code_fraction/layer0": 0.0, | |
| "epoch": 0.04, | |
| "input_norm": 31.998617092768363, | |
| "input_norm/layer0": 31.998617092768363, | |
| "learning_rate": 0.0005, | |
| "loss": 3.0998, | |
| "max_norm": 35.28767013549805, | |
| "max_norm/layer0": 35.28767013549805, | |
| "mean_norm": 32.571420669555664, | |
| "mean_norm/layer0": 32.571420669555664, | |
| "multicode_k": 1, | |
| "output_norm": 9.74717748324076, | |
| "output_norm/layer0": 9.74717748324076, | |
| "step": 400 | |
| }, | |
| { | |
| "MSE": 817.218793334961, | |
| "MSE/layer0": 817.218793334961, | |
| "dead_code_fraction": 0.0, | |
| "dead_code_fraction/layer0": 0.0, | |
| "epoch": 0.04, | |
| "input_norm": 31.99862334251403, | |
| "input_norm/layer0": 31.99862334251403, | |
| "learning_rate": 0.0005, | |
| "loss": 3.0603, | |
| "max_norm": 35.4771842956543, | |
| "max_norm/layer0": 35.4771842956543, | |
| "mean_norm": 32.68177795410156, | |
| "mean_norm/layer0": 32.68177795410156, | |
| "multicode_k": 1, | |
| "output_norm": 9.985308513641357, | |
| "output_norm/layer0": 9.985308513641357, | |
| "step": 450 | |
| }, | |
| { | |
| "MSE": 809.1558084106446, | |
| "MSE/layer0": 809.1558084106446, | |
| "dead_code_fraction": 0.0, | |
| "dead_code_fraction/layer0": 0.0, | |
| "epoch": 0.05, | |
| "input_norm": 31.998615137736, | |
| "input_norm/layer0": 31.998615137736, | |
| "learning_rate": 0.0005, | |
| "loss": 3.0494, | |
| "max_norm": 35.6486701965332, | |
| "max_norm/layer0": 35.6486701965332, | |
| "mean_norm": 32.793779373168945, | |
| "mean_norm/layer0": 32.793779373168945, | |
| "multicode_k": 1, | |
| "output_norm": 10.232081251144415, | |
| "output_norm/layer0": 10.232081251144415, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_MSE/layer0": 805.1675846628777, | |
| "eval_accuracy": 0.41770872781318447, | |
| "eval_dead_code_fraction/layer0": 0.0, | |
| "eval_input_norm/layer0": 31.998606410347342, | |
| "eval_loss": 2.992654323577881, | |
| "eval_multicode_k": 1, | |
| "eval_output_norm/layer0": 10.360000263063938, | |
| "eval_runtime": 159.8847, | |
| "eval_samples_per_second": 28.915, | |
| "eval_steps_per_second": 1.808, | |
| "step": 500 | |
| }, | |
| { | |
| "MSE": 801.7215725708003, | |
| "MSE/layer0": 801.7215725708003, | |
| "dead_code_fraction": 0.0, | |
| "dead_code_fraction/layer0": 0.0, | |
| "epoch": 0.06, | |
| "input_norm": 31.998598546981817, | |
| "input_norm/layer0": 31.998598546981817, | |
| "learning_rate": 0.0005, | |
| "loss": 2.9547, | |
| "max_norm": 35.86976623535156, | |
| "max_norm/layer0": 35.86976623535156, | |
| "mean_norm": 32.91193962097168, | |
| "mean_norm/layer0": 32.91193962097168, | |
| "multicode_k": 1, | |
| "output_norm": 10.47719025929769, | |
| "output_norm/layer0": 10.47719025929769, | |
| "step": 550 | |
| }, | |
| { | |
| "MSE": 794.043483174642, | |
| "MSE/layer0": 794.043483174642, | |
| "dead_code_fraction": 0.0, | |
| "dead_code_fraction/layer0": 0.0, | |
| "epoch": 0.06, | |
| "input_norm": 31.99859639167787, | |
| "input_norm/layer0": 31.99859639167787, | |
| "learning_rate": 0.0005, | |
| "loss": 2.9506, | |
| "max_norm": 36.08134078979492, | |
| "max_norm/layer0": 36.08134078979492, | |
| "mean_norm": 33.03110313415527, | |
| "mean_norm/layer0": 33.03110313415527, | |
| "multicode_k": 1, | |
| "output_norm": 10.729146582285566, | |
| "output_norm/layer0": 10.729146582285566, | |
| "step": 600 | |
| }, | |
| { | |
| "MSE": 786.3193520100913, | |
| "MSE/layer0": 786.3193520100913, | |
| "dead_code_fraction": 5e-05, | |
| "dead_code_fraction/layer0": 5e-05, | |
| "epoch": 0.07, | |
| "input_norm": 31.99857716878254, | |
| "input_norm/layer0": 31.99857716878254, | |
| "learning_rate": 0.0005, | |
| "loss": 2.8944, | |
| "max_norm": 36.33954620361328, | |
| "max_norm/layer0": 36.33954620361328, | |
| "mean_norm": 33.15106773376465, | |
| "mean_norm/layer0": 33.15106773376465, | |
| "multicode_k": 1, | |
| "output_norm": 10.987898168563845, | |
| "output_norm/layer0": 10.987898168563845, | |
| "step": 650 | |
| }, | |
| { | |
| "MSE": 780.0598099772137, | |
| "MSE/layer0": 780.0598099772137, | |
| "dead_code_fraction": 0.0001, | |
| "dead_code_fraction/layer0": 0.0001, | |
| "epoch": 0.07, | |
| "input_norm": 31.998565645217887, | |
| "input_norm/layer0": 31.998565645217887, | |
| "learning_rate": 0.0005, | |
| "loss": 2.8643, | |
| "max_norm": 36.55862808227539, | |
| "max_norm/layer0": 36.55862808227539, | |
| "mean_norm": 33.269744873046875, | |
| "mean_norm/layer0": 33.269744873046875, | |
| "multicode_k": 1, | |
| "output_norm": 11.218051005999246, | |
| "output_norm/layer0": 11.218051005999246, | |
| "step": 700 | |
| }, | |
| { | |
| "MSE": 772.4797055053714, | |
| "MSE/layer0": 772.4797055053714, | |
| "dead_code_fraction": 0.00045, | |
| "dead_code_fraction/layer0": 0.00045, | |
| "epoch": 0.07, | |
| "input_norm": 31.998559678395594, | |
| "input_norm/layer0": 31.998559678395594, | |
| "learning_rate": 0.0005, | |
| "loss": 2.8618, | |
| "max_norm": 36.793521881103516, | |
| "max_norm/layer0": 36.793521881103516, | |
| "mean_norm": 33.39421844482422, | |
| "mean_norm/layer0": 33.39421844482422, | |
| "multicode_k": 1, | |
| "output_norm": 11.470201053619387, | |
| "output_norm/layer0": 11.470201053619387, | |
| "step": 750 | |
| }, | |
| { | |
| "MSE": 766.037492879232, | |
| "MSE/layer0": 766.037492879232, | |
| "dead_code_fraction": 0.00055, | |
| "dead_code_fraction/layer0": 0.00055, | |
| "epoch": 0.08, | |
| "input_norm": 31.99854364713033, | |
| "input_norm/layer0": 31.99854364713033, | |
| "learning_rate": 0.0005, | |
| "loss": 2.8403, | |
| "max_norm": 37.0079231262207, | |
| "max_norm/layer0": 37.0079231262207, | |
| "mean_norm": 33.52132034301758, | |
| "mean_norm/layer0": 33.52132034301758, | |
| "multicode_k": 1, | |
| "output_norm": 11.711471532185875, | |
| "output_norm/layer0": 11.711471532185875, | |
| "step": 800 | |
| }, | |
| { | |
| "MSE": 759.9610600789387, | |
| "MSE/layer0": 759.9610600789387, | |
| "dead_code_fraction": 0.00135, | |
| "dead_code_fraction/layer0": 0.00135, | |
| "epoch": 0.09, | |
| "input_norm": 31.998529828389472, | |
| "input_norm/layer0": 31.998529828389472, | |
| "learning_rate": 0.0005, | |
| "loss": 2.7453, | |
| "max_norm": 37.20747375488281, | |
| "max_norm/layer0": 37.20747375488281, | |
| "mean_norm": 33.64577674865723, | |
| "mean_norm/layer0": 33.64577674865723, | |
| "multicode_k": 1, | |
| "output_norm": 11.93199801921844, | |
| "output_norm/layer0": 11.93199801921844, | |
| "step": 850 | |
| }, | |
| { | |
| "MSE": 753.5576912434896, | |
| "MSE/layer0": 753.5576912434896, | |
| "dead_code_fraction": 0.00205, | |
| "dead_code_fraction/layer0": 0.00205, | |
| "epoch": 0.09, | |
| "input_norm": 31.99852911949157, | |
| "input_norm/layer0": 31.99852911949157, | |
| "learning_rate": 0.0005, | |
| "loss": 2.7975, | |
| "max_norm": 37.432743072509766, | |
| "max_norm/layer0": 37.432743072509766, | |
| "mean_norm": 33.778066635131836, | |
| "mean_norm/layer0": 33.778066635131836, | |
| "multicode_k": 1, | |
| "output_norm": 12.165767738024394, | |
| "output_norm/layer0": 12.165767738024394, | |
| "step": 900 | |
| }, | |
| { | |
| "MSE": 747.6473927815753, | |
| "MSE/layer0": 747.6473927815753, | |
| "dead_code_fraction": 0.00335, | |
| "dead_code_fraction/layer0": 0.00335, | |
| "epoch": 0.1, | |
| "input_norm": 31.998517106374106, | |
| "input_norm/layer0": 31.998517106374106, | |
| "learning_rate": 0.0005, | |
| "loss": 2.7378, | |
| "max_norm": 37.62055969238281, | |
| "max_norm/layer0": 37.62055969238281, | |
| "mean_norm": 33.90963554382324, | |
| "mean_norm/layer0": 33.90963554382324, | |
| "multicode_k": 1, | |
| "output_norm": 12.390189347267153, | |
| "output_norm/layer0": 12.390189347267153, | |
| "step": 950 | |
| }, | |
| { | |
| "MSE": 742.6674826049805, | |
| "MSE/layer0": 742.6674826049805, | |
| "dead_code_fraction": 0.0048, | |
| "dead_code_fraction/layer0": 0.0048, | |
| "epoch": 0.1, | |
| "input_norm": 31.998499689102182, | |
| "input_norm/layer0": 31.998499689102182, | |
| "learning_rate": 0.0005, | |
| "loss": 2.6986, | |
| "max_norm": 37.880615234375, | |
| "max_norm/layer0": 37.880615234375, | |
| "mean_norm": 34.04428672790527, | |
| "mean_norm/layer0": 34.04428672790527, | |
| "multicode_k": 1, | |
| "output_norm": 12.59642965157827, | |
| "output_norm/layer0": 12.59642965157827, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_MSE/layer0": 739.3243520424373, | |
| "eval_accuracy": 0.44721058737930897, | |
| "eval_dead_code_fraction/layer0": 0.00845, | |
| "eval_input_norm/layer0": 31.998487053973697, | |
| "eval_loss": 2.707960367202759, | |
| "eval_multicode_k": 1, | |
| "eval_output_norm/layer0": 12.71647696584792, | |
| "eval_runtime": 157.5908, | |
| "eval_samples_per_second": 29.335, | |
| "eval_steps_per_second": 1.834, | |
| "step": 1000 | |
| }, | |
| { | |
| "MSE": 736.2877898152667, | |
| "MSE/layer0": 736.2877898152667, | |
| "dead_code_fraction": 0.00735, | |
| "dead_code_fraction/layer0": 0.00735, | |
| "epoch": 0.1, | |
| "input_norm": 31.998484554290766, | |
| "input_norm/layer0": 31.998484554290766, | |
| "learning_rate": 0.0005, | |
| "loss": 2.7222, | |
| "max_norm": 38.21133804321289, | |
| "max_norm/layer0": 38.21133804321289, | |
| "mean_norm": 34.17984199523926, | |
| "mean_norm/layer0": 34.17984199523926, | |
| "multicode_k": 1, | |
| "output_norm": 12.82279133001963, | |
| "output_norm/layer0": 12.82279133001963, | |
| "step": 1050 | |
| }, | |
| { | |
| "MSE": 731.6754523722336, | |
| "MSE/layer0": 731.6754523722336, | |
| "dead_code_fraction": 0.01015, | |
| "dead_code_fraction/layer0": 0.01015, | |
| "epoch": 0.11, | |
| "input_norm": 31.998473711013787, | |
| "input_norm/layer0": 31.998473711013787, | |
| "learning_rate": 0.0005, | |
| "loss": 2.652, | |
| "max_norm": 38.533973693847656, | |
| "max_norm/layer0": 38.533973693847656, | |
| "mean_norm": 34.31424903869629, | |
| "mean_norm/layer0": 34.31424903869629, | |
| "multicode_k": 1, | |
| "output_norm": 13.017293116251633, | |
| "output_norm/layer0": 13.017293116251633, | |
| "step": 1100 | |
| }, | |
| { | |
| "MSE": 726.8081079101562, | |
| "MSE/layer0": 726.8081079101562, | |
| "dead_code_fraction": 0.013, | |
| "dead_code_fraction/layer0": 0.013, | |
| "epoch": 0.12, | |
| "input_norm": 31.99846080144247, | |
| "input_norm/layer0": 31.99846080144247, | |
| "learning_rate": 0.0005, | |
| "loss": 2.6519, | |
| "max_norm": 38.87154769897461, | |
| "max_norm/layer0": 38.87154769897461, | |
| "mean_norm": 34.454498291015625, | |
| "mean_norm/layer0": 34.454498291015625, | |
| "multicode_k": 1, | |
| "output_norm": 13.209378539721174, | |
| "output_norm/layer0": 13.209378539721174, | |
| "step": 1150 | |
| }, | |
| { | |
| "MSE": 722.3268162027996, | |
| "MSE/layer0": 722.3268162027996, | |
| "dead_code_fraction": 0.01565, | |
| "dead_code_fraction/layer0": 0.01565, | |
| "epoch": 0.12, | |
| "input_norm": 31.998446766535434, | |
| "input_norm/layer0": 31.998446766535434, | |
| "learning_rate": 0.0005, | |
| "loss": 2.6464, | |
| "max_norm": 39.23857879638672, | |
| "max_norm/layer0": 39.23857879638672, | |
| "mean_norm": 34.597312927246094, | |
| "mean_norm/layer0": 34.597312927246094, | |
| "multicode_k": 1, | |
| "output_norm": 13.40400979042053, | |
| "output_norm/layer0": 13.40400979042053, | |
| "step": 1200 | |
| }, | |
| { | |
| "MSE": 717.3231912231446, | |
| "MSE/layer0": 717.3231912231446, | |
| "dead_code_fraction": 0.0241, | |
| "dead_code_fraction/layer0": 0.0241, | |
| "epoch": 0.12, | |
| "input_norm": 31.998441489537555, | |
| "input_norm/layer0": 31.998441489537555, | |
| "learning_rate": 0.0005, | |
| "loss": 2.6563, | |
| "max_norm": 39.60569381713867, | |
| "max_norm/layer0": 39.60569381713867, | |
| "mean_norm": 34.73863220214844, | |
| "mean_norm/layer0": 34.73863220214844, | |
| "multicode_k": 1, | |
| "output_norm": 13.590513488451638, | |
| "output_norm/layer0": 13.590513488451638, | |
| "step": 1250 | |
| }, | |
| { | |
| "MSE": 713.6523872884117, | |
| "MSE/layer0": 713.6523872884117, | |
| "dead_code_fraction": 0.02485, | |
| "dead_code_fraction/layer0": 0.02485, | |
| "epoch": 0.13, | |
| "input_norm": 31.998419742584225, | |
| "input_norm/layer0": 31.998419742584225, | |
| "learning_rate": 0.0005, | |
| "loss": 2.5806, | |
| "max_norm": 39.939239501953125, | |
| "max_norm/layer0": 39.939239501953125, | |
| "mean_norm": 34.87986946105957, | |
| "mean_norm/layer0": 34.87986946105957, | |
| "multicode_k": 1, | |
| "output_norm": 13.766959317525227, | |
| "output_norm/layer0": 13.766959317525227, | |
| "step": 1300 | |
| }, | |
| { | |
| "MSE": 709.5852165730794, | |
| "MSE/layer0": 709.5852165730794, | |
| "dead_code_fraction": 0.02925, | |
| "dead_code_fraction/layer0": 0.02925, | |
| "epoch": 0.14, | |
| "input_norm": 31.998412898381545, | |
| "input_norm/layer0": 31.998412898381545, | |
| "learning_rate": 0.0005, | |
| "loss": 2.5789, | |
| "max_norm": 40.28993225097656, | |
| "max_norm/layer0": 40.28993225097656, | |
| "mean_norm": 35.022348403930664, | |
| "mean_norm/layer0": 35.022348403930664, | |
| "multicode_k": 1, | |
| "output_norm": 13.93345036347707, | |
| "output_norm/layer0": 13.93345036347707, | |
| "step": 1350 | |
| }, | |
| { | |
| "MSE": 705.2143248494463, | |
| "MSE/layer0": 705.2143248494463, | |
| "dead_code_fraction": 0.03375, | |
| "dead_code_fraction/layer0": 0.03375, | |
| "epoch": 0.14, | |
| "input_norm": 31.9984123802185, | |
| "input_norm/layer0": 31.9984123802185, | |
| "learning_rate": 0.0005, | |
| "loss": 2.5943, | |
| "max_norm": 40.63530349731445, | |
| "max_norm/layer0": 40.63530349731445, | |
| "mean_norm": 35.164276123046875, | |
| "mean_norm/layer0": 35.164276123046875, | |
| "multicode_k": 1, | |
| "output_norm": 14.105911358197524, | |
| "output_norm/layer0": 14.105911358197524, | |
| "step": 1400 | |
| }, | |
| { | |
| "MSE": 702.3593349202476, | |
| "MSE/layer0": 702.3593349202476, | |
| "dead_code_fraction": 0.0404, | |
| "dead_code_fraction/layer0": 0.0404, | |
| "epoch": 0.14, | |
| "input_norm": 31.99839937845865, | |
| "input_norm/layer0": 31.99839937845865, | |
| "learning_rate": 0.0005, | |
| "loss": 2.5407, | |
| "max_norm": 40.98182678222656, | |
| "max_norm/layer0": 40.98182678222656, | |
| "mean_norm": 35.30343246459961, | |
| "mean_norm/layer0": 35.30343246459961, | |
| "multicode_k": 1, | |
| "output_norm": 14.2450444761912, | |
| "output_norm/layer0": 14.2450444761912, | |
| "step": 1450 | |
| }, | |
| { | |
| "MSE": 699.0307844034837, | |
| "MSE/layer0": 699.0307844034837, | |
| "dead_code_fraction": 0.04535, | |
| "dead_code_fraction/layer0": 0.04535, | |
| "epoch": 0.15, | |
| "input_norm": 31.998390986124676, | |
| "input_norm/layer0": 31.998390986124676, | |
| "learning_rate": 0.0005, | |
| "loss": 2.5145, | |
| "max_norm": 41.328433990478516, | |
| "max_norm/layer0": 41.328433990478516, | |
| "mean_norm": 35.445411682128906, | |
| "mean_norm/layer0": 35.445411682128906, | |
| "multicode_k": 1, | |
| "output_norm": 14.399013953208918, | |
| "output_norm/layer0": 14.399013953208918, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_MSE/layer0": 697.1178701616536, | |
| "eval_accuracy": 0.4637486628652817, | |
| "eval_dead_code_fraction/layer0": 0.05465, | |
| "eval_input_norm/layer0": 31.99837304089923, | |
| "eval_loss": 2.525156259536743, | |
| "eval_multicode_k": 1, | |
| "eval_output_norm/layer0": 14.48893911880305, | |
| "eval_runtime": 156.9005, | |
| "eval_samples_per_second": 29.465, | |
| "eval_steps_per_second": 1.842, | |
| "step": 1500 | |
| }, | |
| { | |
| "MSE": 696.0442759195965, | |
| "MSE/layer0": 696.0442759195965, | |
| "dead_code_fraction": 0.05145, | |
| "dead_code_fraction/layer0": 0.05145, | |
| "epoch": 0.15, | |
| "input_norm": 31.99836520512899, | |
| "input_norm/layer0": 31.99836520512899, | |
| "learning_rate": 0.0005, | |
| "loss": 2.4631, | |
| "max_norm": 41.6606559753418, | |
| "max_norm/layer0": 41.6606559753418, | |
| "mean_norm": 35.58424758911133, | |
| "mean_norm/layer0": 35.58424758911133, | |
| "multicode_k": 1, | |
| "output_norm": 14.54295777956645, | |
| "output_norm/layer0": 14.54295777956645, | |
| "step": 1550 | |
| }, | |
| { | |
| "MSE": 691.8516132609051, | |
| "MSE/layer0": 691.8516132609051, | |
| "dead_code_fraction": 0.0558, | |
| "dead_code_fraction/layer0": 0.0558, | |
| "epoch": 0.16, | |
| "input_norm": 31.998375968933097, | |
| "input_norm/layer0": 31.998375968933097, | |
| "learning_rate": 0.0005, | |
| "loss": 2.5501, | |
| "max_norm": 42.08574676513672, | |
| "max_norm/layer0": 42.08574676513672, | |
| "mean_norm": 35.72518730163574, | |
| "mean_norm/layer0": 35.72518730163574, | |
| "multicode_k": 1, | |
| "output_norm": 14.692513732910157, | |
| "output_norm/layer0": 14.692513732910157, | |
| "step": 1600 | |
| }, | |
| { | |
| "MSE": 688.7181396484375, | |
| "MSE/layer0": 688.7181396484375, | |
| "dead_code_fraction": 0.0595, | |
| "dead_code_fraction/layer0": 0.0595, | |
| "epoch": 0.17, | |
| "input_norm": 31.99835859616598, | |
| "input_norm/layer0": 31.99835859616598, | |
| "learning_rate": 0.0005, | |
| "loss": 2.4699, | |
| "max_norm": 42.610233306884766, | |
| "max_norm/layer0": 42.610233306884766, | |
| "mean_norm": 35.86595916748047, | |
| "mean_norm/layer0": 35.86595916748047, | |
| "multicode_k": 1, | |
| "output_norm": 14.833582207361854, | |
| "output_norm/layer0": 14.833582207361854, | |
| "step": 1650 | |
| }, | |
| { | |
| "MSE": 685.5445822143549, | |
| "MSE/layer0": 685.5445822143549, | |
| "dead_code_fraction": 0.06595, | |
| "dead_code_fraction/layer0": 0.06595, | |
| "epoch": 0.17, | |
| "input_norm": 31.99835782368978, | |
| "input_norm/layer0": 31.99835782368978, | |
| "learning_rate": 0.0005, | |
| "loss": 2.5014, | |
| "max_norm": 43.15216064453125, | |
| "max_norm/layer0": 43.15216064453125, | |
| "mean_norm": 36.00602149963379, | |
| "mean_norm/layer0": 36.00602149963379, | |
| "multicode_k": 1, | |
| "output_norm": 14.96381513118744, | |
| "output_norm/layer0": 14.96381513118744, | |
| "step": 1700 | |
| }, | |
| { | |
| "MSE": 683.2388099161783, | |
| "MSE/layer0": 683.2388099161783, | |
| "dead_code_fraction": 0.0708, | |
| "dead_code_fraction/layer0": 0.0708, | |
| "epoch": 0.17, | |
| "input_norm": 31.998353064854925, | |
| "input_norm/layer0": 31.998353064854925, | |
| "learning_rate": 0.0005, | |
| "loss": 2.4762, | |
| "max_norm": 43.683807373046875, | |
| "max_norm/layer0": 43.683807373046875, | |
| "mean_norm": 36.14344596862793, | |
| "mean_norm/layer0": 36.14344596862793, | |
| "multicode_k": 1, | |
| "output_norm": 15.08479848066965, | |
| "output_norm/layer0": 15.08479848066965, | |
| "step": 1750 | |
| }, | |
| { | |
| "MSE": 680.5147140502929, | |
| "MSE/layer0": 680.5147140502929, | |
| "dead_code_fraction": 0.0711, | |
| "dead_code_fraction/layer0": 0.0711, | |
| "epoch": 0.18, | |
| "input_norm": 31.998323942820228, | |
| "input_norm/layer0": 31.998323942820228, | |
| "learning_rate": 0.0005, | |
| "loss": 2.4017, | |
| "max_norm": 44.204158782958984, | |
| "max_norm/layer0": 44.204158782958984, | |
| "mean_norm": 36.281328201293945, | |
| "mean_norm/layer0": 36.281328201293945, | |
| "multicode_k": 1, | |
| "output_norm": 15.21150853157043, | |
| "output_norm/layer0": 15.21150853157043, | |
| "step": 1800 | |
| }, | |
| { | |
| "MSE": 677.8235699462891, | |
| "MSE/layer0": 677.8235699462891, | |
| "dead_code_fraction": 0.0789, | |
| "dead_code_fraction/layer0": 0.0789, | |
| "epoch": 0.18, | |
| "input_norm": 31.99832211176553, | |
| "input_norm/layer0": 31.99832211176553, | |
| "learning_rate": 0.0005, | |
| "loss": 2.4204, | |
| "max_norm": 44.73421096801758, | |
| "max_norm/layer0": 44.73421096801758, | |
| "mean_norm": 36.41860580444336, | |
| "mean_norm/layer0": 36.41860580444336, | |
| "multicode_k": 1, | |
| "output_norm": 15.32913914521535, | |
| "output_norm/layer0": 15.32913914521535, | |
| "step": 1850 | |
| }, | |
| { | |
| "MSE": 674.8260657755535, | |
| "MSE/layer0": 674.8260657755535, | |
| "dead_code_fraction": 0.0859, | |
| "dead_code_fraction/layer0": 0.0859, | |
| "epoch": 0.19, | |
| "input_norm": 31.998327109018952, | |
| "input_norm/layer0": 31.998327109018952, | |
| "learning_rate": 0.0005, | |
| "loss": 2.4612, | |
| "max_norm": 45.264217376708984, | |
| "max_norm/layer0": 45.264217376708984, | |
| "mean_norm": 36.55377197265625, | |
| "mean_norm/layer0": 36.55377197265625, | |
| "multicode_k": 1, | |
| "output_norm": 15.449233846664427, | |
| "output_norm/layer0": 15.449233846664427, | |
| "step": 1900 | |
| }, | |
| { | |
| "MSE": 672.4308366902667, | |
| "MSE/layer0": 672.4308366902667, | |
| "dead_code_fraction": 0.08975, | |
| "dead_code_fraction/layer0": 0.08975, | |
| "epoch": 0.2, | |
| "input_norm": 31.998313461939492, | |
| "input_norm/layer0": 31.998313461939492, | |
| "learning_rate": 0.0005, | |
| "loss": 2.413, | |
| "max_norm": 45.7476692199707, | |
| "max_norm/layer0": 45.7476692199707, | |
| "mean_norm": 36.687320709228516, | |
| "mean_norm/layer0": 36.687320709228516, | |
| "multicode_k": 1, | |
| "output_norm": 15.564360074996952, | |
| "output_norm/layer0": 15.564360074996952, | |
| "step": 1950 | |
| }, | |
| { | |
| "MSE": 669.9350853474932, | |
| "MSE/layer0": 669.9350853474932, | |
| "dead_code_fraction": 0.09495, | |
| "dead_code_fraction/layer0": 0.09495, | |
| "epoch": 0.2, | |
| "input_norm": 31.998307892481467, | |
| "input_norm/layer0": 31.998307892481467, | |
| "learning_rate": 0.0005, | |
| "loss": 2.4197, | |
| "max_norm": 46.2595100402832, | |
| "max_norm/layer0": 46.2595100402832, | |
| "mean_norm": 36.82127571105957, | |
| "mean_norm/layer0": 36.82127571105957, | |
| "multicode_k": 1, | |
| "output_norm": 15.671763955752056, | |
| "output_norm/layer0": 15.671763955752056, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_MSE/layer0": 670.0254334077002, | |
| "eval_accuracy": 0.47584128742153486, | |
| "eval_dead_code_fraction/layer0": 0.0988, | |
| "eval_input_norm/layer0": 31.99830309178647, | |
| "eval_loss": 2.409283399581909, | |
| "eval_multicode_k": 1, | |
| "eval_output_norm/layer0": 15.728763990528059, | |
| "eval_runtime": 158.0617, | |
| "eval_samples_per_second": 29.248, | |
| "eval_steps_per_second": 1.828, | |
| "step": 2000 | |
| }, | |
| { | |
| "MSE": 667.9600658162435, | |
| "MSE/layer0": 667.9600658162435, | |
| "dead_code_fraction": 0.09825, | |
| "dead_code_fraction/layer0": 0.09825, | |
| "epoch": 0.2, | |
| "input_norm": 31.99829890569051, | |
| "input_norm/layer0": 31.99829890569051, | |
| "learning_rate": 0.0005, | |
| "loss": 2.3908, | |
| "max_norm": 46.76186752319336, | |
| "max_norm/layer0": 46.76186752319336, | |
| "mean_norm": 36.954044342041016, | |
| "mean_norm/layer0": 36.954044342041016, | |
| "multicode_k": 1, | |
| "output_norm": 15.786985732714339, | |
| "output_norm/layer0": 15.786985732714339, | |
| "step": 2050 | |
| }, | |
| { | |
| "MSE": 665.8677533976238, | |
| "MSE/layer0": 665.8677533976238, | |
| "dead_code_fraction": 0.10105, | |
| "dead_code_fraction/layer0": 0.10105, | |
| "epoch": 0.21, | |
| "input_norm": 31.998287776311233, | |
| "input_norm/layer0": 31.998287776311233, | |
| "learning_rate": 0.0005, | |
| "loss": 2.3532, | |
| "max_norm": 47.23879623413086, | |
| "max_norm/layer0": 47.23879623413086, | |
| "mean_norm": 37.08414268493652, | |
| "mean_norm/layer0": 37.08414268493652, | |
| "multicode_k": 1, | |
| "output_norm": 15.887771523793544, | |
| "output_norm/layer0": 15.887771523793544, | |
| "step": 2100 | |
| }, | |
| { | |
| "MSE": 664.0484969075521, | |
| "MSE/layer0": 664.0484969075521, | |
| "dead_code_fraction": 0.10515, | |
| "dead_code_fraction/layer0": 0.10515, | |
| "epoch": 0.21, | |
| "input_norm": 31.998289143244435, | |
| "input_norm/layer0": 31.998289143244435, | |
| "learning_rate": 0.0005, | |
| "loss": 2.3835, | |
| "max_norm": 47.72446823120117, | |
| "max_norm/layer0": 47.72446823120117, | |
| "mean_norm": 37.21368408203125, | |
| "mean_norm/layer0": 37.21368408203125, | |
| "multicode_k": 1, | |
| "output_norm": 15.987558364868171, | |
| "output_norm/layer0": 15.987558364868171, | |
| "step": 2150 | |
| }, | |
| { | |
| "MSE": 662.043323059082, | |
| "MSE/layer0": 662.043323059082, | |
| "dead_code_fraction": 0.11065, | |
| "dead_code_fraction/layer0": 0.11065, | |
| "epoch": 0.22, | |
| "input_norm": 31.998284489313747, | |
| "input_norm/layer0": 31.998284489313747, | |
| "learning_rate": 0.0005, | |
| "loss": 2.3711, | |
| "max_norm": 48.21998596191406, | |
| "max_norm/layer0": 48.21998596191406, | |
| "mean_norm": 37.34214973449707, | |
| "mean_norm/layer0": 37.34214973449707, | |
| "multicode_k": 1, | |
| "output_norm": 16.084624527295432, | |
| "output_norm/layer0": 16.084624527295432, | |
| "step": 2200 | |
| }, | |
| { | |
| "MSE": 660.071201883952, | |
| "MSE/layer0": 660.071201883952, | |
| "dead_code_fraction": 0.1138, | |
| "dead_code_fraction/layer0": 0.1138, | |
| "epoch": 0.23, | |
| "input_norm": 31.998274552027382, | |
| "input_norm/layer0": 31.998274552027382, | |
| "learning_rate": 0.0005, | |
| "loss": 2.3361, | |
| "max_norm": 48.656124114990234, | |
| "max_norm/layer0": 48.656124114990234, | |
| "mean_norm": 37.46707344055176, | |
| "mean_norm/layer0": 37.46707344055176, | |
| "multicode_k": 1, | |
| "output_norm": 16.1770029671987, | |
| "output_norm/layer0": 16.1770029671987, | |
| "step": 2250 | |
| }, | |
| { | |
| "MSE": 658.2848066202794, | |
| "MSE/layer0": 658.2848066202794, | |
| "dead_code_fraction": 0.11715, | |
| "dead_code_fraction/layer0": 0.11715, | |
| "epoch": 0.23, | |
| "input_norm": 31.998281342188513, | |
| "input_norm/layer0": 31.998281342188513, | |
| "learning_rate": 0.0005, | |
| "loss": 2.3697, | |
| "max_norm": 49.14850616455078, | |
| "max_norm/layer0": 49.14850616455078, | |
| "mean_norm": 37.592119216918945, | |
| "mean_norm/layer0": 37.592119216918945, | |
| "multicode_k": 1, | |
| "output_norm": 16.273267321586616, | |
| "output_norm/layer0": 16.273267321586616, | |
| "step": 2300 | |
| }, | |
| { | |
| "MSE": 656.6614913940434, | |
| "MSE/layer0": 656.6614913940434, | |
| "dead_code_fraction": 0.1208, | |
| "dead_code_fraction/layer0": 0.1208, | |
| "epoch": 0.23, | |
| "input_norm": 31.99827545166017, | |
| "input_norm/layer0": 31.99827545166017, | |
| "learning_rate": 0.0005, | |
| "loss": 2.3691, | |
| "max_norm": 49.611228942871094, | |
| "max_norm/layer0": 49.611228942871094, | |
| "mean_norm": 37.71496772766113, | |
| "mean_norm/layer0": 37.71496772766113, | |
| "multicode_k": 1, | |
| "output_norm": 16.361617434819536, | |
| "output_norm/layer0": 16.361617434819536, | |
| "step": 2350 | |
| }, | |
| { | |
| "MSE": 654.7551118977863, | |
| "MSE/layer0": 654.7551118977863, | |
| "dead_code_fraction": 0.12205, | |
| "dead_code_fraction/layer0": 0.12205, | |
| "epoch": 0.24, | |
| "input_norm": 31.998258228302007, | |
| "input_norm/layer0": 31.998258228302007, | |
| "learning_rate": 0.0005, | |
| "loss": 2.3413, | |
| "max_norm": 50.082008361816406, | |
| "max_norm/layer0": 50.082008361816406, | |
| "mean_norm": 37.836740493774414, | |
| "mean_norm/layer0": 37.836740493774414, | |
| "multicode_k": 1, | |
| "output_norm": 16.442067163785307, | |
| "output_norm/layer0": 16.442067163785307, | |
| "step": 2400 | |
| }, | |
| { | |
| "MSE": 653.2320398966472, | |
| "MSE/layer0": 653.2320398966472, | |
| "dead_code_fraction": 0.1261, | |
| "dead_code_fraction/layer0": 0.1261, | |
| "epoch": 0.24, | |
| "input_norm": 31.99826599121093, | |
| "input_norm/layer0": 31.99826599121093, | |
| "learning_rate": 0.0005, | |
| "loss": 2.3415, | |
| "max_norm": 50.542850494384766, | |
| "max_norm/layer0": 50.542850494384766, | |
| "mean_norm": 37.956573486328125, | |
| "mean_norm/layer0": 37.956573486328125, | |
| "multicode_k": 1, | |
| "output_norm": 16.545647277832018, | |
| "output_norm/layer0": 16.545647277832018, | |
| "step": 2450 | |
| }, | |
| { | |
| "MSE": 652.0689453124999, | |
| "MSE/layer0": 652.0689453124999, | |
| "dead_code_fraction": 0.1305, | |
| "dead_code_fraction/layer0": 0.1305, | |
| "epoch": 0.25, | |
| "input_norm": 31.998266054789227, | |
| "input_norm/layer0": 31.998266054789227, | |
| "learning_rate": 0.0005, | |
| "loss": 2.3541, | |
| "max_norm": 50.972904205322266, | |
| "max_norm/layer0": 50.972904205322266, | |
| "mean_norm": 38.07469177246094, | |
| "mean_norm/layer0": 38.07469177246094, | |
| "multicode_k": 1, | |
| "output_norm": 16.614015088081356, | |
| "output_norm/layer0": 16.614015088081356, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_MSE/layer0": 651.1296869864225, | |
| "eval_accuracy": 0.48371217143066175, | |
| "eval_dead_code_fraction/layer0": 0.1337, | |
| "eval_input_norm/layer0": 31.998264631048162, | |
| "eval_loss": 2.340399742126465, | |
| "eval_multicode_k": 1, | |
| "eval_output_norm/layer0": 16.66022368217996, | |
| "eval_runtime": 157.8946, | |
| "eval_samples_per_second": 29.279, | |
| "eval_steps_per_second": 1.83, | |
| "step": 2500 | |
| }, | |
| { | |
| "MSE": 650.5154676310221, | |
| "MSE/layer0": 650.5154676310221, | |
| "dead_code_fraction": 0.1312, | |
| "dead_code_fraction/layer0": 0.1312, | |
| "epoch": 0.26, | |
| "input_norm": 31.99826429367065, | |
| "input_norm/layer0": 31.99826429367065, | |
| "learning_rate": 0.0005, | |
| "loss": 2.3374, | |
| "max_norm": 51.42794418334961, | |
| "max_norm/layer0": 51.42794418334961, | |
| "mean_norm": 38.19082260131836, | |
| "mean_norm/layer0": 38.19082260131836, | |
| "multicode_k": 1, | |
| "output_norm": 16.705677251815793, | |
| "output_norm/layer0": 16.705677251815793, | |
| "step": 2550 | |
| }, | |
| { | |
| "MSE": 649.4798397827149, | |
| "MSE/layer0": 649.4798397827149, | |
| "dead_code_fraction": 0.13625, | |
| "dead_code_fraction/layer0": 0.13625, | |
| "epoch": 0.26, | |
| "input_norm": 31.99826188405354, | |
| "input_norm/layer0": 31.99826188405354, | |
| "learning_rate": 0.0005, | |
| "loss": 2.3364, | |
| "max_norm": 51.84079360961914, | |
| "max_norm/layer0": 51.84079360961914, | |
| "mean_norm": 38.306650161743164, | |
| "mean_norm/layer0": 38.306650161743164, | |
| "multicode_k": 1, | |
| "output_norm": 16.774758176803587, | |
| "output_norm/layer0": 16.774758176803587, | |
| "step": 2600 | |
| }, | |
| { | |
| "MSE": 648.4373052978513, | |
| "MSE/layer0": 648.4373052978513, | |
| "dead_code_fraction": 0.13795, | |
| "dead_code_fraction/layer0": 0.13795, | |
| "epoch": 0.27, | |
| "input_norm": 31.998252007166542, | |
| "input_norm/layer0": 31.998252007166542, | |
| "learning_rate": 0.0005, | |
| "loss": 2.3162, | |
| "max_norm": 52.24661636352539, | |
| "max_norm/layer0": 52.24661636352539, | |
| "mean_norm": 38.41937828063965, | |
| "mean_norm/layer0": 38.41937828063965, | |
| "multicode_k": 1, | |
| "output_norm": 16.851604979832963, | |
| "output_norm/layer0": 16.851604979832963, | |
| "step": 2650 | |
| }, | |
| { | |
| "MSE": 647.0678014119467, | |
| "MSE/layer0": 647.0678014119467, | |
| "dead_code_fraction": 0.1397, | |
| "dead_code_fraction/layer0": 0.1397, | |
| "epoch": 0.27, | |
| "input_norm": 31.998265930811563, | |
| "input_norm/layer0": 31.998265930811563, | |
| "learning_rate": 0.0005, | |
| "loss": 2.3497, | |
| "max_norm": 52.66170120239258, | |
| "max_norm/layer0": 52.66170120239258, | |
| "mean_norm": 38.53024482727051, | |
| "mean_norm/layer0": 38.53024482727051, | |
| "multicode_k": 1, | |
| "output_norm": 16.925416787465398, | |
| "output_norm/layer0": 16.925416787465398, | |
| "step": 2700 | |
| }, | |
| { | |
| "MSE": 646.4085242716471, | |
| "MSE/layer0": 646.4085242716471, | |
| "dead_code_fraction": 0.14125, | |
| "dead_code_fraction/layer0": 0.14125, | |
| "epoch": 0.28, | |
| "input_norm": 31.99825245221455, | |
| "input_norm/layer0": 31.99825245221455, | |
| "learning_rate": 0.0005, | |
| "loss": 2.301, | |
| "max_norm": 53.03037643432617, | |
| "max_norm/layer0": 53.03037643432617, | |
| "mean_norm": 38.63713836669922, | |
| "mean_norm/layer0": 38.63713836669922, | |
| "multicode_k": 1, | |
| "output_norm": 16.985576423009235, | |
| "output_norm/layer0": 16.985576423009235, | |
| "step": 2750 | |
| }, | |
| { | |
| "MSE": 644.7344170125325, | |
| "MSE/layer0": 644.7344170125325, | |
| "dead_code_fraction": 0.14415, | |
| "dead_code_fraction/layer0": 0.14415, | |
| "epoch": 0.28, | |
| "input_norm": 31.998260081609082, | |
| "input_norm/layer0": 31.998260081609082, | |
| "learning_rate": 0.0005, | |
| "loss": 2.3395, | |
| "max_norm": 53.41487503051758, | |
| "max_norm/layer0": 53.41487503051758, | |
| "mean_norm": 38.74285697937012, | |
| "mean_norm/layer0": 38.74285697937012, | |
| "multicode_k": 1, | |
| "output_norm": 17.068980147043867, | |
| "output_norm/layer0": 17.068980147043867, | |
| "step": 2800 | |
| }, | |
| { | |
| "MSE": 644.636144104004, | |
| "MSE/layer0": 644.636144104004, | |
| "dead_code_fraction": 0.14565, | |
| "dead_code_fraction/layer0": 0.14565, | |
| "epoch": 0.28, | |
| "input_norm": 31.998243366877247, | |
| "input_norm/layer0": 31.998243366877247, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2757, | |
| "max_norm": 53.792579650878906, | |
| "max_norm/layer0": 53.792579650878906, | |
| "mean_norm": 38.84635543823242, | |
| "mean_norm/layer0": 38.84635543823242, | |
| "multicode_k": 1, | |
| "output_norm": 17.124992834726967, | |
| "output_norm/layer0": 17.124992834726967, | |
| "step": 2850 | |
| }, | |
| { | |
| "MSE": 643.8843309529623, | |
| "MSE/layer0": 643.8843309529623, | |
| "dead_code_fraction": 0.14495, | |
| "dead_code_fraction/layer0": 0.14495, | |
| "epoch": 0.29, | |
| "input_norm": 31.998242295583093, | |
| "input_norm/layer0": 31.998242295583093, | |
| "learning_rate": 0.0005, | |
| "loss": 2.3057, | |
| "max_norm": 54.146453857421875, | |
| "max_norm/layer0": 54.146453857421875, | |
| "mean_norm": 38.947309494018555, | |
| "mean_norm/layer0": 38.947309494018555, | |
| "multicode_k": 1, | |
| "output_norm": 17.17694611549377, | |
| "output_norm/layer0": 17.17694611549377, | |
| "step": 2900 | |
| }, | |
| { | |
| "MSE": 642.6776557413741, | |
| "MSE/layer0": 642.6776557413741, | |
| "dead_code_fraction": 0.1504, | |
| "dead_code_fraction/layer0": 0.1504, | |
| "epoch": 0.29, | |
| "input_norm": 31.998272593816125, | |
| "input_norm/layer0": 31.998272593816125, | |
| "learning_rate": 0.0005, | |
| "loss": 2.3545, | |
| "max_norm": 54.51527404785156, | |
| "max_norm/layer0": 54.51527404785156, | |
| "mean_norm": 39.047607421875, | |
| "mean_norm/layer0": 39.047607421875, | |
| "multicode_k": 1, | |
| "output_norm": 17.240235595703133, | |
| "output_norm/layer0": 17.240235595703133, | |
| "step": 2950 | |
| }, | |
| { | |
| "MSE": 643.1047460937498, | |
| "MSE/layer0": 643.1047460937498, | |
| "dead_code_fraction": 0.1483, | |
| "dead_code_fraction/layer0": 0.1483, | |
| "epoch": 0.3, | |
| "input_norm": 31.998249003092454, | |
| "input_norm/layer0": 31.998249003092454, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2742, | |
| "max_norm": 54.86568832397461, | |
| "max_norm/layer0": 54.86568832397461, | |
| "mean_norm": 39.14469337463379, | |
| "mean_norm/layer0": 39.14469337463379, | |
| "multicode_k": 1, | |
| "output_norm": 17.28876600265503, | |
| "output_norm/layer0": 17.28876600265503, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_MSE/layer0": 642.6360311704152, | |
| "eval_accuracy": 0.49030507287608877, | |
| "eval_dead_code_fraction/layer0": 0.14995, | |
| "eval_input_norm/layer0": 31.998255163205542, | |
| "eval_loss": 2.2907073497772217, | |
| "eval_multicode_k": 1, | |
| "eval_output_norm/layer0": 17.324301861386118, | |
| "eval_runtime": 157.9262, | |
| "eval_samples_per_second": 29.273, | |
| "eval_steps_per_second": 1.83, | |
| "step": 3000 | |
| }, | |
| { | |
| "MSE": 641.9817254638668, | |
| "MSE/layer0": 641.9817254638668, | |
| "dead_code_fraction": 0.1511, | |
| "dead_code_fraction/layer0": 0.1511, | |
| "epoch": 0.3, | |
| "input_norm": 31.99826343536376, | |
| "input_norm/layer0": 31.99826343536376, | |
| "learning_rate": 0.0005, | |
| "loss": 2.3422, | |
| "max_norm": 55.2226676940918, | |
| "max_norm/layer0": 55.2226676940918, | |
| "mean_norm": 39.23999786376953, | |
| "mean_norm/layer0": 39.23999786376953, | |
| "multicode_k": 1, | |
| "output_norm": 17.350644410451252, | |
| "output_norm/layer0": 17.350644410451252, | |
| "step": 3050 | |
| }, | |
| { | |
| "MSE": 641.9993333943689, | |
| "MSE/layer0": 641.9993333943689, | |
| "dead_code_fraction": 0.1504, | |
| "dead_code_fraction/layer0": 0.1504, | |
| "epoch": 0.31, | |
| "input_norm": 31.998250141143807, | |
| "input_norm/layer0": 31.998250141143807, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2814, | |
| "max_norm": 55.56163787841797, | |
| "max_norm/layer0": 55.56163787841797, | |
| "mean_norm": 39.33370780944824, | |
| "mean_norm/layer0": 39.33370780944824, | |
| "multicode_k": 1, | |
| "output_norm": 17.39312816301982, | |
| "output_norm/layer0": 17.39312816301982, | |
| "step": 3100 | |
| }, | |
| { | |
| "MSE": 641.5148900349936, | |
| "MSE/layer0": 641.5148900349936, | |
| "dead_code_fraction": 0.15185, | |
| "dead_code_fraction/layer0": 0.15185, | |
| "epoch": 0.32, | |
| "input_norm": 31.998260364532467, | |
| "input_norm/layer0": 31.998260364532467, | |
| "learning_rate": 0.0005, | |
| "loss": 2.3152, | |
| "max_norm": 55.8856315612793, | |
| "max_norm/layer0": 55.8856315612793, | |
| "mean_norm": 39.42481803894043, | |
| "mean_norm/layer0": 39.42481803894043, | |
| "multicode_k": 1, | |
| "output_norm": 17.44178107897441, | |
| "output_norm/layer0": 17.44178107897441, | |
| "step": 3150 | |
| }, | |
| { | |
| "MSE": 640.499552408854, | |
| "MSE/layer0": 640.499552408854, | |
| "dead_code_fraction": 0.1516, | |
| "dead_code_fraction/layer0": 0.1516, | |
| "epoch": 0.32, | |
| "input_norm": 31.99825292587281, | |
| "input_norm/layer0": 31.99825292587281, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2462, | |
| "max_norm": 56.21445846557617, | |
| "max_norm/layer0": 56.21445846557617, | |
| "mean_norm": 39.51395606994629, | |
| "mean_norm/layer0": 39.51395606994629, | |
| "multicode_k": 1, | |
| "output_norm": 17.50789775530497, | |
| "output_norm/layer0": 17.50789775530497, | |
| "step": 3200 | |
| }, | |
| { | |
| "MSE": 640.565166829427, | |
| "MSE/layer0": 640.565166829427, | |
| "dead_code_fraction": 0.15285, | |
| "dead_code_fraction/layer0": 0.15285, | |
| "epoch": 0.33, | |
| "input_norm": 31.998250306447353, | |
| "input_norm/layer0": 31.998250306447353, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2595, | |
| "max_norm": 56.526973724365234, | |
| "max_norm/layer0": 56.526973724365234, | |
| "mean_norm": 39.601173400878906, | |
| "mean_norm/layer0": 39.601173400878906, | |
| "multicode_k": 1, | |
| "output_norm": 17.54366443951924, | |
| "output_norm/layer0": 17.54366443951924, | |
| "step": 3250 | |
| }, | |
| { | |
| "MSE": 640.8991118367509, | |
| "MSE/layer0": 640.8991118367509, | |
| "dead_code_fraction": 0.1531, | |
| "dead_code_fraction/layer0": 0.1531, | |
| "epoch": 0.33, | |
| "input_norm": 31.998245798746755, | |
| "input_norm/layer0": 31.998245798746755, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2326, | |
| "max_norm": 56.82651138305664, | |
| "max_norm/layer0": 56.82651138305664, | |
| "mean_norm": 39.684635162353516, | |
| "mean_norm/layer0": 39.684635162353516, | |
| "multicode_k": 1, | |
| "output_norm": 17.578553660710664, | |
| "output_norm/layer0": 17.578553660710664, | |
| "step": 3300 | |
| }, | |
| { | |
| "MSE": 640.486218770345, | |
| "MSE/layer0": 640.486218770345, | |
| "dead_code_fraction": 0.15345, | |
| "dead_code_fraction/layer0": 0.15345, | |
| "epoch": 0.34, | |
| "input_norm": 31.998255780537924, | |
| "input_norm/layer0": 31.998255780537924, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2733, | |
| "max_norm": 57.12877655029297, | |
| "max_norm/layer0": 57.12877655029297, | |
| "mean_norm": 39.76711463928223, | |
| "mean_norm/layer0": 39.76711463928223, | |
| "multicode_k": 1, | |
| "output_norm": 17.619242086410516, | |
| "output_norm/layer0": 17.619242086410516, | |
| "step": 3350 | |
| }, | |
| { | |
| "MSE": 639.5240251668292, | |
| "MSE/layer0": 639.5240251668292, | |
| "dead_code_fraction": 0.15565, | |
| "dead_code_fraction/layer0": 0.15565, | |
| "epoch": 0.34, | |
| "input_norm": 31.998264500300095, | |
| "input_norm/layer0": 31.998264500300095, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2633, | |
| "max_norm": 57.42041778564453, | |
| "max_norm/layer0": 57.42041778564453, | |
| "mean_norm": 39.84800338745117, | |
| "mean_norm/layer0": 39.84800338745117, | |
| "multicode_k": 1, | |
| "output_norm": 17.667484652201342, | |
| "output_norm/layer0": 17.667484652201342, | |
| "step": 3400 | |
| }, | |
| { | |
| "MSE": 639.2691174316408, | |
| "MSE/layer0": 639.2691174316408, | |
| "dead_code_fraction": 0.15605, | |
| "dead_code_fraction/layer0": 0.15605, | |
| "epoch": 0.34, | |
| "input_norm": 31.99825723965962, | |
| "input_norm/layer0": 31.99825723965962, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2495, | |
| "max_norm": 57.706260681152344, | |
| "max_norm/layer0": 57.706260681152344, | |
| "mean_norm": 39.92698097229004, | |
| "mean_norm/layer0": 39.92698097229004, | |
| "multicode_k": 1, | |
| "output_norm": 17.705148900349947, | |
| "output_norm/layer0": 17.705148900349947, | |
| "step": 3450 | |
| }, | |
| { | |
| "MSE": 639.3908192952478, | |
| "MSE/layer0": 639.3908192952478, | |
| "dead_code_fraction": 0.15655, | |
| "dead_code_fraction/layer0": 0.15655, | |
| "epoch": 0.35, | |
| "input_norm": 31.9982618745168, | |
| "input_norm/layer0": 31.9982618745168, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2488, | |
| "max_norm": 57.98209762573242, | |
| "max_norm/layer0": 57.98209762573242, | |
| "mean_norm": 40.005022048950195, | |
| "mean_norm/layer0": 40.005022048950195, | |
| "multicode_k": 1, | |
| "output_norm": 17.73683495521545, | |
| "output_norm/layer0": 17.73683495521545, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_MSE/layer0": 640.3158307464355, | |
| "eval_accuracy": 0.49451074349024987, | |
| "eval_dead_code_fraction/layer0": 0.1575, | |
| "eval_input_norm/layer0": 31.99825158244007, | |
| "eval_loss": 2.2564537525177, | |
| "eval_multicode_k": 1, | |
| "eval_output_norm/layer0": 17.756634140179678, | |
| "eval_runtime": 157.599, | |
| "eval_samples_per_second": 29.334, | |
| "eval_steps_per_second": 1.834, | |
| "step": 3500 | |
| }, | |
| { | |
| "MSE": 639.6838141886391, | |
| "MSE/layer0": 639.6838141886391, | |
| "dead_code_fraction": 0.157, | |
| "dead_code_fraction/layer0": 0.157, | |
| "epoch": 0.35, | |
| "input_norm": 31.99826737085978, | |
| "input_norm/layer0": 31.99826737085978, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2738, | |
| "max_norm": 58.24713897705078, | |
| "max_norm/layer0": 58.24713897705078, | |
| "mean_norm": 40.08023262023926, | |
| "mean_norm/layer0": 40.08023262023926, | |
| "multicode_k": 1, | |
| "output_norm": 17.755876312255864, | |
| "output_norm/layer0": 17.755876312255864, | |
| "step": 3550 | |
| }, | |
| { | |
| "MSE": 639.2954257202149, | |
| "MSE/layer0": 639.2954257202149, | |
| "dead_code_fraction": 0.1559, | |
| "dead_code_fraction/layer0": 0.1559, | |
| "epoch": 0.36, | |
| "input_norm": 31.998245531717938, | |
| "input_norm/layer0": 31.998245531717938, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2036, | |
| "max_norm": 58.50635528564453, | |
| "max_norm/layer0": 58.50635528564453, | |
| "mean_norm": 40.15370178222656, | |
| "mean_norm/layer0": 40.15370178222656, | |
| "multicode_k": 1, | |
| "output_norm": 17.812968953450515, | |
| "output_norm/layer0": 17.812968953450515, | |
| "step": 3600 | |
| }, | |
| { | |
| "MSE": 639.3338773600263, | |
| "MSE/layer0": 639.3338773600263, | |
| "dead_code_fraction": 0.15905, | |
| "dead_code_fraction/layer0": 0.15905, | |
| "epoch": 0.36, | |
| "input_norm": 31.99827084223429, | |
| "input_norm/layer0": 31.99827084223429, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2672, | |
| "max_norm": 58.76622009277344, | |
| "max_norm/layer0": 58.76622009277344, | |
| "mean_norm": 40.22719192504883, | |
| "mean_norm/layer0": 40.22719192504883, | |
| "multicode_k": 1, | |
| "output_norm": 17.821751413345332, | |
| "output_norm/layer0": 17.821751413345332, | |
| "step": 3650 | |
| }, | |
| { | |
| "MSE": 639.0531684366863, | |
| "MSE/layer0": 639.0531684366863, | |
| "dead_code_fraction": 0.15975, | |
| "dead_code_fraction/layer0": 0.15975, | |
| "epoch": 0.37, | |
| "input_norm": 31.99827636400858, | |
| "input_norm/layer0": 31.99827636400858, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2444, | |
| "max_norm": 59.02393341064453, | |
| "max_norm/layer0": 59.02393341064453, | |
| "mean_norm": 40.298166275024414, | |
| "mean_norm/layer0": 40.298166275024414, | |
| "multicode_k": 1, | |
| "output_norm": 17.85403926849365, | |
| "output_norm/layer0": 17.85403926849365, | |
| "step": 3700 | |
| }, | |
| { | |
| "MSE": 638.9355230712894, | |
| "MSE/layer0": 638.9355230712894, | |
| "dead_code_fraction": 0.1605, | |
| "dead_code_fraction/layer0": 0.1605, | |
| "epoch": 0.38, | |
| "input_norm": 31.99827863057454, | |
| "input_norm/layer0": 31.99827863057454, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2454, | |
| "max_norm": 59.28853225708008, | |
| "max_norm/layer0": 59.28853225708008, | |
| "mean_norm": 40.36880111694336, | |
| "mean_norm/layer0": 40.36880111694336, | |
| "multicode_k": 1, | |
| "output_norm": 17.88599282582601, | |
| "output_norm/layer0": 17.88599282582601, | |
| "step": 3750 | |
| }, | |
| { | |
| "MSE": 639.0086972045899, | |
| "MSE/layer0": 639.0086972045899, | |
| "dead_code_fraction": 0.16125, | |
| "dead_code_fraction/layer0": 0.16125, | |
| "epoch": 0.38, | |
| "input_norm": 31.9982850710551, | |
| "input_norm/layer0": 31.9982850710551, | |
| "learning_rate": 0.0005, | |
| "loss": 2.27, | |
| "max_norm": 59.546451568603516, | |
| "max_norm/layer0": 59.546451568603516, | |
| "mean_norm": 40.43776512145996, | |
| "mean_norm/layer0": 40.43776512145996, | |
| "multicode_k": 1, | |
| "output_norm": 17.90943570454915, | |
| "output_norm/layer0": 17.90943570454915, | |
| "step": 3800 | |
| }, | |
| { | |
| "MSE": 638.9462019856769, | |
| "MSE/layer0": 638.9462019856769, | |
| "dead_code_fraction": 0.1583, | |
| "dead_code_fraction/layer0": 0.1583, | |
| "epoch": 0.39, | |
| "input_norm": 31.998278980255122, | |
| "input_norm/layer0": 31.998278980255122, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2438, | |
| "max_norm": 59.80894470214844, | |
| "max_norm/layer0": 59.80894470214844, | |
| "mean_norm": 40.50556945800781, | |
| "mean_norm/layer0": 40.50556945800781, | |
| "multicode_k": 1, | |
| "output_norm": 17.947645209630338, | |
| "output_norm/layer0": 17.947645209630338, | |
| "step": 3850 | |
| }, | |
| { | |
| "MSE": 639.4130173746743, | |
| "MSE/layer0": 639.4130173746743, | |
| "dead_code_fraction": 0.16135, | |
| "dead_code_fraction/layer0": 0.16135, | |
| "epoch": 0.39, | |
| "input_norm": 31.998284943898526, | |
| "input_norm/layer0": 31.998284943898526, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2526, | |
| "max_norm": 60.04655075073242, | |
| "max_norm/layer0": 60.04655075073242, | |
| "mean_norm": 40.57136535644531, | |
| "mean_norm/layer0": 40.57136535644531, | |
| "multicode_k": 1, | |
| "output_norm": 17.960218969980872, | |
| "output_norm/layer0": 17.960218969980872, | |
| "step": 3900 | |
| }, | |
| { | |
| "MSE": 639.8756245930986, | |
| "MSE/layer0": 639.8756245930986, | |
| "dead_code_fraction": 0.15755, | |
| "dead_code_fraction/layer0": 0.15755, | |
| "epoch": 0.4, | |
| "input_norm": 31.998285398483272, | |
| "input_norm/layer0": 31.998285398483272, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2266, | |
| "max_norm": 60.29011154174805, | |
| "max_norm/layer0": 60.29011154174805, | |
| "mean_norm": 40.63625144958496, | |
| "mean_norm/layer0": 40.63625144958496, | |
| "multicode_k": 1, | |
| "output_norm": 17.97526204744974, | |
| "output_norm/layer0": 17.97526204744974, | |
| "step": 3950 | |
| }, | |
| { | |
| "MSE": 640.046054585775, | |
| "MSE/layer0": 640.046054585775, | |
| "dead_code_fraction": 0.1605, | |
| "dead_code_fraction/layer0": 0.1605, | |
| "epoch": 0.4, | |
| "input_norm": 31.998285433451336, | |
| "input_norm/layer0": 31.998285433451336, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2287, | |
| "max_norm": 60.52168655395508, | |
| "max_norm/layer0": 60.52168655395508, | |
| "mean_norm": 40.698753356933594, | |
| "mean_norm/layer0": 40.698753356933594, | |
| "multicode_k": 1, | |
| "output_norm": 17.997498016357426, | |
| "output_norm/layer0": 17.997498016357426, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_MSE/layer0": 638.8422855589264, | |
| "eval_accuracy": 0.49670513512593434, | |
| "eval_dead_code_fraction/layer0": 0.16135, | |
| "eval_input_norm/layer0": 31.99827300782795, | |
| "eval_loss": 2.2332887649536133, | |
| "eval_multicode_k": 1, | |
| "eval_output_norm/layer0": 18.022313365115252, | |
| "eval_runtime": 158.1975, | |
| "eval_samples_per_second": 29.223, | |
| "eval_steps_per_second": 1.827, | |
| "step": 4000 | |
| }, | |
| { | |
| "MSE": 639.952128804525, | |
| "MSE/layer0": 639.952128804525, | |
| "dead_code_fraction": 0.16035, | |
| "dead_code_fraction/layer0": 0.16035, | |
| "epoch": 0.41, | |
| "input_norm": 31.998286927541105, | |
| "input_norm/layer0": 31.998286927541105, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2193, | |
| "max_norm": 60.76009750366211, | |
| "max_norm/layer0": 60.76009750366211, | |
| "mean_norm": 40.75992393493652, | |
| "mean_norm/layer0": 40.75992393493652, | |
| "multicode_k": 1, | |
| "output_norm": 18.024092137018826, | |
| "output_norm/layer0": 18.024092137018826, | |
| "step": 4050 | |
| }, | |
| { | |
| "MSE": 640.5730131022133, | |
| "MSE/layer0": 640.5730131022133, | |
| "dead_code_fraction": 0.1634, | |
| "dead_code_fraction/layer0": 0.1634, | |
| "epoch": 0.41, | |
| "input_norm": 31.99828769365946, | |
| "input_norm/layer0": 31.99828769365946, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2301, | |
| "max_norm": 60.98118591308594, | |
| "max_norm/layer0": 60.98118591308594, | |
| "mean_norm": 40.8208122253418, | |
| "mean_norm/layer0": 40.8208122253418, | |
| "multicode_k": 1, | |
| "output_norm": 18.02807092984518, | |
| "output_norm/layer0": 18.02807092984518, | |
| "step": 4100 | |
| }, | |
| { | |
| "MSE": 640.4258350626628, | |
| "MSE/layer0": 640.4258350626628, | |
| "dead_code_fraction": 0.1612, | |
| "dead_code_fraction/layer0": 0.1612, | |
| "epoch": 0.41, | |
| "input_norm": 31.998297268549607, | |
| "input_norm/layer0": 31.998297268549607, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2307, | |
| "max_norm": 61.19542694091797, | |
| "max_norm/layer0": 61.19542694091797, | |
| "mean_norm": 40.88128852844238, | |
| "mean_norm/layer0": 40.88128852844238, | |
| "multicode_k": 1, | |
| "output_norm": 18.04158842404684, | |
| "output_norm/layer0": 18.04158842404684, | |
| "step": 4150 | |
| }, | |
| { | |
| "MSE": 639.5022987874349, | |
| "MSE/layer0": 639.5022987874349, | |
| "dead_code_fraction": 0.16015, | |
| "dead_code_fraction/layer0": 0.16015, | |
| "epoch": 0.42, | |
| "input_norm": 31.99830362319948, | |
| "input_norm/layer0": 31.99830362319948, | |
| "learning_rate": 0.0005, | |
| "loss": 2.247, | |
| "max_norm": 61.4282341003418, | |
| "max_norm/layer0": 61.4282341003418, | |
| "mean_norm": 40.941017150878906, | |
| "mean_norm/layer0": 40.941017150878906, | |
| "multicode_k": 1, | |
| "output_norm": 18.079462760289516, | |
| "output_norm/layer0": 18.079462760289516, | |
| "step": 4200 | |
| }, | |
| { | |
| "MSE": 640.0252755737306, | |
| "MSE/layer0": 640.0252755737306, | |
| "dead_code_fraction": 0.1604, | |
| "dead_code_fraction/layer0": 0.1604, | |
| "epoch": 0.42, | |
| "input_norm": 31.99830138524374, | |
| "input_norm/layer0": 31.99830138524374, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2314, | |
| "max_norm": 61.648414611816406, | |
| "max_norm/layer0": 61.648414611816406, | |
| "mean_norm": 40.99977684020996, | |
| "mean_norm/layer0": 40.99977684020996, | |
| "multicode_k": 1, | |
| "output_norm": 18.09024664878845, | |
| "output_norm/layer0": 18.09024664878845, | |
| "step": 4250 | |
| }, | |
| { | |
| "MSE": 639.7621870930992, | |
| "MSE/layer0": 639.7621870930992, | |
| "dead_code_fraction": 0.16365, | |
| "dead_code_fraction/layer0": 0.16365, | |
| "epoch": 0.43, | |
| "input_norm": 31.99830169359842, | |
| "input_norm/layer0": 31.99830169359842, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2144, | |
| "max_norm": 61.86562728881836, | |
| "max_norm/layer0": 61.86562728881836, | |
| "mean_norm": 41.05688667297363, | |
| "mean_norm/layer0": 41.05688667297363, | |
| "multicode_k": 1, | |
| "output_norm": 18.11899041493734, | |
| "output_norm/layer0": 18.11899041493734, | |
| "step": 4300 | |
| }, | |
| { | |
| "MSE": 640.3955947875975, | |
| "MSE/layer0": 640.3955947875975, | |
| "dead_code_fraction": 0.1592, | |
| "dead_code_fraction/layer0": 0.1592, | |
| "epoch": 0.43, | |
| "input_norm": 31.998302787144976, | |
| "input_norm/layer0": 31.998302787144976, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2077, | |
| "max_norm": 62.060550689697266, | |
| "max_norm/layer0": 62.060550689697266, | |
| "mean_norm": 41.11246681213379, | |
| "mean_norm/layer0": 41.11246681213379, | |
| "multicode_k": 1, | |
| "output_norm": 18.121066271464024, | |
| "output_norm/layer0": 18.121066271464024, | |
| "step": 4350 | |
| }, | |
| { | |
| "MSE": 639.8066222127281, | |
| "MSE/layer0": 639.8066222127281, | |
| "dead_code_fraction": 0.1635, | |
| "dead_code_fraction/layer0": 0.1635, | |
| "epoch": 0.44, | |
| "input_norm": 31.998314228057872, | |
| "input_norm/layer0": 31.998314228057872, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2287, | |
| "max_norm": 62.275943756103516, | |
| "max_norm/layer0": 62.275943756103516, | |
| "mean_norm": 41.167396545410156, | |
| "mean_norm/layer0": 41.167396545410156, | |
| "multicode_k": 1, | |
| "output_norm": 18.142933632532753, | |
| "output_norm/layer0": 18.142933632532753, | |
| "step": 4400 | |
| }, | |
| { | |
| "MSE": 639.8160334269206, | |
| "MSE/layer0": 639.8160334269206, | |
| "dead_code_fraction": 0.16385, | |
| "dead_code_fraction/layer0": 0.16385, | |
| "epoch": 0.45, | |
| "input_norm": 31.99831516901653, | |
| "input_norm/layer0": 31.99831516901653, | |
| "learning_rate": 0.0005, | |
| "loss": 2.215, | |
| "max_norm": 62.486793518066406, | |
| "max_norm/layer0": 62.486793518066406, | |
| "mean_norm": 41.221702575683594, | |
| "mean_norm/layer0": 41.221702575683594, | |
| "multicode_k": 1, | |
| "output_norm": 18.167670075098677, | |
| "output_norm/layer0": 18.167670075098677, | |
| "step": 4450 | |
| }, | |
| { | |
| "MSE": 640.1416244506836, | |
| "MSE/layer0": 640.1416244506836, | |
| "dead_code_fraction": 0.16675, | |
| "dead_code_fraction/layer0": 0.16675, | |
| "epoch": 0.45, | |
| "input_norm": 31.998327512741074, | |
| "input_norm/layer0": 31.998327512741074, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2576, | |
| "max_norm": 62.67790222167969, | |
| "max_norm/layer0": 62.67790222167969, | |
| "mean_norm": 41.275705337524414, | |
| "mean_norm/layer0": 41.275705337524414, | |
| "multicode_k": 1, | |
| "output_norm": 18.162402251561495, | |
| "output_norm/layer0": 18.162402251561495, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_MSE/layer0": 639.7464034476376, | |
| "eval_accuracy": 0.49916912103175737, | |
| "eval_dead_code_fraction/layer0": 0.16755, | |
| "eval_input_norm/layer0": 31.998309449821527, | |
| "eval_loss": 2.215489387512207, | |
| "eval_multicode_k": 1, | |
| "eval_output_norm/layer0": 18.191884751910905, | |
| "eval_runtime": 157.9108, | |
| "eval_samples_per_second": 29.276, | |
| "eval_steps_per_second": 1.83, | |
| "step": 4500 | |
| }, | |
| { | |
| "MSE": 640.4858755493162, | |
| "MSE/layer0": 640.4858755493162, | |
| "dead_code_fraction": 0.1633, | |
| "dead_code_fraction/layer0": 0.1633, | |
| "epoch": 0.46, | |
| "input_norm": 31.99831475257874, | |
| "input_norm/layer0": 31.99831475257874, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1869, | |
| "max_norm": 62.88029861450195, | |
| "max_norm/layer0": 62.88029861450195, | |
| "mean_norm": 41.32845115661621, | |
| "mean_norm/layer0": 41.32845115661621, | |
| "multicode_k": 1, | |
| "output_norm": 18.18791744550069, | |
| "output_norm/layer0": 18.18791744550069, | |
| "step": 4550 | |
| }, | |
| { | |
| "MSE": 640.7411174519859, | |
| "MSE/layer0": 640.7411174519859, | |
| "dead_code_fraction": 0.16375, | |
| "dead_code_fraction/layer0": 0.16375, | |
| "epoch": 0.46, | |
| "input_norm": 31.998337395985924, | |
| "input_norm/layer0": 31.998337395985924, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2426, | |
| "max_norm": 63.06687545776367, | |
| "max_norm/layer0": 63.06687545776367, | |
| "mean_norm": 41.38063049316406, | |
| "mean_norm/layer0": 41.38063049316406, | |
| "multicode_k": 1, | |
| "output_norm": 18.185693721771244, | |
| "output_norm/layer0": 18.185693721771244, | |
| "step": 4600 | |
| }, | |
| { | |
| "MSE": 640.3254055786131, | |
| "MSE/layer0": 640.3254055786131, | |
| "dead_code_fraction": 0.1637, | |
| "dead_code_fraction/layer0": 0.1637, | |
| "epoch": 0.47, | |
| "input_norm": 31.998331034978236, | |
| "input_norm/layer0": 31.998331034978236, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2103, | |
| "max_norm": 63.24494171142578, | |
| "max_norm/layer0": 63.24494171142578, | |
| "mean_norm": 41.4316463470459, | |
| "mean_norm/layer0": 41.4316463470459, | |
| "multicode_k": 1, | |
| "output_norm": 18.215761318206788, | |
| "output_norm/layer0": 18.215761318206788, | |
| "step": 4650 | |
| }, | |
| { | |
| "MSE": 640.0117889404299, | |
| "MSE/layer0": 640.0117889404299, | |
| "dead_code_fraction": 0.1653, | |
| "dead_code_fraction/layer0": 0.1653, | |
| "epoch": 0.47, | |
| "input_norm": 31.998331683476753, | |
| "input_norm/layer0": 31.998331683476753, | |
| "learning_rate": 0.0005, | |
| "loss": 2.189, | |
| "max_norm": 63.429969787597656, | |
| "max_norm/layer0": 63.429969787597656, | |
| "mean_norm": 41.481590270996094, | |
| "mean_norm/layer0": 41.481590270996094, | |
| "multicode_k": 1, | |
| "output_norm": 18.22781534512837, | |
| "output_norm/layer0": 18.22781534512837, | |
| "step": 4700 | |
| }, | |
| { | |
| "MSE": 640.034366455078, | |
| "MSE/layer0": 640.034366455078, | |
| "dead_code_fraction": 0.16355, | |
| "dead_code_fraction/layer0": 0.16355, | |
| "epoch": 0.47, | |
| "input_norm": 31.998335037231442, | |
| "input_norm/layer0": 31.998335037231442, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1746, | |
| "max_norm": 63.604644775390625, | |
| "max_norm/layer0": 63.604644775390625, | |
| "mean_norm": 41.530447006225586, | |
| "mean_norm/layer0": 41.530447006225586, | |
| "multicode_k": 1, | |
| "output_norm": 18.247568238576257, | |
| "output_norm/layer0": 18.247568238576257, | |
| "step": 4750 | |
| }, | |
| { | |
| "MSE": 641.3402144411094, | |
| "MSE/layer0": 641.3402144411094, | |
| "dead_code_fraction": 0.16465, | |
| "dead_code_fraction/layer0": 0.16465, | |
| "epoch": 1.0, | |
| "input_norm": 31.998328861016873, | |
| "input_norm/layer0": 31.998328861016873, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1589, | |
| "max_norm": 63.7794303894043, | |
| "max_norm/layer0": 63.7794303894043, | |
| "mean_norm": 41.577613830566406, | |
| "mean_norm/layer0": 41.577613830566406, | |
| "multicode_k": 1, | |
| "output_norm": 18.227145007068557, | |
| "output_norm/layer0": 18.227145007068557, | |
| "step": 4800 | |
| }, | |
| { | |
| "MSE": 640.0454110717772, | |
| "MSE/layer0": 640.0454110717772, | |
| "dead_code_fraction": 0.16635, | |
| "dead_code_fraction/layer0": 0.16635, | |
| "epoch": 1.01, | |
| "input_norm": 31.998361120224008, | |
| "input_norm/layer0": 31.998361120224008, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2585, | |
| "max_norm": 63.96126937866211, | |
| "max_norm/layer0": 63.96126937866211, | |
| "mean_norm": 41.62501525878906, | |
| "mean_norm/layer0": 41.62501525878906, | |
| "multicode_k": 1, | |
| "output_norm": 18.258941303888953, | |
| "output_norm/layer0": 18.258941303888953, | |
| "step": 4850 | |
| }, | |
| { | |
| "MSE": 640.0055624389651, | |
| "MSE/layer0": 640.0055624389651, | |
| "dead_code_fraction": 0.16515, | |
| "dead_code_fraction/layer0": 0.16515, | |
| "epoch": 1.01, | |
| "input_norm": 31.998340495427446, | |
| "input_norm/layer0": 31.998340495427446, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1578, | |
| "max_norm": 64.13137817382812, | |
| "max_norm/layer0": 64.13137817382812, | |
| "mean_norm": 41.672542572021484, | |
| "mean_norm/layer0": 41.672542572021484, | |
| "multicode_k": 1, | |
| "output_norm": 18.272732003529867, | |
| "output_norm/layer0": 18.272732003529867, | |
| "step": 4900 | |
| }, | |
| { | |
| "MSE": 640.108183898926, | |
| "MSE/layer0": 640.108183898926, | |
| "dead_code_fraction": 0.1668, | |
| "dead_code_fraction/layer0": 0.1668, | |
| "epoch": 1.02, | |
| "input_norm": 31.998351519902535, | |
| "input_norm/layer0": 31.998351519902535, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1809, | |
| "max_norm": 64.30120086669922, | |
| "max_norm/layer0": 64.30120086669922, | |
| "mean_norm": 41.71914291381836, | |
| "mean_norm/layer0": 41.71914291381836, | |
| "multicode_k": 1, | |
| "output_norm": 18.278290322621658, | |
| "output_norm/layer0": 18.278290322621658, | |
| "step": 4950 | |
| }, | |
| { | |
| "MSE": 639.8438139851887, | |
| "MSE/layer0": 639.8438139851887, | |
| "dead_code_fraction": 0.1671, | |
| "dead_code_fraction/layer0": 0.1671, | |
| "epoch": 1.02, | |
| "input_norm": 31.998358796437586, | |
| "input_norm/layer0": 31.998358796437586, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1901, | |
| "max_norm": 64.4720230102539, | |
| "max_norm/layer0": 64.4720230102539, | |
| "mean_norm": 41.76571464538574, | |
| "mean_norm/layer0": 41.76571464538574, | |
| "multicode_k": 1, | |
| "output_norm": 18.29636260350546, | |
| "output_norm/layer0": 18.29636260350546, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_MSE/layer0": 638.1766108092672, | |
| "eval_accuracy": 0.5013711247409516, | |
| "eval_dead_code_fraction/layer0": 0.16955, | |
| "eval_input_norm/layer0": 31.99836045128427, | |
| "eval_loss": 2.202561616897583, | |
| "eval_multicode_k": 1, | |
| "eval_output_norm/layer0": 18.311866774487846, | |
| "eval_runtime": 158.3836, | |
| "eval_samples_per_second": 29.189, | |
| "eval_steps_per_second": 1.825, | |
| "step": 5000 | |
| }, | |
| { | |
| "MSE": 639.5863418579103, | |
| "MSE/layer0": 639.5863418579103, | |
| "dead_code_fraction": 0.1675, | |
| "dead_code_fraction/layer0": 0.1675, | |
| "epoch": 1.03, | |
| "input_norm": 31.99836014429728, | |
| "input_norm/layer0": 31.99836014429728, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1914, | |
| "max_norm": 64.65907287597656, | |
| "max_norm/layer0": 64.65907287597656, | |
| "mean_norm": 41.8120174407959, | |
| "mean_norm/layer0": 41.8120174407959, | |
| "multicode_k": 1, | |
| "output_norm": 18.301887426376346, | |
| "output_norm/layer0": 18.301887426376346, | |
| "step": 5050 | |
| }, | |
| { | |
| "MSE": 639.5830181884764, | |
| "MSE/layer0": 639.5830181884764, | |
| "dead_code_fraction": 0.16545, | |
| "dead_code_fraction/layer0": 0.16545, | |
| "epoch": 1.03, | |
| "input_norm": 31.998363596598292, | |
| "input_norm/layer0": 31.998363596598292, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1503, | |
| "max_norm": 64.83207702636719, | |
| "max_norm/layer0": 64.83207702636719, | |
| "mean_norm": 41.85700988769531, | |
| "mean_norm/layer0": 41.85700988769531, | |
| "multicode_k": 1, | |
| "output_norm": 18.3204355875651, | |
| "output_norm/layer0": 18.3204355875651, | |
| "step": 5100 | |
| }, | |
| { | |
| "MSE": 640.3749603271485, | |
| "MSE/layer0": 640.3749603271485, | |
| "dead_code_fraction": 0.16725, | |
| "dead_code_fraction/layer0": 0.16725, | |
| "epoch": 1.04, | |
| "input_norm": 31.9983703358968, | |
| "input_norm/layer0": 31.9983703358968, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1634, | |
| "max_norm": 65.003662109375, | |
| "max_norm/layer0": 65.003662109375, | |
| "mean_norm": 41.90180778503418, | |
| "mean_norm/layer0": 41.90180778503418, | |
| "multicode_k": 1, | |
| "output_norm": 18.316434319814057, | |
| "output_norm/layer0": 18.316434319814057, | |
| "step": 5150 | |
| }, | |
| { | |
| "MSE": 639.0211893717446, | |
| "MSE/layer0": 639.0211893717446, | |
| "dead_code_fraction": 0.16875, | |
| "dead_code_fraction/layer0": 0.16875, | |
| "epoch": 1.04, | |
| "input_norm": 31.998389561971024, | |
| "input_norm/layer0": 31.998389561971024, | |
| "learning_rate": 0.0005, | |
| "loss": 2.224, | |
| "max_norm": 65.19213104248047, | |
| "max_norm/layer0": 65.19213104248047, | |
| "mean_norm": 41.94645309448242, | |
| "mean_norm/layer0": 41.94645309448242, | |
| "multicode_k": 1, | |
| "output_norm": 18.33804360071819, | |
| "output_norm/layer0": 18.33804360071819, | |
| "step": 5200 | |
| }, | |
| { | |
| "MSE": 638.6207899983721, | |
| "MSE/layer0": 638.6207899983721, | |
| "dead_code_fraction": 0.17055, | |
| "dead_code_fraction/layer0": 0.17055, | |
| "epoch": 1.05, | |
| "input_norm": 31.998394203186038, | |
| "input_norm/layer0": 31.998394203186038, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2235, | |
| "max_norm": 65.36846160888672, | |
| "max_norm/layer0": 65.36846160888672, | |
| "mean_norm": 41.991315841674805, | |
| "mean_norm/layer0": 41.991315841674805, | |
| "multicode_k": 1, | |
| "output_norm": 18.346421286265045, | |
| "output_norm/layer0": 18.346421286265045, | |
| "step": 5250 | |
| }, | |
| { | |
| "MSE": 638.3484961954751, | |
| "MSE/layer0": 638.3484961954751, | |
| "dead_code_fraction": 0.1704, | |
| "dead_code_fraction/layer0": 0.1704, | |
| "epoch": 1.05, | |
| "input_norm": 31.998402004241942, | |
| "input_norm/layer0": 31.998402004241942, | |
| "learning_rate": 0.0005, | |
| "loss": 2.209, | |
| "max_norm": 65.53041076660156, | |
| "max_norm/layer0": 65.53041076660156, | |
| "mean_norm": 42.0357780456543, | |
| "mean_norm/layer0": 42.0357780456543, | |
| "multicode_k": 1, | |
| "output_norm": 18.351918992996215, | |
| "output_norm/layer0": 18.351918992996215, | |
| "step": 5300 | |
| }, | |
| { | |
| "MSE": 638.9349023437496, | |
| "MSE/layer0": 638.9349023437496, | |
| "dead_code_fraction": 0.1671, | |
| "dead_code_fraction/layer0": 0.1671, | |
| "epoch": 1.06, | |
| "input_norm": 31.998392171859756, | |
| "input_norm/layer0": 31.998392171859756, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1737, | |
| "max_norm": 65.69444274902344, | |
| "max_norm/layer0": 65.69444274902344, | |
| "mean_norm": 42.078935623168945, | |
| "mean_norm/layer0": 42.078935623168945, | |
| "multicode_k": 1, | |
| "output_norm": 18.365610707600908, | |
| "output_norm/layer0": 18.365610707600908, | |
| "step": 5350 | |
| }, | |
| { | |
| "MSE": 638.1850768025716, | |
| "MSE/layer0": 638.1850768025716, | |
| "dead_code_fraction": 0.17125, | |
| "dead_code_fraction/layer0": 0.17125, | |
| "epoch": 1.06, | |
| "input_norm": 31.99840373039246, | |
| "input_norm/layer0": 31.99840373039246, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1904, | |
| "max_norm": 65.84613037109375, | |
| "max_norm/layer0": 65.84613037109375, | |
| "mean_norm": 42.122589111328125, | |
| "mean_norm/layer0": 42.122589111328125, | |
| "multicode_k": 1, | |
| "output_norm": 18.371175734202062, | |
| "output_norm/layer0": 18.371175734202062, | |
| "step": 5400 | |
| }, | |
| { | |
| "MSE": 637.5771400960282, | |
| "MSE/layer0": 637.5771400960282, | |
| "dead_code_fraction": 0.17005, | |
| "dead_code_fraction/layer0": 0.17005, | |
| "epoch": 1.07, | |
| "input_norm": 31.998408838907892, | |
| "input_norm/layer0": 31.998408838907892, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2013, | |
| "max_norm": 66.00259399414062, | |
| "max_norm/layer0": 66.00259399414062, | |
| "mean_norm": 42.16551399230957, | |
| "mean_norm/layer0": 42.16551399230957, | |
| "multicode_k": 1, | |
| "output_norm": 18.396056934992465, | |
| "output_norm/layer0": 18.396056934992465, | |
| "step": 5450 | |
| }, | |
| { | |
| "MSE": 637.4973764038084, | |
| "MSE/layer0": 637.4973764038084, | |
| "dead_code_fraction": 0.17135, | |
| "dead_code_fraction/layer0": 0.17135, | |
| "epoch": 1.07, | |
| "input_norm": 31.998402996063238, | |
| "input_norm/layer0": 31.998402996063238, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1686, | |
| "max_norm": 66.15951538085938, | |
| "max_norm/layer0": 66.15951538085938, | |
| "mean_norm": 42.207963943481445, | |
| "mean_norm/layer0": 42.207963943481445, | |
| "multicode_k": 1, | |
| "output_norm": 18.402882191340133, | |
| "output_norm/layer0": 18.402882191340133, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_MSE/layer0": 638.6084431543663, | |
| "eval_accuracy": 0.5026125270625071, | |
| "eval_dead_code_fraction/layer0": 0.17165, | |
| "eval_input_norm/layer0": 31.99841410479916, | |
| "eval_loss": 2.1934523582458496, | |
| "eval_multicode_k": 1, | |
| "eval_output_norm/layer0": 18.401259186926417, | |
| "eval_runtime": 158.4926, | |
| "eval_samples_per_second": 29.169, | |
| "eval_steps_per_second": 1.823, | |
| "step": 5500 | |
| }, | |
| { | |
| "MSE": 637.3816906738282, | |
| "MSE/layer0": 637.3816906738282, | |
| "dead_code_fraction": 0.17125, | |
| "dead_code_fraction/layer0": 0.17125, | |
| "epoch": 1.08, | |
| "input_norm": 31.998415158589676, | |
| "input_norm/layer0": 31.998415158589676, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2097, | |
| "max_norm": 66.32366180419922, | |
| "max_norm/layer0": 66.32366180419922, | |
| "mean_norm": 42.25027084350586, | |
| "mean_norm/layer0": 42.25027084350586, | |
| "multicode_k": 1, | |
| "output_norm": 18.40568763732911, | |
| "output_norm/layer0": 18.40568763732911, | |
| "step": 5550 | |
| }, | |
| { | |
| "MSE": 636.5928268432615, | |
| "MSE/layer0": 636.5928268432615, | |
| "dead_code_fraction": 0.1711, | |
| "dead_code_fraction/layer0": 0.1711, | |
| "epoch": 1.08, | |
| "input_norm": 31.99841807047526, | |
| "input_norm/layer0": 31.99841807047526, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1987, | |
| "max_norm": 66.49840545654297, | |
| "max_norm/layer0": 66.49840545654297, | |
| "mean_norm": 42.29284858703613, | |
| "mean_norm/layer0": 42.29284858703613, | |
| "multicode_k": 1, | |
| "output_norm": 18.424939454396565, | |
| "output_norm/layer0": 18.424939454396565, | |
| "step": 5600 | |
| }, | |
| { | |
| "MSE": 637.195534973145, | |
| "MSE/layer0": 637.195534973145, | |
| "dead_code_fraction": 0.17175, | |
| "dead_code_fraction/layer0": 0.17175, | |
| "epoch": 1.09, | |
| "input_norm": 31.99841377894082, | |
| "input_norm/layer0": 31.99841377894082, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1571, | |
| "max_norm": 66.6655502319336, | |
| "max_norm/layer0": 66.6655502319336, | |
| "mean_norm": 42.33401679992676, | |
| "mean_norm/layer0": 42.33401679992676, | |
| "multicode_k": 1, | |
| "output_norm": 18.427337226867675, | |
| "output_norm/layer0": 18.427337226867675, | |
| "step": 5650 | |
| }, | |
| { | |
| "MSE": 635.8865025838217, | |
| "MSE/layer0": 635.8865025838217, | |
| "dead_code_fraction": 0.1736, | |
| "dead_code_fraction/layer0": 0.1736, | |
| "epoch": 1.09, | |
| "input_norm": 31.998435058593753, | |
| "input_norm/layer0": 31.998435058593753, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2146, | |
| "max_norm": 66.82868957519531, | |
| "max_norm/layer0": 66.82868957519531, | |
| "mean_norm": 42.37582206726074, | |
| "mean_norm/layer0": 42.37582206726074, | |
| "multicode_k": 1, | |
| "output_norm": 18.443573204676298, | |
| "output_norm/layer0": 18.443573204676298, | |
| "step": 5700 | |
| }, | |
| { | |
| "MSE": 636.1581252034503, | |
| "MSE/layer0": 636.1581252034503, | |
| "dead_code_fraction": 0.17225, | |
| "dead_code_fraction/layer0": 0.17225, | |
| "epoch": 1.1, | |
| "input_norm": 31.998433354695635, | |
| "input_norm/layer0": 31.998433354695635, | |
| "learning_rate": 0.0005, | |
| "loss": 2.171, | |
| "max_norm": 66.9796371459961, | |
| "max_norm/layer0": 66.9796371459961, | |
| "mean_norm": 42.41728591918945, | |
| "mean_norm/layer0": 42.41728591918945, | |
| "multicode_k": 1, | |
| "output_norm": 18.440257479349775, | |
| "output_norm/layer0": 18.440257479349775, | |
| "step": 5750 | |
| }, | |
| { | |
| "MSE": 636.7286339314779, | |
| "MSE/layer0": 636.7286339314779, | |
| "dead_code_fraction": 0.1738, | |
| "dead_code_fraction/layer0": 0.1738, | |
| "epoch": 1.1, | |
| "input_norm": 31.998429416020713, | |
| "input_norm/layer0": 31.998429416020713, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1502, | |
| "max_norm": 67.13478088378906, | |
| "max_norm/layer0": 67.13478088378906, | |
| "mean_norm": 42.45817756652832, | |
| "mean_norm/layer0": 42.45817756652832, | |
| "multicode_k": 1, | |
| "output_norm": 18.442232058842986, | |
| "output_norm/layer0": 18.442232058842986, | |
| "step": 5800 | |
| }, | |
| { | |
| "MSE": 635.2576449584958, | |
| "MSE/layer0": 635.2576449584958, | |
| "dead_code_fraction": 0.17405, | |
| "dead_code_fraction/layer0": 0.17405, | |
| "epoch": 1.11, | |
| "input_norm": 31.99844219843547, | |
| "input_norm/layer0": 31.99844219843547, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2067, | |
| "max_norm": 67.28919982910156, | |
| "max_norm/layer0": 67.28919982910156, | |
| "mean_norm": 42.49948501586914, | |
| "mean_norm/layer0": 42.49948501586914, | |
| "multicode_k": 1, | |
| "output_norm": 18.46717386881511, | |
| "output_norm/layer0": 18.46717386881511, | |
| "step": 5850 | |
| }, | |
| { | |
| "MSE": 636.0759664916989, | |
| "MSE/layer0": 636.0759664916989, | |
| "dead_code_fraction": 0.17355, | |
| "dead_code_fraction/layer0": 0.17355, | |
| "epoch": 1.11, | |
| "input_norm": 31.998439470926915, | |
| "input_norm/layer0": 31.998439470926915, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1543, | |
| "max_norm": 67.44383239746094, | |
| "max_norm/layer0": 67.44383239746094, | |
| "mean_norm": 42.53946495056152, | |
| "mean_norm/layer0": 42.53946495056152, | |
| "multicode_k": 1, | |
| "output_norm": 18.469777971903483, | |
| "output_norm/layer0": 18.469777971903483, | |
| "step": 5900 | |
| }, | |
| { | |
| "MSE": 635.3813305664057, | |
| "MSE/layer0": 635.3813305664057, | |
| "dead_code_fraction": 0.17405, | |
| "dead_code_fraction/layer0": 0.17405, | |
| "epoch": 1.12, | |
| "input_norm": 31.99844372113545, | |
| "input_norm/layer0": 31.99844372113545, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1846, | |
| "max_norm": 67.59025573730469, | |
| "max_norm/layer0": 67.59025573730469, | |
| "mean_norm": 42.58071327209473, | |
| "mean_norm/layer0": 42.58071327209473, | |
| "multicode_k": 1, | |
| "output_norm": 18.477715517679847, | |
| "output_norm/layer0": 18.477715517679847, | |
| "step": 5950 | |
| }, | |
| { | |
| "MSE": 634.5524212646484, | |
| "MSE/layer0": 634.5524212646484, | |
| "dead_code_fraction": 0.17535, | |
| "dead_code_fraction/layer0": 0.17535, | |
| "epoch": 1.12, | |
| "input_norm": 31.998457225163776, | |
| "input_norm/layer0": 31.998457225163776, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2158, | |
| "max_norm": 67.7379379272461, | |
| "max_norm/layer0": 67.7379379272461, | |
| "mean_norm": 42.62178421020508, | |
| "mean_norm/layer0": 42.62178421020508, | |
| "multicode_k": 1, | |
| "output_norm": 18.489366165796913, | |
| "output_norm/layer0": 18.489366165796913, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_MSE/layer0": 632.9325560995336, | |
| "eval_accuracy": 0.5036799089257694, | |
| "eval_dead_code_fraction/layer0": 0.17795, | |
| "eval_input_norm/layer0": 31.998461353451354, | |
| "eval_loss": 2.1832942962646484, | |
| "eval_multicode_k": 1, | |
| "eval_output_norm/layer0": 18.51493810096293, | |
| "eval_runtime": 158.8489, | |
| "eval_samples_per_second": 29.103, | |
| "eval_steps_per_second": 1.819, | |
| "step": 6000 | |
| }, | |
| { | |
| "MSE": 634.7784757486979, | |
| "MSE/layer0": 634.7784757486979, | |
| "dead_code_fraction": 0.1755, | |
| "dead_code_fraction/layer0": 0.1755, | |
| "epoch": 1.13, | |
| "input_norm": 31.99845712025961, | |
| "input_norm/layer0": 31.99845712025961, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1789, | |
| "max_norm": 67.8902816772461, | |
| "max_norm/layer0": 67.8902816772461, | |
| "mean_norm": 42.66269874572754, | |
| "mean_norm/layer0": 42.66269874572754, | |
| "multicode_k": 1, | |
| "output_norm": 18.49625307718913, | |
| "output_norm/layer0": 18.49625307718913, | |
| "step": 6050 | |
| }, | |
| { | |
| "MSE": 634.5078458658851, | |
| "MSE/layer0": 634.5078458658851, | |
| "dead_code_fraction": 0.17445, | |
| "dead_code_fraction/layer0": 0.17445, | |
| "epoch": 1.13, | |
| "input_norm": 31.99845917383831, | |
| "input_norm/layer0": 31.99845917383831, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2009, | |
| "max_norm": 68.04124450683594, | |
| "max_norm/layer0": 68.04124450683594, | |
| "mean_norm": 42.70250701904297, | |
| "mean_norm/layer0": 42.70250701904297, | |
| "multicode_k": 1, | |
| "output_norm": 18.514623686472582, | |
| "output_norm/layer0": 18.514623686472582, | |
| "step": 6100 | |
| }, | |
| { | |
| "MSE": 634.443066914876, | |
| "MSE/layer0": 634.443066914876, | |
| "dead_code_fraction": 0.17575, | |
| "dead_code_fraction/layer0": 0.17575, | |
| "epoch": 1.14, | |
| "input_norm": 31.99845913887024, | |
| "input_norm/layer0": 31.99845913887024, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1623, | |
| "max_norm": 68.17865753173828, | |
| "max_norm/layer0": 68.17865753173828, | |
| "mean_norm": 42.742488861083984, | |
| "mean_norm/layer0": 42.742488861083984, | |
| "multicode_k": 1, | |
| "output_norm": 18.513023862838743, | |
| "output_norm/layer0": 18.513023862838743, | |
| "step": 6150 | |
| }, | |
| { | |
| "MSE": 633.6522382609048, | |
| "MSE/layer0": 633.6522382609048, | |
| "dead_code_fraction": 0.17475, | |
| "dead_code_fraction/layer0": 0.17475, | |
| "epoch": 1.14, | |
| "input_norm": 31.998471844991045, | |
| "input_norm/layer0": 31.998471844991045, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1824, | |
| "max_norm": 68.31253051757812, | |
| "max_norm/layer0": 68.31253051757812, | |
| "mean_norm": 42.782148361206055, | |
| "mean_norm/layer0": 42.782148361206055, | |
| "multicode_k": 1, | |
| "output_norm": 18.529316590627033, | |
| "output_norm/layer0": 18.529316590627033, | |
| "step": 6200 | |
| }, | |
| { | |
| "MSE": 634.0474910481774, | |
| "MSE/layer0": 634.0474910481774, | |
| "dead_code_fraction": 0.1771, | |
| "dead_code_fraction/layer0": 0.1771, | |
| "epoch": 1.15, | |
| "input_norm": 31.998480736414585, | |
| "input_norm/layer0": 31.998480736414585, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1948, | |
| "max_norm": 68.44271850585938, | |
| "max_norm/layer0": 68.44271850585938, | |
| "mean_norm": 42.82079887390137, | |
| "mean_norm/layer0": 42.82079887390137, | |
| "multicode_k": 1, | |
| "output_norm": 18.524528849919633, | |
| "output_norm/layer0": 18.524528849919633, | |
| "step": 6250 | |
| }, | |
| { | |
| "MSE": 633.648407084147, | |
| "MSE/layer0": 633.648407084147, | |
| "dead_code_fraction": 0.1745, | |
| "dead_code_fraction/layer0": 0.1745, | |
| "epoch": 1.15, | |
| "input_norm": 31.998468182881673, | |
| "input_norm/layer0": 31.998468182881673, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1145, | |
| "max_norm": 68.57721710205078, | |
| "max_norm/layer0": 68.57721710205078, | |
| "mean_norm": 42.859825134277344, | |
| "mean_norm/layer0": 42.859825134277344, | |
| "multicode_k": 1, | |
| "output_norm": 18.540853935877482, | |
| "output_norm/layer0": 18.540853935877482, | |
| "step": 6300 | |
| }, | |
| { | |
| "MSE": 633.5945191446937, | |
| "MSE/layer0": 633.5945191446937, | |
| "dead_code_fraction": 0.17705, | |
| "dead_code_fraction/layer0": 0.17705, | |
| "epoch": 1.16, | |
| "input_norm": 31.99847273508707, | |
| "input_norm/layer0": 31.99847273508707, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1507, | |
| "max_norm": 68.7186050415039, | |
| "max_norm/layer0": 68.7186050415039, | |
| "mean_norm": 42.897830963134766, | |
| "mean_norm/layer0": 42.897830963134766, | |
| "multicode_k": 1, | |
| "output_norm": 18.55124579429626, | |
| "output_norm/layer0": 18.55124579429626, | |
| "step": 6350 | |
| }, | |
| { | |
| "MSE": 632.1478841145836, | |
| "MSE/layer0": 632.1478841145836, | |
| "dead_code_fraction": 0.1775, | |
| "dead_code_fraction/layer0": 0.1775, | |
| "epoch": 1.16, | |
| "input_norm": 31.9984964243571, | |
| "input_norm/layer0": 31.9984964243571, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1962, | |
| "max_norm": 68.85418701171875, | |
| "max_norm/layer0": 68.85418701171875, | |
| "mean_norm": 42.937448501586914, | |
| "mean_norm/layer0": 42.937448501586914, | |
| "multicode_k": 1, | |
| "output_norm": 18.5615934785207, | |
| "output_norm/layer0": 18.5615934785207, | |
| "step": 6400 | |
| }, | |
| { | |
| "MSE": 632.57952931722, | |
| "MSE/layer0": 632.57952931722, | |
| "dead_code_fraction": 0.1777, | |
| "dead_code_fraction/layer0": 0.1777, | |
| "epoch": 1.17, | |
| "input_norm": 31.998487294514977, | |
| "input_norm/layer0": 31.998487294514977, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1627, | |
| "max_norm": 69.0008316040039, | |
| "max_norm/layer0": 69.0008316040039, | |
| "mean_norm": 42.97622108459473, | |
| "mean_norm/layer0": 42.97622108459473, | |
| "multicode_k": 1, | |
| "output_norm": 18.57248200734457, | |
| "output_norm/layer0": 18.57248200734457, | |
| "step": 6450 | |
| }, | |
| { | |
| "MSE": 631.0360174560547, | |
| "MSE/layer0": 631.0360174560547, | |
| "dead_code_fraction": 0.1784, | |
| "dead_code_fraction/layer0": 0.1784, | |
| "epoch": 1.17, | |
| "input_norm": 31.998495709101356, | |
| "input_norm/layer0": 31.998495709101356, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1843, | |
| "max_norm": 69.13652038574219, | |
| "max_norm/layer0": 69.13652038574219, | |
| "mean_norm": 43.01558876037598, | |
| "mean_norm/layer0": 43.01558876037598, | |
| "multicode_k": 1, | |
| "output_norm": 18.591586551666268, | |
| "output_norm/layer0": 18.591586551666268, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_MSE/layer0": 631.2925020152297, | |
| "eval_accuracy": 0.5039093283634951, | |
| "eval_dead_code_fraction/layer0": 0.1797, | |
| "eval_input_norm/layer0": 31.99848882414009, | |
| "eval_loss": 2.175981044769287, | |
| "eval_multicode_k": 1, | |
| "eval_output_norm/layer0": 18.598594732777567, | |
| "eval_runtime": 158.1453, | |
| "eval_samples_per_second": 29.233, | |
| "eval_steps_per_second": 1.827, | |
| "step": 6500 | |
| }, | |
| { | |
| "MSE": 631.294188741048, | |
| "MSE/layer0": 631.294188741048, | |
| "dead_code_fraction": 0.1796, | |
| "dead_code_fraction/layer0": 0.1796, | |
| "epoch": 1.18, | |
| "input_norm": 31.998505541483564, | |
| "input_norm/layer0": 31.998505541483564, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1855, | |
| "max_norm": 69.26646423339844, | |
| "max_norm/layer0": 69.26646423339844, | |
| "mean_norm": 43.0548152923584, | |
| "mean_norm/layer0": 43.0548152923584, | |
| "multicode_k": 1, | |
| "output_norm": 18.585241152445477, | |
| "output_norm/layer0": 18.585241152445477, | |
| "step": 6550 | |
| }, | |
| { | |
| "MSE": 631.297376505534, | |
| "MSE/layer0": 631.297376505534, | |
| "dead_code_fraction": 0.1779, | |
| "dead_code_fraction/layer0": 0.1779, | |
| "epoch": 1.18, | |
| "input_norm": 31.998487745920816, | |
| "input_norm/layer0": 31.998487745920816, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1197, | |
| "max_norm": 69.3987808227539, | |
| "max_norm/layer0": 69.3987808227539, | |
| "mean_norm": 43.093589782714844, | |
| "mean_norm/layer0": 43.093589782714844, | |
| "multicode_k": 1, | |
| "output_norm": 18.605287278493257, | |
| "output_norm/layer0": 18.605287278493257, | |
| "step": 6600 | |
| }, | |
| { | |
| "MSE": 630.8991915893555, | |
| "MSE/layer0": 630.8991915893555, | |
| "dead_code_fraction": 0.17815, | |
| "dead_code_fraction/layer0": 0.17815, | |
| "epoch": 1.19, | |
| "input_norm": 31.99848988215129, | |
| "input_norm/layer0": 31.99848988215129, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1248, | |
| "max_norm": 69.52507019042969, | |
| "max_norm/layer0": 69.52507019042969, | |
| "mean_norm": 43.132524490356445, | |
| "mean_norm/layer0": 43.132524490356445, | |
| "multicode_k": 1, | |
| "output_norm": 18.61235850652059, | |
| "output_norm/layer0": 18.61235850652059, | |
| "step": 6650 | |
| }, | |
| { | |
| "MSE": 629.604686584473, | |
| "MSE/layer0": 629.604686584473, | |
| "dead_code_fraction": 0.17965, | |
| "dead_code_fraction/layer0": 0.17965, | |
| "epoch": 1.19, | |
| "input_norm": 31.99852681477865, | |
| "input_norm/layer0": 31.99852681477865, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2265, | |
| "max_norm": 69.66030883789062, | |
| "max_norm/layer0": 69.66030883789062, | |
| "mean_norm": 43.17206573486328, | |
| "mean_norm/layer0": 43.17206573486328, | |
| "multicode_k": 1, | |
| "output_norm": 18.626948499679564, | |
| "output_norm/layer0": 18.626948499679564, | |
| "step": 6700 | |
| }, | |
| { | |
| "MSE": 629.7875715128578, | |
| "MSE/layer0": 629.7875715128578, | |
| "dead_code_fraction": 0.1802, | |
| "dead_code_fraction/layer0": 0.1802, | |
| "epoch": 1.2, | |
| "input_norm": 31.998509550094596, | |
| "input_norm/layer0": 31.998509550094596, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1432, | |
| "max_norm": 69.78119659423828, | |
| "max_norm/layer0": 69.78119659423828, | |
| "mean_norm": 43.21029472351074, | |
| "mean_norm/layer0": 43.21029472351074, | |
| "multicode_k": 1, | |
| "output_norm": 18.639319947560622, | |
| "output_norm/layer0": 18.639319947560622, | |
| "step": 6750 | |
| }, | |
| { | |
| "MSE": 629.3708419799802, | |
| "MSE/layer0": 629.3708419799802, | |
| "dead_code_fraction": 0.18015, | |
| "dead_code_fraction/layer0": 0.18015, | |
| "epoch": 1.2, | |
| "input_norm": 31.99851152102152, | |
| "input_norm/layer0": 31.99851152102152, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1606, | |
| "max_norm": 69.91252899169922, | |
| "max_norm/layer0": 69.91252899169922, | |
| "mean_norm": 43.24948692321777, | |
| "mean_norm/layer0": 43.24948692321777, | |
| "multicode_k": 1, | |
| "output_norm": 18.64606482187906, | |
| "output_norm/layer0": 18.64606482187906, | |
| "step": 6800 | |
| }, | |
| { | |
| "MSE": 628.4038922119142, | |
| "MSE/layer0": 628.4038922119142, | |
| "dead_code_fraction": 0.1806, | |
| "dead_code_fraction/layer0": 0.1806, | |
| "epoch": 1.21, | |
| "input_norm": 31.998516721725462, | |
| "input_norm/layer0": 31.998516721725462, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1582, | |
| "max_norm": 70.04332733154297, | |
| "max_norm/layer0": 70.04332733154297, | |
| "mean_norm": 43.28862762451172, | |
| "mean_norm/layer0": 43.28862762451172, | |
| "multicode_k": 1, | |
| "output_norm": 18.669758415222162, | |
| "output_norm/layer0": 18.669758415222162, | |
| "step": 6850 | |
| }, | |
| { | |
| "MSE": 628.1812467447919, | |
| "MSE/layer0": 628.1812467447919, | |
| "dead_code_fraction": 0.18055, | |
| "dead_code_fraction/layer0": 0.18055, | |
| "epoch": 1.21, | |
| "input_norm": 31.998515844345086, | |
| "input_norm/layer0": 31.998515844345086, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1433, | |
| "max_norm": 70.16979217529297, | |
| "max_norm/layer0": 70.16979217529297, | |
| "mean_norm": 43.327192306518555, | |
| "mean_norm/layer0": 43.327192306518555, | |
| "multicode_k": 1, | |
| "output_norm": 18.674684073130294, | |
| "output_norm/layer0": 18.674684073130294, | |
| "step": 6900 | |
| }, | |
| { | |
| "MSE": 628.1862957763672, | |
| "MSE/layer0": 628.1862957763672, | |
| "dead_code_fraction": 0.18045, | |
| "dead_code_fraction/layer0": 0.18045, | |
| "epoch": 1.22, | |
| "input_norm": 31.99852259953816, | |
| "input_norm/layer0": 31.99852259953816, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1458, | |
| "max_norm": 70.29747772216797, | |
| "max_norm/layer0": 70.29747772216797, | |
| "mean_norm": 43.36609077453613, | |
| "mean_norm/layer0": 43.36609077453613, | |
| "multicode_k": 1, | |
| "output_norm": 18.682749029795335, | |
| "output_norm/layer0": 18.682749029795335, | |
| "step": 6950 | |
| }, | |
| { | |
| "MSE": 627.7981392415361, | |
| "MSE/layer0": 627.7981392415361, | |
| "dead_code_fraction": 0.18045, | |
| "dead_code_fraction/layer0": 0.18045, | |
| "epoch": 1.22, | |
| "input_norm": 31.998523871103927, | |
| "input_norm/layer0": 31.998523871103927, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1339, | |
| "max_norm": 70.425537109375, | |
| "max_norm/layer0": 70.425537109375, | |
| "mean_norm": 43.40445899963379, | |
| "mean_norm/layer0": 43.40445899963379, | |
| "multicode_k": 1, | |
| "output_norm": 18.696380834579458, | |
| "output_norm/layer0": 18.696380834579458, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_MSE/layer0": 627.9790743019787, | |
| "eval_accuracy": 0.5048263717749389, | |
| "eval_dead_code_fraction/layer0": 0.1819, | |
| "eval_input_norm/layer0": 31.998524618592334, | |
| "eval_loss": 2.1696202754974365, | |
| "eval_multicode_k": 1, | |
| "eval_output_norm/layer0": 18.705300997223095, | |
| "eval_runtime": 159.1692, | |
| "eval_samples_per_second": 29.045, | |
| "eval_steps_per_second": 1.816, | |
| "step": 7000 | |
| }, | |
| { | |
| "MSE": 627.3165437825519, | |
| "MSE/layer0": 627.3165437825519, | |
| "dead_code_fraction": 0.1822, | |
| "dead_code_fraction/layer0": 0.1822, | |
| "epoch": 1.23, | |
| "input_norm": 31.99852600097656, | |
| "input_norm/layer0": 31.99852600097656, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1483, | |
| "max_norm": 70.54450988769531, | |
| "max_norm/layer0": 70.54450988769531, | |
| "mean_norm": 43.442848205566406, | |
| "mean_norm/layer0": 43.442848205566406, | |
| "multicode_k": 1, | |
| "output_norm": 18.700957148869843, | |
| "output_norm/layer0": 18.700957148869843, | |
| "step": 7050 | |
| }, | |
| { | |
| "MSE": 626.7479965209961, | |
| "MSE/layer0": 626.7479965209961, | |
| "dead_code_fraction": 0.1804, | |
| "dead_code_fraction/layer0": 0.1804, | |
| "epoch": 1.23, | |
| "input_norm": 31.998541386922206, | |
| "input_norm/layer0": 31.998541386922206, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1512, | |
| "max_norm": 70.66608428955078, | |
| "max_norm/layer0": 70.66608428955078, | |
| "mean_norm": 43.48159599304199, | |
| "mean_norm/layer0": 43.48159599304199, | |
| "multicode_k": 1, | |
| "output_norm": 18.714396947224948, | |
| "output_norm/layer0": 18.714396947224948, | |
| "step": 7100 | |
| }, | |
| { | |
| "MSE": 626.4450497436519, | |
| "MSE/layer0": 626.4450497436519, | |
| "dead_code_fraction": 0.1823, | |
| "dead_code_fraction/layer0": 0.1823, | |
| "epoch": 1.24, | |
| "input_norm": 31.998545411427806, | |
| "input_norm/layer0": 31.998545411427806, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1586, | |
| "max_norm": 70.7937240600586, | |
| "max_norm/layer0": 70.7937240600586, | |
| "mean_norm": 43.5198860168457, | |
| "mean_norm/layer0": 43.5198860168457, | |
| "multicode_k": 1, | |
| "output_norm": 18.726943721771242, | |
| "output_norm/layer0": 18.726943721771242, | |
| "step": 7150 | |
| }, | |
| { | |
| "MSE": 626.1652618408202, | |
| "MSE/layer0": 626.1652618408202, | |
| "dead_code_fraction": 0.1814, | |
| "dead_code_fraction/layer0": 0.1814, | |
| "epoch": 1.24, | |
| "input_norm": 31.998541978200272, | |
| "input_norm/layer0": 31.998541978200272, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1552, | |
| "max_norm": 70.90862274169922, | |
| "max_norm/layer0": 70.90862274169922, | |
| "mean_norm": 43.55833053588867, | |
| "mean_norm/layer0": 43.55833053588867, | |
| "multicode_k": 1, | |
| "output_norm": 18.731371542612706, | |
| "output_norm/layer0": 18.731371542612706, | |
| "step": 7200 | |
| }, | |
| { | |
| "MSE": 625.2572497558597, | |
| "MSE/layer0": 625.2572497558597, | |
| "dead_code_fraction": 0.1839, | |
| "dead_code_fraction/layer0": 0.1839, | |
| "epoch": 1.25, | |
| "input_norm": 31.998552770614626, | |
| "input_norm/layer0": 31.998552770614626, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1673, | |
| "max_norm": 71.0332260131836, | |
| "max_norm/layer0": 71.0332260131836, | |
| "mean_norm": 43.5967960357666, | |
| "mean_norm/layer0": 43.5967960357666, | |
| "multicode_k": 1, | |
| "output_norm": 18.756609748204536, | |
| "output_norm/layer0": 18.756609748204536, | |
| "step": 7250 | |
| }, | |
| { | |
| "MSE": 624.7860372924804, | |
| "MSE/layer0": 624.7860372924804, | |
| "dead_code_fraction": 0.1831, | |
| "dead_code_fraction/layer0": 0.1831, | |
| "epoch": 1.25, | |
| "input_norm": 31.998555002212534, | |
| "input_norm/layer0": 31.998555002212534, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1575, | |
| "max_norm": 71.15364837646484, | |
| "max_norm/layer0": 71.15364837646484, | |
| "mean_norm": 43.63525199890137, | |
| "mean_norm/layer0": 43.63525199890137, | |
| "multicode_k": 1, | |
| "output_norm": 18.767410192489628, | |
| "output_norm/layer0": 18.767410192489628, | |
| "step": 7300 | |
| }, | |
| { | |
| "MSE": 624.7060753377278, | |
| "MSE/layer0": 624.7060753377278, | |
| "dead_code_fraction": 0.18335, | |
| "dead_code_fraction/layer0": 0.18335, | |
| "epoch": 1.26, | |
| "input_norm": 31.99856230099995, | |
| "input_norm/layer0": 31.99856230099995, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1622, | |
| "max_norm": 71.2812271118164, | |
| "max_norm/layer0": 71.2812271118164, | |
| "mean_norm": 43.67383575439453, | |
| "mean_norm/layer0": 43.67383575439453, | |
| "multicode_k": 1, | |
| "output_norm": 18.77556623776755, | |
| "output_norm/layer0": 18.77556623776755, | |
| "step": 7350 | |
| }, | |
| { | |
| "MSE": 623.9612900797528, | |
| "MSE/layer0": 623.9612900797528, | |
| "dead_code_fraction": 0.1834, | |
| "dead_code_fraction/layer0": 0.1834, | |
| "epoch": 1.26, | |
| "input_norm": 31.998564265569062, | |
| "input_norm/layer0": 31.998564265569062, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1721, | |
| "max_norm": 71.4082260131836, | |
| "max_norm/layer0": 71.4082260131836, | |
| "mean_norm": 43.71280097961426, | |
| "mean_norm/layer0": 43.71280097961426, | |
| "multicode_k": 1, | |
| "output_norm": 18.78839166323344, | |
| "output_norm/layer0": 18.78839166323344, | |
| "step": 7400 | |
| }, | |
| { | |
| "MSE": 623.9870674641929, | |
| "MSE/layer0": 623.9870674641929, | |
| "dead_code_fraction": 0.18355, | |
| "dead_code_fraction/layer0": 0.18355, | |
| "epoch": 1.27, | |
| "input_norm": 31.998560991287228, | |
| "input_norm/layer0": 31.998560991287228, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1424, | |
| "max_norm": 71.52973937988281, | |
| "max_norm/layer0": 71.52973937988281, | |
| "mean_norm": 43.75117111206055, | |
| "mean_norm/layer0": 43.75117111206055, | |
| "multicode_k": 1, | |
| "output_norm": 18.79942525227863, | |
| "output_norm/layer0": 18.79942525227863, | |
| "step": 7450 | |
| }, | |
| { | |
| "MSE": 622.7629538981118, | |
| "MSE/layer0": 622.7629538981118, | |
| "dead_code_fraction": 0.1844, | |
| "dead_code_fraction/layer0": 0.1844, | |
| "epoch": 1.27, | |
| "input_norm": 31.998580735524506, | |
| "input_norm/layer0": 31.998580735524506, | |
| "learning_rate": 0.0005, | |
| "loss": 2.187, | |
| "max_norm": 71.64968872070312, | |
| "max_norm/layer0": 71.64968872070312, | |
| "mean_norm": 43.790061950683594, | |
| "mean_norm/layer0": 43.790061950683594, | |
| "multicode_k": 1, | |
| "output_norm": 18.81509483655294, | |
| "output_norm/layer0": 18.81509483655294, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "eval_MSE/layer0": 622.122652727573, | |
| "eval_accuracy": 0.5062701283839631, | |
| "eval_dead_code_fraction/layer0": 0.18665, | |
| "eval_input_norm/layer0": 31.998566619663464, | |
| "eval_loss": 2.1583967208862305, | |
| "eval_multicode_k": 1, | |
| "eval_output_norm/layer0": 18.83381110374323, | |
| "eval_runtime": 158.6442, | |
| "eval_samples_per_second": 29.141, | |
| "eval_steps_per_second": 1.822, | |
| "step": 7500 | |
| }, | |
| { | |
| "MSE": 622.9042826334635, | |
| "MSE/layer0": 622.9042826334635, | |
| "dead_code_fraction": 0.1841, | |
| "dead_code_fraction/layer0": 0.1841, | |
| "epoch": 1.28, | |
| "input_norm": 31.998572101593023, | |
| "input_norm/layer0": 31.998572101593023, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1434, | |
| "max_norm": 71.76019287109375, | |
| "max_norm/layer0": 71.76019287109375, | |
| "mean_norm": 43.828460693359375, | |
| "mean_norm/layer0": 43.828460693359375, | |
| "multicode_k": 1, | |
| "output_norm": 18.82229045232136, | |
| "output_norm/layer0": 18.82229045232136, | |
| "step": 7550 | |
| }, | |
| { | |
| "MSE": 621.695281575521, | |
| "MSE/layer0": 621.695281575521, | |
| "dead_code_fraction": 0.1854, | |
| "dead_code_fraction/layer0": 0.1854, | |
| "epoch": 1.28, | |
| "input_norm": 31.998584995269773, | |
| "input_norm/layer0": 31.998584995269773, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1712, | |
| "max_norm": 71.87606048583984, | |
| "max_norm/layer0": 71.87606048583984, | |
| "mean_norm": 43.867136001586914, | |
| "mean_norm/layer0": 43.867136001586914, | |
| "multicode_k": 1, | |
| "output_norm": 18.84749958992006, | |
| "output_norm/layer0": 18.84749958992006, | |
| "step": 7600 | |
| }, | |
| { | |
| "MSE": 622.6274766031902, | |
| "MSE/layer0": 622.6274766031902, | |
| "dead_code_fraction": 0.18355, | |
| "dead_code_fraction/layer0": 0.18355, | |
| "epoch": 1.29, | |
| "input_norm": 31.998571812311802, | |
| "input_norm/layer0": 31.998571812311802, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1412, | |
| "max_norm": 71.98139953613281, | |
| "max_norm/layer0": 71.98139953613281, | |
| "mean_norm": 43.90544891357422, | |
| "mean_norm/layer0": 43.90544891357422, | |
| "multicode_k": 1, | |
| "output_norm": 18.83851943016053, | |
| "output_norm/layer0": 18.83851943016053, | |
| "step": 7650 | |
| }, | |
| { | |
| "MSE": 621.3046355183919, | |
| "MSE/layer0": 621.3046355183919, | |
| "dead_code_fraction": 0.18495, | |
| "dead_code_fraction/layer0": 0.18495, | |
| "epoch": 1.29, | |
| "input_norm": 31.998585087458295, | |
| "input_norm/layer0": 31.998585087458295, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1711, | |
| "max_norm": 72.08447265625, | |
| "max_norm/layer0": 72.08447265625, | |
| "mean_norm": 43.94407653808594, | |
| "mean_norm/layer0": 43.94407653808594, | |
| "multicode_k": 1, | |
| "output_norm": 18.86037411053976, | |
| "output_norm/layer0": 18.86037411053976, | |
| "step": 7700 | |
| }, | |
| { | |
| "MSE": 620.5873645019533, | |
| "MSE/layer0": 620.5873645019533, | |
| "dead_code_fraction": 0.18485, | |
| "dead_code_fraction/layer0": 0.18485, | |
| "epoch": 1.3, | |
| "input_norm": 31.998606751759848, | |
| "input_norm/layer0": 31.998606751759848, | |
| "learning_rate": 0.0005, | |
| "loss": 2.2069, | |
| "max_norm": 72.18034362792969, | |
| "max_norm/layer0": 72.18034362792969, | |
| "mean_norm": 43.9833927154541, | |
| "mean_norm/layer0": 43.9833927154541, | |
| "multicode_k": 1, | |
| "output_norm": 18.87507179578146, | |
| "output_norm/layer0": 18.87507179578146, | |
| "step": 7750 | |
| }, | |
| { | |
| "MSE": 621.2272378540041, | |
| "MSE/layer0": 621.2272378540041, | |
| "dead_code_fraction": 0.18385, | |
| "dead_code_fraction/layer0": 0.18385, | |
| "epoch": 1.3, | |
| "input_norm": 31.998583949406935, | |
| "input_norm/layer0": 31.998583949406935, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1217, | |
| "max_norm": 72.27928924560547, | |
| "max_norm/layer0": 72.27928924560547, | |
| "mean_norm": 44.021806716918945, | |
| "mean_norm/layer0": 44.021806716918945, | |
| "multicode_k": 1, | |
| "output_norm": 18.877027104695642, | |
| "output_norm/layer0": 18.877027104695642, | |
| "step": 7800 | |
| }, | |
| { | |
| "MSE": 620.067134602865, | |
| "MSE/layer0": 620.067134602865, | |
| "dead_code_fraction": 0.18535, | |
| "dead_code_fraction/layer0": 0.18535, | |
| "epoch": 1.31, | |
| "input_norm": 31.998594888051343, | |
| "input_norm/layer0": 31.998594888051343, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1753, | |
| "max_norm": 72.39033508300781, | |
| "max_norm/layer0": 72.39033508300781, | |
| "mean_norm": 44.060611724853516, | |
| "mean_norm/layer0": 44.060611724853516, | |
| "multicode_k": 1, | |
| "output_norm": 18.89820697466533, | |
| "output_norm/layer0": 18.89820697466533, | |
| "step": 7850 | |
| }, | |
| { | |
| "MSE": 620.6704218546549, | |
| "MSE/layer0": 620.6704218546549, | |
| "dead_code_fraction": 0.18735, | |
| "dead_code_fraction/layer0": 0.18735, | |
| "epoch": 1.31, | |
| "input_norm": 31.998597246805822, | |
| "input_norm/layer0": 31.998597246805822, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1778, | |
| "max_norm": 72.4916000366211, | |
| "max_norm/layer0": 72.4916000366211, | |
| "mean_norm": 44.09913635253906, | |
| "mean_norm/layer0": 44.09913635253906, | |
| "multicode_k": 1, | |
| "output_norm": 18.890051161448145, | |
| "output_norm/layer0": 18.890051161448145, | |
| "step": 7900 | |
| }, | |
| { | |
| "MSE": 619.2155123901367, | |
| "MSE/layer0": 619.2155123901367, | |
| "dead_code_fraction": 0.1863, | |
| "dead_code_fraction/layer0": 0.1863, | |
| "epoch": 1.32, | |
| "input_norm": 31.99860541343688, | |
| "input_norm/layer0": 31.99860541343688, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1684, | |
| "max_norm": 72.59037017822266, | |
| "max_norm/layer0": 72.59037017822266, | |
| "mean_norm": 44.13744926452637, | |
| "mean_norm/layer0": 44.13744926452637, | |
| "multicode_k": 1, | |
| "output_norm": 18.920912733078, | |
| "output_norm/layer0": 18.920912733078, | |
| "step": 7950 | |
| }, | |
| { | |
| "MSE": 618.8985408528646, | |
| "MSE/layer0": 618.8985408528646, | |
| "dead_code_fraction": 0.1867, | |
| "dead_code_fraction/layer0": 0.1867, | |
| "epoch": 1.32, | |
| "input_norm": 31.998596220016488, | |
| "input_norm/layer0": 31.998596220016488, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1302, | |
| "max_norm": 72.69281768798828, | |
| "max_norm/layer0": 72.69281768798828, | |
| "mean_norm": 44.176042556762695, | |
| "mean_norm/layer0": 44.176042556762695, | |
| "multicode_k": 1, | |
| "output_norm": 18.93559975624085, | |
| "output_norm/layer0": 18.93559975624085, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "eval_MSE/layer0": 617.7161538934592, | |
| "eval_accuracy": 0.5071360017457022, | |
| "eval_dead_code_fraction/layer0": 0.18755, | |
| "eval_input_norm/layer0": 31.99860155017712, | |
| "eval_loss": 2.150786876678467, | |
| "eval_multicode_k": 1, | |
| "eval_output_norm/layer0": 18.949325338731256, | |
| "eval_runtime": 158.4669, | |
| "eval_samples_per_second": 29.173, | |
| "eval_steps_per_second": 1.824, | |
| "step": 8000 | |
| }, | |
| { | |
| "MSE": 619.1937561035155, | |
| "MSE/layer0": 619.1937561035155, | |
| "dead_code_fraction": 0.18685, | |
| "dead_code_fraction/layer0": 0.18685, | |
| "epoch": 1.33, | |
| "input_norm": 31.998596970240285, | |
| "input_norm/layer0": 31.998596970240285, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1279, | |
| "max_norm": 72.79032135009766, | |
| "max_norm/layer0": 72.79032135009766, | |
| "mean_norm": 44.21445846557617, | |
| "mean_norm/layer0": 44.21445846557617, | |
| "multicode_k": 1, | |
| "output_norm": 18.93686810175578, | |
| "output_norm/layer0": 18.93686810175578, | |
| "step": 8050 | |
| }, | |
| { | |
| "MSE": 619.539402567546, | |
| "MSE/layer0": 619.539402567546, | |
| "dead_code_fraction": 0.18665, | |
| "dead_code_fraction/layer0": 0.18665, | |
| "epoch": 1.33, | |
| "input_norm": 31.998598492940268, | |
| "input_norm/layer0": 31.998598492940268, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1113, | |
| "max_norm": 72.88322448730469, | |
| "max_norm/layer0": 72.88322448730469, | |
| "mean_norm": 44.251609802246094, | |
| "mean_norm/layer0": 44.251609802246094, | |
| "multicode_k": 1, | |
| "output_norm": 18.939144274393726, | |
| "output_norm/layer0": 18.939144274393726, | |
| "step": 8100 | |
| }, | |
| { | |
| "MSE": 617.7248203531905, | |
| "MSE/layer0": 617.7248203531905, | |
| "dead_code_fraction": 0.18555, | |
| "dead_code_fraction/layer0": 0.18555, | |
| "epoch": 1.34, | |
| "input_norm": 31.99861437161764, | |
| "input_norm/layer0": 31.99861437161764, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1592, | |
| "max_norm": 72.97504425048828, | |
| "max_norm/layer0": 72.97504425048828, | |
| "mean_norm": 44.289913177490234, | |
| "mean_norm/layer0": 44.289913177490234, | |
| "multicode_k": 1, | |
| "output_norm": 18.963457323710102, | |
| "output_norm/layer0": 18.963457323710102, | |
| "step": 8150 | |
| }, | |
| { | |
| "MSE": 617.1626446533202, | |
| "MSE/layer0": 617.1626446533202, | |
| "dead_code_fraction": 0.1856, | |
| "dead_code_fraction/layer0": 0.1856, | |
| "epoch": 1.34, | |
| "input_norm": 31.998610553741443, | |
| "input_norm/layer0": 31.998610553741443, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1339, | |
| "max_norm": 73.06546020507812, | |
| "max_norm/layer0": 73.06546020507812, | |
| "mean_norm": 44.32819747924805, | |
| "mean_norm/layer0": 44.32819747924805, | |
| "multicode_k": 1, | |
| "output_norm": 18.980771627426144, | |
| "output_norm/layer0": 18.980771627426144, | |
| "step": 8200 | |
| }, | |
| { | |
| "MSE": 616.5359758504233, | |
| "MSE/layer0": 616.5359758504233, | |
| "dead_code_fraction": 0.18785, | |
| "dead_code_fraction/layer0": 0.18785, | |
| "epoch": 1.35, | |
| "input_norm": 31.99861484845479, | |
| "input_norm/layer0": 31.99861484845479, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1287, | |
| "max_norm": 73.1684341430664, | |
| "max_norm/layer0": 73.1684341430664, | |
| "mean_norm": 44.36627197265625, | |
| "mean_norm/layer0": 44.36627197265625, | |
| "multicode_k": 1, | |
| "output_norm": 19.002285525004055, | |
| "output_norm/layer0": 19.002285525004055, | |
| "step": 8250 | |
| }, | |
| { | |
| "MSE": 616.9324924723311, | |
| "MSE/layer0": 616.9324924723311, | |
| "dead_code_fraction": 0.18715, | |
| "dead_code_fraction/layer0": 0.18715, | |
| "epoch": 1.35, | |
| "input_norm": 31.998625895182286, | |
| "input_norm/layer0": 31.998625895182286, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1575, | |
| "max_norm": 73.259521484375, | |
| "max_norm/layer0": 73.259521484375, | |
| "mean_norm": 44.40446090698242, | |
| "mean_norm/layer0": 44.40446090698242, | |
| "multicode_k": 1, | |
| "output_norm": 18.992992315292362, | |
| "output_norm/layer0": 18.992992315292362, | |
| "step": 8300 | |
| }, | |
| { | |
| "MSE": 616.2650039672851, | |
| "MSE/layer0": 616.2650039672851, | |
| "dead_code_fraction": 0.18655, | |
| "dead_code_fraction/layer0": 0.18655, | |
| "epoch": 1.36, | |
| "input_norm": 31.99862662315369, | |
| "input_norm/layer0": 31.99862662315369, | |
| "learning_rate": 0.0005, | |
| "loss": 2.139, | |
| "max_norm": 73.36270141601562, | |
| "max_norm/layer0": 73.36270141601562, | |
| "mean_norm": 44.44254493713379, | |
| "mean_norm/layer0": 44.44254493713379, | |
| "multicode_k": 1, | |
| "output_norm": 19.00672375679015, | |
| "output_norm/layer0": 19.00672375679015, | |
| "step": 8350 | |
| }, | |
| { | |
| "MSE": 615.5159185791019, | |
| "MSE/layer0": 615.5159185791019, | |
| "dead_code_fraction": 0.18685, | |
| "dead_code_fraction/layer0": 0.18685, | |
| "epoch": 1.36, | |
| "input_norm": 31.998618663152055, | |
| "input_norm/layer0": 31.998618663152055, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1207, | |
| "max_norm": 73.45561981201172, | |
| "max_norm/layer0": 73.45561981201172, | |
| "mean_norm": 44.48077201843262, | |
| "mean_norm/layer0": 44.48077201843262, | |
| "multicode_k": 1, | |
| "output_norm": 19.030768597920748, | |
| "output_norm/layer0": 19.030768597920748, | |
| "step": 8400 | |
| }, | |
| { | |
| "MSE": 615.7112675984704, | |
| "MSE/layer0": 615.7112675984704, | |
| "dead_code_fraction": 0.18675, | |
| "dead_code_fraction/layer0": 0.18675, | |
| "epoch": 1.37, | |
| "input_norm": 31.99863114674885, | |
| "input_norm/layer0": 31.99863114674885, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1394, | |
| "max_norm": 73.54468536376953, | |
| "max_norm/layer0": 73.54468536376953, | |
| "mean_norm": 44.5194206237793, | |
| "mean_norm/layer0": 44.5194206237793, | |
| "multicode_k": 1, | |
| "output_norm": 19.03362373669942, | |
| "output_norm/layer0": 19.03362373669942, | |
| "step": 8450 | |
| }, | |
| { | |
| "MSE": 615.0864140828453, | |
| "MSE/layer0": 615.0864140828453, | |
| "dead_code_fraction": 0.1866, | |
| "dead_code_fraction/layer0": 0.1866, | |
| "epoch": 1.37, | |
| "input_norm": 31.9986399269104, | |
| "input_norm/layer0": 31.9986399269104, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1471, | |
| "max_norm": 73.64068603515625, | |
| "max_norm/layer0": 73.64068603515625, | |
| "mean_norm": 44.55780220031738, | |
| "mean_norm/layer0": 44.55780220031738, | |
| "multicode_k": 1, | |
| "output_norm": 19.04360143979391, | |
| "output_norm/layer0": 19.04360143979391, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "eval_MSE/layer0": 613.7248421548741, | |
| "eval_accuracy": 0.5081896395873495, | |
| "eval_dead_code_fraction/layer0": 0.1885, | |
| "eval_input_norm/layer0": 31.998632826486393, | |
| "eval_loss": 2.1443779468536377, | |
| "eval_multicode_k": 1, | |
| "eval_output_norm/layer0": 19.066619998676906, | |
| "eval_runtime": 158.5923, | |
| "eval_samples_per_second": 29.15, | |
| "eval_steps_per_second": 1.822, | |
| "step": 8500 | |
| }, | |
| { | |
| "MSE": 614.1585445149744, | |
| "MSE/layer0": 614.1585445149744, | |
| "dead_code_fraction": 0.18715, | |
| "dead_code_fraction/layer0": 0.18715, | |
| "epoch": 1.38, | |
| "input_norm": 31.99863867441813, | |
| "input_norm/layer0": 31.99863867441813, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1506, | |
| "max_norm": 73.73002624511719, | |
| "max_norm/layer0": 73.73002624511719, | |
| "mean_norm": 44.597002029418945, | |
| "mean_norm/layer0": 44.597002029418945, | |
| "multicode_k": 1, | |
| "output_norm": 19.06499721844991, | |
| "output_norm/layer0": 19.06499721844991, | |
| "step": 8550 | |
| }, | |
| { | |
| "MSE": 614.256539204915, | |
| "MSE/layer0": 614.256539204915, | |
| "dead_code_fraction": 0.1879, | |
| "dead_code_fraction/layer0": 0.1879, | |
| "epoch": 1.38, | |
| "input_norm": 31.998648173014317, | |
| "input_norm/layer0": 31.998648173014317, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1643, | |
| "max_norm": 73.80333709716797, | |
| "max_norm/layer0": 73.80333709716797, | |
| "mean_norm": 44.63543891906738, | |
| "mean_norm/layer0": 44.63543891906738, | |
| "multicode_k": 1, | |
| "output_norm": 19.078293412526467, | |
| "output_norm/layer0": 19.078293412526467, | |
| "step": 8600 | |
| }, | |
| { | |
| "MSE": 613.3546946207681, | |
| "MSE/layer0": 613.3546946207681, | |
| "dead_code_fraction": 0.1879, | |
| "dead_code_fraction/layer0": 0.1879, | |
| "epoch": 1.39, | |
| "input_norm": 31.99864864667257, | |
| "input_norm/layer0": 31.99864864667257, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1535, | |
| "max_norm": 73.89517974853516, | |
| "max_norm/layer0": 73.89517974853516, | |
| "mean_norm": 44.674211502075195, | |
| "mean_norm/layer0": 44.674211502075195, | |
| "multicode_k": 1, | |
| "output_norm": 19.09559381167095, | |
| "output_norm/layer0": 19.09559381167095, | |
| "step": 8650 | |
| }, | |
| { | |
| "MSE": 613.6053087361654, | |
| "MSE/layer0": 613.6053087361654, | |
| "dead_code_fraction": 0.18645, | |
| "dead_code_fraction/layer0": 0.18645, | |
| "epoch": 1.39, | |
| "input_norm": 31.998652140299477, | |
| "input_norm/layer0": 31.998652140299477, | |
| "learning_rate": 0.0005, | |
| "loss": 2.137, | |
| "max_norm": 73.9770736694336, | |
| "max_norm/layer0": 73.9770736694336, | |
| "mean_norm": 44.71265983581543, | |
| "mean_norm/layer0": 44.71265983581543, | |
| "multicode_k": 1, | |
| "output_norm": 19.098618446985878, | |
| "output_norm/layer0": 19.098618446985878, | |
| "step": 8700 | |
| }, | |
| { | |
| "MSE": 613.292506408691, | |
| "MSE/layer0": 613.292506408691, | |
| "dead_code_fraction": 0.1876, | |
| "dead_code_fraction/layer0": 0.1876, | |
| "epoch": 1.4, | |
| "input_norm": 31.998654588063562, | |
| "input_norm/layer0": 31.998654588063562, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1482, | |
| "max_norm": 74.05269622802734, | |
| "max_norm/layer0": 74.05269622802734, | |
| "mean_norm": 44.750946044921875, | |
| "mean_norm/layer0": 44.750946044921875, | |
| "multicode_k": 1, | |
| "output_norm": 19.104494848251342, | |
| "output_norm/layer0": 19.104494848251342, | |
| "step": 8750 | |
| }, | |
| { | |
| "MSE": 613.8824895222986, | |
| "MSE/layer0": 613.8824895222986, | |
| "dead_code_fraction": 0.1868, | |
| "dead_code_fraction/layer0": 0.1868, | |
| "epoch": 1.4, | |
| "input_norm": 31.998655049006146, | |
| "input_norm/layer0": 31.998655049006146, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1331, | |
| "max_norm": 74.12651824951172, | |
| "max_norm/layer0": 74.12651824951172, | |
| "mean_norm": 44.7886848449707, | |
| "mean_norm/layer0": 44.7886848449707, | |
| "multicode_k": 1, | |
| "output_norm": 19.110120385487882, | |
| "output_norm/layer0": 19.110120385487882, | |
| "step": 8800 | |
| }, | |
| { | |
| "MSE": 613.8568901570636, | |
| "MSE/layer0": 613.8568901570636, | |
| "dead_code_fraction": 0.18675, | |
| "dead_code_fraction/layer0": 0.18675, | |
| "epoch": 1.41, | |
| "input_norm": 31.99864878336588, | |
| "input_norm/layer0": 31.99864878336588, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1038, | |
| "max_norm": 74.20288848876953, | |
| "max_norm/layer0": 74.20288848876953, | |
| "mean_norm": 44.82563400268555, | |
| "mean_norm/layer0": 44.82563400268555, | |
| "multicode_k": 1, | |
| "output_norm": 19.120709832509363, | |
| "output_norm/layer0": 19.120709832509363, | |
| "step": 8850 | |
| }, | |
| { | |
| "MSE": 612.8203454589843, | |
| "MSE/layer0": 612.8203454589843, | |
| "dead_code_fraction": 0.18635, | |
| "dead_code_fraction/layer0": 0.18635, | |
| "epoch": 1.41, | |
| "input_norm": 31.99866209030152, | |
| "input_norm/layer0": 31.99866209030152, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1619, | |
| "max_norm": 74.27029418945312, | |
| "max_norm/layer0": 74.27029418945312, | |
| "mean_norm": 44.863847732543945, | |
| "mean_norm/layer0": 44.863847732543945, | |
| "multicode_k": 1, | |
| "output_norm": 19.13362557093303, | |
| "output_norm/layer0": 19.13362557093303, | |
| "step": 8900 | |
| }, | |
| { | |
| "MSE": 612.7508836873369, | |
| "MSE/layer0": 612.7508836873369, | |
| "dead_code_fraction": 0.1865, | |
| "dead_code_fraction/layer0": 0.1865, | |
| "epoch": 1.42, | |
| "input_norm": 31.998662964502977, | |
| "input_norm/layer0": 31.998662964502977, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1274, | |
| "max_norm": 74.35165405273438, | |
| "max_norm/layer0": 74.35165405273438, | |
| "mean_norm": 44.90276908874512, | |
| "mean_norm/layer0": 44.90276908874512, | |
| "multicode_k": 1, | |
| "output_norm": 19.13368027687074, | |
| "output_norm/layer0": 19.13368027687074, | |
| "step": 8950 | |
| }, | |
| { | |
| "MSE": 611.3088948567707, | |
| "MSE/layer0": 611.3088948567707, | |
| "dead_code_fraction": 0.18625, | |
| "dead_code_fraction/layer0": 0.18625, | |
| "epoch": 1.42, | |
| "input_norm": 31.998670199712116, | |
| "input_norm/layer0": 31.998670199712116, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1556, | |
| "max_norm": 74.43575286865234, | |
| "max_norm/layer0": 74.43575286865234, | |
| "mean_norm": 44.94179916381836, | |
| "mean_norm/layer0": 44.94179916381836, | |
| "multicode_k": 1, | |
| "output_norm": 19.165478760401413, | |
| "output_norm/layer0": 19.165478760401413, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_MSE/layer0": 610.3757424029645, | |
| "eval_accuracy": 0.5087341142897861, | |
| "eval_dead_code_fraction/layer0": 0.18805, | |
| "eval_input_norm/layer0": 31.998659288421646, | |
| "eval_loss": 2.139230489730835, | |
| "eval_multicode_k": 1, | |
| "eval_output_norm/layer0": 19.181722183648382, | |
| "eval_runtime": 158.0526, | |
| "eval_samples_per_second": 29.25, | |
| "eval_steps_per_second": 1.829, | |
| "step": 9000 | |
| }, | |
| { | |
| "MSE": 611.2356985473632, | |
| "MSE/layer0": 611.2356985473632, | |
| "dead_code_fraction": 0.1879, | |
| "dead_code_fraction/layer0": 0.1879, | |
| "epoch": 1.43, | |
| "input_norm": 31.998666836420703, | |
| "input_norm/layer0": 31.998666836420703, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1388, | |
| "max_norm": 74.51050567626953, | |
| "max_norm/layer0": 74.51050567626953, | |
| "mean_norm": 44.98063850402832, | |
| "mean_norm/layer0": 44.98063850402832, | |
| "multicode_k": 1, | |
| "output_norm": 19.177389281590777, | |
| "output_norm/layer0": 19.177389281590777, | |
| "step": 9050 | |
| }, | |
| { | |
| "MSE": 610.8344569905598, | |
| "MSE/layer0": 610.8344569905598, | |
| "dead_code_fraction": 0.18865, | |
| "dead_code_fraction/layer0": 0.18865, | |
| "epoch": 1.43, | |
| "input_norm": 31.99867141723631, | |
| "input_norm/layer0": 31.99867141723631, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1328, | |
| "max_norm": 74.59440612792969, | |
| "max_norm/layer0": 74.59440612792969, | |
| "mean_norm": 45.01910400390625, | |
| "mean_norm/layer0": 45.01910400390625, | |
| "multicode_k": 1, | |
| "output_norm": 19.185275354385375, | |
| "output_norm/layer0": 19.185275354385375, | |
| "step": 9100 | |
| }, | |
| { | |
| "MSE": 610.7402758789062, | |
| "MSE/layer0": 610.7402758789062, | |
| "dead_code_fraction": 0.1871, | |
| "dead_code_fraction/layer0": 0.1871, | |
| "epoch": 1.44, | |
| "input_norm": 31.99866997400921, | |
| "input_norm/layer0": 31.99866997400921, | |
| "learning_rate": 0.0005, | |
| "loss": 2.117, | |
| "max_norm": 74.67122650146484, | |
| "max_norm/layer0": 74.67122650146484, | |
| "mean_norm": 45.05727577209473, | |
| "mean_norm/layer0": 45.05727577209473, | |
| "multicode_k": 1, | |
| "output_norm": 19.190109596252437, | |
| "output_norm/layer0": 19.190109596252437, | |
| "step": 9150 | |
| }, | |
| { | |
| "MSE": 610.1339531453451, | |
| "MSE/layer0": 610.1339531453451, | |
| "dead_code_fraction": 0.18745, | |
| "dead_code_fraction/layer0": 0.18745, | |
| "epoch": 1.44, | |
| "input_norm": 31.998679358164473, | |
| "input_norm/layer0": 31.998679358164473, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1459, | |
| "max_norm": 74.7430419921875, | |
| "max_norm/layer0": 74.7430419921875, | |
| "mean_norm": 45.095571517944336, | |
| "mean_norm/layer0": 45.095571517944336, | |
| "multicode_k": 1, | |
| "output_norm": 19.203376553853335, | |
| "output_norm/layer0": 19.203376553853335, | |
| "step": 9200 | |
| }, | |
| { | |
| "MSE": 609.6957601928709, | |
| "MSE/layer0": 609.6957601928709, | |
| "dead_code_fraction": 0.1878, | |
| "dead_code_fraction/layer0": 0.1878, | |
| "epoch": 1.45, | |
| "input_norm": 31.99868172009785, | |
| "input_norm/layer0": 31.99868172009785, | |
| "learning_rate": 0.0005, | |
| "loss": 2.142, | |
| "max_norm": 74.8177490234375, | |
| "max_norm/layer0": 74.8177490234375, | |
| "mean_norm": 45.133853912353516, | |
| "mean_norm/layer0": 45.133853912353516, | |
| "multicode_k": 1, | |
| "output_norm": 19.22210531552632, | |
| "output_norm/layer0": 19.22210531552632, | |
| "step": 9250 | |
| }, | |
| { | |
| "MSE": 609.5997785441082, | |
| "MSE/layer0": 609.5997785441082, | |
| "dead_code_fraction": 0.18805, | |
| "dead_code_fraction/layer0": 0.18805, | |
| "epoch": 1.45, | |
| "input_norm": 31.998693205515544, | |
| "input_norm/layer0": 31.998693205515544, | |
| "learning_rate": 0.0005, | |
| "loss": 2.18, | |
| "max_norm": 74.87744140625, | |
| "max_norm/layer0": 74.87744140625, | |
| "mean_norm": 45.172555923461914, | |
| "mean_norm/layer0": 45.172555923461914, | |
| "multicode_k": 1, | |
| "output_norm": 19.226630802154542, | |
| "output_norm/layer0": 19.226630802154542, | |
| "step": 9300 | |
| }, | |
| { | |
| "MSE": 609.8342389933271, | |
| "MSE/layer0": 609.8342389933271, | |
| "dead_code_fraction": 0.18735, | |
| "dead_code_fraction/layer0": 0.18735, | |
| "epoch": 1.46, | |
| "input_norm": 31.998687505722053, | |
| "input_norm/layer0": 31.998687505722053, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1164, | |
| "max_norm": 74.94609069824219, | |
| "max_norm/layer0": 74.94609069824219, | |
| "mean_norm": 45.21059799194336, | |
| "mean_norm/layer0": 45.21059799194336, | |
| "multicode_k": 1, | |
| "output_norm": 19.234882882436114, | |
| "output_norm/layer0": 19.234882882436114, | |
| "step": 9350 | |
| }, | |
| { | |
| "MSE": 609.2034523518882, | |
| "MSE/layer0": 609.2034523518882, | |
| "dead_code_fraction": 0.1869, | |
| "dead_code_fraction/layer0": 0.1869, | |
| "epoch": 1.46, | |
| "input_norm": 31.99869050979616, | |
| "input_norm/layer0": 31.99869050979616, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1316, | |
| "max_norm": 75.01142883300781, | |
| "max_norm/layer0": 75.01142883300781, | |
| "mean_norm": 45.248979568481445, | |
| "mean_norm/layer0": 45.248979568481445, | |
| "multicode_k": 1, | |
| "output_norm": 19.247848326365144, | |
| "output_norm/layer0": 19.247848326365144, | |
| "step": 9400 | |
| }, | |
| { | |
| "MSE": 609.0324313354497, | |
| "MSE/layer0": 609.0324313354497, | |
| "dead_code_fraction": 0.18745, | |
| "dead_code_fraction/layer0": 0.18745, | |
| "epoch": 1.47, | |
| "input_norm": 31.99869132041931, | |
| "input_norm/layer0": 31.99869132041931, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1214, | |
| "max_norm": 75.07112121582031, | |
| "max_norm/layer0": 75.07112121582031, | |
| "mean_norm": 45.287214279174805, | |
| "mean_norm/layer0": 45.287214279174805, | |
| "multicode_k": 1, | |
| "output_norm": 19.25519768079122, | |
| "output_norm/layer0": 19.25519768079122, | |
| "step": 9450 | |
| }, | |
| { | |
| "MSE": 607.8594933064783, | |
| "MSE/layer0": 607.8594933064783, | |
| "dead_code_fraction": 0.18835, | |
| "dead_code_fraction/layer0": 0.18835, | |
| "epoch": 1.47, | |
| "input_norm": 31.998687744140625, | |
| "input_norm/layer0": 31.998687744140625, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1067, | |
| "max_norm": 75.15766143798828, | |
| "max_norm/layer0": 75.15766143798828, | |
| "mean_norm": 45.32560920715332, | |
| "mean_norm/layer0": 45.32560920715332, | |
| "multicode_k": 1, | |
| "output_norm": 19.27704188664754, | |
| "output_norm/layer0": 19.27704188664754, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "eval_MSE/layer0": 608.6866096036146, | |
| "eval_accuracy": 0.5090880757079915, | |
| "eval_dead_code_fraction/layer0": 0.18755, | |
| "eval_input_norm/layer0": 31.998685899710146, | |
| "eval_loss": 2.1350600719451904, | |
| "eval_multicode_k": 1, | |
| "eval_output_norm/layer0": 19.283631281241068, | |
| "eval_runtime": 158.1797, | |
| "eval_samples_per_second": 29.226, | |
| "eval_steps_per_second": 1.827, | |
| "step": 9500 | |
| }, | |
| { | |
| "MSE": 607.5302533983886, | |
| "MSE/layer0": 607.5302533983886, | |
| "dead_code_fraction": 0.1872, | |
| "dead_code_fraction/layer0": 0.1872, | |
| "epoch": 1.48, | |
| "input_norm": 31.99869025141972, | |
| "input_norm/layer0": 31.99869025141972, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1075, | |
| "max_norm": 75.2263412475586, | |
| "max_norm/layer0": 75.2263412475586, | |
| "mean_norm": 45.363752365112305, | |
| "mean_norm/layer0": 45.363752365112305, | |
| "multicode_k": 1, | |
| "output_norm": 19.2927733112995, | |
| "output_norm/layer0": 19.2927733112995, | |
| "step": 9550 | |
| }, | |
| { | |
| "MSE": 608.902215973978, | |
| "MSE/layer0": 608.902215973978, | |
| "dead_code_fraction": 0.187, | |
| "dead_code_fraction/layer0": 0.187, | |
| "epoch": 2.0, | |
| "input_norm": 31.998686492629858, | |
| "input_norm/layer0": 31.998686492629858, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1013, | |
| "max_norm": 75.294677734375, | |
| "max_norm/layer0": 75.294677734375, | |
| "mean_norm": 45.40024948120117, | |
| "mean_norm/layer0": 45.40024948120117, | |
| "multicode_k": 1, | |
| "output_norm": 19.268582361188244, | |
| "output_norm/layer0": 19.268582361188244, | |
| "step": 9600 | |
| }, | |
| { | |
| "MSE": 606.3796120198567, | |
| "MSE/layer0": 606.3796120198567, | |
| "dead_code_fraction": 0.18715, | |
| "dead_code_fraction/layer0": 0.18715, | |
| "epoch": 2.01, | |
| "input_norm": 31.998710851669312, | |
| "input_norm/layer0": 31.998710851669312, | |
| "learning_rate": 0.0005, | |
| "loss": 2.17, | |
| "max_norm": 75.35186004638672, | |
| "max_norm/layer0": 75.35186004638672, | |
| "mean_norm": 45.4382266998291, | |
| "mean_norm/layer0": 45.4382266998291, | |
| "multicode_k": 1, | |
| "output_norm": 19.314183537165327, | |
| "output_norm/layer0": 19.314183537165327, | |
| "step": 9650 | |
| }, | |
| { | |
| "MSE": 606.9239878336591, | |
| "MSE/layer0": 606.9239878336591, | |
| "dead_code_fraction": 0.1877, | |
| "dead_code_fraction/layer0": 0.1877, | |
| "epoch": 2.01, | |
| "input_norm": 31.99869126637776, | |
| "input_norm/layer0": 31.99869126637776, | |
| "learning_rate": 0.0005, | |
| "loss": 2.0661, | |
| "max_norm": 75.44601440429688, | |
| "max_norm/layer0": 75.44601440429688, | |
| "mean_norm": 45.47653579711914, | |
| "mean_norm/layer0": 45.47653579711914, | |
| "multicode_k": 1, | |
| "output_norm": 19.313949975967407, | |
| "output_norm/layer0": 19.313949975967407, | |
| "step": 9700 | |
| }, | |
| { | |
| "MSE": 606.1468785603844, | |
| "MSE/layer0": 606.1468785603844, | |
| "dead_code_fraction": 0.18755, | |
| "dead_code_fraction/layer0": 0.18755, | |
| "epoch": 2.02, | |
| "input_norm": 31.998706903457652, | |
| "input_norm/layer0": 31.998706903457652, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1325, | |
| "max_norm": 75.6237564086914, | |
| "max_norm/layer0": 75.6237564086914, | |
| "mean_norm": 45.51473808288574, | |
| "mean_norm/layer0": 45.51473808288574, | |
| "multicode_k": 1, | |
| "output_norm": 19.331538470586143, | |
| "output_norm/layer0": 19.331538470586143, | |
| "step": 9750 | |
| }, | |
| { | |
| "MSE": 606.2908910115561, | |
| "MSE/layer0": 606.2908910115561, | |
| "dead_code_fraction": 0.18715, | |
| "dead_code_fraction/layer0": 0.18715, | |
| "epoch": 2.02, | |
| "input_norm": 31.998702777226768, | |
| "input_norm/layer0": 31.998702777226768, | |
| "learning_rate": 0.0005, | |
| "loss": 2.0999, | |
| "max_norm": 75.77623748779297, | |
| "max_norm/layer0": 75.77623748779297, | |
| "mean_norm": 45.55307388305664, | |
| "mean_norm/layer0": 45.55307388305664, | |
| "multicode_k": 1, | |
| "output_norm": 19.340178826649982, | |
| "output_norm/layer0": 19.340178826649982, | |
| "step": 9800 | |
| }, | |
| { | |
| "MSE": 605.7215723673501, | |
| "MSE/layer0": 605.7215723673501, | |
| "dead_code_fraction": 0.18635, | |
| "dead_code_fraction/layer0": 0.18635, | |
| "epoch": 2.03, | |
| "input_norm": 31.998708073298122, | |
| "input_norm/layer0": 31.998708073298122, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1015, | |
| "max_norm": 75.92095184326172, | |
| "max_norm/layer0": 75.92095184326172, | |
| "mean_norm": 45.591548919677734, | |
| "mean_norm/layer0": 45.591548919677734, | |
| "multicode_k": 1, | |
| "output_norm": 19.351260058085124, | |
| "output_norm/layer0": 19.351260058085124, | |
| "step": 9850 | |
| }, | |
| { | |
| "MSE": 605.7307819620769, | |
| "MSE/layer0": 605.7307819620769, | |
| "dead_code_fraction": 0.1879, | |
| "dead_code_fraction/layer0": 0.1879, | |
| "epoch": 2.03, | |
| "input_norm": 31.99871432304383, | |
| "input_norm/layer0": 31.99871432304383, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1079, | |
| "max_norm": 76.06104278564453, | |
| "max_norm/layer0": 76.06104278564453, | |
| "mean_norm": 45.62945747375488, | |
| "mean_norm/layer0": 45.62945747375488, | |
| "multicode_k": 1, | |
| "output_norm": 19.36078415234882, | |
| "output_norm/layer0": 19.36078415234882, | |
| "step": 9900 | |
| }, | |
| { | |
| "MSE": 605.7736006673174, | |
| "MSE/layer0": 605.7736006673174, | |
| "dead_code_fraction": 0.1873, | |
| "dead_code_fraction/layer0": 0.1873, | |
| "epoch": 2.04, | |
| "input_norm": 31.99871180534363, | |
| "input_norm/layer0": 31.99871180534363, | |
| "learning_rate": 0.0005, | |
| "loss": 2.102, | |
| "max_norm": 76.22486877441406, | |
| "max_norm/layer0": 76.22486877441406, | |
| "mean_norm": 45.66733360290527, | |
| "mean_norm/layer0": 45.66733360290527, | |
| "multicode_k": 1, | |
| "output_norm": 19.36815209388733, | |
| "output_norm/layer0": 19.36815209388733, | |
| "step": 9950 | |
| }, | |
| { | |
| "MSE": 604.9809751383466, | |
| "MSE/layer0": 604.9809751383466, | |
| "dead_code_fraction": 0.1872, | |
| "dead_code_fraction/layer0": 0.1872, | |
| "epoch": 2.04, | |
| "input_norm": 31.998728539148978, | |
| "input_norm/layer0": 31.998728539148978, | |
| "learning_rate": 0.0005, | |
| "loss": 2.1536, | |
| "max_norm": 76.40007019042969, | |
| "max_norm/layer0": 76.40007019042969, | |
| "mean_norm": 45.70543670654297, | |
| "mean_norm/layer0": 45.70543670654297, | |
| "multicode_k": 1, | |
| "output_norm": 19.38911464373271, | |
| "output_norm/layer0": 19.38911464373271, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_MSE/layer0": 604.5096733395267, | |
| "eval_accuracy": 0.5091345939349958, | |
| "eval_dead_code_fraction/layer0": 0.18795, | |
| "eval_input_norm/layer0": 31.99872850438308, | |
| "eval_loss": 2.132894992828369, | |
| "eval_multicode_k": 1, | |
| "eval_output_norm/layer0": 19.389702240368152, | |
| "eval_runtime": 158.9177, | |
| "eval_samples_per_second": 29.091, | |
| "eval_steps_per_second": 1.819, | |
| "step": 10000 | |
| }, | |
| { | |
| "MSE": 0.0, | |
| "MSE/layer0": 0.0, | |
| "dead_code_fraction": 1.0, | |
| "dead_code_fraction/layer0": 1.0, | |
| "epoch": 2.04, | |
| "input_norm": 0.0, | |
| "input_norm/layer0": 0.0, | |
| "max_norm": 76.40007019042969, | |
| "max_norm/layer0": 76.40007019042969, | |
| "mean_norm": 45.70543670654297, | |
| "mean_norm/layer0": 45.70543670654297, | |
| "multicode_k": 1, | |
| "output_norm": 0.0, | |
| "output_norm/layer0": 0.0, | |
| "step": 10000, | |
| "total_flos": 7.43098011353088e+16, | |
| "train_loss": 2.325971780395508, | |
| "train_runtime": 15639.0026, | |
| "train_samples_per_second": 61.385, | |
| "train_steps_per_second": 0.639 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 10000, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 500, | |
| "total_flos": 7.43098011353088e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |