| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 50.0, | |
| "global_step": 3200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 30.29538917541504, | |
| "eval_mae": 3.445791244506836, | |
| "eval_mse": 30.29538917541504, | |
| "eval_rmse": 5.504124641418457, | |
| "eval_runtime": 0.1613, | |
| "eval_samples_per_second": 396.717, | |
| "eval_steps_per_second": 49.59, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 22.2762508392334, | |
| "eval_mae": 2.9041895866394043, | |
| "eval_mse": 22.276248931884766, | |
| "eval_rmse": 4.7197723388671875, | |
| "eval_runtime": 0.1462, | |
| "eval_samples_per_second": 437.674, | |
| "eval_steps_per_second": 54.709, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 19.444108963012695, | |
| "eval_mae": 2.726284980773926, | |
| "eval_mse": 19.444108963012695, | |
| "eval_rmse": 4.409547328948975, | |
| "eval_runtime": 0.2087, | |
| "eval_samples_per_second": 306.646, | |
| "eval_steps_per_second": 38.331, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 17.12045669555664, | |
| "eval_mae": 2.5535168647766113, | |
| "eval_mse": 17.12045669555664, | |
| "eval_rmse": 4.1376872062683105, | |
| "eval_runtime": 0.3272, | |
| "eval_samples_per_second": 195.59, | |
| "eval_steps_per_second": 24.449, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 16.316720962524414, | |
| "eval_mae": 2.692227840423584, | |
| "eval_mse": 16.316720962524414, | |
| "eval_rmse": 4.039396286010742, | |
| "eval_runtime": 0.4287, | |
| "eval_samples_per_second": 149.284, | |
| "eval_steps_per_second": 18.661, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 15.293220520019531, | |
| "eval_mae": 2.601778268814087, | |
| "eval_mse": 15.293218612670898, | |
| "eval_rmse": 3.9106545448303223, | |
| "eval_runtime": 0.2956, | |
| "eval_samples_per_second": 216.483, | |
| "eval_steps_per_second": 27.06, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 14.56900691986084, | |
| "eval_mae": 2.5558180809020996, | |
| "eval_mse": 14.569008827209473, | |
| "eval_rmse": 3.816937208175659, | |
| "eval_runtime": 0.4164, | |
| "eval_samples_per_second": 153.681, | |
| "eval_steps_per_second": 19.21, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "learning_rate": 8.4375e-06, | |
| "loss": 3.9689, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 13.64460277557373, | |
| "eval_mae": 2.39548397064209, | |
| "eval_mse": 13.64460277557373, | |
| "eval_rmse": 3.6938600540161133, | |
| "eval_runtime": 0.4311, | |
| "eval_samples_per_second": 148.456, | |
| "eval_steps_per_second": 18.557, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 13.105853080749512, | |
| "eval_mae": 2.4013423919677734, | |
| "eval_mse": 13.105853080749512, | |
| "eval_rmse": 3.6202006340026855, | |
| "eval_runtime": 0.4297, | |
| "eval_samples_per_second": 148.951, | |
| "eval_steps_per_second": 18.619, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 13.048660278320312, | |
| "eval_mae": 2.3826305866241455, | |
| "eval_mse": 13.048660278320312, | |
| "eval_rmse": 3.612293004989624, | |
| "eval_runtime": 0.2978, | |
| "eval_samples_per_second": 214.88, | |
| "eval_steps_per_second": 26.86, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 12.558533668518066, | |
| "eval_mae": 2.3683643341064453, | |
| "eval_mse": 12.55853271484375, | |
| "eval_rmse": 3.54380202293396, | |
| "eval_runtime": 0.4369, | |
| "eval_samples_per_second": 146.483, | |
| "eval_steps_per_second": 18.31, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 12.03190803527832, | |
| "eval_mae": 2.2633650302886963, | |
| "eval_mse": 12.03190803527832, | |
| "eval_rmse": 3.4687039852142334, | |
| "eval_runtime": 0.4152, | |
| "eval_samples_per_second": 154.131, | |
| "eval_steps_per_second": 19.266, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 12.05549144744873, | |
| "eval_mae": 2.288928508758545, | |
| "eval_mse": 12.05549144744873, | |
| "eval_rmse": 3.472101926803589, | |
| "eval_runtime": 0.4311, | |
| "eval_samples_per_second": 148.454, | |
| "eval_steps_per_second": 18.557, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 11.724568367004395, | |
| "eval_mae": 2.2129979133605957, | |
| "eval_mse": 11.724568367004395, | |
| "eval_rmse": 3.4241156578063965, | |
| "eval_runtime": 0.4413, | |
| "eval_samples_per_second": 145.038, | |
| "eval_steps_per_second": 18.13, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 11.610068321228027, | |
| "eval_mae": 2.238194227218628, | |
| "eval_mse": 11.610069274902344, | |
| "eval_rmse": 3.4073550701141357, | |
| "eval_runtime": 0.41, | |
| "eval_samples_per_second": 156.087, | |
| "eval_steps_per_second": 19.511, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 15.62, | |
| "learning_rate": 6.875e-06, | |
| "loss": 0.8847, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 11.409879684448242, | |
| "eval_mae": 2.171018123626709, | |
| "eval_mse": 11.40987777709961, | |
| "eval_rmse": 3.3778510093688965, | |
| "eval_runtime": 0.4279, | |
| "eval_samples_per_second": 149.579, | |
| "eval_steps_per_second": 18.697, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 11.323513984680176, | |
| "eval_mae": 2.1948208808898926, | |
| "eval_mse": 11.323514938354492, | |
| "eval_rmse": 3.3650431632995605, | |
| "eval_runtime": 0.4359, | |
| "eval_samples_per_second": 146.809, | |
| "eval_steps_per_second": 18.351, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 10.922426223754883, | |
| "eval_mae": 2.1065478324890137, | |
| "eval_mse": 10.9224271774292, | |
| "eval_rmse": 3.3049094676971436, | |
| "eval_runtime": 0.4318, | |
| "eval_samples_per_second": 148.218, | |
| "eval_steps_per_second": 18.527, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_loss": 10.802040100097656, | |
| "eval_mae": 2.1256909370422363, | |
| "eval_mse": 10.80203914642334, | |
| "eval_rmse": 3.2866456508636475, | |
| "eval_runtime": 0.2822, | |
| "eval_samples_per_second": 226.81, | |
| "eval_steps_per_second": 28.351, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 10.668560981750488, | |
| "eval_mae": 2.102822780609131, | |
| "eval_mse": 10.668560981750488, | |
| "eval_rmse": 3.2662763595581055, | |
| "eval_runtime": 0.4297, | |
| "eval_samples_per_second": 148.924, | |
| "eval_steps_per_second": 18.616, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_loss": 10.928054809570312, | |
| "eval_mae": 2.201329231262207, | |
| "eval_mse": 10.928054809570312, | |
| "eval_rmse": 3.3057608604431152, | |
| "eval_runtime": 0.4133, | |
| "eval_samples_per_second": 154.835, | |
| "eval_steps_per_second": 19.354, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_loss": 10.920714378356934, | |
| "eval_mae": 2.1609740257263184, | |
| "eval_mse": 10.920713424682617, | |
| "eval_rmse": 3.30465030670166, | |
| "eval_runtime": 0.4362, | |
| "eval_samples_per_second": 146.723, | |
| "eval_steps_per_second": 18.34, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_loss": 10.914674758911133, | |
| "eval_mae": 2.1464970111846924, | |
| "eval_mse": 10.914674758911133, | |
| "eval_rmse": 3.303736448287964, | |
| "eval_runtime": 0.3154, | |
| "eval_samples_per_second": 202.948, | |
| "eval_steps_per_second": 25.369, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 23.44, | |
| "learning_rate": 5.3125e-06, | |
| "loss": 0.6489, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_loss": 10.826024055480957, | |
| "eval_mae": 2.166719436645508, | |
| "eval_mse": 10.826024055480957, | |
| "eval_rmse": 3.290292501449585, | |
| "eval_runtime": 0.4282, | |
| "eval_samples_per_second": 149.463, | |
| "eval_steps_per_second": 18.683, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_loss": 10.914568901062012, | |
| "eval_mae": 2.1764001846313477, | |
| "eval_mse": 10.914569854736328, | |
| "eval_rmse": 3.303720712661743, | |
| "eval_runtime": 0.4401, | |
| "eval_samples_per_second": 145.428, | |
| "eval_steps_per_second": 18.178, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_loss": 11.035606384277344, | |
| "eval_mae": 2.2225704193115234, | |
| "eval_mse": 11.035605430603027, | |
| "eval_rmse": 3.321988105773926, | |
| "eval_runtime": 0.4326, | |
| "eval_samples_per_second": 147.959, | |
| "eval_steps_per_second": 18.495, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_loss": 11.038973808288574, | |
| "eval_mae": 2.2142393589019775, | |
| "eval_mse": 11.038971900939941, | |
| "eval_rmse": 3.3224947452545166, | |
| "eval_runtime": 0.435, | |
| "eval_samples_per_second": 147.12, | |
| "eval_steps_per_second": 18.39, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_loss": 11.178277015686035, | |
| "eval_mae": 2.2388522624969482, | |
| "eval_mse": 11.178277015686035, | |
| "eval_rmse": 3.343393087387085, | |
| "eval_runtime": 0.4403, | |
| "eval_samples_per_second": 145.349, | |
| "eval_steps_per_second": 18.169, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_loss": 10.676009178161621, | |
| "eval_mae": 2.163010358810425, | |
| "eval_mse": 10.676008224487305, | |
| "eval_rmse": 3.26741623878479, | |
| "eval_runtime": 0.4243, | |
| "eval_samples_per_second": 150.831, | |
| "eval_steps_per_second": 18.854, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_loss": 10.856060028076172, | |
| "eval_mae": 2.1758837699890137, | |
| "eval_mse": 10.856060981750488, | |
| "eval_rmse": 3.294853687286377, | |
| "eval_runtime": 0.4294, | |
| "eval_samples_per_second": 149.044, | |
| "eval_steps_per_second": 18.63, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_loss": 10.435661315917969, | |
| "eval_mae": 2.1428065299987793, | |
| "eval_mse": 10.435661315917969, | |
| "eval_rmse": 3.2304275035858154, | |
| "eval_runtime": 0.4438, | |
| "eval_samples_per_second": 144.195, | |
| "eval_steps_per_second": 18.024, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 31.25, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 0.5499, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_loss": 10.568000793457031, | |
| "eval_mae": 2.1557540893554688, | |
| "eval_mse": 10.568000793457031, | |
| "eval_rmse": 3.2508461475372314, | |
| "eval_runtime": 0.4281, | |
| "eval_samples_per_second": 149.484, | |
| "eval_steps_per_second": 18.686, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_loss": 10.61816120147705, | |
| "eval_mae": 2.1454195976257324, | |
| "eval_mse": 10.618160247802734, | |
| "eval_rmse": 3.258551836013794, | |
| "eval_runtime": 0.418, | |
| "eval_samples_per_second": 153.1, | |
| "eval_steps_per_second": 19.137, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_loss": 10.70760726928711, | |
| "eval_mae": 2.185828924179077, | |
| "eval_mse": 10.707606315612793, | |
| "eval_rmse": 3.272247791290283, | |
| "eval_runtime": 0.4346, | |
| "eval_samples_per_second": 147.25, | |
| "eval_steps_per_second": 18.406, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_loss": 10.65221118927002, | |
| "eval_mae": 2.1475348472595215, | |
| "eval_mse": 10.65221118927002, | |
| "eval_rmse": 3.263772487640381, | |
| "eval_runtime": 0.4365, | |
| "eval_samples_per_second": 146.633, | |
| "eval_steps_per_second": 18.329, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_loss": 10.60755443572998, | |
| "eval_mae": 2.1597788333892822, | |
| "eval_mse": 10.607553482055664, | |
| "eval_rmse": 3.2569239139556885, | |
| "eval_runtime": 0.3132, | |
| "eval_samples_per_second": 204.312, | |
| "eval_steps_per_second": 25.539, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_loss": 10.558968544006348, | |
| "eval_mae": 2.1506295204162598, | |
| "eval_mse": 10.558967590332031, | |
| "eval_rmse": 3.2494564056396484, | |
| "eval_runtime": 0.4253, | |
| "eval_samples_per_second": 150.493, | |
| "eval_steps_per_second": 18.812, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_loss": 10.674619674682617, | |
| "eval_mae": 2.1740453243255615, | |
| "eval_mse": 10.674619674682617, | |
| "eval_rmse": 3.2672035694122314, | |
| "eval_runtime": 0.4355, | |
| "eval_samples_per_second": 146.966, | |
| "eval_steps_per_second": 18.371, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_loss": 10.524478912353516, | |
| "eval_mae": 2.1458582878112793, | |
| "eval_mse": 10.5244779586792, | |
| "eval_rmse": 3.244145154953003, | |
| "eval_runtime": 0.4374, | |
| "eval_samples_per_second": 146.309, | |
| "eval_steps_per_second": 18.289, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 39.06, | |
| "learning_rate": 2.1875000000000002e-06, | |
| "loss": 0.5012, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_loss": 10.667997360229492, | |
| "eval_mae": 2.155714273452759, | |
| "eval_mse": 10.667997360229492, | |
| "eval_rmse": 3.2661900520324707, | |
| "eval_runtime": 0.3418, | |
| "eval_samples_per_second": 187.219, | |
| "eval_steps_per_second": 23.402, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_loss": 10.641304016113281, | |
| "eval_mae": 2.174771547317505, | |
| "eval_mse": 10.641304016113281, | |
| "eval_rmse": 3.262101173400879, | |
| "eval_runtime": 0.4419, | |
| "eval_samples_per_second": 144.834, | |
| "eval_steps_per_second": 18.104, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_loss": 10.72380542755127, | |
| "eval_mae": 2.2047784328460693, | |
| "eval_mse": 10.723804473876953, | |
| "eval_rmse": 3.274722099304199, | |
| "eval_runtime": 0.4025, | |
| "eval_samples_per_second": 158.999, | |
| "eval_steps_per_second": 19.875, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_loss": 10.794100761413574, | |
| "eval_mae": 2.212890863418579, | |
| "eval_mse": 10.79410171508789, | |
| "eval_rmse": 3.285437822341919, | |
| "eval_runtime": 0.4371, | |
| "eval_samples_per_second": 146.435, | |
| "eval_steps_per_second": 18.304, | |
| "step": 2752 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_loss": 10.679245948791504, | |
| "eval_mae": 2.199676513671875, | |
| "eval_mse": 10.679245948791504, | |
| "eval_rmse": 3.267911672592163, | |
| "eval_runtime": 0.4079, | |
| "eval_samples_per_second": 156.894, | |
| "eval_steps_per_second": 19.612, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_loss": 10.524163246154785, | |
| "eval_mae": 2.1674587726593018, | |
| "eval_mse": 10.524163246154785, | |
| "eval_rmse": 3.2440967559814453, | |
| "eval_runtime": 0.2984, | |
| "eval_samples_per_second": 214.505, | |
| "eval_steps_per_second": 26.813, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_loss": 10.532914161682129, | |
| "eval_mae": 2.177356481552124, | |
| "eval_mse": 10.532913208007812, | |
| "eval_rmse": 3.2454450130462646, | |
| "eval_runtime": 0.4463, | |
| "eval_samples_per_second": 143.398, | |
| "eval_steps_per_second": 17.925, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 46.88, | |
| "learning_rate": 6.25e-07, | |
| "loss": 0.471, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_loss": 10.552495002746582, | |
| "eval_mae": 2.182786464691162, | |
| "eval_mse": 10.552494049072266, | |
| "eval_rmse": 3.248460292816162, | |
| "eval_runtime": 0.3828, | |
| "eval_samples_per_second": 167.191, | |
| "eval_steps_per_second": 20.899, | |
| "step": 3008 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_loss": 10.577668190002441, | |
| "eval_mae": 2.177499294281006, | |
| "eval_mse": 10.577667236328125, | |
| "eval_rmse": 3.2523326873779297, | |
| "eval_runtime": 0.4436, | |
| "eval_samples_per_second": 144.276, | |
| "eval_steps_per_second": 18.035, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_loss": 10.599947929382324, | |
| "eval_mae": 2.184239625930786, | |
| "eval_mse": 10.59994888305664, | |
| "eval_rmse": 3.255756378173828, | |
| "eval_runtime": 0.2975, | |
| "eval_samples_per_second": 215.1, | |
| "eval_steps_per_second": 26.888, | |
| "step": 3136 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_loss": 10.600202560424805, | |
| "eval_mae": 2.1849870681762695, | |
| "eval_mse": 10.600202560424805, | |
| "eval_rmse": 3.2557952404022217, | |
| "eval_runtime": 0.4398, | |
| "eval_samples_per_second": 145.517, | |
| "eval_steps_per_second": 18.19, | |
| "step": 3200 | |
| } | |
| ], | |
| "max_steps": 3200, | |
| "num_train_epochs": 50, | |
| "total_flos": 1692195608947200.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |