| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 21.70479763169194, | |
| "global_step": 30000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0002777777777777778, | |
| "loss": 7.7491, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0005555555555555556, | |
| "loss": 6.5663, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.0008333333333333334, | |
| "loss": 6.293, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.0009929078014184398, | |
| "loss": 5.636, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.000975177304964539, | |
| "loss": 4.3729, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 0.0009574468085106384, | |
| "loss": 3.5407, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 0.0009397163120567376, | |
| "loss": 3.1107, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 0.0009219858156028368, | |
| "loss": 2.8753, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 0.0009042553191489362, | |
| "loss": 2.7162, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 0.0008865248226950354, | |
| "loss": 2.6016, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 0.0008687943262411348, | |
| "loss": 2.5154, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 0.000851063829787234, | |
| "loss": 2.4472, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 0.0008333333333333334, | |
| "loss": 2.3923, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 0.0008156028368794326, | |
| "loss": 2.3473, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 0.0007978723404255319, | |
| "loss": 2.3071, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 0.0007801418439716312, | |
| "loss": 2.2735, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 0.0007624113475177306, | |
| "loss": 2.2441, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 0.0007446808510638298, | |
| "loss": 2.2174, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 0.0007269503546099291, | |
| "loss": 2.194, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 0.0007092198581560284, | |
| "loss": 2.1728, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 0.0006914893617021278, | |
| "loss": 2.1535, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "learning_rate": 0.0006737588652482269, | |
| "loss": 2.1346, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "learning_rate": 0.0006560283687943263, | |
| "loss": 2.1183, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 8.68, | |
| "learning_rate": 0.0006382978723404256, | |
| "loss": 2.1035, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "learning_rate": 0.0006205673758865247, | |
| "loss": 2.0897, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 9.41, | |
| "learning_rate": 0.0006028368794326241, | |
| "loss": 2.076, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 9.77, | |
| "learning_rate": 0.0005851063829787234, | |
| "loss": 2.065, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 10.13, | |
| "learning_rate": 0.0005673758865248228, | |
| "loss": 2.0528, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 10.49, | |
| "learning_rate": 0.0005496453900709219, | |
| "loss": 2.0432, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 10.85, | |
| "learning_rate": 0.0005319148936170213, | |
| "loss": 2.0335, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 11.21, | |
| "learning_rate": 0.0005141843971631206, | |
| "loss": 2.0241, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 11.58, | |
| "learning_rate": 0.0004964539007092199, | |
| "loss": 2.0153, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 11.94, | |
| "learning_rate": 0.0004787234042553192, | |
| "loss": 2.0066, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 12.3, | |
| "learning_rate": 0.0004609929078014184, | |
| "loss": 1.9987, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 12.66, | |
| "learning_rate": 0.0004432624113475177, | |
| "loss": 1.9909, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 13.02, | |
| "learning_rate": 0.000425531914893617, | |
| "loss": 1.9842, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 13.38, | |
| "learning_rate": 0.0004078014184397163, | |
| "loss": 1.9768, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 13.75, | |
| "learning_rate": 0.0003900709219858156, | |
| "loss": 1.9705, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 14.11, | |
| "learning_rate": 0.0003723404255319149, | |
| "loss": 1.9638, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 14.46, | |
| "learning_rate": 0.0003546099290780142, | |
| "loss": 1.9587, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 14.82, | |
| "learning_rate": 0.00033687943262411345, | |
| "loss": 1.9549, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 15.18, | |
| "learning_rate": 0.0003191489361702128, | |
| "loss": 1.9493, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 15.55, | |
| "learning_rate": 0.00030141843971631205, | |
| "loss": 1.9424, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 15.91, | |
| "learning_rate": 0.0002836879432624114, | |
| "loss": 1.9393, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 16.27, | |
| "learning_rate": 0.00026595744680851064, | |
| "loss": 1.9334, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 16.63, | |
| "learning_rate": 0.00024822695035460994, | |
| "loss": 1.9294, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 16.99, | |
| "learning_rate": 0.0002304964539007092, | |
| "loss": 1.9229, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 17.35, | |
| "learning_rate": 0.0002127659574468085, | |
| "loss": 1.9193, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 17.73, | |
| "learning_rate": 0.0001950354609929078, | |
| "loss": 1.9133, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 18.09, | |
| "learning_rate": 0.0001773049645390071, | |
| "loss": 1.9082, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 18.45, | |
| "learning_rate": 0.0001595744680851064, | |
| "loss": 1.9038, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 18.81, | |
| "learning_rate": 0.0001418439716312057, | |
| "loss": 1.9004, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 19.17, | |
| "learning_rate": 0.00012411347517730497, | |
| "loss": 1.8966, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 19.54, | |
| "learning_rate": 0.00010638297872340425, | |
| "loss": 1.8934, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 19.9, | |
| "learning_rate": 8.865248226950355e-05, | |
| "loss": 1.8888, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 20.26, | |
| "learning_rate": 7.092198581560285e-05, | |
| "loss": 1.8852, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 20.62, | |
| "learning_rate": 5.319148936170213e-05, | |
| "loss": 1.8816, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 20.98, | |
| "learning_rate": 3.5460992907801425e-05, | |
| "loss": 1.8789, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 21.34, | |
| "learning_rate": 1.7730496453900712e-05, | |
| "loss": 1.8754, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 21.7, | |
| "learning_rate": 0.0, | |
| "loss": 1.8724, | |
| "step": 30000 | |
| } | |
| ], | |
| "max_steps": 30000, | |
| "num_train_epochs": 22, | |
| "total_flos": 1.6544861361662853e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |