| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.962822997027284, | |
| "global_step": 10000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.904275808936826e-05, | |
| "loss": 0.3678, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.807973805855162e-05, | |
| "loss": 0.3141, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.7116718027734977e-05, | |
| "loss": 0.2928, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.6153697996918337e-05, | |
| "loss": 0.2788, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.5190677966101697e-05, | |
| "loss": 0.2664, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.4227657935285056e-05, | |
| "loss": 0.2551, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.3264637904468413e-05, | |
| "loss": 0.2465, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.2301617873651772e-05, | |
| "loss": 0.2373, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.1338597842835134e-05, | |
| "loss": 0.2305, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.037557781201849e-05, | |
| "loss": 0.2229, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.416409861325117e-06, | |
| "loss": 0.2154, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 8.455315870570109e-06, | |
| "loss": 0.2104, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 7.492295839753467e-06, | |
| "loss": 0.2043, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 6.529275808936827e-06, | |
| "loss": 0.1985, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 5.568181818181818e-06, | |
| "loss": 0.1953, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.605161787365178e-06, | |
| "loss": 0.1903, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.645993836671803e-06, | |
| "loss": 0.186, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 2.682973805855162e-06, | |
| "loss": 0.1837, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.7238058551617876e-06, | |
| "loss": 0.1803, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 7.607858243451465e-07, | |
| "loss": 0.1775, | |
| "step": 10000 | |
| } | |
| ], | |
| "max_steps": 10386, | |
| "num_train_epochs": 1, | |
| "total_flos": 1.1258147859595264e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |