| { | |
| "best_metric": 0.8050541516245487, | |
| "best_model_checkpoint": "./fp32_1e_5/models/rte-roberta-base/checkpoint-1000", | |
| "epoch": 12.820512820512821, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 5.319148936170213e-06, | |
| "loss": 0.6932, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_accuracy": 0.4729241877256318, | |
| "eval_loss": 0.6963141560554504, | |
| "eval_runtime": 2.0901, | |
| "eval_samples_per_second": 132.533, | |
| "eval_steps_per_second": 4.306, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 9.959072305593452e-06, | |
| "loss": 0.6957, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "eval_accuracy": 0.4729241877256318, | |
| "eval_loss": 0.69448322057724, | |
| "eval_runtime": 0.7491, | |
| "eval_samples_per_second": 369.778, | |
| "eval_steps_per_second": 12.014, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 9.618008185538881e-06, | |
| "loss": 0.6869, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_accuracy": 0.6245487364620939, | |
| "eval_loss": 0.6610168218612671, | |
| "eval_runtime": 0.7857, | |
| "eval_samples_per_second": 352.54, | |
| "eval_steps_per_second": 11.454, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 9.276944065484312e-06, | |
| "loss": 0.6292, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_accuracy": 0.6570397111913358, | |
| "eval_loss": 0.6132365465164185, | |
| "eval_runtime": 0.7843, | |
| "eval_samples_per_second": 353.202, | |
| "eval_steps_per_second": 11.476, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 8.935879945429742e-06, | |
| "loss": 0.5473, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "eval_accuracy": 0.7075812274368231, | |
| "eval_loss": 0.6135809421539307, | |
| "eval_runtime": 0.8034, | |
| "eval_samples_per_second": 344.792, | |
| "eval_steps_per_second": 11.203, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 8.59481582537517e-06, | |
| "loss": 0.4867, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "eval_accuracy": 0.7256317689530686, | |
| "eval_loss": 0.5372898578643799, | |
| "eval_runtime": 0.8424, | |
| "eval_samples_per_second": 328.815, | |
| "eval_steps_per_second": 10.684, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 8.253751705320601e-06, | |
| "loss": 0.3673, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "eval_accuracy": 0.7292418772563177, | |
| "eval_loss": 0.6152286529541016, | |
| "eval_runtime": 0.7525, | |
| "eval_samples_per_second": 368.117, | |
| "eval_steps_per_second": 11.96, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 7.912687585266032e-06, | |
| "loss": 0.3246, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "eval_accuracy": 0.7545126353790613, | |
| "eval_loss": 0.6094390153884888, | |
| "eval_runtime": 0.7684, | |
| "eval_samples_per_second": 360.479, | |
| "eval_steps_per_second": 11.712, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 7.5716234652114605e-06, | |
| "loss": 0.2585, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "eval_accuracy": 0.7725631768953068, | |
| "eval_loss": 0.6251689791679382, | |
| "eval_runtime": 0.7559, | |
| "eval_samples_per_second": 366.46, | |
| "eval_steps_per_second": 11.907, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 7.23055934515689e-06, | |
| "loss": 0.1975, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "eval_accuracy": 0.7436823104693141, | |
| "eval_loss": 0.7674959301948547, | |
| "eval_runtime": 0.766, | |
| "eval_samples_per_second": 361.606, | |
| "eval_steps_per_second": 11.749, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "learning_rate": 6.88949522510232e-06, | |
| "loss": 0.19, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "eval_accuracy": 0.7617328519855595, | |
| "eval_loss": 0.7783448696136475, | |
| "eval_runtime": 0.7892, | |
| "eval_samples_per_second": 350.981, | |
| "eval_steps_per_second": 11.404, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "learning_rate": 6.54843110504775e-06, | |
| "loss": 0.1266, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "eval_accuracy": 0.7545126353790613, | |
| "eval_loss": 1.0359293222427368, | |
| "eval_runtime": 0.7751, | |
| "eval_samples_per_second": 357.395, | |
| "eval_steps_per_second": 11.612, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "learning_rate": 6.20736698499318e-06, | |
| "loss": 0.1222, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "eval_accuracy": 0.7833935018050542, | |
| "eval_loss": 0.8435311913490295, | |
| "eval_runtime": 1.3166, | |
| "eval_samples_per_second": 210.387, | |
| "eval_steps_per_second": 6.836, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "learning_rate": 5.8663028649386085e-06, | |
| "loss": 0.1146, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "eval_accuracy": 0.7833935018050542, | |
| "eval_loss": 0.941527247428894, | |
| "eval_runtime": 0.749, | |
| "eval_samples_per_second": 369.818, | |
| "eval_steps_per_second": 12.016, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 9.62, | |
| "learning_rate": 5.525238744884038e-06, | |
| "loss": 0.1131, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 9.62, | |
| "eval_accuracy": 0.7833935018050542, | |
| "eval_loss": 0.9660640358924866, | |
| "eval_runtime": 0.7502, | |
| "eval_samples_per_second": 369.243, | |
| "eval_steps_per_second": 11.997, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 10.26, | |
| "learning_rate": 5.1841746248294686e-06, | |
| "loss": 0.0719, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 10.26, | |
| "eval_accuracy": 0.7906137184115524, | |
| "eval_loss": 1.0486385822296143, | |
| "eval_runtime": 0.7951, | |
| "eval_samples_per_second": 348.395, | |
| "eval_steps_per_second": 11.32, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 10.9, | |
| "learning_rate": 4.843110504774898e-06, | |
| "loss": 0.1002, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 10.9, | |
| "eval_accuracy": 0.776173285198556, | |
| "eval_loss": 1.1470500230789185, | |
| "eval_runtime": 0.7467, | |
| "eval_samples_per_second": 370.982, | |
| "eval_steps_per_second": 12.054, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 11.54, | |
| "learning_rate": 4.502046384720328e-06, | |
| "loss": 0.0515, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 11.54, | |
| "eval_accuracy": 0.7906137184115524, | |
| "eval_loss": 1.0903133153915405, | |
| "eval_runtime": 0.7707, | |
| "eval_samples_per_second": 359.412, | |
| "eval_steps_per_second": 11.678, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 12.18, | |
| "learning_rate": 4.160982264665757e-06, | |
| "loss": 0.0732, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 12.18, | |
| "eval_accuracy": 0.7978339350180506, | |
| "eval_loss": 1.0761293172836304, | |
| "eval_runtime": 0.8013, | |
| "eval_samples_per_second": 345.69, | |
| "eval_steps_per_second": 11.232, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 12.82, | |
| "learning_rate": 3.819918144611187e-06, | |
| "loss": 0.0644, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 12.82, | |
| "eval_accuracy": 0.8050541516245487, | |
| "eval_loss": 1.118359088897705, | |
| "eval_runtime": 0.7549, | |
| "eval_samples_per_second": 366.937, | |
| "eval_steps_per_second": 11.922, | |
| "step": 1000 | |
| } | |
| ], | |
| "max_steps": 1560, | |
| "num_train_epochs": 20, | |
| "total_flos": 2100152443883520.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |