{ "best_metric": 0.8050541516245487, "best_model_checkpoint": "./fp32_1e_5/models/rte-roberta-base/checkpoint-1000", "epoch": 12.820512820512821, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.64, "learning_rate": 5.319148936170213e-06, "loss": 0.6932, "step": 50 }, { "epoch": 0.64, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.6963141560554504, "eval_runtime": 2.0901, "eval_samples_per_second": 132.533, "eval_steps_per_second": 4.306, "step": 50 }, { "epoch": 1.28, "learning_rate": 9.959072305593452e-06, "loss": 0.6957, "step": 100 }, { "epoch": 1.28, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.69448322057724, "eval_runtime": 0.7491, "eval_samples_per_second": 369.778, "eval_steps_per_second": 12.014, "step": 100 }, { "epoch": 1.92, "learning_rate": 9.618008185538881e-06, "loss": 0.6869, "step": 150 }, { "epoch": 1.92, "eval_accuracy": 0.6245487364620939, "eval_loss": 0.6610168218612671, "eval_runtime": 0.7857, "eval_samples_per_second": 352.54, "eval_steps_per_second": 11.454, "step": 150 }, { "epoch": 2.56, "learning_rate": 9.276944065484312e-06, "loss": 0.6292, "step": 200 }, { "epoch": 2.56, "eval_accuracy": 0.6570397111913358, "eval_loss": 0.6132365465164185, "eval_runtime": 0.7843, "eval_samples_per_second": 353.202, "eval_steps_per_second": 11.476, "step": 200 }, { "epoch": 3.21, "learning_rate": 8.935879945429742e-06, "loss": 0.5473, "step": 250 }, { "epoch": 3.21, "eval_accuracy": 0.7075812274368231, "eval_loss": 0.6135809421539307, "eval_runtime": 0.8034, "eval_samples_per_second": 344.792, "eval_steps_per_second": 11.203, "step": 250 }, { "epoch": 3.85, "learning_rate": 8.59481582537517e-06, "loss": 0.4867, "step": 300 }, { "epoch": 3.85, "eval_accuracy": 0.7256317689530686, "eval_loss": 0.5372898578643799, "eval_runtime": 0.8424, "eval_samples_per_second": 328.815, "eval_steps_per_second": 10.684, "step": 300 }, { "epoch": 4.49, "learning_rate": 8.253751705320601e-06, "loss": 0.3673, "step": 350 }, { "epoch": 4.49, "eval_accuracy": 0.7292418772563177, "eval_loss": 0.6152286529541016, "eval_runtime": 0.7525, "eval_samples_per_second": 368.117, "eval_steps_per_second": 11.96, "step": 350 }, { "epoch": 5.13, "learning_rate": 7.912687585266032e-06, "loss": 0.3246, "step": 400 }, { "epoch": 5.13, "eval_accuracy": 0.7545126353790613, "eval_loss": 0.6094390153884888, "eval_runtime": 0.7684, "eval_samples_per_second": 360.479, "eval_steps_per_second": 11.712, "step": 400 }, { "epoch": 5.77, "learning_rate": 7.5716234652114605e-06, "loss": 0.2585, "step": 450 }, { "epoch": 5.77, "eval_accuracy": 0.7725631768953068, "eval_loss": 0.6251689791679382, "eval_runtime": 0.7559, "eval_samples_per_second": 366.46, "eval_steps_per_second": 11.907, "step": 450 }, { "epoch": 6.41, "learning_rate": 7.23055934515689e-06, "loss": 0.1975, "step": 500 }, { "epoch": 6.41, "eval_accuracy": 0.7436823104693141, "eval_loss": 0.7674959301948547, "eval_runtime": 0.766, "eval_samples_per_second": 361.606, "eval_steps_per_second": 11.749, "step": 500 }, { "epoch": 7.05, "learning_rate": 6.88949522510232e-06, "loss": 0.19, "step": 550 }, { "epoch": 7.05, "eval_accuracy": 0.7617328519855595, "eval_loss": 0.7783448696136475, "eval_runtime": 0.7892, "eval_samples_per_second": 350.981, "eval_steps_per_second": 11.404, "step": 550 }, { "epoch": 7.69, "learning_rate": 6.54843110504775e-06, "loss": 0.1266, "step": 600 }, { "epoch": 7.69, "eval_accuracy": 0.7545126353790613, "eval_loss": 1.0359293222427368, "eval_runtime": 0.7751, "eval_samples_per_second": 357.395, "eval_steps_per_second": 11.612, "step": 600 }, { "epoch": 8.33, "learning_rate": 6.20736698499318e-06, "loss": 0.1222, "step": 650 }, { "epoch": 8.33, "eval_accuracy": 0.7833935018050542, "eval_loss": 0.8435311913490295, "eval_runtime": 1.3166, "eval_samples_per_second": 210.387, "eval_steps_per_second": 6.836, "step": 650 }, { "epoch": 8.97, "learning_rate": 5.8663028649386085e-06, "loss": 0.1146, "step": 700 }, { "epoch": 8.97, "eval_accuracy": 0.7833935018050542, "eval_loss": 0.941527247428894, "eval_runtime": 0.749, "eval_samples_per_second": 369.818, "eval_steps_per_second": 12.016, "step": 700 }, { "epoch": 9.62, "learning_rate": 5.525238744884038e-06, "loss": 0.1131, "step": 750 }, { "epoch": 9.62, "eval_accuracy": 0.7833935018050542, "eval_loss": 0.9660640358924866, "eval_runtime": 0.7502, "eval_samples_per_second": 369.243, "eval_steps_per_second": 11.997, "step": 750 }, { "epoch": 10.26, "learning_rate": 5.1841746248294686e-06, "loss": 0.0719, "step": 800 }, { "epoch": 10.26, "eval_accuracy": 0.7906137184115524, "eval_loss": 1.0486385822296143, "eval_runtime": 0.7951, "eval_samples_per_second": 348.395, "eval_steps_per_second": 11.32, "step": 800 }, { "epoch": 10.9, "learning_rate": 4.843110504774898e-06, "loss": 0.1002, "step": 850 }, { "epoch": 10.9, "eval_accuracy": 0.776173285198556, "eval_loss": 1.1470500230789185, "eval_runtime": 0.7467, "eval_samples_per_second": 370.982, "eval_steps_per_second": 12.054, "step": 850 }, { "epoch": 11.54, "learning_rate": 4.502046384720328e-06, "loss": 0.0515, "step": 900 }, { "epoch": 11.54, "eval_accuracy": 0.7906137184115524, "eval_loss": 1.0903133153915405, "eval_runtime": 0.7707, "eval_samples_per_second": 359.412, "eval_steps_per_second": 11.678, "step": 900 }, { "epoch": 12.18, "learning_rate": 4.160982264665757e-06, "loss": 0.0732, "step": 950 }, { "epoch": 12.18, "eval_accuracy": 0.7978339350180506, "eval_loss": 1.0761293172836304, "eval_runtime": 0.8013, "eval_samples_per_second": 345.69, "eval_steps_per_second": 11.232, "step": 950 }, { "epoch": 12.82, "learning_rate": 3.819918144611187e-06, "loss": 0.0644, "step": 1000 }, { "epoch": 12.82, "eval_accuracy": 0.8050541516245487, "eval_loss": 1.118359088897705, "eval_runtime": 0.7549, "eval_samples_per_second": 366.937, "eval_steps_per_second": 11.922, "step": 1000 } ], "max_steps": 1560, "num_train_epochs": 20, "total_flos": 2100152443883520.0, "trial_name": null, "trial_params": null }