| { | |
| "best_metric": 0.9160277022013357, | |
| "best_model_checkpoint": "./fp32_2e_5/models/qqp-roberta-base/checkpoint-67000", | |
| "epoch": 5.892181866150734, | |
| "global_step": 67000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.9312619082515024e-06, | |
| "loss": 0.5476, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_accuracy": 0.8186000494682166, | |
| "eval_combined_score": 0.781458719476822, | |
| "eval_f1": 0.7443173894854274, | |
| "eval_loss": 0.3999478816986084, | |
| "eval_runtime": 100.5489, | |
| "eval_samples_per_second": 402.093, | |
| "eval_steps_per_second": 12.571, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 5.862523816503005e-06, | |
| "loss": 0.3783, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_accuracy": 0.8439772446203314, | |
| "eval_combined_score": 0.8249961873510662, | |
| "eval_f1": 0.8060151300818009, | |
| "eval_loss": 0.3368555009365082, | |
| "eval_runtime": 105.3628, | |
| "eval_samples_per_second": 383.722, | |
| "eval_steps_per_second": 11.997, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 8.793785724754508e-06, | |
| "loss": 0.3465, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_accuracy": 0.8580262181548355, | |
| "eval_combined_score": 0.8396217180601624, | |
| "eval_f1": 0.8212172179654894, | |
| "eval_loss": 0.30989906191825867, | |
| "eval_runtime": 107.6992, | |
| "eval_samples_per_second": 375.397, | |
| "eval_steps_per_second": 11.736, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.172504763300601e-05, | |
| "loss": 0.3284, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_accuracy": 0.8592134553549344, | |
| "eval_combined_score": 0.8436663967196221, | |
| "eval_f1": 0.8281193380843096, | |
| "eval_loss": 0.30866414308547974, | |
| "eval_runtime": 104.8651, | |
| "eval_samples_per_second": 385.543, | |
| "eval_steps_per_second": 12.054, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.4656309541257511e-05, | |
| "loss": 0.3244, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_accuracy": 0.8497897600791492, | |
| "eval_combined_score": 0.835341100584005, | |
| "eval_f1": 0.8208924410888607, | |
| "eval_loss": 0.32785937190055847, | |
| "eval_runtime": 101.6619, | |
| "eval_samples_per_second": 397.691, | |
| "eval_steps_per_second": 12.433, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 1.7587571449509016e-05, | |
| "loss": 0.3087, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_accuracy": 0.8763047242146921, | |
| "eval_combined_score": 0.8556630011071151, | |
| "eval_f1": 0.8350212779995382, | |
| "eval_loss": 0.2904255986213684, | |
| "eval_runtime": 100.5764, | |
| "eval_samples_per_second": 401.983, | |
| "eval_steps_per_second": 12.568, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9966880911616945e-05, | |
| "loss": 0.2958, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_accuracy": 0.8771209497897601, | |
| "eval_combined_score": 0.8548056701204515, | |
| "eval_f1": 0.8324903904511429, | |
| "eval_loss": 0.2824174165725708, | |
| "eval_runtime": 109.1783, | |
| "eval_samples_per_second": 370.312, | |
| "eval_steps_per_second": 11.577, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.9779767417927343e-05, | |
| "loss": 0.2985, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_accuracy": 0.8826861241652239, | |
| "eval_combined_score": 0.8620260280680964, | |
| "eval_f1": 0.8413659319709689, | |
| "eval_loss": 0.2692563235759735, | |
| "eval_runtime": 109.9785, | |
| "eval_samples_per_second": 367.617, | |
| "eval_steps_per_second": 11.493, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.959265392423775e-05, | |
| "loss": 0.2854, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_accuracy": 0.8790749443482563, | |
| "eval_combined_score": 0.8634582963003614, | |
| "eval_f1": 0.8478416482524664, | |
| "eval_loss": 0.2772486209869385, | |
| "eval_runtime": 104.6366, | |
| "eval_samples_per_second": 386.385, | |
| "eval_steps_per_second": 12.08, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.940554043054815e-05, | |
| "loss": 0.279, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_accuracy": 0.8851842691070987, | |
| "eval_combined_score": 0.8687501900384056, | |
| "eval_f1": 0.8523161109697124, | |
| "eval_loss": 0.26354455947875977, | |
| "eval_runtime": 102.343, | |
| "eval_samples_per_second": 395.044, | |
| "eval_steps_per_second": 12.351, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.9218426936858555e-05, | |
| "loss": 0.2728, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_accuracy": 0.8893643334157804, | |
| "eval_combined_score": 0.8714862904192304, | |
| "eval_f1": 0.8536082474226804, | |
| "eval_loss": 0.2556052803993225, | |
| "eval_runtime": 100.7949, | |
| "eval_samples_per_second": 401.112, | |
| "eval_steps_per_second": 12.54, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 1.9031313443168953e-05, | |
| "loss": 0.2474, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "eval_accuracy": 0.8884986396240415, | |
| "eval_combined_score": 0.8716473743117953, | |
| "eval_f1": 0.854796108999549, | |
| "eval_loss": 0.26520249247550964, | |
| "eval_runtime": 102.8555, | |
| "eval_samples_per_second": 393.076, | |
| "eval_steps_per_second": 12.289, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 1.884419994947936e-05, | |
| "loss": 0.2382, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_accuracy": 0.8925302992827109, | |
| "eval_combined_score": 0.8776649412356591, | |
| "eval_f1": 0.8627995831886073, | |
| "eval_loss": 0.25566014647483826, | |
| "eval_runtime": 99.8698, | |
| "eval_samples_per_second": 404.827, | |
| "eval_steps_per_second": 12.656, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 1.865708645578976e-05, | |
| "loss": 0.2437, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "eval_accuracy": 0.8940143457828346, | |
| "eval_combined_score": 0.8742345089194055, | |
| "eval_f1": 0.8544546720559764, | |
| "eval_loss": 0.25206559896469116, | |
| "eval_runtime": 98.7673, | |
| "eval_samples_per_second": 409.346, | |
| "eval_steps_per_second": 12.798, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 1.8469972962100165e-05, | |
| "loss": 0.2314, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "eval_accuracy": 0.8962651496413554, | |
| "eval_combined_score": 0.8779659416144943, | |
| "eval_f1": 0.859666733587633, | |
| "eval_loss": 0.2604176104068756, | |
| "eval_runtime": 99.873, | |
| "eval_samples_per_second": 404.814, | |
| "eval_steps_per_second": 12.656, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.8282859468410567e-05, | |
| "loss": 0.2347, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "eval_accuracy": 0.897872866683156, | |
| "eval_combined_score": 0.8811078037131652, | |
| "eval_f1": 0.8643427407431743, | |
| "eval_loss": 0.2466404139995575, | |
| "eval_runtime": 99.5334, | |
| "eval_samples_per_second": 406.195, | |
| "eval_steps_per_second": 12.699, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.809574597472097e-05, | |
| "loss": 0.2322, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_accuracy": 0.8983675488498639, | |
| "eval_combined_score": 0.8828389557469845, | |
| "eval_f1": 0.867310362644105, | |
| "eval_loss": 0.24636423587799072, | |
| "eval_runtime": 100.203, | |
| "eval_samples_per_second": 403.481, | |
| "eval_steps_per_second": 12.614, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 1.790863248103137e-05, | |
| "loss": 0.2373, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_accuracy": 0.8971061093247589, | |
| "eval_combined_score": 0.883107204810262, | |
| "eval_f1": 0.8691083002957649, | |
| "eval_loss": 0.2558729350566864, | |
| "eval_runtime": 102.812, | |
| "eval_samples_per_second": 393.242, | |
| "eval_steps_per_second": 12.294, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 1.7721518987341772e-05, | |
| "loss": 0.2295, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "eval_accuracy": 0.8999752658916647, | |
| "eval_combined_score": 0.8847323563693139, | |
| "eval_f1": 0.8694894468469633, | |
| "eval_loss": 0.25413957238197327, | |
| "eval_runtime": 101.1101, | |
| "eval_samples_per_second": 399.861, | |
| "eval_steps_per_second": 12.501, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 1.7534405493652177e-05, | |
| "loss": 0.2295, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "eval_accuracy": 0.9001978728666832, | |
| "eval_combined_score": 0.8849152590077334, | |
| "eval_f1": 0.8696326451487837, | |
| "eval_loss": 0.24725091457366943, | |
| "eval_runtime": 102.8761, | |
| "eval_samples_per_second": 392.997, | |
| "eval_steps_per_second": 12.287, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.734729199996258e-05, | |
| "loss": 0.2281, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "eval_accuracy": 0.8977491961414791, | |
| "eval_combined_score": 0.8776085657993287, | |
| "eval_f1": 0.8574679354571784, | |
| "eval_loss": 0.2688974142074585, | |
| "eval_runtime": 99.2566, | |
| "eval_samples_per_second": 407.328, | |
| "eval_steps_per_second": 12.735, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.716017850627298e-05, | |
| "loss": 0.228, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "eval_accuracy": 0.9038090526836507, | |
| "eval_combined_score": 0.888468647308778, | |
| "eval_f1": 0.8731282419339053, | |
| "eval_loss": 0.23130430281162262, | |
| "eval_runtime": 99.2642, | |
| "eval_samples_per_second": 407.297, | |
| "eval_steps_per_second": 12.734, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.6973065012583382e-05, | |
| "loss": 0.2116, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "eval_accuracy": 0.903067029433589, | |
| "eval_combined_score": 0.8880802586693867, | |
| "eval_f1": 0.8730934879051844, | |
| "eval_loss": 0.24339179694652557, | |
| "eval_runtime": 99.8059, | |
| "eval_samples_per_second": 405.086, | |
| "eval_steps_per_second": 12.665, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.6785951518893787e-05, | |
| "loss": 0.179, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "eval_accuracy": 0.9035617116002969, | |
| "eval_combined_score": 0.8877347307631893, | |
| "eval_f1": 0.8719077499260817, | |
| "eval_loss": 0.2653352916240692, | |
| "eval_runtime": 98.8508, | |
| "eval_samples_per_second": 409.0, | |
| "eval_steps_per_second": 12.787, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.659883802520419e-05, | |
| "loss": 0.1752, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "eval_accuracy": 0.9018303240168192, | |
| "eval_combined_score": 0.8860091930795462, | |
| "eval_f1": 0.8701880621422732, | |
| "eval_loss": 0.2552090883255005, | |
| "eval_runtime": 99.1983, | |
| "eval_samples_per_second": 407.568, | |
| "eval_steps_per_second": 12.742, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.6411724531514594e-05, | |
| "loss": 0.1822, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "eval_accuracy": 0.9064803363838734, | |
| "eval_combined_score": 0.8916663806141756, | |
| "eval_f1": 0.8768524248444777, | |
| "eval_loss": 0.2510685920715332, | |
| "eval_runtime": 98.8214, | |
| "eval_samples_per_second": 409.122, | |
| "eval_steps_per_second": 12.791, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.6224611037824992e-05, | |
| "loss": 0.1826, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "eval_accuracy": 0.9051199604254266, | |
| "eval_combined_score": 0.8899457206149071, | |
| "eval_f1": 0.8747714808043876, | |
| "eval_loss": 0.24112574756145477, | |
| "eval_runtime": 100.4418, | |
| "eval_samples_per_second": 402.521, | |
| "eval_steps_per_second": 12.584, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 1.6037497544135397e-05, | |
| "loss": 0.1807, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "eval_accuracy": 0.9052683650754391, | |
| "eval_combined_score": 0.8889197827506554, | |
| "eval_f1": 0.8725712004258717, | |
| "eval_loss": 0.28508177399635315, | |
| "eval_runtime": 99.6721, | |
| "eval_samples_per_second": 405.63, | |
| "eval_steps_per_second": 12.682, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 1.58503840504458e-05, | |
| "loss": 0.1829, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "eval_accuracy": 0.903512243383626, | |
| "eval_combined_score": 0.888178430111253, | |
| "eval_f1": 0.8728446168388799, | |
| "eval_loss": 0.24632664024829865, | |
| "eval_runtime": 98.7974, | |
| "eval_samples_per_second": 409.221, | |
| "eval_steps_per_second": 12.794, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 1.5663270556756204e-05, | |
| "loss": 0.1832, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "eval_accuracy": 0.9063071976255256, | |
| "eval_combined_score": 0.8893352822146093, | |
| "eval_f1": 0.8723633668036931, | |
| "eval_loss": 0.2510886788368225, | |
| "eval_runtime": 99.841, | |
| "eval_samples_per_second": 404.944, | |
| "eval_steps_per_second": 12.66, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 1.5476157063066602e-05, | |
| "loss": 0.1752, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "eval_accuracy": 0.9047984170170665, | |
| "eval_combined_score": 0.8909587334582507, | |
| "eval_f1": 0.8771190498994349, | |
| "eval_loss": 0.25542038679122925, | |
| "eval_runtime": 98.5241, | |
| "eval_samples_per_second": 410.356, | |
| "eval_steps_per_second": 12.829, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 1.5289043569377007e-05, | |
| "loss": 0.1822, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "eval_accuracy": 0.9059361860004946, | |
| "eval_combined_score": 0.8898770813542074, | |
| "eval_f1": 0.87381797670792, | |
| "eval_loss": 0.2384674996137619, | |
| "eval_runtime": 104.6367, | |
| "eval_samples_per_second": 386.384, | |
| "eval_steps_per_second": 12.08, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.510193007568741e-05, | |
| "loss": 0.1834, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "eval_accuracy": 0.9094484293841207, | |
| "eval_combined_score": 0.895065771906153, | |
| "eval_f1": 0.8806831144281851, | |
| "eval_loss": 0.25289300084114075, | |
| "eval_runtime": 106.9571, | |
| "eval_samples_per_second": 378.002, | |
| "eval_steps_per_second": 11.818, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.4914816581997812e-05, | |
| "loss": 0.1829, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_accuracy": 0.9079643828839971, | |
| "eval_combined_score": 0.8944023406718317, | |
| "eval_f1": 0.8808402984596663, | |
| "eval_loss": 0.25022605061531067, | |
| "eval_runtime": 99.5394, | |
| "eval_samples_per_second": 406.171, | |
| "eval_steps_per_second": 12.698, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 1.4727703088308214e-05, | |
| "loss": 0.1477, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "eval_accuracy": 0.9075686371506307, | |
| "eval_combined_score": 0.892660304050797, | |
| "eval_f1": 0.8777519709509634, | |
| "eval_loss": 0.27075424790382385, | |
| "eval_runtime": 99.0291, | |
| "eval_samples_per_second": 408.264, | |
| "eval_steps_per_second": 12.764, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 1.4540589594618617e-05, | |
| "loss": 0.1429, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "eval_accuracy": 0.908780608459065, | |
| "eval_combined_score": 0.8936844838450184, | |
| "eval_f1": 0.8785883592309719, | |
| "eval_loss": 0.2728239893913269, | |
| "eval_runtime": 99.8083, | |
| "eval_samples_per_second": 405.076, | |
| "eval_steps_per_second": 12.664, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 1.435347610092902e-05, | |
| "loss": 0.1424, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "eval_accuracy": 0.9095226317091268, | |
| "eval_combined_score": 0.8944181947724124, | |
| "eval_f1": 0.8793137578356978, | |
| "eval_loss": 0.3181060254573822, | |
| "eval_runtime": 99.1709, | |
| "eval_samples_per_second": 407.68, | |
| "eval_steps_per_second": 12.746, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 1.416636260723942e-05, | |
| "loss": 0.1421, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "eval_accuracy": 0.9079891169923324, | |
| "eval_combined_score": 0.8935460697083859, | |
| "eval_f1": 0.8791030224244393, | |
| "eval_loss": 0.25459086894989014, | |
| "eval_runtime": 101.0313, | |
| "eval_samples_per_second": 400.173, | |
| "eval_steps_per_second": 12.511, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 1.3979249113549824e-05, | |
| "loss": 0.1453, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "eval_accuracy": 0.9072965619589414, | |
| "eval_combined_score": 0.8937684241287549, | |
| "eval_f1": 0.8802402862985685, | |
| "eval_loss": 0.2652507722377777, | |
| "eval_runtime": 98.7666, | |
| "eval_samples_per_second": 409.349, | |
| "eval_steps_per_second": 12.798, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 1.3792135619860227e-05, | |
| "loss": 0.1442, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "eval_accuracy": 0.9106109324758842, | |
| "eval_combined_score": 0.896522122060974, | |
| "eval_f1": 0.8824333116460638, | |
| "eval_loss": 0.27689746022224426, | |
| "eval_runtime": 98.8771, | |
| "eval_samples_per_second": 408.891, | |
| "eval_steps_per_second": 12.784, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 1.360502212617063e-05, | |
| "loss": 0.1485, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "eval_accuracy": 0.911031412317586, | |
| "eval_combined_score": 0.8962277549473212, | |
| "eval_f1": 0.8814240975770563, | |
| "eval_loss": 0.2877097427845001, | |
| "eval_runtime": 99.1266, | |
| "eval_samples_per_second": 407.862, | |
| "eval_steps_per_second": 12.751, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 1.341790863248103e-05, | |
| "loss": 0.1453, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "eval_accuracy": 0.9098194410091516, | |
| "eval_combined_score": 0.895775167500813, | |
| "eval_f1": 0.8817308939924743, | |
| "eval_loss": 0.27044087648391724, | |
| "eval_runtime": 99.2706, | |
| "eval_samples_per_second": 407.271, | |
| "eval_steps_per_second": 12.733, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 1.3230795138791434e-05, | |
| "loss": 0.1492, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "eval_accuracy": 0.91348008904279, | |
| "eval_combined_score": 0.898925685140197, | |
| "eval_f1": 0.8843712812376041, | |
| "eval_loss": 0.25266170501708984, | |
| "eval_runtime": 100.9281, | |
| "eval_samples_per_second": 400.582, | |
| "eval_steps_per_second": 12.524, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 1.3043681645101837e-05, | |
| "loss": 0.1444, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "eval_accuracy": 0.911847637892654, | |
| "eval_combined_score": 0.8970186225956724, | |
| "eval_f1": 0.882189607298691, | |
| "eval_loss": 0.2621923089027405, | |
| "eval_runtime": 98.9247, | |
| "eval_samples_per_second": 408.695, | |
| "eval_steps_per_second": 12.777, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 1.285656815141224e-05, | |
| "loss": 0.1433, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "eval_accuracy": 0.9120949789760079, | |
| "eval_combined_score": 0.8979680019732961, | |
| "eval_f1": 0.8838410249705844, | |
| "eval_loss": 0.27234378457069397, | |
| "eval_runtime": 102.783, | |
| "eval_samples_per_second": 393.353, | |
| "eval_steps_per_second": 12.298, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 1.2669454657722643e-05, | |
| "loss": 0.1315, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "eval_accuracy": 0.9134306208261193, | |
| "eval_combined_score": 0.8995033318593784, | |
| "eval_f1": 0.8855760428926376, | |
| "eval_loss": 0.29698267579078674, | |
| "eval_runtime": 104.3977, | |
| "eval_samples_per_second": 387.269, | |
| "eval_steps_per_second": 12.108, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 1.2482341164033044e-05, | |
| "loss": 0.1135, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "eval_accuracy": 0.9112540192926045, | |
| "eval_combined_score": 0.8968766821639864, | |
| "eval_f1": 0.8824993450353682, | |
| "eval_loss": 0.30147695541381836, | |
| "eval_runtime": 100.9744, | |
| "eval_samples_per_second": 400.398, | |
| "eval_steps_per_second": 12.518, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 1.2295227670343448e-05, | |
| "loss": 0.1178, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "eval_accuracy": 0.9088548107840713, | |
| "eval_combined_score": 0.8948400699242306, | |
| "eval_f1": 0.8808253290643899, | |
| "eval_loss": 0.30377110838890076, | |
| "eval_runtime": 101.3426, | |
| "eval_samples_per_second": 398.944, | |
| "eval_steps_per_second": 12.473, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 1.2108114176653851e-05, | |
| "loss": 0.1192, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "eval_accuracy": 0.9117487014593124, | |
| "eval_combined_score": 0.8974361955933878, | |
| "eval_f1": 0.8831236897274632, | |
| "eval_loss": 0.3054348826408386, | |
| "eval_runtime": 102.6949, | |
| "eval_samples_per_second": 393.691, | |
| "eval_steps_per_second": 12.308, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 1.1921000682964253e-05, | |
| "loss": 0.1113, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "eval_accuracy": 0.910784071234232, | |
| "eval_combined_score": 0.8965060853118298, | |
| "eval_f1": 0.8822280993894277, | |
| "eval_loss": 0.31508392095565796, | |
| "eval_runtime": 100.8416, | |
| "eval_samples_per_second": 400.926, | |
| "eval_steps_per_second": 12.535, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 1.1733887189274656e-05, | |
| "loss": 0.1176, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "eval_accuracy": 0.9117487014593124, | |
| "eval_combined_score": 0.8964195750181319, | |
| "eval_f1": 0.8810904485769512, | |
| "eval_loss": 0.3410942554473877, | |
| "eval_runtime": 101.1296, | |
| "eval_samples_per_second": 399.784, | |
| "eval_steps_per_second": 12.499, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 1.1546773695585058e-05, | |
| "loss": 0.1245, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "eval_accuracy": 0.9097947069008162, | |
| "eval_combined_score": 0.8954127669229248, | |
| "eval_f1": 0.8810308269450335, | |
| "eval_loss": 0.3030332028865814, | |
| "eval_runtime": 100.675, | |
| "eval_samples_per_second": 401.589, | |
| "eval_steps_per_second": 12.555, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 1.1359660201895461e-05, | |
| "loss": 0.1197, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "eval_accuracy": 0.9111056146425921, | |
| "eval_combined_score": 0.8957604335940053, | |
| "eval_f1": 0.8804152525454183, | |
| "eval_loss": 0.3057613670825958, | |
| "eval_runtime": 102.3073, | |
| "eval_samples_per_second": 395.182, | |
| "eval_steps_per_second": 12.355, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 1.1172546708205863e-05, | |
| "loss": 0.1226, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "eval_accuracy": 0.9141479099678457, | |
| "eval_combined_score": 0.8996355965526801, | |
| "eval_f1": 0.8851232831375145, | |
| "eval_loss": 0.31216293573379517, | |
| "eval_runtime": 101.2062, | |
| "eval_samples_per_second": 399.481, | |
| "eval_steps_per_second": 12.489, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 1.0985433214516266e-05, | |
| "loss": 0.1177, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "eval_accuracy": 0.9131338115260945, | |
| "eval_combined_score": 0.8982512840839825, | |
| "eval_f1": 0.8833687566418704, | |
| "eval_loss": 0.32634615898132324, | |
| "eval_runtime": 101.5822, | |
| "eval_samples_per_second": 398.003, | |
| "eval_steps_per_second": 12.443, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 1.079831972082667e-05, | |
| "loss": 0.1172, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "eval_accuracy": 0.912663863467722, | |
| "eval_combined_score": 0.8966402794364677, | |
| "eval_f1": 0.8806166954052135, | |
| "eval_loss": 0.3014249801635742, | |
| "eval_runtime": 101.6983, | |
| "eval_samples_per_second": 397.549, | |
| "eval_steps_per_second": 12.429, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 1.0611206227137073e-05, | |
| "loss": 0.1123, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "eval_accuracy": 0.914296314617858, | |
| "eval_combined_score": 0.8996530522745814, | |
| "eval_f1": 0.8850097899313046, | |
| "eval_loss": 0.3819635510444641, | |
| "eval_runtime": 101.5331, | |
| "eval_samples_per_second": 398.195, | |
| "eval_steps_per_second": 12.449, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 1.0424092733447473e-05, | |
| "loss": 0.0951, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "eval_accuracy": 0.9105614642592135, | |
| "eval_combined_score": 0.8972352717469176, | |
| "eval_f1": 0.8839090792346218, | |
| "eval_loss": 0.3715880215167999, | |
| "eval_runtime": 101.3316, | |
| "eval_samples_per_second": 398.987, | |
| "eval_steps_per_second": 12.474, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 1.0236979239757876e-05, | |
| "loss": 0.0991, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "eval_accuracy": 0.9139995053178332, | |
| "eval_combined_score": 0.8992518702473571, | |
| "eval_f1": 0.8845042351768809, | |
| "eval_loss": 0.37594878673553467, | |
| "eval_runtime": 101.3841, | |
| "eval_samples_per_second": 398.781, | |
| "eval_steps_per_second": 12.467, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 1.004986574606828e-05, | |
| "loss": 0.1035, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "eval_accuracy": 0.9140242394261687, | |
| "eval_combined_score": 0.8998636256975642, | |
| "eval_f1": 0.8857030119689596, | |
| "eval_loss": 0.3245335817337036, | |
| "eval_runtime": 102.513, | |
| "eval_samples_per_second": 394.389, | |
| "eval_steps_per_second": 12.33, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 9.862752252378681e-06, | |
| "loss": 0.0967, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "eval_accuracy": 0.9130101409844175, | |
| "eval_combined_score": 0.8977728972181029, | |
| "eval_f1": 0.8825356534517885, | |
| "eval_loss": 0.3583969473838806, | |
| "eval_runtime": 104.7234, | |
| "eval_samples_per_second": 386.065, | |
| "eval_steps_per_second": 12.07, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 9.675638758689083e-06, | |
| "loss": 0.1001, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "eval_accuracy": 0.9139995053178332, | |
| "eval_combined_score": 0.8993361485592781, | |
| "eval_f1": 0.8846727918007231, | |
| "eval_loss": 0.3541596829891205, | |
| "eval_runtime": 101.634, | |
| "eval_samples_per_second": 397.8, | |
| "eval_steps_per_second": 12.437, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 9.488525264999486e-06, | |
| "loss": 0.0999, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "eval_accuracy": 0.9132327479594361, | |
| "eval_combined_score": 0.8982896913513594, | |
| "eval_f1": 0.8833466347432828, | |
| "eval_loss": 0.36085495352745056, | |
| "eval_runtime": 103.5986, | |
| "eval_samples_per_second": 390.256, | |
| "eval_steps_per_second": 12.201, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 9.301411771309888e-06, | |
| "loss": 0.0961, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "eval_accuracy": 0.9137026960178085, | |
| "eval_combined_score": 0.8989772147109578, | |
| "eval_f1": 0.884251733404107, | |
| "eval_loss": 0.3343105912208557, | |
| "eval_runtime": 101.6502, | |
| "eval_samples_per_second": 397.736, | |
| "eval_steps_per_second": 12.435, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 9.114298277620291e-06, | |
| "loss": 0.0992, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "eval_accuracy": 0.9153846153846154, | |
| "eval_combined_score": 0.9020065310865193, | |
| "eval_f1": 0.8886284467884233, | |
| "eval_loss": 0.34594395756721497, | |
| "eval_runtime": 102.9906, | |
| "eval_samples_per_second": 392.56, | |
| "eval_steps_per_second": 12.273, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 8.927184783930693e-06, | |
| "loss": 0.0991, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "eval_accuracy": 0.9145189215928766, | |
| "eval_combined_score": 0.9006334285510589, | |
| "eval_f1": 0.886747935509241, | |
| "eval_loss": 0.31652504205703735, | |
| "eval_runtime": 103.8354, | |
| "eval_samples_per_second": 389.366, | |
| "eval_steps_per_second": 12.173, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 8.740071290241096e-06, | |
| "loss": 0.1012, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "eval_accuracy": 0.9160277022013357, | |
| "eval_combined_score": 0.9019111310689396, | |
| "eval_f1": 0.8877945599365435, | |
| "eval_loss": 0.3151375651359558, | |
| "eval_runtime": 100.9327, | |
| "eval_samples_per_second": 400.564, | |
| "eval_steps_per_second": 12.523, | |
| "step": 67000 | |
| } | |
| ], | |
| "max_steps": 113710, | |
| "num_train_epochs": 10, | |
| "total_flos": 1.410189745636608e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |