{ "best_metric": 0.9099219196343554, "best_model_checkpoint": "./output//roberta-large_ipc1_D_5_32_5e-6_0.01_0.06_07-08-22_05-38/checkpoint-54000", "epoch": 3.8676404571981076, "global_step": 56000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 2.3020257826887664e-06, "loss": 0.2633, "step": 2000 }, { "epoch": 0.14, "eval_accuracy": 0.782186065915728, "eval_f1": 0.8434414112545581, "eval_loss": 0.12234517931938171, "eval_roc_auc": 0.8943263793686275, "eval_runtime": 707.0346, "eval_samples_per_second": 33.902, "eval_steps_per_second": 2.12, "step": 2000 }, { "epoch": 0.28, "learning_rate": 4.604051565377533e-06, "loss": 0.1135, "step": 4000 }, { "epoch": 0.28, "eval_accuracy": 0.81543596161869, "eval_f1": 0.8686181075561606, "eval_loss": 0.09953225404024124, "eval_roc_auc": 0.9126753468236911, "eval_runtime": 707.1318, "eval_samples_per_second": 33.897, "eval_steps_per_second": 2.12, "step": 4000 }, { "epoch": 0.41, "learning_rate": 4.878326549205743e-06, "loss": 0.0975, "step": 6000 }, { "epoch": 0.41, "eval_accuracy": 0.827784730913642, "eval_f1": 0.8807810494379399, "eval_loss": 0.08744705468416214, "eval_roc_auc": 0.9195269073883311, "eval_runtime": 707.2934, "eval_samples_per_second": 33.89, "eval_steps_per_second": 2.119, "step": 6000 }, { "epoch": 0.55, "learning_rate": 4.731377937135384e-06, "loss": 0.0897, "step": 8000 }, { "epoch": 0.55, "eval_accuracy": 0.8280350438047559, "eval_f1": 0.883428263546798, "eval_loss": 0.08566972613334656, "eval_roc_auc": 0.9246573113358266, "eval_runtime": 707.7071, "eval_samples_per_second": 33.87, "eval_steps_per_second": 2.118, "step": 8000 }, { "epoch": 0.69, "learning_rate": 4.584429325065025e-06, "loss": 0.0845, "step": 10000 }, { "epoch": 0.69, "eval_accuracy": 0.8415936587400917, "eval_f1": 0.8920891085286283, "eval_loss": 0.07975932210683823, "eval_roc_auc": 0.928458970107628, "eval_runtime": 707.3951, "eval_samples_per_second": 33.885, "eval_steps_per_second": 2.119, "step": 10000 }, { "epoch": 0.83, "learning_rate": 4.437480712994666e-06, "loss": 0.0818, "step": 12000 }, { "epoch": 0.83, "eval_accuracy": 0.8427200667501042, "eval_f1": 0.8958713920350749, "eval_loss": 0.0771109014749527, "eval_roc_auc": 0.9319582466091877, "eval_runtime": 707.8327, "eval_samples_per_second": 33.864, "eval_steps_per_second": 2.118, "step": 12000 }, { "epoch": 0.97, "learning_rate": 4.2905321009243076e-06, "loss": 0.0799, "step": 14000 }, { "epoch": 0.97, "eval_accuracy": 0.8450146015853149, "eval_f1": 0.8957159726658537, "eval_loss": 0.07565668225288391, "eval_roc_auc": 0.9295314687703254, "eval_runtime": 707.6287, "eval_samples_per_second": 33.874, "eval_steps_per_second": 2.118, "step": 14000 }, { "epoch": 1.11, "learning_rate": 4.143583488853948e-06, "loss": 0.0732, "step": 16000 }, { "epoch": 1.11, "eval_accuracy": 0.8493950771798081, "eval_f1": 0.8984909514907395, "eval_loss": 0.07519076764583588, "eval_roc_auc": 0.9326589314853615, "eval_runtime": 707.9951, "eval_samples_per_second": 33.856, "eval_steps_per_second": 2.117, "step": 16000 }, { "epoch": 1.24, "learning_rate": 3.996634876783589e-06, "loss": 0.0726, "step": 18000 }, { "epoch": 1.24, "eval_accuracy": 0.8492699207342511, "eval_f1": 0.8975535762903036, "eval_loss": 0.07450989633798599, "eval_roc_auc": 0.9289754929708756, "eval_runtime": 708.1958, "eval_samples_per_second": 33.847, "eval_steps_per_second": 2.117, "step": 18000 }, { "epoch": 1.38, "learning_rate": 3.84968626471323e-06, "loss": 0.0716, "step": 20000 }, { "epoch": 1.38, "eval_accuracy": 0.8525657071339174, "eval_f1": 0.9005084354055927, "eval_loss": 0.07325474917888641, "eval_roc_auc": 0.9340545363189772, "eval_runtime": 707.8752, "eval_samples_per_second": 33.862, "eval_steps_per_second": 2.118, "step": 20000 }, { "epoch": 1.52, "learning_rate": 3.702737652642871e-06, "loss": 0.0708, "step": 22000 }, { "epoch": 1.52, "eval_accuracy": 0.8506466416353775, "eval_f1": 0.9017078523041694, "eval_loss": 0.0716293528676033, "eval_roc_auc": 0.9380172851232347, "eval_runtime": 707.8218, "eval_samples_per_second": 33.864, "eval_steps_per_second": 2.118, "step": 22000 }, { "epoch": 1.66, "learning_rate": 3.555789040572512e-06, "loss": 0.0695, "step": 24000 }, { "epoch": 1.66, "eval_accuracy": 0.8547350855235711, "eval_f1": 0.9044931999464413, "eval_loss": 0.07056614011526108, "eval_roc_auc": 0.9387482370760931, "eval_runtime": 708.0549, "eval_samples_per_second": 33.853, "eval_steps_per_second": 2.117, "step": 24000 }, { "epoch": 1.8, "learning_rate": 3.4088404285021533e-06, "loss": 0.069, "step": 26000 }, { "epoch": 1.8, "eval_accuracy": 0.8569044639132248, "eval_f1": 0.9042787144565763, "eval_loss": 0.07104314118623734, "eval_roc_auc": 0.9356039344541693, "eval_runtime": 707.8507, "eval_samples_per_second": 33.863, "eval_steps_per_second": 2.118, "step": 26000 }, { "epoch": 1.93, "learning_rate": 3.261891816431794e-06, "loss": 0.0682, "step": 28000 }, { "epoch": 1.93, "eval_accuracy": 0.8599499374217772, "eval_f1": 0.9060724534406717, "eval_loss": 0.06967160105705261, "eval_roc_auc": 0.9372038441063302, "eval_runtime": 708.2055, "eval_samples_per_second": 33.846, "eval_steps_per_second": 2.117, "step": 28000 }, { "epoch": 2.07, "learning_rate": 3.1149432043614354e-06, "loss": 0.0639, "step": 30000 }, { "epoch": 2.07, "eval_accuracy": 0.854276178556529, "eval_f1": 0.9025946730864763, "eval_loss": 0.07231773436069489, "eval_roc_auc": 0.9380671303178707, "eval_runtime": 708.1848, "eval_samples_per_second": 33.847, "eval_steps_per_second": 2.117, "step": 30000 }, { "epoch": 2.21, "learning_rate": 2.9679945922910757e-06, "loss": 0.0595, "step": 32000 }, { "epoch": 2.21, "eval_accuracy": 0.8584897788902796, "eval_f1": 0.9064987752602572, "eval_loss": 0.07001405209302902, "eval_roc_auc": 0.9397355166673759, "eval_runtime": 708.0437, "eval_samples_per_second": 33.854, "eval_steps_per_second": 2.117, "step": 32000 }, { "epoch": 2.35, "learning_rate": 2.821045980220717e-06, "loss": 0.0609, "step": 34000 }, { "epoch": 2.35, "eval_accuracy": 0.8602002503128912, "eval_f1": 0.9077644010972264, "eval_loss": 0.06918834149837494, "eval_roc_auc": 0.9421116895582996, "eval_runtime": 707.9146, "eval_samples_per_second": 33.86, "eval_steps_per_second": 2.117, "step": 34000 }, { "epoch": 2.49, "learning_rate": 2.674097368150358e-06, "loss": 0.0612, "step": 36000 }, { "epoch": 2.49, "eval_accuracy": 0.8603671255736337, "eval_f1": 0.9078671395339969, "eval_loss": 0.06887050718069077, "eval_roc_auc": 0.9418260414584333, "eval_runtime": 708.0484, "eval_samples_per_second": 33.854, "eval_steps_per_second": 2.117, "step": 36000 }, { "epoch": 2.62, "learning_rate": 2.527148756079999e-06, "loss": 0.0607, "step": 38000 }, { "epoch": 2.62, "eval_accuracy": 0.8638715060492282, "eval_f1": 0.9094039329711531, "eval_loss": 0.0678349956870079, "eval_roc_auc": 0.941532162317987, "eval_runtime": 707.9126, "eval_samples_per_second": 33.86, "eval_steps_per_second": 2.117, "step": 38000 }, { "epoch": 2.76, "learning_rate": 2.38020014400964e-06, "loss": 0.0598, "step": 40000 }, { "epoch": 2.76, "eval_accuracy": 0.8620775969962453, "eval_f1": 0.9084244866628287, "eval_loss": 0.06894223392009735, "eval_roc_auc": 0.9398278292003405, "eval_runtime": 708.1682, "eval_samples_per_second": 33.848, "eval_steps_per_second": 2.117, "step": 40000 }, { "epoch": 2.9, "learning_rate": 2.233251531939281e-06, "loss": 0.0589, "step": 42000 }, { "epoch": 2.9, "eval_accuracy": 0.8625782227784731, "eval_f1": 0.9087424782492046, "eval_loss": 0.06813088804483414, "eval_roc_auc": 0.9405057223889542, "eval_runtime": 708.1038, "eval_samples_per_second": 33.851, "eval_steps_per_second": 2.117, "step": 42000 }, { "epoch": 3.04, "learning_rate": 2.086302919868922e-06, "loss": 0.0584, "step": 44000 }, { "epoch": 3.04, "eval_accuracy": 0.863537755527743, "eval_f1": 0.9081979164668188, "eval_loss": 0.07025768607854843, "eval_roc_auc": 0.9397884820006503, "eval_runtime": 707.4211, "eval_samples_per_second": 33.884, "eval_steps_per_second": 2.119, "step": 44000 }, { "epoch": 3.18, "learning_rate": 1.939354307798563e-06, "loss": 0.0521, "step": 46000 }, { "epoch": 3.18, "eval_accuracy": 0.8621610346266166, "eval_f1": 0.9091812742530537, "eval_loss": 0.0701606273651123, "eval_roc_auc": 0.941660515050063, "eval_runtime": 707.6705, "eval_samples_per_second": 33.872, "eval_steps_per_second": 2.118, "step": 46000 }, { "epoch": 3.32, "learning_rate": 1.792405695728204e-06, "loss": 0.0528, "step": 48000 }, { "epoch": 3.32, "eval_accuracy": 0.8632874426366292, "eval_f1": 0.9089131684876366, "eval_loss": 0.06941932439804077, "eval_roc_auc": 0.9405201937068114, "eval_runtime": 707.419, "eval_samples_per_second": 33.884, "eval_steps_per_second": 2.119, "step": 48000 }, { "epoch": 3.45, "learning_rate": 1.6454570836578452e-06, "loss": 0.0527, "step": 50000 }, { "epoch": 3.45, "eval_accuracy": 0.8617855652899458, "eval_f1": 0.9090562440419447, "eval_loss": 0.069430410861969, "eval_roc_auc": 0.9425349573751873, "eval_runtime": 707.7016, "eval_samples_per_second": 33.87, "eval_steps_per_second": 2.118, "step": 50000 }, { "epoch": 3.59, "learning_rate": 1.498508471587486e-06, "loss": 0.053, "step": 52000 }, { "epoch": 3.59, "eval_accuracy": 0.8624530663329162, "eval_f1": 0.9087446678854357, "eval_loss": 0.06918352842330933, "eval_roc_auc": 0.9427842849830317, "eval_runtime": 707.4215, "eval_samples_per_second": 33.884, "eval_steps_per_second": 2.119, "step": 52000 }, { "epoch": 3.73, "learning_rate": 1.351559859517127e-06, "loss": 0.0522, "step": 54000 }, { "epoch": 3.73, "eval_accuracy": 0.8645390070921986, "eval_f1": 0.9099219196343554, "eval_loss": 0.0693092793226242, "eval_roc_auc": 0.943445813623303, "eval_runtime": 717.1057, "eval_samples_per_second": 33.426, "eval_steps_per_second": 2.09, "step": 54000 }, { "epoch": 3.87, "learning_rate": 1.2046112474467679e-06, "loss": 0.0521, "step": 56000 }, { "epoch": 3.87, "eval_accuracy": 0.8630788485607008, "eval_f1": 0.9097205045758101, "eval_loss": 0.06953160464763641, "eval_roc_auc": 0.9436689409083443, "eval_runtime": 707.5176, "eval_samples_per_second": 33.879, "eval_steps_per_second": 2.119, "step": 56000 } ], "max_steps": 72395, "num_train_epochs": 5, "total_flos": 1.6700995925936701e+18, "trial_name": null, "trial_params": null }