ipc_level1_D / trainer_state.json
mapama247's picture
upload ipc_level1_D model
bf389b6
{
"best_metric": 0.9099219196343554,
"best_model_checkpoint": "./output//roberta-large_ipc1_D_5_32_5e-6_0.01_0.06_07-08-22_05-38/checkpoint-54000",
"epoch": 3.8676404571981076,
"global_step": 56000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14,
"learning_rate": 2.3020257826887664e-06,
"loss": 0.2633,
"step": 2000
},
{
"epoch": 0.14,
"eval_accuracy": 0.782186065915728,
"eval_f1": 0.8434414112545581,
"eval_loss": 0.12234517931938171,
"eval_roc_auc": 0.8943263793686275,
"eval_runtime": 707.0346,
"eval_samples_per_second": 33.902,
"eval_steps_per_second": 2.12,
"step": 2000
},
{
"epoch": 0.28,
"learning_rate": 4.604051565377533e-06,
"loss": 0.1135,
"step": 4000
},
{
"epoch": 0.28,
"eval_accuracy": 0.81543596161869,
"eval_f1": 0.8686181075561606,
"eval_loss": 0.09953225404024124,
"eval_roc_auc": 0.9126753468236911,
"eval_runtime": 707.1318,
"eval_samples_per_second": 33.897,
"eval_steps_per_second": 2.12,
"step": 4000
},
{
"epoch": 0.41,
"learning_rate": 4.878326549205743e-06,
"loss": 0.0975,
"step": 6000
},
{
"epoch": 0.41,
"eval_accuracy": 0.827784730913642,
"eval_f1": 0.8807810494379399,
"eval_loss": 0.08744705468416214,
"eval_roc_auc": 0.9195269073883311,
"eval_runtime": 707.2934,
"eval_samples_per_second": 33.89,
"eval_steps_per_second": 2.119,
"step": 6000
},
{
"epoch": 0.55,
"learning_rate": 4.731377937135384e-06,
"loss": 0.0897,
"step": 8000
},
{
"epoch": 0.55,
"eval_accuracy": 0.8280350438047559,
"eval_f1": 0.883428263546798,
"eval_loss": 0.08566972613334656,
"eval_roc_auc": 0.9246573113358266,
"eval_runtime": 707.7071,
"eval_samples_per_second": 33.87,
"eval_steps_per_second": 2.118,
"step": 8000
},
{
"epoch": 0.69,
"learning_rate": 4.584429325065025e-06,
"loss": 0.0845,
"step": 10000
},
{
"epoch": 0.69,
"eval_accuracy": 0.8415936587400917,
"eval_f1": 0.8920891085286283,
"eval_loss": 0.07975932210683823,
"eval_roc_auc": 0.928458970107628,
"eval_runtime": 707.3951,
"eval_samples_per_second": 33.885,
"eval_steps_per_second": 2.119,
"step": 10000
},
{
"epoch": 0.83,
"learning_rate": 4.437480712994666e-06,
"loss": 0.0818,
"step": 12000
},
{
"epoch": 0.83,
"eval_accuracy": 0.8427200667501042,
"eval_f1": 0.8958713920350749,
"eval_loss": 0.0771109014749527,
"eval_roc_auc": 0.9319582466091877,
"eval_runtime": 707.8327,
"eval_samples_per_second": 33.864,
"eval_steps_per_second": 2.118,
"step": 12000
},
{
"epoch": 0.97,
"learning_rate": 4.2905321009243076e-06,
"loss": 0.0799,
"step": 14000
},
{
"epoch": 0.97,
"eval_accuracy": 0.8450146015853149,
"eval_f1": 0.8957159726658537,
"eval_loss": 0.07565668225288391,
"eval_roc_auc": 0.9295314687703254,
"eval_runtime": 707.6287,
"eval_samples_per_second": 33.874,
"eval_steps_per_second": 2.118,
"step": 14000
},
{
"epoch": 1.11,
"learning_rate": 4.143583488853948e-06,
"loss": 0.0732,
"step": 16000
},
{
"epoch": 1.11,
"eval_accuracy": 0.8493950771798081,
"eval_f1": 0.8984909514907395,
"eval_loss": 0.07519076764583588,
"eval_roc_auc": 0.9326589314853615,
"eval_runtime": 707.9951,
"eval_samples_per_second": 33.856,
"eval_steps_per_second": 2.117,
"step": 16000
},
{
"epoch": 1.24,
"learning_rate": 3.996634876783589e-06,
"loss": 0.0726,
"step": 18000
},
{
"epoch": 1.24,
"eval_accuracy": 0.8492699207342511,
"eval_f1": 0.8975535762903036,
"eval_loss": 0.07450989633798599,
"eval_roc_auc": 0.9289754929708756,
"eval_runtime": 708.1958,
"eval_samples_per_second": 33.847,
"eval_steps_per_second": 2.117,
"step": 18000
},
{
"epoch": 1.38,
"learning_rate": 3.84968626471323e-06,
"loss": 0.0716,
"step": 20000
},
{
"epoch": 1.38,
"eval_accuracy": 0.8525657071339174,
"eval_f1": 0.9005084354055927,
"eval_loss": 0.07325474917888641,
"eval_roc_auc": 0.9340545363189772,
"eval_runtime": 707.8752,
"eval_samples_per_second": 33.862,
"eval_steps_per_second": 2.118,
"step": 20000
},
{
"epoch": 1.52,
"learning_rate": 3.702737652642871e-06,
"loss": 0.0708,
"step": 22000
},
{
"epoch": 1.52,
"eval_accuracy": 0.8506466416353775,
"eval_f1": 0.9017078523041694,
"eval_loss": 0.0716293528676033,
"eval_roc_auc": 0.9380172851232347,
"eval_runtime": 707.8218,
"eval_samples_per_second": 33.864,
"eval_steps_per_second": 2.118,
"step": 22000
},
{
"epoch": 1.66,
"learning_rate": 3.555789040572512e-06,
"loss": 0.0695,
"step": 24000
},
{
"epoch": 1.66,
"eval_accuracy": 0.8547350855235711,
"eval_f1": 0.9044931999464413,
"eval_loss": 0.07056614011526108,
"eval_roc_auc": 0.9387482370760931,
"eval_runtime": 708.0549,
"eval_samples_per_second": 33.853,
"eval_steps_per_second": 2.117,
"step": 24000
},
{
"epoch": 1.8,
"learning_rate": 3.4088404285021533e-06,
"loss": 0.069,
"step": 26000
},
{
"epoch": 1.8,
"eval_accuracy": 0.8569044639132248,
"eval_f1": 0.9042787144565763,
"eval_loss": 0.07104314118623734,
"eval_roc_auc": 0.9356039344541693,
"eval_runtime": 707.8507,
"eval_samples_per_second": 33.863,
"eval_steps_per_second": 2.118,
"step": 26000
},
{
"epoch": 1.93,
"learning_rate": 3.261891816431794e-06,
"loss": 0.0682,
"step": 28000
},
{
"epoch": 1.93,
"eval_accuracy": 0.8599499374217772,
"eval_f1": 0.9060724534406717,
"eval_loss": 0.06967160105705261,
"eval_roc_auc": 0.9372038441063302,
"eval_runtime": 708.2055,
"eval_samples_per_second": 33.846,
"eval_steps_per_second": 2.117,
"step": 28000
},
{
"epoch": 2.07,
"learning_rate": 3.1149432043614354e-06,
"loss": 0.0639,
"step": 30000
},
{
"epoch": 2.07,
"eval_accuracy": 0.854276178556529,
"eval_f1": 0.9025946730864763,
"eval_loss": 0.07231773436069489,
"eval_roc_auc": 0.9380671303178707,
"eval_runtime": 708.1848,
"eval_samples_per_second": 33.847,
"eval_steps_per_second": 2.117,
"step": 30000
},
{
"epoch": 2.21,
"learning_rate": 2.9679945922910757e-06,
"loss": 0.0595,
"step": 32000
},
{
"epoch": 2.21,
"eval_accuracy": 0.8584897788902796,
"eval_f1": 0.9064987752602572,
"eval_loss": 0.07001405209302902,
"eval_roc_auc": 0.9397355166673759,
"eval_runtime": 708.0437,
"eval_samples_per_second": 33.854,
"eval_steps_per_second": 2.117,
"step": 32000
},
{
"epoch": 2.35,
"learning_rate": 2.821045980220717e-06,
"loss": 0.0609,
"step": 34000
},
{
"epoch": 2.35,
"eval_accuracy": 0.8602002503128912,
"eval_f1": 0.9077644010972264,
"eval_loss": 0.06918834149837494,
"eval_roc_auc": 0.9421116895582996,
"eval_runtime": 707.9146,
"eval_samples_per_second": 33.86,
"eval_steps_per_second": 2.117,
"step": 34000
},
{
"epoch": 2.49,
"learning_rate": 2.674097368150358e-06,
"loss": 0.0612,
"step": 36000
},
{
"epoch": 2.49,
"eval_accuracy": 0.8603671255736337,
"eval_f1": 0.9078671395339969,
"eval_loss": 0.06887050718069077,
"eval_roc_auc": 0.9418260414584333,
"eval_runtime": 708.0484,
"eval_samples_per_second": 33.854,
"eval_steps_per_second": 2.117,
"step": 36000
},
{
"epoch": 2.62,
"learning_rate": 2.527148756079999e-06,
"loss": 0.0607,
"step": 38000
},
{
"epoch": 2.62,
"eval_accuracy": 0.8638715060492282,
"eval_f1": 0.9094039329711531,
"eval_loss": 0.0678349956870079,
"eval_roc_auc": 0.941532162317987,
"eval_runtime": 707.9126,
"eval_samples_per_second": 33.86,
"eval_steps_per_second": 2.117,
"step": 38000
},
{
"epoch": 2.76,
"learning_rate": 2.38020014400964e-06,
"loss": 0.0598,
"step": 40000
},
{
"epoch": 2.76,
"eval_accuracy": 0.8620775969962453,
"eval_f1": 0.9084244866628287,
"eval_loss": 0.06894223392009735,
"eval_roc_auc": 0.9398278292003405,
"eval_runtime": 708.1682,
"eval_samples_per_second": 33.848,
"eval_steps_per_second": 2.117,
"step": 40000
},
{
"epoch": 2.9,
"learning_rate": 2.233251531939281e-06,
"loss": 0.0589,
"step": 42000
},
{
"epoch": 2.9,
"eval_accuracy": 0.8625782227784731,
"eval_f1": 0.9087424782492046,
"eval_loss": 0.06813088804483414,
"eval_roc_auc": 0.9405057223889542,
"eval_runtime": 708.1038,
"eval_samples_per_second": 33.851,
"eval_steps_per_second": 2.117,
"step": 42000
},
{
"epoch": 3.04,
"learning_rate": 2.086302919868922e-06,
"loss": 0.0584,
"step": 44000
},
{
"epoch": 3.04,
"eval_accuracy": 0.863537755527743,
"eval_f1": 0.9081979164668188,
"eval_loss": 0.07025768607854843,
"eval_roc_auc": 0.9397884820006503,
"eval_runtime": 707.4211,
"eval_samples_per_second": 33.884,
"eval_steps_per_second": 2.119,
"step": 44000
},
{
"epoch": 3.18,
"learning_rate": 1.939354307798563e-06,
"loss": 0.0521,
"step": 46000
},
{
"epoch": 3.18,
"eval_accuracy": 0.8621610346266166,
"eval_f1": 0.9091812742530537,
"eval_loss": 0.0701606273651123,
"eval_roc_auc": 0.941660515050063,
"eval_runtime": 707.6705,
"eval_samples_per_second": 33.872,
"eval_steps_per_second": 2.118,
"step": 46000
},
{
"epoch": 3.32,
"learning_rate": 1.792405695728204e-06,
"loss": 0.0528,
"step": 48000
},
{
"epoch": 3.32,
"eval_accuracy": 0.8632874426366292,
"eval_f1": 0.9089131684876366,
"eval_loss": 0.06941932439804077,
"eval_roc_auc": 0.9405201937068114,
"eval_runtime": 707.419,
"eval_samples_per_second": 33.884,
"eval_steps_per_second": 2.119,
"step": 48000
},
{
"epoch": 3.45,
"learning_rate": 1.6454570836578452e-06,
"loss": 0.0527,
"step": 50000
},
{
"epoch": 3.45,
"eval_accuracy": 0.8617855652899458,
"eval_f1": 0.9090562440419447,
"eval_loss": 0.069430410861969,
"eval_roc_auc": 0.9425349573751873,
"eval_runtime": 707.7016,
"eval_samples_per_second": 33.87,
"eval_steps_per_second": 2.118,
"step": 50000
},
{
"epoch": 3.59,
"learning_rate": 1.498508471587486e-06,
"loss": 0.053,
"step": 52000
},
{
"epoch": 3.59,
"eval_accuracy": 0.8624530663329162,
"eval_f1": 0.9087446678854357,
"eval_loss": 0.06918352842330933,
"eval_roc_auc": 0.9427842849830317,
"eval_runtime": 707.4215,
"eval_samples_per_second": 33.884,
"eval_steps_per_second": 2.119,
"step": 52000
},
{
"epoch": 3.73,
"learning_rate": 1.351559859517127e-06,
"loss": 0.0522,
"step": 54000
},
{
"epoch": 3.73,
"eval_accuracy": 0.8645390070921986,
"eval_f1": 0.9099219196343554,
"eval_loss": 0.0693092793226242,
"eval_roc_auc": 0.943445813623303,
"eval_runtime": 717.1057,
"eval_samples_per_second": 33.426,
"eval_steps_per_second": 2.09,
"step": 54000
},
{
"epoch": 3.87,
"learning_rate": 1.2046112474467679e-06,
"loss": 0.0521,
"step": 56000
},
{
"epoch": 3.87,
"eval_accuracy": 0.8630788485607008,
"eval_f1": 0.9097205045758101,
"eval_loss": 0.06953160464763641,
"eval_roc_auc": 0.9436689409083443,
"eval_runtime": 707.5176,
"eval_samples_per_second": 33.879,
"eval_steps_per_second": 2.119,
"step": 56000
}
],
"max_steps": 72395,
"num_train_epochs": 5,
"total_flos": 1.6700995925936701e+18,
"trial_name": null,
"trial_params": null
}