roberta-base-rte / trainer_state.json
ZhangYunchenY
[Model] roberta-base-rte
4780863
{
"best_metric": 0.8050541516245487,
"best_model_checkpoint": "./fp32_1e_5/models/rte-roberta-base/checkpoint-1000",
"epoch": 12.820512820512821,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.64,
"learning_rate": 5.319148936170213e-06,
"loss": 0.6932,
"step": 50
},
{
"epoch": 0.64,
"eval_accuracy": 0.4729241877256318,
"eval_loss": 0.6963141560554504,
"eval_runtime": 2.0901,
"eval_samples_per_second": 132.533,
"eval_steps_per_second": 4.306,
"step": 50
},
{
"epoch": 1.28,
"learning_rate": 9.959072305593452e-06,
"loss": 0.6957,
"step": 100
},
{
"epoch": 1.28,
"eval_accuracy": 0.4729241877256318,
"eval_loss": 0.69448322057724,
"eval_runtime": 0.7491,
"eval_samples_per_second": 369.778,
"eval_steps_per_second": 12.014,
"step": 100
},
{
"epoch": 1.92,
"learning_rate": 9.618008185538881e-06,
"loss": 0.6869,
"step": 150
},
{
"epoch": 1.92,
"eval_accuracy": 0.6245487364620939,
"eval_loss": 0.6610168218612671,
"eval_runtime": 0.7857,
"eval_samples_per_second": 352.54,
"eval_steps_per_second": 11.454,
"step": 150
},
{
"epoch": 2.56,
"learning_rate": 9.276944065484312e-06,
"loss": 0.6292,
"step": 200
},
{
"epoch": 2.56,
"eval_accuracy": 0.6570397111913358,
"eval_loss": 0.6132365465164185,
"eval_runtime": 0.7843,
"eval_samples_per_second": 353.202,
"eval_steps_per_second": 11.476,
"step": 200
},
{
"epoch": 3.21,
"learning_rate": 8.935879945429742e-06,
"loss": 0.5473,
"step": 250
},
{
"epoch": 3.21,
"eval_accuracy": 0.7075812274368231,
"eval_loss": 0.6135809421539307,
"eval_runtime": 0.8034,
"eval_samples_per_second": 344.792,
"eval_steps_per_second": 11.203,
"step": 250
},
{
"epoch": 3.85,
"learning_rate": 8.59481582537517e-06,
"loss": 0.4867,
"step": 300
},
{
"epoch": 3.85,
"eval_accuracy": 0.7256317689530686,
"eval_loss": 0.5372898578643799,
"eval_runtime": 0.8424,
"eval_samples_per_second": 328.815,
"eval_steps_per_second": 10.684,
"step": 300
},
{
"epoch": 4.49,
"learning_rate": 8.253751705320601e-06,
"loss": 0.3673,
"step": 350
},
{
"epoch": 4.49,
"eval_accuracy": 0.7292418772563177,
"eval_loss": 0.6152286529541016,
"eval_runtime": 0.7525,
"eval_samples_per_second": 368.117,
"eval_steps_per_second": 11.96,
"step": 350
},
{
"epoch": 5.13,
"learning_rate": 7.912687585266032e-06,
"loss": 0.3246,
"step": 400
},
{
"epoch": 5.13,
"eval_accuracy": 0.7545126353790613,
"eval_loss": 0.6094390153884888,
"eval_runtime": 0.7684,
"eval_samples_per_second": 360.479,
"eval_steps_per_second": 11.712,
"step": 400
},
{
"epoch": 5.77,
"learning_rate": 7.5716234652114605e-06,
"loss": 0.2585,
"step": 450
},
{
"epoch": 5.77,
"eval_accuracy": 0.7725631768953068,
"eval_loss": 0.6251689791679382,
"eval_runtime": 0.7559,
"eval_samples_per_second": 366.46,
"eval_steps_per_second": 11.907,
"step": 450
},
{
"epoch": 6.41,
"learning_rate": 7.23055934515689e-06,
"loss": 0.1975,
"step": 500
},
{
"epoch": 6.41,
"eval_accuracy": 0.7436823104693141,
"eval_loss": 0.7674959301948547,
"eval_runtime": 0.766,
"eval_samples_per_second": 361.606,
"eval_steps_per_second": 11.749,
"step": 500
},
{
"epoch": 7.05,
"learning_rate": 6.88949522510232e-06,
"loss": 0.19,
"step": 550
},
{
"epoch": 7.05,
"eval_accuracy": 0.7617328519855595,
"eval_loss": 0.7783448696136475,
"eval_runtime": 0.7892,
"eval_samples_per_second": 350.981,
"eval_steps_per_second": 11.404,
"step": 550
},
{
"epoch": 7.69,
"learning_rate": 6.54843110504775e-06,
"loss": 0.1266,
"step": 600
},
{
"epoch": 7.69,
"eval_accuracy": 0.7545126353790613,
"eval_loss": 1.0359293222427368,
"eval_runtime": 0.7751,
"eval_samples_per_second": 357.395,
"eval_steps_per_second": 11.612,
"step": 600
},
{
"epoch": 8.33,
"learning_rate": 6.20736698499318e-06,
"loss": 0.1222,
"step": 650
},
{
"epoch": 8.33,
"eval_accuracy": 0.7833935018050542,
"eval_loss": 0.8435311913490295,
"eval_runtime": 1.3166,
"eval_samples_per_second": 210.387,
"eval_steps_per_second": 6.836,
"step": 650
},
{
"epoch": 8.97,
"learning_rate": 5.8663028649386085e-06,
"loss": 0.1146,
"step": 700
},
{
"epoch": 8.97,
"eval_accuracy": 0.7833935018050542,
"eval_loss": 0.941527247428894,
"eval_runtime": 0.749,
"eval_samples_per_second": 369.818,
"eval_steps_per_second": 12.016,
"step": 700
},
{
"epoch": 9.62,
"learning_rate": 5.525238744884038e-06,
"loss": 0.1131,
"step": 750
},
{
"epoch": 9.62,
"eval_accuracy": 0.7833935018050542,
"eval_loss": 0.9660640358924866,
"eval_runtime": 0.7502,
"eval_samples_per_second": 369.243,
"eval_steps_per_second": 11.997,
"step": 750
},
{
"epoch": 10.26,
"learning_rate": 5.1841746248294686e-06,
"loss": 0.0719,
"step": 800
},
{
"epoch": 10.26,
"eval_accuracy": 0.7906137184115524,
"eval_loss": 1.0486385822296143,
"eval_runtime": 0.7951,
"eval_samples_per_second": 348.395,
"eval_steps_per_second": 11.32,
"step": 800
},
{
"epoch": 10.9,
"learning_rate": 4.843110504774898e-06,
"loss": 0.1002,
"step": 850
},
{
"epoch": 10.9,
"eval_accuracy": 0.776173285198556,
"eval_loss": 1.1470500230789185,
"eval_runtime": 0.7467,
"eval_samples_per_second": 370.982,
"eval_steps_per_second": 12.054,
"step": 850
},
{
"epoch": 11.54,
"learning_rate": 4.502046384720328e-06,
"loss": 0.0515,
"step": 900
},
{
"epoch": 11.54,
"eval_accuracy": 0.7906137184115524,
"eval_loss": 1.0903133153915405,
"eval_runtime": 0.7707,
"eval_samples_per_second": 359.412,
"eval_steps_per_second": 11.678,
"step": 900
},
{
"epoch": 12.18,
"learning_rate": 4.160982264665757e-06,
"loss": 0.0732,
"step": 950
},
{
"epoch": 12.18,
"eval_accuracy": 0.7978339350180506,
"eval_loss": 1.0761293172836304,
"eval_runtime": 0.8013,
"eval_samples_per_second": 345.69,
"eval_steps_per_second": 11.232,
"step": 950
},
{
"epoch": 12.82,
"learning_rate": 3.819918144611187e-06,
"loss": 0.0644,
"step": 1000
},
{
"epoch": 12.82,
"eval_accuracy": 0.8050541516245487,
"eval_loss": 1.118359088897705,
"eval_runtime": 0.7549,
"eval_samples_per_second": 366.937,
"eval_steps_per_second": 11.922,
"step": 1000
}
],
"max_steps": 1560,
"num_train_epochs": 20,
"total_flos": 2100152443883520.0,
"trial_name": null,
"trial_params": null
}