{ "best_metric": null, "best_model_checkpoint": null, "epoch": 16.666666666666668, "global_step": 1300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.28, "learning_rate": 2.941571524513096e-05, "loss": 0.682, "step": 100 }, { "epoch": 1.28, "eval_accuracy": 0.7003610108303249, "eval_loss": 0.6075819134712219, "eval_runtime": 3.25, "eval_samples_per_second": 85.23, "eval_steps_per_second": 2.769, "step": 100 }, { "epoch": 2.56, "learning_rate": 2.7400940228341167e-05, "loss": 0.5556, "step": 200 }, { "epoch": 2.56, "eval_accuracy": 0.6967509025270758, "eval_loss": 0.5837834477424622, "eval_runtime": 1.0394, "eval_samples_per_second": 266.512, "eval_steps_per_second": 8.659, "step": 200 }, { "epoch": 3.85, "learning_rate": 2.538616521155138e-05, "loss": 0.3956, "step": 300 }, { "epoch": 3.85, "eval_accuracy": 0.7653429602888087, "eval_loss": 0.5577418208122253, "eval_runtime": 1.0269, "eval_samples_per_second": 269.751, "eval_steps_per_second": 8.764, "step": 300 }, { "epoch": 5.13, "learning_rate": 2.3371390194761586e-05, "loss": 0.2705, "step": 400 }, { "epoch": 5.13, "eval_accuracy": 0.7472924187725631, "eval_loss": 0.7243108153343201, "eval_runtime": 1.0048, "eval_samples_per_second": 275.666, "eval_steps_per_second": 8.957, "step": 400 }, { "epoch": 6.41, "learning_rate": 2.1356615177971793e-05, "loss": 0.1431, "step": 500 }, { "epoch": 6.41, "eval_accuracy": 0.7581227436823105, "eval_loss": 1.279666781425476, "eval_runtime": 1.01, "eval_samples_per_second": 274.262, "eval_steps_per_second": 8.911, "step": 500 }, { "epoch": 7.69, "learning_rate": 1.9341840161182e-05, "loss": 0.1032, "step": 600 }, { "epoch": 7.69, "eval_accuracy": 0.7617328519855595, "eval_loss": 1.1039319038391113, "eval_runtime": 1.0386, "eval_samples_per_second": 266.696, "eval_steps_per_second": 8.665, "step": 600 }, { "epoch": 8.97, "learning_rate": 1.7327065144392212e-05, "loss": 0.0787, "step": 700 }, { "epoch": 8.97, "eval_accuracy": 0.7653429602888087, "eval_loss": 1.4732542037963867, "eval_runtime": 1.4811, "eval_samples_per_second": 187.028, "eval_steps_per_second": 6.077, "step": 700 }, { "epoch": 10.26, "learning_rate": 1.531229012760242e-05, "loss": 0.0543, "step": 800 }, { "epoch": 10.26, "eval_accuracy": 0.776173285198556, "eval_loss": 1.4965362548828125, "eval_runtime": 0.9908, "eval_samples_per_second": 279.574, "eval_steps_per_second": 9.084, "step": 800 }, { "epoch": 11.54, "learning_rate": 1.3297515110812627e-05, "loss": 0.0411, "step": 900 }, { "epoch": 11.54, "eval_accuracy": 0.7725631768953068, "eval_loss": 1.4953675270080566, "eval_runtime": 1.0282, "eval_samples_per_second": 269.415, "eval_steps_per_second": 8.754, "step": 900 }, { "epoch": 12.82, "learning_rate": 1.1282740094022835e-05, "loss": 0.0335, "step": 1000 }, { "epoch": 12.82, "eval_accuracy": 0.7689530685920578, "eval_loss": 1.7958177328109741, "eval_runtime": 1.0152, "eval_samples_per_second": 272.858, "eval_steps_per_second": 8.865, "step": 1000 }, { "epoch": 14.1, "learning_rate": 9.267965077233043e-06, "loss": 0.0236, "step": 1100 }, { "epoch": 14.1, "eval_accuracy": 0.779783393501805, "eval_loss": 1.6150606870651245, "eval_runtime": 2.5651, "eval_samples_per_second": 107.987, "eval_steps_per_second": 3.509, "step": 1100 }, { "epoch": 15.38, "learning_rate": 7.253190060443251e-06, "loss": 0.0132, "step": 1200 }, { "epoch": 15.38, "eval_accuracy": 0.776173285198556, "eval_loss": 2.066054582595825, "eval_runtime": 1.0235, "eval_samples_per_second": 270.642, "eval_steps_per_second": 8.793, "step": 1200 }, { "epoch": 16.67, "learning_rate": 5.238415043653459e-06, "loss": 0.0233, "step": 1300 }, { "epoch": 16.67, "eval_accuracy": 0.7906137184115524, "eval_loss": 1.6857606172561646, "eval_runtime": 2.6037, "eval_samples_per_second": 106.386, "eval_steps_per_second": 3.457, "step": 1300 } ], "max_steps": 1560, "num_train_epochs": 20, "total_flos": 3182186931732480.0, "trial_name": null, "trial_params": null }