| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.034348828741352, | |
| "global_step": 25000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.045307443365696e-06, | |
| "loss": 3.818, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_HasAns_exact": 44.97300944669366, | |
| "eval_HasAns_f1": 52.01439218879365, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 50.647603027754414, | |
| "eval_NoAns_f1": 50.647603027754414, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 50.09685841825992, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 51.885203441071, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 47.81436873578708, | |
| "eval_f1": 51.33001911018023, | |
| "eval_total": 11873, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 8.090614886731393e-06, | |
| "loss": 1.5859, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_HasAns_exact": 69.04520917678812, | |
| "eval_HasAns_f1": 77.70845080310447, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 45.853658536585364, | |
| "eval_NoAns_f1": 45.853658536585364, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 57.48336561947276, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 61.802768280079185, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 57.43283079255453, | |
| "eval_f1": 61.758249503984274, | |
| "eval_total": 11873, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 9.863646928182218e-06, | |
| "loss": 1.3077, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_HasAns_exact": 70.58029689608637, | |
| "eval_HasAns_f1": 78.40109850512528, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 64.8780487804878, | |
| "eval_NoAns_f1": 64.8780487804878, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 67.73351301271794, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 71.63831482678222, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 67.7250905415649, | |
| "eval_f1": 71.62989235562912, | |
| "eval_total": 11873, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 9.605402473981872e-06, | |
| "loss": 1.1543, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_HasAns_exact": 65.24966261808368, | |
| "eval_HasAns_f1": 70.75967293392448, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 85.26492851135409, | |
| "eval_NoAns_f1": 85.26492851135409, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 75.27162469468541, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 78.02268518085604, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 75.27162469468541, | |
| "eval_f1": 78.02268518085617, | |
| "eval_total": 11873, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 9.347158019781525e-06, | |
| "loss": 1.083, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_HasAns_exact": 72.62145748987854, | |
| "eval_HasAns_f1": 78.57932116933335, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 75.47518923465097, | |
| "eval_NoAns_f1": 75.47518923465097, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 74.05036637749515, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 77.02503292274977, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 74.05036637749515, | |
| "eval_f1": 77.02503292274983, | |
| "eval_total": 11873, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 9.08891356558118e-06, | |
| "loss": 1.0232, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_HasAns_exact": 75.87719298245614, | |
| "eval_HasAns_f1": 82.4856147328558, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 75.1892346509672, | |
| "eval_NoAns_f1": 75.1892346509672, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 75.53272130042954, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 78.83220114009663, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 75.53272130042954, | |
| "eval_f1": 78.83220114009681, | |
| "eval_total": 11873, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 8.830669111380835e-06, | |
| "loss": 0.9925, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_HasAns_exact": 76.16396761133603, | |
| "eval_HasAns_f1": 82.92856292587923, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 76.53490328006728, | |
| "eval_NoAns_f1": 76.53490328006728, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 76.34970100227407, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 79.72715581778914, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 76.34970100227407, | |
| "eval_f1": 79.72715581778931, | |
| "eval_total": 11873, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 8.572424657180489e-06, | |
| "loss": 0.9409, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_HasAns_exact": 76.87246963562752, | |
| "eval_HasAns_f1": 83.28420126895146, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 77.32548359966358, | |
| "eval_NoAns_f1": 77.32548359966358, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 77.0993009348943, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 80.3005765284547, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 77.0993009348943, | |
| "eval_f1": 80.30057652845481, | |
| "eval_total": 11873, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 8.314180202980142e-06, | |
| "loss": 0.8459, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_HasAns_exact": 79.47031039136303, | |
| "eval_HasAns_f1": 86.27241463084697, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 70.64760302775441, | |
| "eval_NoAns_f1": 70.64760302775441, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 75.05264044470648, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 78.44882286967555, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 75.05264044470648, | |
| "eval_f1": 78.44882286967575, | |
| "eval_total": 11873, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 8.055935748779795e-06, | |
| "loss": 0.7936, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "eval_HasAns_exact": 73.66734143049932, | |
| "eval_HasAns_f1": 79.21121792689594, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 83.28006728343145, | |
| "eval_NoAns_f1": 83.28006728343145, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 78.48058620399225, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 81.24855553530176, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 78.48058620399225, | |
| "eval_f1": 81.24855553530186, | |
| "eval_total": 11873, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 7.797691294579448e-06, | |
| "loss": 0.801, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "eval_HasAns_exact": 74.07219973009447, | |
| "eval_HasAns_f1": 79.92037997936747, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 84.52481076534903, | |
| "eval_NoAns_f1": 84.52481076534903, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 79.3059883769898, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 82.22589173062302, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 79.3059883769898, | |
| "eval_f1": 82.2258917306232, | |
| "eval_total": 11873, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 7.539446840379103e-06, | |
| "loss": 0.8088, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "eval_HasAns_exact": 76.06275303643724, | |
| "eval_HasAns_f1": 82.31177335930619, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 80.40370058873002, | |
| "eval_NoAns_f1": 80.40370058873002, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 78.2363345405542, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 81.35637096554916, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 78.2363345405542, | |
| "eval_f1": 81.3563709655493, | |
| "eval_total": 11873, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 7.281202386178758e-06, | |
| "loss": 0.8092, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_HasAns_exact": 78.7280701754386, | |
| "eval_HasAns_f1": 85.24409087886156, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 76.31623212783852, | |
| "eval_NoAns_f1": 76.31623212783852, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 77.52042449254611, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 80.77376995956291, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 77.52042449254611, | |
| "eval_f1": 80.77376995956293, | |
| "eval_total": 11873, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 7.0229579319784115e-06, | |
| "loss": 0.7684, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_HasAns_exact": 78.0195681511471, | |
| "eval_HasAns_f1": 84.35251514019012, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 79.76450798990749, | |
| "eval_NoAns_f1": 79.76450798990749, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 78.89328729049103, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 82.05522696462938, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 78.89328729049103, | |
| "eval_f1": 82.05522696462947, | |
| "eval_total": 11873, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 6.764713477778065e-06, | |
| "loss": 0.7751, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "eval_HasAns_exact": 78.69433198380567, | |
| "eval_HasAns_f1": 84.83678017806832, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 78.16652649285113, | |
| "eval_NoAns_f1": 78.16652649285113, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 78.43005137707404, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 81.49687803382365, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 78.43005137707404, | |
| "eval_f1": 81.49687803382375, | |
| "eval_total": 11873, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 6.506469023577719e-06, | |
| "loss": 0.7746, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "eval_HasAns_exact": 78.40755735492577, | |
| "eval_HasAns_f1": 84.57986037447542, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 78.67115222876367, | |
| "eval_NoAns_f1": 78.67115222876367, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 78.53954350206351, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 81.62127619808709, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 78.53954350206351, | |
| "eval_f1": 81.62127619808716, | |
| "eval_total": 11873, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 6.248224569377374e-06, | |
| "loss": 0.6995, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "eval_HasAns_exact": 79.25101214574899, | |
| "eval_HasAns_f1": 85.85300684093066, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 76.93860386879732, | |
| "eval_NoAns_f1": 76.93860386879732, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 78.09315253095258, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 81.38942344420408, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 78.09315253095258, | |
| "eval_f1": 81.38942344420423, | |
| "eval_total": 11873, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 5.9899801151770276e-06, | |
| "loss": 0.6657, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "eval_HasAns_exact": 79.1497975708502, | |
| "eval_HasAns_f1": 85.40764464218985, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 78.01513877207738, | |
| "eval_NoAns_f1": 78.01513877207738, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 78.5816558578287, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 81.70609933790104, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 78.5816558578287, | |
| "eval_f1": 81.70609933790118, | |
| "eval_total": 11873, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 5.7317356609766805e-06, | |
| "loss": 0.643, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "eval_HasAns_exact": 73.6842105263158, | |
| "eval_HasAns_f1": 79.63974257623968, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 84.60891505466779, | |
| "eval_NoAns_f1": 84.60891505466779, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 79.15438389623516, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 82.12788629596118, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 79.15438389623516, | |
| "eval_f1": 82.12788629596123, | |
| "eval_total": 11873, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 5.473491206776334e-06, | |
| "loss": 0.6494, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "eval_HasAns_exact": 77.10863697705803, | |
| "eval_HasAns_f1": 83.39536168287856, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 80.89150546677881, | |
| "eval_NoAns_f1": 80.89150546677881, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 79.0027794154805, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 82.14164103900457, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 79.0027794154805, | |
| "eval_f1": 82.14164103900465, | |
| "eval_total": 11873, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 5.215246752575989e-06, | |
| "loss": 0.6326, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "eval_HasAns_exact": 79.28475033738192, | |
| "eval_HasAns_f1": 85.68401857002343, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 78.33473507148865, | |
| "eval_NoAns_f1": 78.33473507148865, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 78.80906257896066, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 82.00411539485366, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 78.80906257896066, | |
| "eval_f1": 82.00411539485363, | |
| "eval_total": 11873, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 4.957002298375643e-06, | |
| "loss": 0.6236, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "eval_HasAns_exact": 79.77395411605939, | |
| "eval_HasAns_f1": 85.76130612988482, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 78.45248107653491, | |
| "eval_NoAns_f1": 78.45248107653491, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 79.11227154046998, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 82.10166114191487, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 79.11227154046998, | |
| "eval_f1": 82.10166114191493, | |
| "eval_total": 11873, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 4.698757844175297e-06, | |
| "loss": 0.6177, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "eval_HasAns_exact": 80.17881241565452, | |
| "eval_HasAns_f1": 86.16732437952784, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 78.33473507148865, | |
| "eval_NoAns_f1": 78.33473507148865, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 79.25545355007159, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 82.24542229612054, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 79.25545355007159, | |
| "eval_f1": 82.24542229612064, | |
| "eval_total": 11873, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 4.4405133899749504e-06, | |
| "loss": 0.6171, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "eval_HasAns_exact": 79.25101214574899, | |
| "eval_HasAns_f1": 85.3060014254297, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 79.3103448275862, | |
| "eval_NoAns_f1": 79.3103448275862, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 79.2807209635307, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 82.30388077570497, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 79.2807209635307, | |
| "eval_f1": 82.303880775705, | |
| "eval_total": 11873, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 4.182268935774604e-06, | |
| "loss": 0.5992, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "eval_HasAns_exact": 76.51821862348179, | |
| "eval_HasAns_f1": 82.59682578958576, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 84.00336417157276, | |
| "eval_NoAns_f1": 84.00336417157276, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 80.26615008843595, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 83.30110193553955, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 80.26615008843595, | |
| "eval_f1": 83.30110193553969, | |
| "eval_total": 11873, | |
| "step": 25000 | |
| } | |
| ], | |
| "max_steps": 41195, | |
| "num_train_epochs": 5, | |
| "total_flos": 7.838843910309734e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |