{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.684270423671435, "eval_steps": 500, "global_step": 47500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00014405693129924946, "grad_norm": 27.26666831970215, "learning_rate": 8.641382621219396e-08, "loss": 4.9, "step": 10 }, { "epoch": 0.0002881138625984989, "grad_norm": 27.764198303222656, "learning_rate": 1.8242918867018726e-07, "loss": 4.9161, "step": 20 }, { "epoch": 0.0004321707938977484, "grad_norm": 28.83149528503418, "learning_rate": 2.7844455112818053e-07, "loss": 4.9883, "step": 30 }, { "epoch": 0.0005762277251969978, "grad_norm": 27.213775634765625, "learning_rate": 3.744599135861738e-07, "loss": 4.9025, "step": 40 }, { "epoch": 0.0007202846564962473, "grad_norm": 24.665821075439453, "learning_rate": 4.7047527604416713e-07, "loss": 4.8487, "step": 50 }, { "epoch": 0.0008643415877954968, "grad_norm": 21.37822914123535, "learning_rate": 5.664906385021604e-07, "loss": 4.728, "step": 60 }, { "epoch": 0.0010083985190947463, "grad_norm": 16.232444763183594, "learning_rate": 6.625060009601537e-07, "loss": 4.5959, "step": 70 }, { "epoch": 0.0011524554503939957, "grad_norm": 14.029967308044434, "learning_rate": 7.585213634181469e-07, "loss": 4.4123, "step": 80 }, { "epoch": 0.0012965123816932453, "grad_norm": 10.092180252075195, "learning_rate": 8.545367258761402e-07, "loss": 4.2001, "step": 90 }, { "epoch": 0.0014405693129924946, "grad_norm": 7.05848503112793, "learning_rate": 9.505520883341334e-07, "loss": 4.0065, "step": 100 }, { "epoch": 0.0015846262442917442, "grad_norm": 5.146378517150879, "learning_rate": 1.0465674507921267e-06, "loss": 3.7921, "step": 110 }, { "epoch": 0.0017286831755909935, "grad_norm": 4.485132217407227, "learning_rate": 1.1425828132501202e-06, "loss": 3.637, "step": 120 }, { "epoch": 0.001872740106890243, "grad_norm": 3.7661523818969727, "learning_rate": 1.2385981757081133e-06, "loss": 3.5115, "step": 130 }, { "epoch": 0.0020167970381894927, "grad_norm": 3.7206203937530518, "learning_rate": 1.3346135381661066e-06, "loss": 3.3966, "step": 140 }, { "epoch": 0.002160853969488742, "grad_norm": 3.2748324871063232, "learning_rate": 1.4306289006241e-06, "loss": 3.1868, "step": 150 }, { "epoch": 0.0023049109007879914, "grad_norm": 2.8945841789245605, "learning_rate": 1.5266442630820932e-06, "loss": 3.0535, "step": 160 }, { "epoch": 0.002448967832087241, "grad_norm": 2.8431167602539062, "learning_rate": 1.6226596255400865e-06, "loss": 2.8823, "step": 170 }, { "epoch": 0.0025930247633864905, "grad_norm": 2.856003522872925, "learning_rate": 1.7186749879980798e-06, "loss": 2.7049, "step": 180 }, { "epoch": 0.0027370816946857396, "grad_norm": 2.8247039318084717, "learning_rate": 1.8146903504560731e-06, "loss": 2.6149, "step": 190 }, { "epoch": 0.002881138625984989, "grad_norm": 2.9072368144989014, "learning_rate": 1.9107057129140664e-06, "loss": 2.4336, "step": 200 }, { "epoch": 0.0030251955572842388, "grad_norm": 2.8501107692718506, "learning_rate": 2.0067210753720597e-06, "loss": 2.3022, "step": 210 }, { "epoch": 0.0031692524885834883, "grad_norm": 3.5651540756225586, "learning_rate": 2.102736437830053e-06, "loss": 2.1635, "step": 220 }, { "epoch": 0.0033133094198827375, "grad_norm": 3.2838971614837646, "learning_rate": 2.1987518002880463e-06, "loss": 2.0627, "step": 230 }, { "epoch": 0.003457366351181987, "grad_norm": 4.433910369873047, "learning_rate": 2.2947671627460396e-06, "loss": 1.9501, "step": 240 }, { "epoch": 0.0036014232824812366, "grad_norm": 3.1078832149505615, "learning_rate": 2.390782525204033e-06, "loss": 1.8411, "step": 250 }, { "epoch": 0.003745480213780486, "grad_norm": 3.1979434490203857, "learning_rate": 2.4867978876620263e-06, "loss": 1.7389, "step": 260 }, { "epoch": 0.0038895371450797353, "grad_norm": 3.8683359622955322, "learning_rate": 2.582813250120019e-06, "loss": 1.6616, "step": 270 }, { "epoch": 0.004033594076378985, "grad_norm": 3.9330086708068848, "learning_rate": 2.678828612578013e-06, "loss": 1.6143, "step": 280 }, { "epoch": 0.004177651007678234, "grad_norm": 5.806529521942139, "learning_rate": 2.774843975036006e-06, "loss": 1.5516, "step": 290 }, { "epoch": 0.004321707938977484, "grad_norm": 3.495863676071167, "learning_rate": 2.870859337493999e-06, "loss": 1.5236, "step": 300 }, { "epoch": 0.004465764870276733, "grad_norm": 2.784912347793579, "learning_rate": 2.9668746999519928e-06, "loss": 1.4801, "step": 310 }, { "epoch": 0.004609821801575983, "grad_norm": 3.510164737701416, "learning_rate": 3.062890062409986e-06, "loss": 1.4932, "step": 320 }, { "epoch": 0.004753878732875232, "grad_norm": 3.2866201400756836, "learning_rate": 3.158905424867979e-06, "loss": 1.4565, "step": 330 }, { "epoch": 0.004897935664174482, "grad_norm": 4.239346981048584, "learning_rate": 3.2549207873259727e-06, "loss": 1.4323, "step": 340 }, { "epoch": 0.0050419925954737314, "grad_norm": 3.097536563873291, "learning_rate": 3.350936149783966e-06, "loss": 1.4196, "step": 350 }, { "epoch": 0.005186049526772981, "grad_norm": 4.322780132293701, "learning_rate": 3.446951512241959e-06, "loss": 1.3871, "step": 360 }, { "epoch": 0.005330106458072231, "grad_norm": 3.513322591781616, "learning_rate": 3.5429668746999526e-06, "loss": 1.3872, "step": 370 }, { "epoch": 0.005474163389371479, "grad_norm": 3.922034740447998, "learning_rate": 3.6389822371579454e-06, "loss": 1.3878, "step": 380 }, { "epoch": 0.005618220320670729, "grad_norm": 2.89475154876709, "learning_rate": 3.7349975996159387e-06, "loss": 1.377, "step": 390 }, { "epoch": 0.005762277251969978, "grad_norm": 2.8162641525268555, "learning_rate": 3.831012962073932e-06, "loss": 1.3718, "step": 400 }, { "epoch": 0.005906334183269228, "grad_norm": 3.8730788230895996, "learning_rate": 3.927028324531925e-06, "loss": 1.3513, "step": 410 }, { "epoch": 0.0060503911145684776, "grad_norm": 2.5291128158569336, "learning_rate": 4.023043686989919e-06, "loss": 1.3291, "step": 420 }, { "epoch": 0.006194448045867727, "grad_norm": 2.768787384033203, "learning_rate": 4.119059049447912e-06, "loss": 1.3187, "step": 430 }, { "epoch": 0.006338504977166977, "grad_norm": 3.5093281269073486, "learning_rate": 4.215074411905905e-06, "loss": 1.313, "step": 440 }, { "epoch": 0.006482561908466226, "grad_norm": 3.6907739639282227, "learning_rate": 4.3110897743638986e-06, "loss": 1.2778, "step": 450 }, { "epoch": 0.006626618839765475, "grad_norm": 3.0370869636535645, "learning_rate": 4.407105136821892e-06, "loss": 1.3173, "step": 460 }, { "epoch": 0.0067706757710647245, "grad_norm": 3.6910996437072754, "learning_rate": 4.503120499279885e-06, "loss": 1.3183, "step": 470 }, { "epoch": 0.006914732702363974, "grad_norm": 3.107311487197876, "learning_rate": 4.5991358617378785e-06, "loss": 1.2866, "step": 480 }, { "epoch": 0.007058789633663224, "grad_norm": 2.9197614192962646, "learning_rate": 4.695151224195872e-06, "loss": 1.2928, "step": 490 }, { "epoch": 0.007202846564962473, "grad_norm": 3.9091696739196777, "learning_rate": 4.791166586653865e-06, "loss": 1.2397, "step": 500 }, { "epoch": 0.007346903496261723, "grad_norm": 3.2482357025146484, "learning_rate": 4.887181949111858e-06, "loss": 1.2641, "step": 510 }, { "epoch": 0.007490960427560972, "grad_norm": 3.3543107509613037, "learning_rate": 4.983197311569852e-06, "loss": 1.2643, "step": 520 }, { "epoch": 0.007635017358860222, "grad_norm": 2.6477134227752686, "learning_rate": 5.079212674027845e-06, "loss": 1.2904, "step": 530 }, { "epoch": 0.007779074290159471, "grad_norm": 2.9984657764434814, "learning_rate": 5.175228036485837e-06, "loss": 1.2355, "step": 540 }, { "epoch": 0.007923131221458721, "grad_norm": 2.8463642597198486, "learning_rate": 5.2712433989438316e-06, "loss": 1.2643, "step": 550 }, { "epoch": 0.00806718815275797, "grad_norm": 3.5982484817504883, "learning_rate": 5.367258761401825e-06, "loss": 1.2626, "step": 560 }, { "epoch": 0.00821124508405722, "grad_norm": 2.690351724624634, "learning_rate": 5.463274123859817e-06, "loss": 1.2533, "step": 570 }, { "epoch": 0.008355302015356468, "grad_norm": 2.9995064735412598, "learning_rate": 5.5592894863178115e-06, "loss": 1.2195, "step": 580 }, { "epoch": 0.008499358946655718, "grad_norm": 3.226581335067749, "learning_rate": 5.655304848775805e-06, "loss": 1.2339, "step": 590 }, { "epoch": 0.008643415877954967, "grad_norm": 4.1260833740234375, "learning_rate": 5.751320211233797e-06, "loss": 1.2249, "step": 600 }, { "epoch": 0.008787472809254217, "grad_norm": 2.9851315021514893, "learning_rate": 5.847335573691791e-06, "loss": 1.2367, "step": 610 }, { "epoch": 0.008931529740553466, "grad_norm": 3.313911199569702, "learning_rate": 5.943350936149785e-06, "loss": 1.2336, "step": 620 }, { "epoch": 0.009075586671852716, "grad_norm": 3.2980895042419434, "learning_rate": 6.039366298607777e-06, "loss": 1.2055, "step": 630 }, { "epoch": 0.009219643603151965, "grad_norm": 2.9993834495544434, "learning_rate": 6.135381661065771e-06, "loss": 1.193, "step": 640 }, { "epoch": 0.009363700534451215, "grad_norm": 3.1403629779815674, "learning_rate": 6.231397023523765e-06, "loss": 1.223, "step": 650 }, { "epoch": 0.009507757465750465, "grad_norm": 2.9803543090820312, "learning_rate": 6.327412385981757e-06, "loss": 1.1861, "step": 660 }, { "epoch": 0.009651814397049714, "grad_norm": 3.8311474323272705, "learning_rate": 6.423427748439751e-06, "loss": 1.1753, "step": 670 }, { "epoch": 0.009795871328348964, "grad_norm": 3.520228147506714, "learning_rate": 6.5194431108977445e-06, "loss": 1.1899, "step": 680 }, { "epoch": 0.009939928259648213, "grad_norm": 3.1205902099609375, "learning_rate": 6.615458473355737e-06, "loss": 1.1884, "step": 690 }, { "epoch": 0.010083985190947463, "grad_norm": 3.5261614322662354, "learning_rate": 6.711473835813731e-06, "loss": 1.1966, "step": 700 }, { "epoch": 0.010228042122246712, "grad_norm": 3.649890184402466, "learning_rate": 6.8074891982717235e-06, "loss": 1.1988, "step": 710 }, { "epoch": 0.010372099053545962, "grad_norm": 2.4982945919036865, "learning_rate": 6.903504560729717e-06, "loss": 1.2032, "step": 720 }, { "epoch": 0.010516155984845212, "grad_norm": 3.6677379608154297, "learning_rate": 6.999519923187711e-06, "loss": 1.2, "step": 730 }, { "epoch": 0.010660212916144461, "grad_norm": 2.5797760486602783, "learning_rate": 7.0955352856457035e-06, "loss": 1.1846, "step": 740 }, { "epoch": 0.010804269847443709, "grad_norm": 2.922659158706665, "learning_rate": 7.191550648103697e-06, "loss": 1.1787, "step": 750 }, { "epoch": 0.010948326778742959, "grad_norm": 2.6787455081939697, "learning_rate": 7.287566010561691e-06, "loss": 1.167, "step": 760 }, { "epoch": 0.011092383710042208, "grad_norm": 3.1619741916656494, "learning_rate": 7.383581373019683e-06, "loss": 1.1754, "step": 770 }, { "epoch": 0.011236440641341458, "grad_norm": 3.1390509605407715, "learning_rate": 7.479596735477677e-06, "loss": 1.1876, "step": 780 }, { "epoch": 0.011380497572640707, "grad_norm": 3.50462007522583, "learning_rate": 7.575612097935671e-06, "loss": 1.1585, "step": 790 }, { "epoch": 0.011524554503939957, "grad_norm": 2.9821434020996094, "learning_rate": 7.671627460393664e-06, "loss": 1.1848, "step": 800 }, { "epoch": 0.011668611435239206, "grad_norm": 3.2226595878601074, "learning_rate": 7.767642822851656e-06, "loss": 1.152, "step": 810 }, { "epoch": 0.011812668366538456, "grad_norm": 2.48160719871521, "learning_rate": 7.86365818530965e-06, "loss": 1.1387, "step": 820 }, { "epoch": 0.011956725297837706, "grad_norm": 2.464972496032715, "learning_rate": 7.959673547767644e-06, "loss": 1.1585, "step": 830 }, { "epoch": 0.012100782229136955, "grad_norm": 2.104072093963623, "learning_rate": 8.055688910225636e-06, "loss": 1.1385, "step": 840 }, { "epoch": 0.012244839160436205, "grad_norm": 2.9877710342407227, "learning_rate": 8.15170427268363e-06, "loss": 1.1372, "step": 850 }, { "epoch": 0.012388896091735454, "grad_norm": 2.8173460960388184, "learning_rate": 8.247719635141624e-06, "loss": 1.152, "step": 860 }, { "epoch": 0.012532953023034704, "grad_norm": 2.934769630432129, "learning_rate": 8.343734997599616e-06, "loss": 1.1609, "step": 870 }, { "epoch": 0.012677009954333953, "grad_norm": 2.321155309677124, "learning_rate": 8.43975036005761e-06, "loss": 1.1276, "step": 880 }, { "epoch": 0.012821066885633203, "grad_norm": 3.0589613914489746, "learning_rate": 8.535765722515604e-06, "loss": 1.1481, "step": 890 }, { "epoch": 0.012965123816932453, "grad_norm": 3.533726215362549, "learning_rate": 8.631781084973595e-06, "loss": 1.1312, "step": 900 }, { "epoch": 0.0131091807482317, "grad_norm": 2.3042759895324707, "learning_rate": 8.72779644743159e-06, "loss": 1.1595, "step": 910 }, { "epoch": 0.01325323767953095, "grad_norm": 2.614858388900757, "learning_rate": 8.823811809889584e-06, "loss": 1.1179, "step": 920 }, { "epoch": 0.0133972946108302, "grad_norm": 2.8544540405273438, "learning_rate": 8.919827172347575e-06, "loss": 1.1117, "step": 930 }, { "epoch": 0.013541351542129449, "grad_norm": 2.6770823001861572, "learning_rate": 9.01584253480557e-06, "loss": 1.1287, "step": 940 }, { "epoch": 0.013685408473428699, "grad_norm": 1.9468642473220825, "learning_rate": 9.111857897263564e-06, "loss": 1.1345, "step": 950 }, { "epoch": 0.013829465404727948, "grad_norm": 2.8672447204589844, "learning_rate": 9.207873259721555e-06, "loss": 1.1124, "step": 960 }, { "epoch": 0.013973522336027198, "grad_norm": 2.024662733078003, "learning_rate": 9.30388862217955e-06, "loss": 1.1246, "step": 970 }, { "epoch": 0.014117579267326447, "grad_norm": 2.2646453380584717, "learning_rate": 9.399903984637544e-06, "loss": 1.1344, "step": 980 }, { "epoch": 0.014261636198625697, "grad_norm": 3.0018582344055176, "learning_rate": 9.495919347095535e-06, "loss": 1.1436, "step": 990 }, { "epoch": 0.014405693129924946, "grad_norm": 2.3975377082824707, "learning_rate": 9.59193470955353e-06, "loss": 1.1309, "step": 1000 }, { "epoch": 0.014549750061224196, "grad_norm": 2.0984370708465576, "learning_rate": 9.687950072011523e-06, "loss": 1.1325, "step": 1010 }, { "epoch": 0.014693806992523446, "grad_norm": 2.1143908500671387, "learning_rate": 9.783965434469515e-06, "loss": 1.1405, "step": 1020 }, { "epoch": 0.014837863923822695, "grad_norm": 2.5381438732147217, "learning_rate": 9.87998079692751e-06, "loss": 1.1473, "step": 1030 }, { "epoch": 0.014981920855121945, "grad_norm": 2.9158403873443604, "learning_rate": 9.975996159385503e-06, "loss": 1.1215, "step": 1040 }, { "epoch": 0.015125977786421194, "grad_norm": 3.113672971725464, "learning_rate": 1.0072011521843497e-05, "loss": 1.1221, "step": 1050 }, { "epoch": 0.015270034717720444, "grad_norm": 2.7869443893432617, "learning_rate": 1.016802688430149e-05, "loss": 1.1025, "step": 1060 }, { "epoch": 0.015414091649019693, "grad_norm": 2.0424559116363525, "learning_rate": 1.0264042246759483e-05, "loss": 1.1074, "step": 1070 }, { "epoch": 0.015558148580318941, "grad_norm": 2.2862319946289062, "learning_rate": 1.0360057609217475e-05, "loss": 1.1281, "step": 1080 }, { "epoch": 0.015702205511618193, "grad_norm": 2.280027389526367, "learning_rate": 1.0456072971675468e-05, "loss": 1.1067, "step": 1090 }, { "epoch": 0.015846262442917442, "grad_norm": 2.6573803424835205, "learning_rate": 1.0552088334133461e-05, "loss": 1.1035, "step": 1100 }, { "epoch": 0.01599031937421669, "grad_norm": 1.6579245328903198, "learning_rate": 1.0648103696591456e-05, "loss": 1.1411, "step": 1110 }, { "epoch": 0.01613437630551594, "grad_norm": 2.261164426803589, "learning_rate": 1.074411905904945e-05, "loss": 1.1015, "step": 1120 }, { "epoch": 0.01627843323681519, "grad_norm": 2.3432836532592773, "learning_rate": 1.0840134421507443e-05, "loss": 1.1097, "step": 1130 }, { "epoch": 0.01642249016811444, "grad_norm": 2.1592226028442383, "learning_rate": 1.0936149783965435e-05, "loss": 1.1143, "step": 1140 }, { "epoch": 0.01656654709941369, "grad_norm": 2.1739590167999268, "learning_rate": 1.1032165146423428e-05, "loss": 1.1129, "step": 1150 }, { "epoch": 0.016710604030712936, "grad_norm": 1.7060655355453491, "learning_rate": 1.1128180508881421e-05, "loss": 1.0962, "step": 1160 }, { "epoch": 0.016854660962012186, "grad_norm": 3.007624387741089, "learning_rate": 1.1224195871339416e-05, "loss": 1.1294, "step": 1170 }, { "epoch": 0.016998717893311435, "grad_norm": 2.0622236728668213, "learning_rate": 1.132021123379741e-05, "loss": 1.1212, "step": 1180 }, { "epoch": 0.017142774824610685, "grad_norm": 2.5541915893554688, "learning_rate": 1.1416226596255401e-05, "loss": 1.1295, "step": 1190 }, { "epoch": 0.017286831755909934, "grad_norm": 2.384983539581299, "learning_rate": 1.1512241958713394e-05, "loss": 1.1233, "step": 1200 }, { "epoch": 0.017430888687209184, "grad_norm": 2.4423985481262207, "learning_rate": 1.1608257321171388e-05, "loss": 1.1184, "step": 1210 }, { "epoch": 0.017574945618508434, "grad_norm": 2.1598997116088867, "learning_rate": 1.1704272683629381e-05, "loss": 1.1226, "step": 1220 }, { "epoch": 0.017719002549807683, "grad_norm": 1.8650798797607422, "learning_rate": 1.1800288046087376e-05, "loss": 1.1236, "step": 1230 }, { "epoch": 0.017863059481106933, "grad_norm": 2.189276695251465, "learning_rate": 1.189630340854537e-05, "loss": 1.1205, "step": 1240 }, { "epoch": 0.018007116412406182, "grad_norm": 2.8257031440734863, "learning_rate": 1.1992318771003361e-05, "loss": 1.1289, "step": 1250 }, { "epoch": 0.018151173343705432, "grad_norm": 2.3813562393188477, "learning_rate": 1.2088334133461354e-05, "loss": 1.1262, "step": 1260 }, { "epoch": 0.01829523027500468, "grad_norm": 2.005934238433838, "learning_rate": 1.2184349495919348e-05, "loss": 1.1085, "step": 1270 }, { "epoch": 0.01843928720630393, "grad_norm": 2.3918726444244385, "learning_rate": 1.2280364858377341e-05, "loss": 1.1314, "step": 1280 }, { "epoch": 0.01858334413760318, "grad_norm": 1.9000368118286133, "learning_rate": 1.2376380220835336e-05, "loss": 1.1034, "step": 1290 }, { "epoch": 0.01872740106890243, "grad_norm": 2.176527976989746, "learning_rate": 1.247239558329333e-05, "loss": 1.0931, "step": 1300 }, { "epoch": 0.01887145800020168, "grad_norm": 2.615513563156128, "learning_rate": 1.256841094575132e-05, "loss": 1.1062, "step": 1310 }, { "epoch": 0.01901551493150093, "grad_norm": 1.7527588605880737, "learning_rate": 1.2664426308209314e-05, "loss": 1.1152, "step": 1320 }, { "epoch": 0.01915957186280018, "grad_norm": 2.300352096557617, "learning_rate": 1.2760441670667307e-05, "loss": 1.1291, "step": 1330 }, { "epoch": 0.01930362879409943, "grad_norm": 2.247837543487549, "learning_rate": 1.28564570331253e-05, "loss": 1.1031, "step": 1340 }, { "epoch": 0.019447685725398678, "grad_norm": 1.7947758436203003, "learning_rate": 1.2952472395583296e-05, "loss": 1.1037, "step": 1350 }, { "epoch": 0.019591742656697927, "grad_norm": 1.8685263395309448, "learning_rate": 1.3048487758041289e-05, "loss": 1.1063, "step": 1360 }, { "epoch": 0.019735799587997177, "grad_norm": 2.6336042881011963, "learning_rate": 1.314450312049928e-05, "loss": 1.1082, "step": 1370 }, { "epoch": 0.019879856519296427, "grad_norm": 2.208981990814209, "learning_rate": 1.3240518482957274e-05, "loss": 1.1055, "step": 1380 }, { "epoch": 0.020023913450595676, "grad_norm": 1.8364813327789307, "learning_rate": 1.3336533845415267e-05, "loss": 1.117, "step": 1390 }, { "epoch": 0.020167970381894926, "grad_norm": 2.072045087814331, "learning_rate": 1.343254920787326e-05, "loss": 1.1358, "step": 1400 }, { "epoch": 0.020312027313194175, "grad_norm": 1.5558465719223022, "learning_rate": 1.3528564570331255e-05, "loss": 1.1104, "step": 1410 }, { "epoch": 0.020456084244493425, "grad_norm": 2.4063785076141357, "learning_rate": 1.3624579932789249e-05, "loss": 1.0778, "step": 1420 }, { "epoch": 0.020600141175792674, "grad_norm": 1.9792841672897339, "learning_rate": 1.372059529524724e-05, "loss": 1.1243, "step": 1430 }, { "epoch": 0.020744198107091924, "grad_norm": 1.7703602313995361, "learning_rate": 1.3816610657705234e-05, "loss": 1.1126, "step": 1440 }, { "epoch": 0.020888255038391174, "grad_norm": 1.716643214225769, "learning_rate": 1.3912626020163227e-05, "loss": 1.0767, "step": 1450 }, { "epoch": 0.021032311969690423, "grad_norm": 2.2212705612182617, "learning_rate": 1.400864138262122e-05, "loss": 1.1305, "step": 1460 }, { "epoch": 0.021176368900989673, "grad_norm": 1.8742339611053467, "learning_rate": 1.4104656745079215e-05, "loss": 1.1, "step": 1470 }, { "epoch": 0.021320425832288922, "grad_norm": 1.9609624147415161, "learning_rate": 1.4200672107537209e-05, "loss": 1.125, "step": 1480 }, { "epoch": 0.02146448276358817, "grad_norm": 1.5962414741516113, "learning_rate": 1.42966874699952e-05, "loss": 1.1115, "step": 1490 }, { "epoch": 0.021608539694887418, "grad_norm": 1.8134044408798218, "learning_rate": 1.4392702832453194e-05, "loss": 1.1039, "step": 1500 }, { "epoch": 0.021752596626186668, "grad_norm": 2.0705580711364746, "learning_rate": 1.4488718194911187e-05, "loss": 1.0849, "step": 1510 }, { "epoch": 0.021896653557485917, "grad_norm": 1.7956775426864624, "learning_rate": 1.458473355736918e-05, "loss": 1.1061, "step": 1520 }, { "epoch": 0.022040710488785167, "grad_norm": 1.871857762336731, "learning_rate": 1.4680748919827175e-05, "loss": 1.0928, "step": 1530 }, { "epoch": 0.022184767420084416, "grad_norm": 1.7649261951446533, "learning_rate": 1.4776764282285168e-05, "loss": 1.0978, "step": 1540 }, { "epoch": 0.022328824351383666, "grad_norm": 1.5898739099502563, "learning_rate": 1.487277964474316e-05, "loss": 1.1193, "step": 1550 }, { "epoch": 0.022472881282682915, "grad_norm": 1.7966949939727783, "learning_rate": 1.4968795007201153e-05, "loss": 1.1104, "step": 1560 }, { "epoch": 0.022616938213982165, "grad_norm": 1.7936170101165771, "learning_rate": 1.5064810369659147e-05, "loss": 1.1049, "step": 1570 }, { "epoch": 0.022760995145281415, "grad_norm": 2.4540610313415527, "learning_rate": 1.516082573211714e-05, "loss": 1.1066, "step": 1580 }, { "epoch": 0.022905052076580664, "grad_norm": 2.32489013671875, "learning_rate": 1.5256841094575132e-05, "loss": 1.1095, "step": 1590 }, { "epoch": 0.023049109007879914, "grad_norm": 1.5011610984802246, "learning_rate": 1.5352856457033127e-05, "loss": 1.12, "step": 1600 }, { "epoch": 0.023193165939179163, "grad_norm": 1.724061369895935, "learning_rate": 1.544887181949112e-05, "loss": 1.0904, "step": 1610 }, { "epoch": 0.023337222870478413, "grad_norm": 2.0318827629089355, "learning_rate": 1.5544887181949113e-05, "loss": 1.0939, "step": 1620 }, { "epoch": 0.023481279801777662, "grad_norm": 1.449752926826477, "learning_rate": 1.5640902544407106e-05, "loss": 1.1056, "step": 1630 }, { "epoch": 0.023625336733076912, "grad_norm": 1.4921005964279175, "learning_rate": 1.57369179068651e-05, "loss": 1.1037, "step": 1640 }, { "epoch": 0.02376939366437616, "grad_norm": 1.6666138172149658, "learning_rate": 1.5832933269323093e-05, "loss": 1.1198, "step": 1650 }, { "epoch": 0.02391345059567541, "grad_norm": 1.498950481414795, "learning_rate": 1.5928948631781086e-05, "loss": 1.1133, "step": 1660 }, { "epoch": 0.02405750752697466, "grad_norm": 2.2030534744262695, "learning_rate": 1.602496399423908e-05, "loss": 1.1035, "step": 1670 }, { "epoch": 0.02420156445827391, "grad_norm": 1.7890610694885254, "learning_rate": 1.6120979356697073e-05, "loss": 1.1097, "step": 1680 }, { "epoch": 0.02434562138957316, "grad_norm": 1.7875065803527832, "learning_rate": 1.6216994719155066e-05, "loss": 1.1045, "step": 1690 }, { "epoch": 0.02448967832087241, "grad_norm": 1.5518572330474854, "learning_rate": 1.631301008161306e-05, "loss": 1.1103, "step": 1700 }, { "epoch": 0.02463373525217166, "grad_norm": 1.769522786140442, "learning_rate": 1.6409025444071053e-05, "loss": 1.0833, "step": 1710 }, { "epoch": 0.02477779218347091, "grad_norm": 2.2341511249542236, "learning_rate": 1.6505040806529046e-05, "loss": 1.0817, "step": 1720 }, { "epoch": 0.024921849114770158, "grad_norm": 1.615538239479065, "learning_rate": 1.660105616898704e-05, "loss": 1.1174, "step": 1730 }, { "epoch": 0.025065906046069408, "grad_norm": 1.597995400428772, "learning_rate": 1.6697071531445033e-05, "loss": 1.0972, "step": 1740 }, { "epoch": 0.025209962977368657, "grad_norm": 1.4337286949157715, "learning_rate": 1.6793086893903026e-05, "loss": 1.1342, "step": 1750 }, { "epoch": 0.025354019908667907, "grad_norm": 1.727091908454895, "learning_rate": 1.688910225636102e-05, "loss": 1.1004, "step": 1760 }, { "epoch": 0.025498076839967156, "grad_norm": 1.6202733516693115, "learning_rate": 1.6985117618819013e-05, "loss": 1.1225, "step": 1770 }, { "epoch": 0.025642133771266406, "grad_norm": 2.443167209625244, "learning_rate": 1.7081132981277006e-05, "loss": 1.1007, "step": 1780 }, { "epoch": 0.025786190702565655, "grad_norm": 1.505105972290039, "learning_rate": 1.7177148343735e-05, "loss": 1.1251, "step": 1790 }, { "epoch": 0.025930247633864905, "grad_norm": 1.6193865537643433, "learning_rate": 1.7273163706192993e-05, "loss": 1.0994, "step": 1800 }, { "epoch": 0.026074304565164155, "grad_norm": 2.123363733291626, "learning_rate": 1.7369179068650986e-05, "loss": 1.0792, "step": 1810 }, { "epoch": 0.0262183614964634, "grad_norm": 1.483593225479126, "learning_rate": 1.746519443110898e-05, "loss": 1.1105, "step": 1820 }, { "epoch": 0.02636241842776265, "grad_norm": 1.5574554204940796, "learning_rate": 1.7561209793566972e-05, "loss": 1.1109, "step": 1830 }, { "epoch": 0.0265064753590619, "grad_norm": 1.426472544670105, "learning_rate": 1.7657225156024966e-05, "loss": 1.0843, "step": 1840 }, { "epoch": 0.02665053229036115, "grad_norm": 1.7228853702545166, "learning_rate": 1.775324051848296e-05, "loss": 1.0837, "step": 1850 }, { "epoch": 0.0267945892216604, "grad_norm": 1.8484386205673218, "learning_rate": 1.7849255880940952e-05, "loss": 1.116, "step": 1860 }, { "epoch": 0.02693864615295965, "grad_norm": 1.4843940734863281, "learning_rate": 1.7945271243398946e-05, "loss": 1.0948, "step": 1870 }, { "epoch": 0.027082703084258898, "grad_norm": 1.2204471826553345, "learning_rate": 1.804128660585694e-05, "loss": 1.0927, "step": 1880 }, { "epoch": 0.027226760015558148, "grad_norm": 1.3783999681472778, "learning_rate": 1.8137301968314932e-05, "loss": 1.099, "step": 1890 }, { "epoch": 0.027370816946857397, "grad_norm": 1.6829112768173218, "learning_rate": 1.8233317330772926e-05, "loss": 1.1028, "step": 1900 }, { "epoch": 0.027514873878156647, "grad_norm": 1.10300612449646, "learning_rate": 1.832933269323092e-05, "loss": 1.0999, "step": 1910 }, { "epoch": 0.027658930809455896, "grad_norm": 1.9120315313339233, "learning_rate": 1.8425348055688912e-05, "loss": 1.1231, "step": 1920 }, { "epoch": 0.027802987740755146, "grad_norm": 1.7644262313842773, "learning_rate": 1.8521363418146905e-05, "loss": 1.1207, "step": 1930 }, { "epoch": 0.027947044672054396, "grad_norm": 1.7623440027236938, "learning_rate": 1.86173787806049e-05, "loss": 1.0908, "step": 1940 }, { "epoch": 0.028091101603353645, "grad_norm": 1.6724064350128174, "learning_rate": 1.8713394143062892e-05, "loss": 1.0977, "step": 1950 }, { "epoch": 0.028235158534652895, "grad_norm": 1.154085397720337, "learning_rate": 1.8809409505520885e-05, "loss": 1.0658, "step": 1960 }, { "epoch": 0.028379215465952144, "grad_norm": 1.228071689605713, "learning_rate": 1.890542486797888e-05, "loss": 1.1198, "step": 1970 }, { "epoch": 0.028523272397251394, "grad_norm": 1.41117525100708, "learning_rate": 1.9001440230436872e-05, "loss": 1.0918, "step": 1980 }, { "epoch": 0.028667329328550643, "grad_norm": 1.3081333637237549, "learning_rate": 1.9097455592894865e-05, "loss": 1.1058, "step": 1990 }, { "epoch": 0.028811386259849893, "grad_norm": 1.481508731842041, "learning_rate": 1.919347095535286e-05, "loss": 1.0802, "step": 2000 }, { "epoch": 0.028955443191149143, "grad_norm": 1.1865828037261963, "learning_rate": 1.9289486317810852e-05, "loss": 1.0995, "step": 2010 }, { "epoch": 0.029099500122448392, "grad_norm": 1.5778357982635498, "learning_rate": 1.9385501680268845e-05, "loss": 1.0992, "step": 2020 }, { "epoch": 0.02924355705374764, "grad_norm": 1.2929495573043823, "learning_rate": 1.948151704272684e-05, "loss": 1.1211, "step": 2030 }, { "epoch": 0.02938761398504689, "grad_norm": 1.2938451766967773, "learning_rate": 1.9577532405184832e-05, "loss": 1.0747, "step": 2040 }, { "epoch": 0.02953167091634614, "grad_norm": 1.5689811706542969, "learning_rate": 1.9673547767642825e-05, "loss": 1.1022, "step": 2050 }, { "epoch": 0.02967572784764539, "grad_norm": 1.4578216075897217, "learning_rate": 1.976956313010082e-05, "loss": 1.1003, "step": 2060 }, { "epoch": 0.02981978477894464, "grad_norm": 0.896359384059906, "learning_rate": 1.986557849255881e-05, "loss": 1.118, "step": 2070 }, { "epoch": 0.02996384171024389, "grad_norm": 1.6154524087905884, "learning_rate": 1.9961593855016805e-05, "loss": 1.088, "step": 2080 }, { "epoch": 0.03010789864154314, "grad_norm": 1.3850620985031128, "learning_rate": 1.9999999608164827e-05, "loss": 1.0875, "step": 2090 }, { "epoch": 0.03025195557284239, "grad_norm": 1.1587663888931274, "learning_rate": 1.9999997213616654e-05, "loss": 1.1036, "step": 2100 }, { "epoch": 0.030396012504141638, "grad_norm": 1.6226013898849487, "learning_rate": 1.999999264220704e-05, "loss": 1.1045, "step": 2110 }, { "epoch": 0.030540069435440888, "grad_norm": 1.5852738618850708, "learning_rate": 1.9999985893936977e-05, "loss": 1.1033, "step": 2120 }, { "epoch": 0.030684126366740137, "grad_norm": 1.262050986289978, "learning_rate": 1.9999976968807936e-05, "loss": 1.102, "step": 2130 }, { "epoch": 0.030828183298039387, "grad_norm": 1.405041217803955, "learning_rate": 1.999996586682186e-05, "loss": 1.1035, "step": 2140 }, { "epoch": 0.030972240229338633, "grad_norm": 1.2300574779510498, "learning_rate": 1.9999952587981158e-05, "loss": 1.0992, "step": 2150 }, { "epoch": 0.031116297160637883, "grad_norm": 0.9463424682617188, "learning_rate": 1.999993713228873e-05, "loss": 1.0986, "step": 2160 }, { "epoch": 0.031260354091937136, "grad_norm": 1.373226284980774, "learning_rate": 1.9999919499747938e-05, "loss": 1.1031, "step": 2170 }, { "epoch": 0.031404411023236385, "grad_norm": 1.2275688648223877, "learning_rate": 1.999989969036262e-05, "loss": 1.1126, "step": 2180 }, { "epoch": 0.031548467954535635, "grad_norm": 0.9869627356529236, "learning_rate": 1.9999877704137088e-05, "loss": 1.0908, "step": 2190 }, { "epoch": 0.031692524885834884, "grad_norm": 1.3384692668914795, "learning_rate": 1.9999853541076132e-05, "loss": 1.1195, "step": 2200 }, { "epoch": 0.031836581817134134, "grad_norm": 1.0576239824295044, "learning_rate": 1.9999827201185004e-05, "loss": 1.1007, "step": 2210 }, { "epoch": 0.03198063874843338, "grad_norm": 0.9131837487220764, "learning_rate": 1.999979868446944e-05, "loss": 1.0917, "step": 2220 }, { "epoch": 0.03212469567973263, "grad_norm": 0.8731379508972168, "learning_rate": 1.9999767990935653e-05, "loss": 1.1014, "step": 2230 }, { "epoch": 0.03226875261103188, "grad_norm": 1.5275228023529053, "learning_rate": 1.999973512059032e-05, "loss": 1.0953, "step": 2240 }, { "epoch": 0.03241280954233113, "grad_norm": 1.290822982788086, "learning_rate": 1.99997000734406e-05, "loss": 1.0837, "step": 2250 }, { "epoch": 0.03255686647363038, "grad_norm": 1.1078766584396362, "learning_rate": 1.999966284949412e-05, "loss": 1.0982, "step": 2260 }, { "epoch": 0.03270092340492963, "grad_norm": 1.3421151638031006, "learning_rate": 1.9999623448758977e-05, "loss": 1.0839, "step": 2270 }, { "epoch": 0.03284498033622888, "grad_norm": 1.0649149417877197, "learning_rate": 1.999958187124376e-05, "loss": 1.0729, "step": 2280 }, { "epoch": 0.03298903726752813, "grad_norm": 1.1525821685791016, "learning_rate": 1.9999538116957514e-05, "loss": 1.1201, "step": 2290 }, { "epoch": 0.03313309419882738, "grad_norm": 1.1011182069778442, "learning_rate": 1.999949218590976e-05, "loss": 1.0785, "step": 2300 }, { "epoch": 0.03327715113012662, "grad_norm": 1.1103689670562744, "learning_rate": 1.9999444078110503e-05, "loss": 1.0915, "step": 2310 }, { "epoch": 0.03342120806142587, "grad_norm": 1.0673400163650513, "learning_rate": 1.999939379357021e-05, "loss": 1.0906, "step": 2320 }, { "epoch": 0.03356526499272512, "grad_norm": 1.1162818670272827, "learning_rate": 1.9999341332299833e-05, "loss": 1.0902, "step": 2330 }, { "epoch": 0.03370932192402437, "grad_norm": 0.9606404304504395, "learning_rate": 1.9999286694310788e-05, "loss": 1.0721, "step": 2340 }, { "epoch": 0.03385337885532362, "grad_norm": 0.931309163570404, "learning_rate": 1.9999229879614967e-05, "loss": 1.1225, "step": 2350 }, { "epoch": 0.03399743578662287, "grad_norm": 1.082837462425232, "learning_rate": 1.9999170888224743e-05, "loss": 1.0968, "step": 2360 }, { "epoch": 0.03414149271792212, "grad_norm": 1.1223711967468262, "learning_rate": 1.9999109720152956e-05, "loss": 1.104, "step": 2370 }, { "epoch": 0.03428554964922137, "grad_norm": 0.9647048115730286, "learning_rate": 1.999904637541292e-05, "loss": 1.0784, "step": 2380 }, { "epoch": 0.03442960658052062, "grad_norm": 1.4086133241653442, "learning_rate": 1.9998980854018426e-05, "loss": 1.105, "step": 2390 }, { "epoch": 0.03457366351181987, "grad_norm": 1.022999882698059, "learning_rate": 1.999891315598373e-05, "loss": 1.0812, "step": 2400 }, { "epoch": 0.03471772044311912, "grad_norm": 1.5496896505355835, "learning_rate": 1.9998843281323586e-05, "loss": 1.0823, "step": 2410 }, { "epoch": 0.03486177737441837, "grad_norm": 1.1944360733032227, "learning_rate": 1.9998771230053186e-05, "loss": 1.0809, "step": 2420 }, { "epoch": 0.03500583430571762, "grad_norm": 1.2143431901931763, "learning_rate": 1.9998697002188226e-05, "loss": 1.0923, "step": 2430 }, { "epoch": 0.03514989123701687, "grad_norm": 1.3194953203201294, "learning_rate": 1.999862059774486e-05, "loss": 1.0672, "step": 2440 }, { "epoch": 0.03529394816831612, "grad_norm": 1.2011752128601074, "learning_rate": 1.9998542016739716e-05, "loss": 1.0682, "step": 2450 }, { "epoch": 0.035438005099615366, "grad_norm": 1.190067172050476, "learning_rate": 1.999846125918991e-05, "loss": 1.0769, "step": 2460 }, { "epoch": 0.035582062030914616, "grad_norm": 1.3442232608795166, "learning_rate": 1.9998378325113015e-05, "loss": 1.0927, "step": 2470 }, { "epoch": 0.035726118962213865, "grad_norm": 1.5200982093811035, "learning_rate": 1.9998293214527088e-05, "loss": 1.0985, "step": 2480 }, { "epoch": 0.035870175893513115, "grad_norm": 0.9945452809333801, "learning_rate": 1.9998205927450653e-05, "loss": 1.0696, "step": 2490 }, { "epoch": 0.036014232824812364, "grad_norm": 1.065801739692688, "learning_rate": 1.999811646390271e-05, "loss": 1.0953, "step": 2500 }, { "epoch": 0.036158289756111614, "grad_norm": 1.2544124126434326, "learning_rate": 1.9998024823902744e-05, "loss": 1.0668, "step": 2510 }, { "epoch": 0.036302346687410864, "grad_norm": 0.8284255862236023, "learning_rate": 1.999793100747069e-05, "loss": 1.1062, "step": 2520 }, { "epoch": 0.03644640361871011, "grad_norm": 1.414483666419983, "learning_rate": 1.999783501462698e-05, "loss": 1.0956, "step": 2530 }, { "epoch": 0.03659046055000936, "grad_norm": 1.02438485622406, "learning_rate": 1.999773684539251e-05, "loss": 1.0935, "step": 2540 }, { "epoch": 0.03673451748130861, "grad_norm": 1.4005661010742188, "learning_rate": 1.9997636499788645e-05, "loss": 1.0925, "step": 2550 }, { "epoch": 0.03687857441260786, "grad_norm": 1.299225091934204, "learning_rate": 1.999753397783723e-05, "loss": 1.0794, "step": 2560 }, { "epoch": 0.03702263134390711, "grad_norm": 1.0879071950912476, "learning_rate": 1.9997429279560587e-05, "loss": 1.1003, "step": 2570 }, { "epoch": 0.03716668827520636, "grad_norm": 1.0114030838012695, "learning_rate": 1.99973224049815e-05, "loss": 1.0797, "step": 2580 }, { "epoch": 0.03731074520650561, "grad_norm": 0.9800271987915039, "learning_rate": 1.9997213354123246e-05, "loss": 1.0991, "step": 2590 }, { "epoch": 0.03745480213780486, "grad_norm": 1.0941202640533447, "learning_rate": 1.9997102127009552e-05, "loss": 1.0637, "step": 2600 }, { "epoch": 0.03759885906910411, "grad_norm": 1.0339268445968628, "learning_rate": 1.9996988723664637e-05, "loss": 1.0646, "step": 2610 }, { "epoch": 0.03774291600040336, "grad_norm": 1.1409887075424194, "learning_rate": 1.9996873144113184e-05, "loss": 1.0968, "step": 2620 }, { "epoch": 0.03788697293170261, "grad_norm": 1.3173432350158691, "learning_rate": 1.9996755388380353e-05, "loss": 1.074, "step": 2630 }, { "epoch": 0.03803102986300186, "grad_norm": 1.7910503149032593, "learning_rate": 1.999663545649178e-05, "loss": 1.0889, "step": 2640 }, { "epoch": 0.03817508679430111, "grad_norm": 1.238196849822998, "learning_rate": 1.9996513348473572e-05, "loss": 1.0823, "step": 2650 }, { "epoch": 0.03831914372560036, "grad_norm": 1.1959824562072754, "learning_rate": 1.9996389064352312e-05, "loss": 1.0768, "step": 2660 }, { "epoch": 0.03846320065689961, "grad_norm": 1.093868613243103, "learning_rate": 1.999626260415505e-05, "loss": 1.0639, "step": 2670 }, { "epoch": 0.03860725758819886, "grad_norm": 1.0971081256866455, "learning_rate": 1.999613396790932e-05, "loss": 1.0773, "step": 2680 }, { "epoch": 0.038751314519498106, "grad_norm": 0.9527555108070374, "learning_rate": 1.999600315564312e-05, "loss": 1.0591, "step": 2690 }, { "epoch": 0.038895371450797356, "grad_norm": 1.4275157451629639, "learning_rate": 1.999587016738493e-05, "loss": 1.0783, "step": 2700 }, { "epoch": 0.039039428382096605, "grad_norm": 0.8318230509757996, "learning_rate": 1.9995735003163693e-05, "loss": 1.0682, "step": 2710 }, { "epoch": 0.039183485313395855, "grad_norm": 0.9934574365615845, "learning_rate": 1.999559766300884e-05, "loss": 1.0946, "step": 2720 }, { "epoch": 0.039327542244695105, "grad_norm": 1.059827446937561, "learning_rate": 1.9995458146950266e-05, "loss": 1.0873, "step": 2730 }, { "epoch": 0.039471599175994354, "grad_norm": 0.9701938033103943, "learning_rate": 1.9995316455018342e-05, "loss": 1.0807, "step": 2740 }, { "epoch": 0.039615656107293604, "grad_norm": 0.9101185202598572, "learning_rate": 1.999517258724391e-05, "loss": 1.0832, "step": 2750 }, { "epoch": 0.03975971303859285, "grad_norm": 1.1547293663024902, "learning_rate": 1.999502654365829e-05, "loss": 1.0885, "step": 2760 }, { "epoch": 0.0399037699698921, "grad_norm": 0.9609155058860779, "learning_rate": 1.9994878324293266e-05, "loss": 1.1198, "step": 2770 }, { "epoch": 0.04004782690119135, "grad_norm": 1.024674415588379, "learning_rate": 1.999472792918112e-05, "loss": 1.0841, "step": 2780 }, { "epoch": 0.0401918838324906, "grad_norm": 1.2069023847579956, "learning_rate": 1.9994575358354576e-05, "loss": 1.0905, "step": 2790 }, { "epoch": 0.04033594076378985, "grad_norm": 1.0754157304763794, "learning_rate": 1.999442061184685e-05, "loss": 1.0699, "step": 2800 }, { "epoch": 0.0404799976950891, "grad_norm": 1.038758635520935, "learning_rate": 1.9994263689691635e-05, "loss": 1.0587, "step": 2810 }, { "epoch": 0.04062405462638835, "grad_norm": 0.8334084749221802, "learning_rate": 1.9994104591923086e-05, "loss": 1.0726, "step": 2820 }, { "epoch": 0.0407681115576876, "grad_norm": 0.8922152519226074, "learning_rate": 1.999394331857583e-05, "loss": 1.0784, "step": 2830 }, { "epoch": 0.04091216848898685, "grad_norm": 1.0895780324935913, "learning_rate": 1.9993779869684988e-05, "loss": 1.1066, "step": 2840 }, { "epoch": 0.0410562254202861, "grad_norm": 0.9029639363288879, "learning_rate": 1.9993614245286125e-05, "loss": 1.0741, "step": 2850 }, { "epoch": 0.04120028235158535, "grad_norm": 0.9833770990371704, "learning_rate": 1.9993446445415308e-05, "loss": 1.1026, "step": 2860 }, { "epoch": 0.0413443392828846, "grad_norm": 1.0057151317596436, "learning_rate": 1.9993276470109054e-05, "loss": 1.065, "step": 2870 }, { "epoch": 0.04148839621418385, "grad_norm": 0.981825590133667, "learning_rate": 1.9993104319404375e-05, "loss": 1.0984, "step": 2880 }, { "epoch": 0.0416324531454831, "grad_norm": 1.2358293533325195, "learning_rate": 1.9992929993338738e-05, "loss": 1.0701, "step": 2890 }, { "epoch": 0.04177651007678235, "grad_norm": 1.2199301719665527, "learning_rate": 1.9992753491950096e-05, "loss": 1.0765, "step": 2900 }, { "epoch": 0.0419205670080816, "grad_norm": 0.9787526726722717, "learning_rate": 1.9992574815276867e-05, "loss": 1.0774, "step": 2910 }, { "epoch": 0.042064623939380846, "grad_norm": 0.8597845435142517, "learning_rate": 1.999239396335795e-05, "loss": 1.0801, "step": 2920 }, { "epoch": 0.042208680870680096, "grad_norm": 1.1765086650848389, "learning_rate": 1.999221093623271e-05, "loss": 1.082, "step": 2930 }, { "epoch": 0.042352737801979345, "grad_norm": 1.0328409671783447, "learning_rate": 1.9992025733940994e-05, "loss": 1.0755, "step": 2940 }, { "epoch": 0.042496794733278595, "grad_norm": 1.0411959886550903, "learning_rate": 1.9991838356523114e-05, "loss": 1.0912, "step": 2950 }, { "epoch": 0.042640851664577845, "grad_norm": 1.1128629446029663, "learning_rate": 1.9991648804019867e-05, "loss": 1.0777, "step": 2960 }, { "epoch": 0.04278490859587709, "grad_norm": 1.0264414548873901, "learning_rate": 1.9991457076472502e-05, "loss": 1.0742, "step": 2970 }, { "epoch": 0.04292896552717634, "grad_norm": 0.8396328687667847, "learning_rate": 1.9991263173922767e-05, "loss": 1.0939, "step": 2980 }, { "epoch": 0.043073022458475586, "grad_norm": 1.1300128698349, "learning_rate": 1.999106709641287e-05, "loss": 1.0691, "step": 2990 }, { "epoch": 0.043217079389774836, "grad_norm": 0.9339667558670044, "learning_rate": 1.999086884398549e-05, "loss": 1.0568, "step": 3000 }, { "epoch": 0.043361136321074085, "grad_norm": 1.1959909200668335, "learning_rate": 1.9990668416683788e-05, "loss": 1.0842, "step": 3010 }, { "epoch": 0.043505193252373335, "grad_norm": 0.9370341897010803, "learning_rate": 1.9990465814551397e-05, "loss": 1.0924, "step": 3020 }, { "epoch": 0.043649250183672585, "grad_norm": 0.9694163203239441, "learning_rate": 1.999026103763241e-05, "loss": 1.0971, "step": 3030 }, { "epoch": 0.043793307114971834, "grad_norm": 0.8307744264602661, "learning_rate": 1.9990054085971416e-05, "loss": 1.089, "step": 3040 }, { "epoch": 0.043937364046271084, "grad_norm": 1.1763337850570679, "learning_rate": 1.9989844959613458e-05, "loss": 1.0734, "step": 3050 }, { "epoch": 0.04408142097757033, "grad_norm": 0.9161782264709473, "learning_rate": 1.9989633658604064e-05, "loss": 1.0719, "step": 3060 }, { "epoch": 0.04422547790886958, "grad_norm": 0.9989141821861267, "learning_rate": 1.9989420182989227e-05, "loss": 1.0672, "step": 3070 }, { "epoch": 0.04436953484016883, "grad_norm": 0.9473924040794373, "learning_rate": 1.9989204532815422e-05, "loss": 1.0845, "step": 3080 }, { "epoch": 0.04451359177146808, "grad_norm": 0.7662175297737122, "learning_rate": 1.9988986708129593e-05, "loss": 1.0826, "step": 3090 }, { "epoch": 0.04465764870276733, "grad_norm": 0.8995963335037231, "learning_rate": 1.998876670897915e-05, "loss": 1.0918, "step": 3100 }, { "epoch": 0.04480170563406658, "grad_norm": 1.0424665212631226, "learning_rate": 1.9988544535411996e-05, "loss": 1.075, "step": 3110 }, { "epoch": 0.04494576256536583, "grad_norm": 1.1500542163848877, "learning_rate": 1.9988320187476483e-05, "loss": 1.0634, "step": 3120 }, { "epoch": 0.04508981949666508, "grad_norm": 1.0494294166564941, "learning_rate": 1.998809366522146e-05, "loss": 1.0811, "step": 3130 }, { "epoch": 0.04523387642796433, "grad_norm": 0.932778000831604, "learning_rate": 1.998786496869623e-05, "loss": 1.0786, "step": 3140 }, { "epoch": 0.04537793335926358, "grad_norm": 1.0902131795883179, "learning_rate": 1.9987634097950576e-05, "loss": 1.0769, "step": 3150 }, { "epoch": 0.04552199029056283, "grad_norm": 1.001160979270935, "learning_rate": 1.998740105303476e-05, "loss": 1.0606, "step": 3160 }, { "epoch": 0.04566604722186208, "grad_norm": 1.0201925039291382, "learning_rate": 1.998716583399951e-05, "loss": 1.0732, "step": 3170 }, { "epoch": 0.04581010415316133, "grad_norm": 0.8125157952308655, "learning_rate": 1.9986928440896034e-05, "loss": 1.0875, "step": 3180 }, { "epoch": 0.04595416108446058, "grad_norm": 0.8411962985992432, "learning_rate": 1.9986688873776003e-05, "loss": 1.0784, "step": 3190 }, { "epoch": 0.04609821801575983, "grad_norm": 1.008968472480774, "learning_rate": 1.9986447132691575e-05, "loss": 1.061, "step": 3200 }, { "epoch": 0.04624227494705908, "grad_norm": 0.8611114025115967, "learning_rate": 1.9986203217695365e-05, "loss": 1.0628, "step": 3210 }, { "epoch": 0.046386331878358326, "grad_norm": 1.017021656036377, "learning_rate": 1.9985957128840478e-05, "loss": 1.0681, "step": 3220 }, { "epoch": 0.046530388809657576, "grad_norm": 0.9527968168258667, "learning_rate": 1.9985708866180475e-05, "loss": 1.0733, "step": 3230 }, { "epoch": 0.046674445740956826, "grad_norm": 1.0302634239196777, "learning_rate": 1.998545842976941e-05, "loss": 1.0769, "step": 3240 }, { "epoch": 0.046818502672256075, "grad_norm": 1.3760242462158203, "learning_rate": 1.998520581966179e-05, "loss": 1.0727, "step": 3250 }, { "epoch": 0.046962559603555325, "grad_norm": 1.0351104736328125, "learning_rate": 1.9984951035912613e-05, "loss": 1.0802, "step": 3260 }, { "epoch": 0.047106616534854574, "grad_norm": 0.9689775109291077, "learning_rate": 1.9984694078577334e-05, "loss": 1.0666, "step": 3270 }, { "epoch": 0.047250673466153824, "grad_norm": 0.8249936699867249, "learning_rate": 1.99844349477119e-05, "loss": 1.0926, "step": 3280 }, { "epoch": 0.04739473039745307, "grad_norm": 0.8944165110588074, "learning_rate": 1.9984173643372705e-05, "loss": 1.0789, "step": 3290 }, { "epoch": 0.04753878732875232, "grad_norm": 0.8791295289993286, "learning_rate": 1.9983910165616643e-05, "loss": 1.0879, "step": 3300 }, { "epoch": 0.04768284426005157, "grad_norm": 0.7624006867408752, "learning_rate": 1.998364451450107e-05, "loss": 1.0806, "step": 3310 }, { "epoch": 0.04782690119135082, "grad_norm": 0.9690932035446167, "learning_rate": 1.9983376690083804e-05, "loss": 1.0838, "step": 3320 }, { "epoch": 0.04797095812265007, "grad_norm": 0.7400982975959778, "learning_rate": 1.9983106692423154e-05, "loss": 1.0778, "step": 3330 }, { "epoch": 0.04811501505394932, "grad_norm": 1.065669298171997, "learning_rate": 1.99828345215779e-05, "loss": 1.0902, "step": 3340 }, { "epoch": 0.04825907198524857, "grad_norm": 0.9505540132522583, "learning_rate": 1.998256017760728e-05, "loss": 1.099, "step": 3350 }, { "epoch": 0.04840312891654782, "grad_norm": 0.7896109223365784, "learning_rate": 1.9982283660571018e-05, "loss": 1.0589, "step": 3360 }, { "epoch": 0.04854718584784707, "grad_norm": 0.8883846998214722, "learning_rate": 1.998200497052931e-05, "loss": 1.0563, "step": 3370 }, { "epoch": 0.04869124277914632, "grad_norm": 1.1594328880310059, "learning_rate": 1.998172410754282e-05, "loss": 1.0897, "step": 3380 }, { "epoch": 0.04883529971044557, "grad_norm": 0.9646573066711426, "learning_rate": 1.9981441071672693e-05, "loss": 1.1009, "step": 3390 }, { "epoch": 0.04897935664174482, "grad_norm": 0.7927548885345459, "learning_rate": 1.9981155862980536e-05, "loss": 1.1053, "step": 3400 }, { "epoch": 0.04912341357304407, "grad_norm": 0.8204102516174316, "learning_rate": 1.998086848152844e-05, "loss": 1.0492, "step": 3410 }, { "epoch": 0.04926747050434332, "grad_norm": 0.8601950407028198, "learning_rate": 1.998057892737896e-05, "loss": 1.0984, "step": 3420 }, { "epoch": 0.04941152743564257, "grad_norm": 0.9040478467941284, "learning_rate": 1.998028720059513e-05, "loss": 1.0773, "step": 3430 }, { "epoch": 0.04955558436694182, "grad_norm": 0.9031913876533508, "learning_rate": 1.9979993301240453e-05, "loss": 1.0797, "step": 3440 }, { "epoch": 0.049699641298241067, "grad_norm": 0.8928804993629456, "learning_rate": 1.997969722937891e-05, "loss": 1.0806, "step": 3450 }, { "epoch": 0.049843698229540316, "grad_norm": 1.0597736835479736, "learning_rate": 1.9979398985074952e-05, "loss": 1.0711, "step": 3460 }, { "epoch": 0.049987755160839566, "grad_norm": 0.7926428914070129, "learning_rate": 1.9979098568393498e-05, "loss": 1.0664, "step": 3470 }, { "epoch": 0.050131812092138815, "grad_norm": 0.9089264869689941, "learning_rate": 1.9978795979399947e-05, "loss": 1.081, "step": 3480 }, { "epoch": 0.050275869023438065, "grad_norm": 0.8083300590515137, "learning_rate": 1.9978491218160173e-05, "loss": 1.0699, "step": 3490 }, { "epoch": 0.050419925954737314, "grad_norm": 0.854659914970398, "learning_rate": 1.997818428474051e-05, "loss": 1.0734, "step": 3500 }, { "epoch": 0.050563982886036564, "grad_norm": 0.8330906629562378, "learning_rate": 1.997787517920778e-05, "loss": 1.0931, "step": 3510 }, { "epoch": 0.050708039817335814, "grad_norm": 0.7009166479110718, "learning_rate": 1.9977563901629268e-05, "loss": 1.0709, "step": 3520 }, { "epoch": 0.05085209674863506, "grad_norm": 1.5810503959655762, "learning_rate": 1.9977250452072732e-05, "loss": 1.0725, "step": 3530 }, { "epoch": 0.05099615367993431, "grad_norm": 0.7954139709472656, "learning_rate": 1.9976934830606415e-05, "loss": 1.0927, "step": 3540 }, { "epoch": 0.05114021061123356, "grad_norm": 0.6684684753417969, "learning_rate": 1.9976617037299012e-05, "loss": 1.0656, "step": 3550 }, { "epoch": 0.05128426754253281, "grad_norm": 0.9031051993370056, "learning_rate": 1.997629707221971e-05, "loss": 1.0786, "step": 3560 }, { "epoch": 0.05142832447383206, "grad_norm": 0.8937859535217285, "learning_rate": 1.9975974935438158e-05, "loss": 1.0773, "step": 3570 }, { "epoch": 0.05157238140513131, "grad_norm": 0.8457878232002258, "learning_rate": 1.9975650627024476e-05, "loss": 1.0728, "step": 3580 }, { "epoch": 0.05171643833643056, "grad_norm": 0.938657820224762, "learning_rate": 1.9975324147049274e-05, "loss": 1.0856, "step": 3590 }, { "epoch": 0.05186049526772981, "grad_norm": 0.7948675155639648, "learning_rate": 1.997499549558361e-05, "loss": 1.0388, "step": 3600 }, { "epoch": 0.05200455219902906, "grad_norm": 0.8567399382591248, "learning_rate": 1.9974664672699037e-05, "loss": 1.0829, "step": 3610 }, { "epoch": 0.05214860913032831, "grad_norm": 0.8215539455413818, "learning_rate": 1.997433167846756e-05, "loss": 1.0899, "step": 3620 }, { "epoch": 0.05229266606162755, "grad_norm": 0.7473692893981934, "learning_rate": 1.9973996512961677e-05, "loss": 1.0815, "step": 3630 }, { "epoch": 0.0524367229929268, "grad_norm": 0.7624854445457458, "learning_rate": 1.9973659176254342e-05, "loss": 1.0752, "step": 3640 }, { "epoch": 0.05258077992422605, "grad_norm": 0.755251944065094, "learning_rate": 1.997331966841899e-05, "loss": 1.0708, "step": 3650 }, { "epoch": 0.0527248368555253, "grad_norm": 1.3446722030639648, "learning_rate": 1.997297798952953e-05, "loss": 1.0519, "step": 3660 }, { "epoch": 0.05286889378682455, "grad_norm": 1.1751680374145508, "learning_rate": 1.997263413966034e-05, "loss": 1.0863, "step": 3670 }, { "epoch": 0.0530129507181238, "grad_norm": 0.9749715924263, "learning_rate": 1.9972288118886264e-05, "loss": 1.0808, "step": 3680 }, { "epoch": 0.05315700764942305, "grad_norm": 0.6592453122138977, "learning_rate": 1.997193992728264e-05, "loss": 1.0786, "step": 3690 }, { "epoch": 0.0533010645807223, "grad_norm": 0.891697347164154, "learning_rate": 1.9971589564925253e-05, "loss": 1.06, "step": 3700 }, { "epoch": 0.05344512151202155, "grad_norm": 0.7699964642524719, "learning_rate": 1.9971237031890374e-05, "loss": 1.06, "step": 3710 }, { "epoch": 0.0535891784433208, "grad_norm": 1.3091051578521729, "learning_rate": 1.9970882328254754e-05, "loss": 1.0784, "step": 3720 }, { "epoch": 0.05373323537462005, "grad_norm": 0.7992281317710876, "learning_rate": 1.9970525454095596e-05, "loss": 1.0922, "step": 3730 }, { "epoch": 0.0538772923059193, "grad_norm": 1.0404248237609863, "learning_rate": 1.9970166409490588e-05, "loss": 1.0586, "step": 3740 }, { "epoch": 0.05402134923721855, "grad_norm": 0.7184622883796692, "learning_rate": 1.996980519451789e-05, "loss": 1.0805, "step": 3750 }, { "epoch": 0.054165406168517796, "grad_norm": 0.7246914505958557, "learning_rate": 1.996944180925614e-05, "loss": 1.0846, "step": 3760 }, { "epoch": 0.054309463099817046, "grad_norm": 0.9941462278366089, "learning_rate": 1.9969076253784433e-05, "loss": 1.0596, "step": 3770 }, { "epoch": 0.054453520031116295, "grad_norm": 1.0598793029785156, "learning_rate": 1.996870852818235e-05, "loss": 1.0835, "step": 3780 }, { "epoch": 0.054597576962415545, "grad_norm": 0.9485270380973816, "learning_rate": 1.9968338632529935e-05, "loss": 1.0558, "step": 3790 }, { "epoch": 0.054741633893714794, "grad_norm": 0.6576977968215942, "learning_rate": 1.9967966566907716e-05, "loss": 1.0767, "step": 3800 }, { "epoch": 0.054885690825014044, "grad_norm": 0.8786500096321106, "learning_rate": 1.9967592331396687e-05, "loss": 1.0547, "step": 3810 }, { "epoch": 0.055029747756313294, "grad_norm": 1.036960244178772, "learning_rate": 1.9967215926078304e-05, "loss": 1.0436, "step": 3820 }, { "epoch": 0.05517380468761254, "grad_norm": 0.8549398183822632, "learning_rate": 1.9966837351034517e-05, "loss": 1.0723, "step": 3830 }, { "epoch": 0.05531786161891179, "grad_norm": 0.8665590286254883, "learning_rate": 1.9966456606347728e-05, "loss": 1.0872, "step": 3840 }, { "epoch": 0.05546191855021104, "grad_norm": 0.7687617540359497, "learning_rate": 1.9966073692100824e-05, "loss": 1.0543, "step": 3850 }, { "epoch": 0.05560597548151029, "grad_norm": 0.831766664981842, "learning_rate": 1.996568860837716e-05, "loss": 1.0872, "step": 3860 }, { "epoch": 0.05575003241280954, "grad_norm": 0.9214545488357544, "learning_rate": 1.9965301355260563e-05, "loss": 1.0597, "step": 3870 }, { "epoch": 0.05589408934410879, "grad_norm": 0.68256014585495, "learning_rate": 1.996491193283533e-05, "loss": 1.0837, "step": 3880 }, { "epoch": 0.05603814627540804, "grad_norm": 0.9160231351852417, "learning_rate": 1.9964520341186236e-05, "loss": 1.0835, "step": 3890 }, { "epoch": 0.05618220320670729, "grad_norm": 0.7571538686752319, "learning_rate": 1.9964126580398528e-05, "loss": 1.0703, "step": 3900 }, { "epoch": 0.05632626013800654, "grad_norm": 0.8873952031135559, "learning_rate": 1.9963730650557918e-05, "loss": 1.0631, "step": 3910 }, { "epoch": 0.05647031706930579, "grad_norm": 0.8825143575668335, "learning_rate": 1.996333255175059e-05, "loss": 1.0535, "step": 3920 }, { "epoch": 0.05661437400060504, "grad_norm": 0.8717725872993469, "learning_rate": 1.9962932284063215e-05, "loss": 1.0596, "step": 3930 }, { "epoch": 0.05675843093190429, "grad_norm": 0.8045423626899719, "learning_rate": 1.9962529847582916e-05, "loss": 1.0463, "step": 3940 }, { "epoch": 0.05690248786320354, "grad_norm": 0.9340243339538574, "learning_rate": 1.9962125242397307e-05, "loss": 1.0637, "step": 3950 }, { "epoch": 0.05704654479450279, "grad_norm": 0.7398734092712402, "learning_rate": 1.9961718468594457e-05, "loss": 1.0579, "step": 3960 }, { "epoch": 0.05719060172580204, "grad_norm": 0.8126267790794373, "learning_rate": 1.996130952626292e-05, "loss": 1.0624, "step": 3970 }, { "epoch": 0.05733465865710129, "grad_norm": 1.2689749002456665, "learning_rate": 1.9960898415491712e-05, "loss": 1.0704, "step": 3980 }, { "epoch": 0.057478715588400536, "grad_norm": 1.1170166730880737, "learning_rate": 1.9960485136370335e-05, "loss": 1.0836, "step": 3990 }, { "epoch": 0.057622772519699786, "grad_norm": 0.98178631067276, "learning_rate": 1.9960069688988746e-05, "loss": 1.0561, "step": 4000 }, { "epoch": 0.057766829450999035, "grad_norm": 1.0303664207458496, "learning_rate": 1.9959652073437384e-05, "loss": 1.0828, "step": 4010 }, { "epoch": 0.057910886382298285, "grad_norm": 0.8844668865203857, "learning_rate": 1.995923228980716e-05, "loss": 1.0663, "step": 4020 }, { "epoch": 0.058054943313597535, "grad_norm": 0.8205824494361877, "learning_rate": 1.9958810338189458e-05, "loss": 1.0771, "step": 4030 }, { "epoch": 0.058199000244896784, "grad_norm": 0.6665905714035034, "learning_rate": 1.995838621867612e-05, "loss": 1.0525, "step": 4040 }, { "epoch": 0.058343057176196034, "grad_norm": 0.9651935696601868, "learning_rate": 1.9957959931359484e-05, "loss": 1.054, "step": 4050 }, { "epoch": 0.05848711410749528, "grad_norm": 0.7814925909042358, "learning_rate": 1.995753147633234e-05, "loss": 1.0635, "step": 4060 }, { "epoch": 0.05863117103879453, "grad_norm": 0.9259180426597595, "learning_rate": 1.9957100853687955e-05, "loss": 1.0841, "step": 4070 }, { "epoch": 0.05877522797009378, "grad_norm": 0.988832414150238, "learning_rate": 1.9956668063520075e-05, "loss": 1.0687, "step": 4080 }, { "epoch": 0.05891928490139303, "grad_norm": 0.8639126420021057, "learning_rate": 1.9956233105922907e-05, "loss": 1.0352, "step": 4090 }, { "epoch": 0.05906334183269228, "grad_norm": 0.8149707913398743, "learning_rate": 1.9955795980991143e-05, "loss": 1.065, "step": 4100 }, { "epoch": 0.05920739876399153, "grad_norm": 0.8967310786247253, "learning_rate": 1.995535668881993e-05, "loss": 1.0655, "step": 4110 }, { "epoch": 0.05935145569529078, "grad_norm": 0.7974560856819153, "learning_rate": 1.9954915229504904e-05, "loss": 1.0549, "step": 4120 }, { "epoch": 0.05949551262659003, "grad_norm": 0.8725034594535828, "learning_rate": 1.9954471603142157e-05, "loss": 1.0668, "step": 4130 }, { "epoch": 0.05963956955788928, "grad_norm": 0.8084957003593445, "learning_rate": 1.9954025809828266e-05, "loss": 1.0524, "step": 4140 }, { "epoch": 0.05978362648918853, "grad_norm": 0.8285936713218689, "learning_rate": 1.995357784966027e-05, "loss": 1.08, "step": 4150 }, { "epoch": 0.05992768342048778, "grad_norm": 0.8037678003311157, "learning_rate": 1.9953127722735688e-05, "loss": 1.0634, "step": 4160 }, { "epoch": 0.06007174035178703, "grad_norm": 0.7721096277236938, "learning_rate": 1.9952675429152503e-05, "loss": 1.0701, "step": 4170 }, { "epoch": 0.06021579728308628, "grad_norm": 0.6830259561538696, "learning_rate": 1.9952220969009175e-05, "loss": 1.0651, "step": 4180 }, { "epoch": 0.06035985421438553, "grad_norm": 1.0105340480804443, "learning_rate": 1.995176434240463e-05, "loss": 1.073, "step": 4190 }, { "epoch": 0.06050391114568478, "grad_norm": 1.1359127759933472, "learning_rate": 1.995130554943828e-05, "loss": 1.0713, "step": 4200 }, { "epoch": 0.06064796807698403, "grad_norm": 0.9534348249435425, "learning_rate": 1.9950844590209982e-05, "loss": 1.0618, "step": 4210 }, { "epoch": 0.060792025008283276, "grad_norm": 0.7248373031616211, "learning_rate": 1.9950381464820093e-05, "loss": 1.0526, "step": 4220 }, { "epoch": 0.060936081939582526, "grad_norm": 0.7521722912788391, "learning_rate": 1.9949916173369424e-05, "loss": 1.0809, "step": 4230 }, { "epoch": 0.061080138870881776, "grad_norm": 0.7272984385490417, "learning_rate": 1.994944871595926e-05, "loss": 1.0616, "step": 4240 }, { "epoch": 0.061224195802181025, "grad_norm": 0.9190437197685242, "learning_rate": 1.994897909269137e-05, "loss": 1.0691, "step": 4250 }, { "epoch": 0.061368252733480275, "grad_norm": 0.8137868046760559, "learning_rate": 1.9948507303667972e-05, "loss": 1.0591, "step": 4260 }, { "epoch": 0.061512309664779524, "grad_norm": 0.8312646746635437, "learning_rate": 1.9948033348991777e-05, "loss": 1.0482, "step": 4270 }, { "epoch": 0.061656366596078774, "grad_norm": 0.9318916201591492, "learning_rate": 1.994755722876595e-05, "loss": 1.0838, "step": 4280 }, { "epoch": 0.061800423527378016, "grad_norm": 0.7599692344665527, "learning_rate": 1.9947078943094148e-05, "loss": 1.0376, "step": 4290 }, { "epoch": 0.061944480458677266, "grad_norm": 0.8444345593452454, "learning_rate": 1.9946598492080475e-05, "loss": 1.092, "step": 4300 }, { "epoch": 0.062088537389976516, "grad_norm": 0.8249942064285278, "learning_rate": 1.9946115875829526e-05, "loss": 1.0936, "step": 4310 }, { "epoch": 0.062232594321275765, "grad_norm": 0.7922937870025635, "learning_rate": 1.9945631094446355e-05, "loss": 1.0707, "step": 4320 }, { "epoch": 0.062376651252575015, "grad_norm": 1.1079844236373901, "learning_rate": 1.99451441480365e-05, "loss": 1.0788, "step": 4330 }, { "epoch": 0.06252070818387427, "grad_norm": 0.7890607118606567, "learning_rate": 1.9944655036705952e-05, "loss": 1.0795, "step": 4340 }, { "epoch": 0.06266476511517352, "grad_norm": 0.7317211031913757, "learning_rate": 1.994416376056119e-05, "loss": 1.0614, "step": 4350 }, { "epoch": 0.06280882204647277, "grad_norm": 0.8626124262809753, "learning_rate": 1.9943670319709162e-05, "loss": 1.0846, "step": 4360 }, { "epoch": 0.06295287897777202, "grad_norm": 0.8770148158073425, "learning_rate": 1.994317471425727e-05, "loss": 1.0782, "step": 4370 }, { "epoch": 0.06309693590907127, "grad_norm": 0.9757588505744934, "learning_rate": 1.9942676944313416e-05, "loss": 1.0663, "step": 4380 }, { "epoch": 0.06324099284037052, "grad_norm": 0.8641564249992371, "learning_rate": 1.9942177009985947e-05, "loss": 1.0705, "step": 4390 }, { "epoch": 0.06338504977166977, "grad_norm": 0.9755296111106873, "learning_rate": 1.99416749113837e-05, "loss": 1.0659, "step": 4400 }, { "epoch": 0.06352910670296902, "grad_norm": 0.9701070785522461, "learning_rate": 1.9941170648615963e-05, "loss": 1.0398, "step": 4410 }, { "epoch": 0.06367316363426827, "grad_norm": 0.8417096138000488, "learning_rate": 1.994066422179252e-05, "loss": 1.0635, "step": 4420 }, { "epoch": 0.06381722056556752, "grad_norm": 0.7876541018486023, "learning_rate": 1.9940155631023604e-05, "loss": 1.0863, "step": 4430 }, { "epoch": 0.06396127749686677, "grad_norm": 0.8045748472213745, "learning_rate": 1.9939644876419934e-05, "loss": 1.0734, "step": 4440 }, { "epoch": 0.06410533442816602, "grad_norm": 0.7743977904319763, "learning_rate": 1.993913195809269e-05, "loss": 1.0511, "step": 4450 }, { "epoch": 0.06424939135946527, "grad_norm": 0.6236658692359924, "learning_rate": 1.9938616876153533e-05, "loss": 1.044, "step": 4460 }, { "epoch": 0.06439344829076452, "grad_norm": 0.8350549936294556, "learning_rate": 1.9938099630714582e-05, "loss": 1.0797, "step": 4470 }, { "epoch": 0.06453750522206377, "grad_norm": 0.8814065456390381, "learning_rate": 1.9937580221888438e-05, "loss": 1.0769, "step": 4480 }, { "epoch": 0.06468156215336301, "grad_norm": 0.8198302984237671, "learning_rate": 1.9937058649788167e-05, "loss": 1.0507, "step": 4490 }, { "epoch": 0.06482561908466226, "grad_norm": 0.8173579573631287, "learning_rate": 1.9936534914527312e-05, "loss": 1.0905, "step": 4500 }, { "epoch": 0.06496967601596151, "grad_norm": 0.9264322519302368, "learning_rate": 1.9936009016219883e-05, "loss": 1.041, "step": 4510 }, { "epoch": 0.06511373294726076, "grad_norm": 0.8179509043693542, "learning_rate": 1.9935480954980352e-05, "loss": 1.0739, "step": 4520 }, { "epoch": 0.06525778987856001, "grad_norm": 0.8666450381278992, "learning_rate": 1.993495073092368e-05, "loss": 1.0975, "step": 4530 }, { "epoch": 0.06540184680985926, "grad_norm": 0.7168770432472229, "learning_rate": 1.9934418344165288e-05, "loss": 1.0763, "step": 4540 }, { "epoch": 0.06554590374115851, "grad_norm": 0.6926397681236267, "learning_rate": 1.9933883794821066e-05, "loss": 1.0664, "step": 4550 }, { "epoch": 0.06568996067245776, "grad_norm": 0.70762038230896, "learning_rate": 1.9933347083007382e-05, "loss": 1.0484, "step": 4560 }, { "epoch": 0.06583401760375701, "grad_norm": 1.0127503871917725, "learning_rate": 1.993280820884107e-05, "loss": 1.0619, "step": 4570 }, { "epoch": 0.06597807453505626, "grad_norm": 0.7766719460487366, "learning_rate": 1.993226717243943e-05, "loss": 1.0684, "step": 4580 }, { "epoch": 0.06612213146635551, "grad_norm": 0.7988908886909485, "learning_rate": 1.9931723973920245e-05, "loss": 1.0403, "step": 4590 }, { "epoch": 0.06626618839765476, "grad_norm": 0.7570084929466248, "learning_rate": 1.9931178613401758e-05, "loss": 1.0252, "step": 4600 }, { "epoch": 0.06641024532895401, "grad_norm": 0.7890056371688843, "learning_rate": 1.993063109100269e-05, "loss": 1.0672, "step": 4610 }, { "epoch": 0.06655430226025325, "grad_norm": 0.8622692823410034, "learning_rate": 1.9930081406842222e-05, "loss": 1.0702, "step": 4620 }, { "epoch": 0.0666983591915525, "grad_norm": 0.7100027203559875, "learning_rate": 1.992952956104002e-05, "loss": 1.0532, "step": 4630 }, { "epoch": 0.06684241612285174, "grad_norm": 0.6402877569198608, "learning_rate": 1.992897555371621e-05, "loss": 1.0754, "step": 4640 }, { "epoch": 0.066986473054151, "grad_norm": 0.9325397610664368, "learning_rate": 1.992841938499139e-05, "loss": 1.0463, "step": 4650 }, { "epoch": 0.06713052998545024, "grad_norm": 0.816155731678009, "learning_rate": 1.9927861054986634e-05, "loss": 1.0635, "step": 4660 }, { "epoch": 0.0672745869167495, "grad_norm": 0.6558541059494019, "learning_rate": 1.9927300563823485e-05, "loss": 1.066, "step": 4670 }, { "epoch": 0.06741864384804874, "grad_norm": 0.7550584077835083, "learning_rate": 1.9926737911623947e-05, "loss": 1.0356, "step": 4680 }, { "epoch": 0.06756270077934799, "grad_norm": 0.8332971334457397, "learning_rate": 1.9926173098510503e-05, "loss": 1.0542, "step": 4690 }, { "epoch": 0.06770675771064724, "grad_norm": 0.7391961812973022, "learning_rate": 1.9925606124606113e-05, "loss": 1.0795, "step": 4700 }, { "epoch": 0.06785081464194649, "grad_norm": 0.8095934391021729, "learning_rate": 1.992503699003419e-05, "loss": 1.0627, "step": 4710 }, { "epoch": 0.06799487157324574, "grad_norm": 0.8879664540290833, "learning_rate": 1.992446569491863e-05, "loss": 1.0445, "step": 4720 }, { "epoch": 0.06813892850454499, "grad_norm": 0.8452285528182983, "learning_rate": 1.99238922393838e-05, "loss": 1.0752, "step": 4730 }, { "epoch": 0.06828298543584424, "grad_norm": 0.8504966497421265, "learning_rate": 1.9923316623554522e-05, "loss": 1.0659, "step": 4740 }, { "epoch": 0.06842704236714349, "grad_norm": 0.8374481201171875, "learning_rate": 1.992273884755611e-05, "loss": 1.0657, "step": 4750 }, { "epoch": 0.06857109929844274, "grad_norm": 0.7134926319122314, "learning_rate": 1.992215891151434e-05, "loss": 1.0682, "step": 4760 }, { "epoch": 0.06871515622974199, "grad_norm": 0.7555819749832153, "learning_rate": 1.9921576815555448e-05, "loss": 1.0763, "step": 4770 }, { "epoch": 0.06885921316104124, "grad_norm": 0.6944034695625305, "learning_rate": 1.992099255980615e-05, "loss": 1.0558, "step": 4780 }, { "epoch": 0.06900327009234049, "grad_norm": 0.7401833534240723, "learning_rate": 1.9920406144393635e-05, "loss": 1.0475, "step": 4790 }, { "epoch": 0.06914732702363974, "grad_norm": 1.1073486804962158, "learning_rate": 1.991981756944555e-05, "loss": 1.0552, "step": 4800 }, { "epoch": 0.06929138395493899, "grad_norm": 0.7874646186828613, "learning_rate": 1.9919226835090025e-05, "loss": 1.0692, "step": 4810 }, { "epoch": 0.06943544088623824, "grad_norm": 0.7678020596504211, "learning_rate": 1.991863394145566e-05, "loss": 1.0719, "step": 4820 }, { "epoch": 0.06957949781753749, "grad_norm": 0.7626875638961792, "learning_rate": 1.9918038888671507e-05, "loss": 1.054, "step": 4830 }, { "epoch": 0.06972355474883674, "grad_norm": 0.8412059545516968, "learning_rate": 1.991744167686711e-05, "loss": 1.0566, "step": 4840 }, { "epoch": 0.06986761168013599, "grad_norm": 0.8255806565284729, "learning_rate": 1.991684230617247e-05, "loss": 1.0711, "step": 4850 }, { "epoch": 0.07001166861143523, "grad_norm": 0.7971453070640564, "learning_rate": 1.9916240776718063e-05, "loss": 1.0303, "step": 4860 }, { "epoch": 0.07015572554273448, "grad_norm": 0.7085686326026917, "learning_rate": 1.9915637088634836e-05, "loss": 1.0476, "step": 4870 }, { "epoch": 0.07029978247403373, "grad_norm": 0.7923370599746704, "learning_rate": 1.99150312420542e-05, "loss": 1.0587, "step": 4880 }, { "epoch": 0.07044383940533298, "grad_norm": 0.827341616153717, "learning_rate": 1.991442323710804e-05, "loss": 1.064, "step": 4890 }, { "epoch": 0.07058789633663223, "grad_norm": 0.7372671961784363, "learning_rate": 1.9913813073928708e-05, "loss": 1.0889, "step": 4900 }, { "epoch": 0.07073195326793148, "grad_norm": 0.7749466300010681, "learning_rate": 1.9913200752649033e-05, "loss": 1.0859, "step": 4910 }, { "epoch": 0.07087601019923073, "grad_norm": 0.9195302724838257, "learning_rate": 1.9912586273402308e-05, "loss": 1.0787, "step": 4920 }, { "epoch": 0.07102006713052998, "grad_norm": 0.7387667894363403, "learning_rate": 1.9911969636322297e-05, "loss": 1.0395, "step": 4930 }, { "epoch": 0.07116412406182923, "grad_norm": 0.7745783925056458, "learning_rate": 1.9911350841543225e-05, "loss": 1.0518, "step": 4940 }, { "epoch": 0.07130818099312848, "grad_norm": 0.9240629076957703, "learning_rate": 1.991072988919981e-05, "loss": 1.0497, "step": 4950 }, { "epoch": 0.07145223792442773, "grad_norm": 0.7768181562423706, "learning_rate": 1.9910106779427213e-05, "loss": 1.0539, "step": 4960 }, { "epoch": 0.07159629485572698, "grad_norm": 0.6498637795448303, "learning_rate": 1.9909481512361083e-05, "loss": 1.0643, "step": 4970 }, { "epoch": 0.07174035178702623, "grad_norm": 0.8959274291992188, "learning_rate": 1.9908854088137525e-05, "loss": 1.0548, "step": 4980 }, { "epoch": 0.07188440871832548, "grad_norm": 0.9232971668243408, "learning_rate": 1.9908224506893124e-05, "loss": 1.0458, "step": 4990 }, { "epoch": 0.07202846564962473, "grad_norm": 0.599284827709198, "learning_rate": 1.9907592768764935e-05, "loss": 1.0692, "step": 5000 }, { "epoch": 0.07217252258092398, "grad_norm": 0.7615419626235962, "learning_rate": 1.9906958873890474e-05, "loss": 1.0357, "step": 5010 }, { "epoch": 0.07231657951222323, "grad_norm": 0.9292043447494507, "learning_rate": 1.9906322822407734e-05, "loss": 1.0751, "step": 5020 }, { "epoch": 0.07246063644352248, "grad_norm": 0.6925828456878662, "learning_rate": 1.990568461445517e-05, "loss": 1.0652, "step": 5030 }, { "epoch": 0.07260469337482173, "grad_norm": 0.8829536437988281, "learning_rate": 1.990504425017172e-05, "loss": 1.0627, "step": 5040 }, { "epoch": 0.07274875030612098, "grad_norm": 0.967322826385498, "learning_rate": 1.9904401729696775e-05, "loss": 1.05, "step": 5050 }, { "epoch": 0.07289280723742023, "grad_norm": 0.8069252371788025, "learning_rate": 1.9903757053170203e-05, "loss": 1.06, "step": 5060 }, { "epoch": 0.07303686416871948, "grad_norm": 0.9855362772941589, "learning_rate": 1.9903110220732344e-05, "loss": 1.0571, "step": 5070 }, { "epoch": 0.07318092110001873, "grad_norm": 0.764899492263794, "learning_rate": 1.9902461232524003e-05, "loss": 1.0853, "step": 5080 }, { "epoch": 0.07332497803131797, "grad_norm": 0.7716745734214783, "learning_rate": 1.9901810088686457e-05, "loss": 1.0688, "step": 5090 }, { "epoch": 0.07346903496261722, "grad_norm": 0.768663227558136, "learning_rate": 1.990115678936145e-05, "loss": 1.0718, "step": 5100 }, { "epoch": 0.07361309189391647, "grad_norm": 0.859196126461029, "learning_rate": 1.9900501334691194e-05, "loss": 1.0867, "step": 5110 }, { "epoch": 0.07375714882521572, "grad_norm": 0.7254055142402649, "learning_rate": 1.9899843724818378e-05, "loss": 1.059, "step": 5120 }, { "epoch": 0.07390120575651497, "grad_norm": 0.7900877594947815, "learning_rate": 1.9899183959886153e-05, "loss": 1.0737, "step": 5130 }, { "epoch": 0.07404526268781422, "grad_norm": 0.7576618790626526, "learning_rate": 1.9898522040038136e-05, "loss": 1.0433, "step": 5140 }, { "epoch": 0.07418931961911347, "grad_norm": 0.7771943807601929, "learning_rate": 1.9897857965418423e-05, "loss": 1.0622, "step": 5150 }, { "epoch": 0.07433337655041272, "grad_norm": 0.6736606955528259, "learning_rate": 1.989719173617157e-05, "loss": 1.0559, "step": 5160 }, { "epoch": 0.07447743348171197, "grad_norm": 0.8990259170532227, "learning_rate": 1.9896523352442608e-05, "loss": 1.0605, "step": 5170 }, { "epoch": 0.07462149041301122, "grad_norm": 0.7779847979545593, "learning_rate": 1.9895852814377034e-05, "loss": 1.0855, "step": 5180 }, { "epoch": 0.07476554734431047, "grad_norm": 0.8152965903282166, "learning_rate": 1.989518012212082e-05, "loss": 1.0682, "step": 5190 }, { "epoch": 0.07490960427560972, "grad_norm": 0.7711450457572937, "learning_rate": 1.9894505275820393e-05, "loss": 1.0408, "step": 5200 }, { "epoch": 0.07505366120690897, "grad_norm": 0.7221505641937256, "learning_rate": 1.9893828275622663e-05, "loss": 1.0547, "step": 5210 }, { "epoch": 0.07519771813820822, "grad_norm": 1.1068379878997803, "learning_rate": 1.9893149121675004e-05, "loss": 1.0609, "step": 5220 }, { "epoch": 0.07534177506950747, "grad_norm": 0.8413456678390503, "learning_rate": 1.9892467814125256e-05, "loss": 1.0622, "step": 5230 }, { "epoch": 0.07548583200080672, "grad_norm": 0.8818542957305908, "learning_rate": 1.9891784353121735e-05, "loss": 1.0667, "step": 5240 }, { "epoch": 0.07562988893210597, "grad_norm": 0.7753092646598816, "learning_rate": 1.9891098738813214e-05, "loss": 1.0343, "step": 5250 }, { "epoch": 0.07577394586340522, "grad_norm": 0.6563664078712463, "learning_rate": 1.9890410971348947e-05, "loss": 1.0512, "step": 5260 }, { "epoch": 0.07591800279470447, "grad_norm": 0.7327760457992554, "learning_rate": 1.9889721050878648e-05, "loss": 1.0648, "step": 5270 }, { "epoch": 0.07606205972600372, "grad_norm": 0.7993945479393005, "learning_rate": 1.9889028977552507e-05, "loss": 1.0633, "step": 5280 }, { "epoch": 0.07620611665730297, "grad_norm": 0.7560327649116516, "learning_rate": 1.9888334751521174e-05, "loss": 1.0747, "step": 5290 }, { "epoch": 0.07635017358860222, "grad_norm": 0.7332075834274292, "learning_rate": 1.988763837293578e-05, "loss": 1.034, "step": 5300 }, { "epoch": 0.07649423051990147, "grad_norm": 0.7934123873710632, "learning_rate": 1.9886939841947908e-05, "loss": 1.064, "step": 5310 }, { "epoch": 0.07663828745120072, "grad_norm": 0.7081010341644287, "learning_rate": 1.9886239158709623e-05, "loss": 1.056, "step": 5320 }, { "epoch": 0.07678234438249996, "grad_norm": 0.7036541700363159, "learning_rate": 1.988553632337346e-05, "loss": 1.0861, "step": 5330 }, { "epoch": 0.07692640131379921, "grad_norm": 0.7646002769470215, "learning_rate": 1.9884831336092402e-05, "loss": 1.0845, "step": 5340 }, { "epoch": 0.07707045824509846, "grad_norm": 0.7128923535346985, "learning_rate": 1.988412419701993e-05, "loss": 1.0437, "step": 5350 }, { "epoch": 0.07721451517639771, "grad_norm": 0.8597097396850586, "learning_rate": 1.9883414906309968e-05, "loss": 1.043, "step": 5360 }, { "epoch": 0.07735857210769696, "grad_norm": 0.8186230063438416, "learning_rate": 1.9882703464116925e-05, "loss": 1.0641, "step": 5370 }, { "epoch": 0.07750262903899621, "grad_norm": 0.6041313409805298, "learning_rate": 1.988198987059567e-05, "loss": 1.0499, "step": 5380 }, { "epoch": 0.07764668597029546, "grad_norm": 0.6971914172172546, "learning_rate": 1.988127412590154e-05, "loss": 1.0587, "step": 5390 }, { "epoch": 0.07779074290159471, "grad_norm": 0.7470895648002625, "learning_rate": 1.9880556230190345e-05, "loss": 1.0452, "step": 5400 }, { "epoch": 0.07793479983289396, "grad_norm": 0.9479622840881348, "learning_rate": 1.987983618361836e-05, "loss": 1.0646, "step": 5410 }, { "epoch": 0.07807885676419321, "grad_norm": 0.7283429503440857, "learning_rate": 1.9879113986342335e-05, "loss": 1.0643, "step": 5420 }, { "epoch": 0.07822291369549246, "grad_norm": 0.7103187441825867, "learning_rate": 1.9878389638519473e-05, "loss": 1.0577, "step": 5430 }, { "epoch": 0.07836697062679171, "grad_norm": 0.7464463114738464, "learning_rate": 1.987766314030746e-05, "loss": 1.044, "step": 5440 }, { "epoch": 0.07851102755809096, "grad_norm": 0.7824941277503967, "learning_rate": 1.9876934491864442e-05, "loss": 1.0483, "step": 5450 }, { "epoch": 0.07865508448939021, "grad_norm": 0.6849684119224548, "learning_rate": 1.9876203693349044e-05, "loss": 1.0661, "step": 5460 }, { "epoch": 0.07879914142068946, "grad_norm": 0.7476369142532349, "learning_rate": 1.987547074492034e-05, "loss": 1.0597, "step": 5470 }, { "epoch": 0.07894319835198871, "grad_norm": 0.818276584148407, "learning_rate": 1.987473564673789e-05, "loss": 1.0407, "step": 5480 }, { "epoch": 0.07908725528328796, "grad_norm": 0.7284795641899109, "learning_rate": 1.9873998398961708e-05, "loss": 1.06, "step": 5490 }, { "epoch": 0.07923131221458721, "grad_norm": 0.7441176176071167, "learning_rate": 1.9873259001752284e-05, "loss": 1.0547, "step": 5500 }, { "epoch": 0.07937536914588646, "grad_norm": 0.922211766242981, "learning_rate": 1.9872517455270583e-05, "loss": 1.0634, "step": 5510 }, { "epoch": 0.0795194260771857, "grad_norm": 0.7370014190673828, "learning_rate": 1.987177375967802e-05, "loss": 1.0553, "step": 5520 }, { "epoch": 0.07966348300848496, "grad_norm": 0.7166247963905334, "learning_rate": 1.987102791513649e-05, "loss": 1.0467, "step": 5530 }, { "epoch": 0.0798075399397842, "grad_norm": 0.7569652199745178, "learning_rate": 1.9870279921808356e-05, "loss": 1.0489, "step": 5540 }, { "epoch": 0.07995159687108346, "grad_norm": 0.7443186044692993, "learning_rate": 1.9869529779856448e-05, "loss": 1.062, "step": 5550 }, { "epoch": 0.0800956538023827, "grad_norm": 0.7470227479934692, "learning_rate": 1.9868777489444052e-05, "loss": 1.055, "step": 5560 }, { "epoch": 0.08023971073368195, "grad_norm": 1.0605957508087158, "learning_rate": 1.9868023050734935e-05, "loss": 1.0612, "step": 5570 }, { "epoch": 0.0803837676649812, "grad_norm": 0.7970819473266602, "learning_rate": 1.986726646389333e-05, "loss": 1.0296, "step": 5580 }, { "epoch": 0.08052782459628045, "grad_norm": 0.7080398201942444, "learning_rate": 1.9866507729083937e-05, "loss": 1.0654, "step": 5590 }, { "epoch": 0.0806718815275797, "grad_norm": 0.7669221758842468, "learning_rate": 1.986574684647192e-05, "loss": 1.0545, "step": 5600 }, { "epoch": 0.08081593845887895, "grad_norm": 0.9112254977226257, "learning_rate": 1.986498381622291e-05, "loss": 1.0693, "step": 5610 }, { "epoch": 0.0809599953901782, "grad_norm": 0.851102888584137, "learning_rate": 1.9864218638503012e-05, "loss": 1.0597, "step": 5620 }, { "epoch": 0.08110405232147745, "grad_norm": 0.764011561870575, "learning_rate": 1.9863451313478796e-05, "loss": 1.0503, "step": 5630 }, { "epoch": 0.0812481092527767, "grad_norm": 0.6748458743095398, "learning_rate": 1.986268184131729e-05, "loss": 1.0549, "step": 5640 }, { "epoch": 0.08139216618407595, "grad_norm": 0.6882784366607666, "learning_rate": 1.9861910222186006e-05, "loss": 1.0698, "step": 5650 }, { "epoch": 0.0815362231153752, "grad_norm": 0.7472461462020874, "learning_rate": 1.9861136456252912e-05, "loss": 1.0662, "step": 5660 }, { "epoch": 0.08168028004667445, "grad_norm": 0.7899004817008972, "learning_rate": 1.986036054368645e-05, "loss": 1.0491, "step": 5670 }, { "epoch": 0.0818243369779737, "grad_norm": 0.8129688501358032, "learning_rate": 1.9859582484655515e-05, "loss": 1.0488, "step": 5680 }, { "epoch": 0.08196839390927295, "grad_norm": 0.8368383049964905, "learning_rate": 1.9858802279329486e-05, "loss": 1.0577, "step": 5690 }, { "epoch": 0.0821124508405722, "grad_norm": 0.7655535936355591, "learning_rate": 1.9858019927878204e-05, "loss": 1.0549, "step": 5700 }, { "epoch": 0.08225650777187145, "grad_norm": 0.8787673711776733, "learning_rate": 1.985723543047198e-05, "loss": 1.0851, "step": 5710 }, { "epoch": 0.0824005647031707, "grad_norm": 0.6964054703712463, "learning_rate": 1.9856448787281576e-05, "loss": 1.0309, "step": 5720 }, { "epoch": 0.08254462163446995, "grad_norm": 0.7971175909042358, "learning_rate": 1.9855659998478245e-05, "loss": 1.0492, "step": 5730 }, { "epoch": 0.0826886785657692, "grad_norm": 0.8056570887565613, "learning_rate": 1.9854869064233694e-05, "loss": 1.0761, "step": 5740 }, { "epoch": 0.08283273549706845, "grad_norm": 0.8324787020683289, "learning_rate": 1.985407598472009e-05, "loss": 1.0696, "step": 5750 }, { "epoch": 0.0829767924283677, "grad_norm": 0.863656222820282, "learning_rate": 1.9853280760110084e-05, "loss": 1.0613, "step": 5760 }, { "epoch": 0.08312084935966695, "grad_norm": 0.7228241562843323, "learning_rate": 1.985248339057678e-05, "loss": 1.0698, "step": 5770 }, { "epoch": 0.0832649062909662, "grad_norm": 0.7174868583679199, "learning_rate": 1.985168387629376e-05, "loss": 1.0546, "step": 5780 }, { "epoch": 0.08340896322226544, "grad_norm": 0.9294284582138062, "learning_rate": 1.9850882217435063e-05, "loss": 1.0556, "step": 5790 }, { "epoch": 0.0835530201535647, "grad_norm": 0.8399975299835205, "learning_rate": 1.9850078414175203e-05, "loss": 1.0577, "step": 5800 }, { "epoch": 0.08369707708486394, "grad_norm": 0.6481358408927917, "learning_rate": 1.984927246668915e-05, "loss": 1.0627, "step": 5810 }, { "epoch": 0.0838411340161632, "grad_norm": 0.7580750584602356, "learning_rate": 1.9848464375152356e-05, "loss": 1.0733, "step": 5820 }, { "epoch": 0.08398519094746244, "grad_norm": 0.5967349410057068, "learning_rate": 1.984765413974072e-05, "loss": 1.0292, "step": 5830 }, { "epoch": 0.08412924787876169, "grad_norm": 0.7164834141731262, "learning_rate": 1.9846841760630633e-05, "loss": 1.0483, "step": 5840 }, { "epoch": 0.08427330481006094, "grad_norm": 0.755739688873291, "learning_rate": 1.984602723799893e-05, "loss": 1.0507, "step": 5850 }, { "epoch": 0.08441736174136019, "grad_norm": 1.1750564575195312, "learning_rate": 1.9845210572022923e-05, "loss": 1.0873, "step": 5860 }, { "epoch": 0.08456141867265944, "grad_norm": 0.7422271966934204, "learning_rate": 1.984439176288039e-05, "loss": 1.0529, "step": 5870 }, { "epoch": 0.08470547560395869, "grad_norm": 0.8092758059501648, "learning_rate": 1.984357081074957e-05, "loss": 1.0675, "step": 5880 }, { "epoch": 0.08484953253525794, "grad_norm": 0.8689402341842651, "learning_rate": 1.984274771580918e-05, "loss": 1.0583, "step": 5890 }, { "epoch": 0.08499358946655719, "grad_norm": 0.7116698622703552, "learning_rate": 1.9841922478238394e-05, "loss": 1.0674, "step": 5900 }, { "epoch": 0.08513764639785644, "grad_norm": 0.7035679817199707, "learning_rate": 1.9841095098216853e-05, "loss": 1.0587, "step": 5910 }, { "epoch": 0.08528170332915569, "grad_norm": 0.6325827240943909, "learning_rate": 1.9840265575924665e-05, "loss": 1.0675, "step": 5920 }, { "epoch": 0.08542576026045492, "grad_norm": 0.7157877087593079, "learning_rate": 1.9839433911542413e-05, "loss": 1.0396, "step": 5930 }, { "epoch": 0.08556981719175417, "grad_norm": 0.7110846042633057, "learning_rate": 1.9838600105251128e-05, "loss": 1.0472, "step": 5940 }, { "epoch": 0.08571387412305342, "grad_norm": 0.7248791456222534, "learning_rate": 1.9837764157232327e-05, "loss": 1.072, "step": 5950 }, { "epoch": 0.08585793105435267, "grad_norm": 0.7378261685371399, "learning_rate": 1.983692606766798e-05, "loss": 1.0503, "step": 5960 }, { "epoch": 0.08600198798565192, "grad_norm": 0.9149982333183289, "learning_rate": 1.9836085836740525e-05, "loss": 1.0747, "step": 5970 }, { "epoch": 0.08614604491695117, "grad_norm": 0.6886637806892395, "learning_rate": 1.9835243464632876e-05, "loss": 1.0571, "step": 5980 }, { "epoch": 0.08629010184825042, "grad_norm": 0.7848793268203735, "learning_rate": 1.9834398951528402e-05, "loss": 1.084, "step": 5990 }, { "epoch": 0.08643415877954967, "grad_norm": 0.7105270624160767, "learning_rate": 1.9833552297610943e-05, "loss": 1.0671, "step": 6000 }, { "epoch": 0.08657821571084892, "grad_norm": 0.7114749550819397, "learning_rate": 1.98327035030648e-05, "loss": 1.0806, "step": 6010 }, { "epoch": 0.08672227264214817, "grad_norm": 0.8656984567642212, "learning_rate": 1.9831852568074746e-05, "loss": 1.0563, "step": 6020 }, { "epoch": 0.08686632957344742, "grad_norm": 0.8152972459793091, "learning_rate": 1.9830999492826023e-05, "loss": 1.069, "step": 6030 }, { "epoch": 0.08701038650474667, "grad_norm": 0.6695161461830139, "learning_rate": 1.9830144277504323e-05, "loss": 1.0601, "step": 6040 }, { "epoch": 0.08715444343604592, "grad_norm": 0.6562400460243225, "learning_rate": 1.9829286922295824e-05, "loss": 1.0528, "step": 6050 }, { "epoch": 0.08729850036734517, "grad_norm": 0.7877181172370911, "learning_rate": 1.982842742738716e-05, "loss": 1.0629, "step": 6060 }, { "epoch": 0.08744255729864442, "grad_norm": 0.6838402152061462, "learning_rate": 1.9827565792965422e-05, "loss": 1.0629, "step": 6070 }, { "epoch": 0.08758661422994367, "grad_norm": 0.8361284136772156, "learning_rate": 1.9826702019218188e-05, "loss": 1.054, "step": 6080 }, { "epoch": 0.08773067116124292, "grad_norm": 0.6967858076095581, "learning_rate": 1.982583610633348e-05, "loss": 1.0565, "step": 6090 }, { "epoch": 0.08787472809254217, "grad_norm": 0.8265675902366638, "learning_rate": 1.98249680544998e-05, "loss": 1.054, "step": 6100 }, { "epoch": 0.08801878502384142, "grad_norm": 0.7994257211685181, "learning_rate": 1.9824097863906112e-05, "loss": 1.0718, "step": 6110 }, { "epoch": 0.08816284195514067, "grad_norm": 0.6750326752662659, "learning_rate": 1.982322553474184e-05, "loss": 1.0732, "step": 6120 }, { "epoch": 0.08830689888643992, "grad_norm": 0.6326549649238586, "learning_rate": 1.982235106719688e-05, "loss": 1.0518, "step": 6130 }, { "epoch": 0.08845095581773917, "grad_norm": 0.7757358551025391, "learning_rate": 1.9821474461461593e-05, "loss": 1.0456, "step": 6140 }, { "epoch": 0.08859501274903842, "grad_norm": 0.7172616124153137, "learning_rate": 1.9820595717726806e-05, "loss": 1.0514, "step": 6150 }, { "epoch": 0.08873906968033766, "grad_norm": 0.7145668864250183, "learning_rate": 1.9819714836183805e-05, "loss": 1.0576, "step": 6160 }, { "epoch": 0.08888312661163691, "grad_norm": 0.601446270942688, "learning_rate": 1.9818831817024344e-05, "loss": 1.0352, "step": 6170 }, { "epoch": 0.08902718354293616, "grad_norm": 0.9036988019943237, "learning_rate": 1.9817946660440647e-05, "loss": 1.0571, "step": 6180 }, { "epoch": 0.08917124047423541, "grad_norm": 0.7691981792449951, "learning_rate": 1.9817059366625406e-05, "loss": 1.0452, "step": 6190 }, { "epoch": 0.08931529740553466, "grad_norm": 0.8221045732498169, "learning_rate": 1.9816169935771765e-05, "loss": 1.0567, "step": 6200 }, { "epoch": 0.08945935433683391, "grad_norm": 0.8755362629890442, "learning_rate": 1.9815278368073342e-05, "loss": 1.056, "step": 6210 }, { "epoch": 0.08960341126813316, "grad_norm": 0.8427380323410034, "learning_rate": 1.981438466372422e-05, "loss": 1.0557, "step": 6220 }, { "epoch": 0.08974746819943241, "grad_norm": 0.7444628477096558, "learning_rate": 1.9813488822918946e-05, "loss": 1.0886, "step": 6230 }, { "epoch": 0.08989152513073166, "grad_norm": 0.6707512736320496, "learning_rate": 1.9812590845852532e-05, "loss": 1.059, "step": 6240 }, { "epoch": 0.09003558206203091, "grad_norm": 0.7399901151657104, "learning_rate": 1.981169073272046e-05, "loss": 1.0475, "step": 6250 }, { "epoch": 0.09017963899333016, "grad_norm": 0.7267374396324158, "learning_rate": 1.9810788483718663e-05, "loss": 1.0647, "step": 6260 }, { "epoch": 0.09032369592462941, "grad_norm": 0.7055925130844116, "learning_rate": 1.9809884099043553e-05, "loss": 1.053, "step": 6270 }, { "epoch": 0.09046775285592866, "grad_norm": 0.7201550602912903, "learning_rate": 1.9808977578892006e-05, "loss": 1.0635, "step": 6280 }, { "epoch": 0.09061180978722791, "grad_norm": 0.9184608459472656, "learning_rate": 1.9808068923461354e-05, "loss": 1.082, "step": 6290 }, { "epoch": 0.09075586671852716, "grad_norm": 0.6973926424980164, "learning_rate": 1.9807158132949398e-05, "loss": 1.07, "step": 6300 }, { "epoch": 0.09089992364982641, "grad_norm": 0.6459714770317078, "learning_rate": 1.9806245207554405e-05, "loss": 1.0435, "step": 6310 }, { "epoch": 0.09104398058112566, "grad_norm": 0.8179060220718384, "learning_rate": 1.980533014747511e-05, "loss": 1.0539, "step": 6320 }, { "epoch": 0.09118803751242491, "grad_norm": 0.7989359498023987, "learning_rate": 1.9804412952910706e-05, "loss": 1.0561, "step": 6330 }, { "epoch": 0.09133209444372416, "grad_norm": 0.6431265473365784, "learning_rate": 1.9803493624060857e-05, "loss": 1.0448, "step": 6340 }, { "epoch": 0.0914761513750234, "grad_norm": 0.7607747316360474, "learning_rate": 1.980257216112568e-05, "loss": 1.0649, "step": 6350 }, { "epoch": 0.09162020830632266, "grad_norm": 0.6712044477462769, "learning_rate": 1.9801648564305772e-05, "loss": 1.0598, "step": 6360 }, { "epoch": 0.0917642652376219, "grad_norm": 0.8743002414703369, "learning_rate": 1.9800722833802186e-05, "loss": 1.0421, "step": 6370 }, { "epoch": 0.09190832216892116, "grad_norm": 0.7426406145095825, "learning_rate": 1.979979496981644e-05, "loss": 1.0655, "step": 6380 }, { "epoch": 0.0920523791002204, "grad_norm": 0.7609308958053589, "learning_rate": 1.9798864972550517e-05, "loss": 1.0717, "step": 6390 }, { "epoch": 0.09219643603151965, "grad_norm": 0.7155079245567322, "learning_rate": 1.9797932842206865e-05, "loss": 1.0535, "step": 6400 }, { "epoch": 0.0923404929628189, "grad_norm": 0.6351951956748962, "learning_rate": 1.9796998578988397e-05, "loss": 1.0336, "step": 6410 }, { "epoch": 0.09248454989411815, "grad_norm": 0.6947046518325806, "learning_rate": 1.9796062183098485e-05, "loss": 1.0268, "step": 6420 }, { "epoch": 0.0926286068254174, "grad_norm": 0.7363710999488831, "learning_rate": 1.9795123654740977e-05, "loss": 1.0436, "step": 6430 }, { "epoch": 0.09277266375671665, "grad_norm": 1.0118937492370605, "learning_rate": 1.979418299412017e-05, "loss": 1.0365, "step": 6440 }, { "epoch": 0.0929167206880159, "grad_norm": 0.7479383945465088, "learning_rate": 1.9793240201440835e-05, "loss": 1.0552, "step": 6450 }, { "epoch": 0.09306077761931515, "grad_norm": 0.6502164602279663, "learning_rate": 1.9792295276908206e-05, "loss": 1.0555, "step": 6460 }, { "epoch": 0.0932048345506144, "grad_norm": 0.6710230708122253, "learning_rate": 1.9791348220727983e-05, "loss": 1.0578, "step": 6470 }, { "epoch": 0.09334889148191365, "grad_norm": 0.7776168584823608, "learning_rate": 1.9790399033106323e-05, "loss": 1.0504, "step": 6480 }, { "epoch": 0.0934929484132129, "grad_norm": 0.7499729990959167, "learning_rate": 1.9789447714249853e-05, "loss": 1.0592, "step": 6490 }, { "epoch": 0.09363700534451215, "grad_norm": 0.6763216853141785, "learning_rate": 1.978849426436566e-05, "loss": 1.0794, "step": 6500 }, { "epoch": 0.0937810622758114, "grad_norm": 0.700145959854126, "learning_rate": 1.9787538683661295e-05, "loss": 1.0542, "step": 6510 }, { "epoch": 0.09392511920711065, "grad_norm": 0.632504403591156, "learning_rate": 1.9786580972344784e-05, "loss": 1.0477, "step": 6520 }, { "epoch": 0.0940691761384099, "grad_norm": 0.7052755951881409, "learning_rate": 1.9785621130624596e-05, "loss": 1.0513, "step": 6530 }, { "epoch": 0.09421323306970915, "grad_norm": 0.7767089009284973, "learning_rate": 1.9784659158709683e-05, "loss": 1.0499, "step": 6540 }, { "epoch": 0.0943572900010084, "grad_norm": 0.7191714644432068, "learning_rate": 1.9783695056809447e-05, "loss": 1.0624, "step": 6550 }, { "epoch": 0.09450134693230765, "grad_norm": 0.890355110168457, "learning_rate": 1.9782728825133767e-05, "loss": 1.0658, "step": 6560 }, { "epoch": 0.0946454038636069, "grad_norm": 0.6689414978027344, "learning_rate": 1.9781760463892974e-05, "loss": 1.0637, "step": 6570 }, { "epoch": 0.09478946079490615, "grad_norm": 0.7338350415229797, "learning_rate": 1.978078997329787e-05, "loss": 1.0445, "step": 6580 }, { "epoch": 0.0949335177262054, "grad_norm": 0.6850501298904419, "learning_rate": 1.977981735355971e-05, "loss": 1.064, "step": 6590 }, { "epoch": 0.09507757465750465, "grad_norm": 0.7931393980979919, "learning_rate": 1.977884260489023e-05, "loss": 1.0421, "step": 6600 }, { "epoch": 0.0952216315888039, "grad_norm": 0.727503776550293, "learning_rate": 1.977786572750161e-05, "loss": 1.0398, "step": 6610 }, { "epoch": 0.09536568852010315, "grad_norm": 0.7935442328453064, "learning_rate": 1.9776886721606506e-05, "loss": 1.0414, "step": 6620 }, { "epoch": 0.0955097454514024, "grad_norm": 0.8451200127601624, "learning_rate": 1.977590558741804e-05, "loss": 1.0521, "step": 6630 }, { "epoch": 0.09565380238270164, "grad_norm": 0.6927206516265869, "learning_rate": 1.977492232514978e-05, "loss": 1.0659, "step": 6640 }, { "epoch": 0.0957978593140009, "grad_norm": 0.6532142162322998, "learning_rate": 1.977393693501578e-05, "loss": 1.0525, "step": 6650 }, { "epoch": 0.09594191624530014, "grad_norm": 0.8253028988838196, "learning_rate": 1.977294941723054e-05, "loss": 1.0569, "step": 6660 }, { "epoch": 0.09608597317659939, "grad_norm": 0.7183331847190857, "learning_rate": 1.9771959772009028e-05, "loss": 1.0755, "step": 6670 }, { "epoch": 0.09623003010789864, "grad_norm": 0.6303145885467529, "learning_rate": 1.977096799956668e-05, "loss": 1.0589, "step": 6680 }, { "epoch": 0.09637408703919789, "grad_norm": 1.0233185291290283, "learning_rate": 1.9769974100119383e-05, "loss": 1.0494, "step": 6690 }, { "epoch": 0.09651814397049714, "grad_norm": 0.7218311429023743, "learning_rate": 1.9768978073883506e-05, "loss": 1.0553, "step": 6700 }, { "epoch": 0.09666220090179639, "grad_norm": 0.7105904817581177, "learning_rate": 1.976797992107587e-05, "loss": 1.0477, "step": 6710 }, { "epoch": 0.09680625783309564, "grad_norm": 0.8148890733718872, "learning_rate": 1.9766979641913746e-05, "loss": 1.0741, "step": 6720 }, { "epoch": 0.09695031476439489, "grad_norm": 0.766864538192749, "learning_rate": 1.9765977236614894e-05, "loss": 1.0546, "step": 6730 }, { "epoch": 0.09709437169569414, "grad_norm": 0.6800231337547302, "learning_rate": 1.9764972705397517e-05, "loss": 1.033, "step": 6740 }, { "epoch": 0.09723842862699339, "grad_norm": 0.6132718324661255, "learning_rate": 1.9763966048480293e-05, "loss": 1.0655, "step": 6750 }, { "epoch": 0.09738248555829264, "grad_norm": 0.7320756912231445, "learning_rate": 1.9762957266082355e-05, "loss": 1.0537, "step": 6760 }, { "epoch": 0.09752654248959189, "grad_norm": 0.7121520638465881, "learning_rate": 1.9761946358423297e-05, "loss": 1.0559, "step": 6770 }, { "epoch": 0.09767059942089114, "grad_norm": 0.7753499150276184, "learning_rate": 1.9760933325723185e-05, "loss": 1.0666, "step": 6780 }, { "epoch": 0.09781465635219039, "grad_norm": 0.7425855994224548, "learning_rate": 1.975991816820254e-05, "loss": 1.0108, "step": 6790 }, { "epoch": 0.09795871328348964, "grad_norm": 0.7980513572692871, "learning_rate": 1.9758900886082343e-05, "loss": 1.0617, "step": 6800 }, { "epoch": 0.09810277021478889, "grad_norm": 0.7671613097190857, "learning_rate": 1.9757881479584055e-05, "loss": 1.0852, "step": 6810 }, { "epoch": 0.09824682714608814, "grad_norm": 0.7799088358879089, "learning_rate": 1.9756859948929573e-05, "loss": 1.0588, "step": 6820 }, { "epoch": 0.09839088407738739, "grad_norm": 0.7418830394744873, "learning_rate": 1.975583629434128e-05, "loss": 1.0643, "step": 6830 }, { "epoch": 0.09853494100868664, "grad_norm": 0.711216151714325, "learning_rate": 1.9754810516042006e-05, "loss": 1.0411, "step": 6840 }, { "epoch": 0.09867899793998589, "grad_norm": 0.8446986675262451, "learning_rate": 1.9753782614255047e-05, "loss": 1.0508, "step": 6850 }, { "epoch": 0.09882305487128513, "grad_norm": 0.6371778845787048, "learning_rate": 1.9752752589204172e-05, "loss": 1.0711, "step": 6860 }, { "epoch": 0.09896711180258438, "grad_norm": 0.8106171488761902, "learning_rate": 1.9751720441113594e-05, "loss": 1.0396, "step": 6870 }, { "epoch": 0.09911116873388363, "grad_norm": 0.7657573223114014, "learning_rate": 1.9750686170208003e-05, "loss": 1.0928, "step": 6880 }, { "epoch": 0.09925522566518288, "grad_norm": 0.6844088435173035, "learning_rate": 1.9749649776712542e-05, "loss": 1.0425, "step": 6890 }, { "epoch": 0.09939928259648213, "grad_norm": 0.8555119633674622, "learning_rate": 1.974861126085282e-05, "loss": 1.0587, "step": 6900 }, { "epoch": 0.09954333952778138, "grad_norm": 0.5692336559295654, "learning_rate": 1.9747570622854912e-05, "loss": 1.0452, "step": 6910 }, { "epoch": 0.09968739645908063, "grad_norm": 0.7701729536056519, "learning_rate": 1.9746527862945347e-05, "loss": 1.0363, "step": 6920 }, { "epoch": 0.09983145339037988, "grad_norm": 0.7081140279769897, "learning_rate": 1.974548298135112e-05, "loss": 1.0609, "step": 6930 }, { "epoch": 0.09997551032167913, "grad_norm": 0.7233031988143921, "learning_rate": 1.9744435978299682e-05, "loss": 1.0412, "step": 6940 }, { "epoch": 0.10011956725297838, "grad_norm": 0.6908624768257141, "learning_rate": 1.9743386854018963e-05, "loss": 1.0434, "step": 6950 }, { "epoch": 0.10026362418427763, "grad_norm": 0.6765705943107605, "learning_rate": 1.974233560873733e-05, "loss": 1.0525, "step": 6960 }, { "epoch": 0.10040768111557688, "grad_norm": 0.670443058013916, "learning_rate": 1.974128224268363e-05, "loss": 1.0455, "step": 6970 }, { "epoch": 0.10055173804687613, "grad_norm": 0.5861614346504211, "learning_rate": 1.9740226756087174e-05, "loss": 1.0495, "step": 6980 }, { "epoch": 0.10069579497817538, "grad_norm": 0.6091853976249695, "learning_rate": 1.9739169149177716e-05, "loss": 1.0728, "step": 6990 }, { "epoch": 0.10083985190947463, "grad_norm": 0.6173252463340759, "learning_rate": 1.9738109422185485e-05, "loss": 1.0552, "step": 7000 }, { "epoch": 0.10098390884077388, "grad_norm": 0.741486132144928, "learning_rate": 1.973704757534117e-05, "loss": 1.0369, "step": 7010 }, { "epoch": 0.10112796577207313, "grad_norm": 0.7461585402488708, "learning_rate": 1.9735983608875918e-05, "loss": 1.0615, "step": 7020 }, { "epoch": 0.10127202270337238, "grad_norm": 0.637152910232544, "learning_rate": 1.973491752302135e-05, "loss": 1.0546, "step": 7030 }, { "epoch": 0.10141607963467163, "grad_norm": 0.5802614092826843, "learning_rate": 1.9733849318009523e-05, "loss": 1.0381, "step": 7040 }, { "epoch": 0.10156013656597088, "grad_norm": 0.7531036138534546, "learning_rate": 1.973277899407298e-05, "loss": 1.0521, "step": 7050 }, { "epoch": 0.10170419349727013, "grad_norm": 0.8710116744041443, "learning_rate": 1.973170655144472e-05, "loss": 1.0503, "step": 7060 }, { "epoch": 0.10184825042856938, "grad_norm": 0.8190781474113464, "learning_rate": 1.9730631990358185e-05, "loss": 1.0351, "step": 7070 }, { "epoch": 0.10199230735986863, "grad_norm": 0.8092232942581177, "learning_rate": 1.9729555311047305e-05, "loss": 1.0359, "step": 7080 }, { "epoch": 0.10213636429116787, "grad_norm": 0.8566160798072815, "learning_rate": 1.972847651374645e-05, "loss": 1.0538, "step": 7090 }, { "epoch": 0.10228042122246712, "grad_norm": 0.7003466486930847, "learning_rate": 1.972739559869046e-05, "loss": 1.0398, "step": 7100 }, { "epoch": 0.10242447815376637, "grad_norm": 0.6902493834495544, "learning_rate": 1.972631256611464e-05, "loss": 1.0338, "step": 7110 }, { "epoch": 0.10256853508506562, "grad_norm": 0.5801345109939575, "learning_rate": 1.972522741625475e-05, "loss": 1.0658, "step": 7120 }, { "epoch": 0.10271259201636487, "grad_norm": 0.6949053406715393, "learning_rate": 1.972414014934701e-05, "loss": 1.0514, "step": 7130 }, { "epoch": 0.10285664894766412, "grad_norm": 0.6205598711967468, "learning_rate": 1.9723050765628105e-05, "loss": 1.0365, "step": 7140 }, { "epoch": 0.10300070587896337, "grad_norm": 0.5499454140663147, "learning_rate": 1.9721959265335178e-05, "loss": 1.0578, "step": 7150 }, { "epoch": 0.10314476281026262, "grad_norm": 0.7337759137153625, "learning_rate": 1.972086564870583e-05, "loss": 1.0707, "step": 7160 }, { "epoch": 0.10328881974156187, "grad_norm": 0.8025751113891602, "learning_rate": 1.9719769915978135e-05, "loss": 1.0573, "step": 7170 }, { "epoch": 0.10343287667286112, "grad_norm": 0.6096850037574768, "learning_rate": 1.971867206739061e-05, "loss": 1.063, "step": 7180 }, { "epoch": 0.10357693360416037, "grad_norm": 0.7098557353019714, "learning_rate": 1.9717572103182246e-05, "loss": 1.0648, "step": 7190 }, { "epoch": 0.10372099053545962, "grad_norm": 0.567997932434082, "learning_rate": 1.9716470023592488e-05, "loss": 1.065, "step": 7200 }, { "epoch": 0.10386504746675887, "grad_norm": 0.6367721557617188, "learning_rate": 1.971536582886125e-05, "loss": 1.0476, "step": 7210 }, { "epoch": 0.10400910439805812, "grad_norm": 0.7719098925590515, "learning_rate": 1.9714259519228886e-05, "loss": 1.0614, "step": 7220 }, { "epoch": 0.10415316132935737, "grad_norm": 0.7801731824874878, "learning_rate": 1.9713151094936238e-05, "loss": 1.0761, "step": 7230 }, { "epoch": 0.10429721826065662, "grad_norm": 0.6191163063049316, "learning_rate": 1.971204055622459e-05, "loss": 1.0425, "step": 7240 }, { "epoch": 0.10444127519195585, "grad_norm": 0.6448741555213928, "learning_rate": 1.971092790333569e-05, "loss": 1.0406, "step": 7250 }, { "epoch": 0.1045853321232551, "grad_norm": 0.7942621111869812, "learning_rate": 1.9709813136511748e-05, "loss": 1.0693, "step": 7260 }, { "epoch": 0.10472938905455435, "grad_norm": 0.6539813280105591, "learning_rate": 1.9708696255995433e-05, "loss": 1.0699, "step": 7270 }, { "epoch": 0.1048734459858536, "grad_norm": 0.7940925359725952, "learning_rate": 1.9707577262029874e-05, "loss": 1.0881, "step": 7280 }, { "epoch": 0.10501750291715285, "grad_norm": 0.7164614200592041, "learning_rate": 1.970645615485866e-05, "loss": 1.0444, "step": 7290 }, { "epoch": 0.1051615598484521, "grad_norm": 0.6418722867965698, "learning_rate": 1.970533293472584e-05, "loss": 1.0592, "step": 7300 }, { "epoch": 0.10530561677975135, "grad_norm": 0.6691604256629944, "learning_rate": 1.9704207601875933e-05, "loss": 1.0534, "step": 7310 }, { "epoch": 0.1054496737110506, "grad_norm": 0.6756848096847534, "learning_rate": 1.9703080156553894e-05, "loss": 1.0439, "step": 7320 }, { "epoch": 0.10559373064234985, "grad_norm": 0.7866693735122681, "learning_rate": 1.970195059900516e-05, "loss": 1.045, "step": 7330 }, { "epoch": 0.1057377875736491, "grad_norm": 0.6200616955757141, "learning_rate": 1.970081892947562e-05, "loss": 1.0357, "step": 7340 }, { "epoch": 0.10588184450494835, "grad_norm": 0.733914315700531, "learning_rate": 1.9699685148211626e-05, "loss": 1.0638, "step": 7350 }, { "epoch": 0.1060259014362476, "grad_norm": 0.6396364569664001, "learning_rate": 1.9698549255459975e-05, "loss": 1.0608, "step": 7360 }, { "epoch": 0.10616995836754685, "grad_norm": 0.6390619874000549, "learning_rate": 1.9697411251467947e-05, "loss": 1.0624, "step": 7370 }, { "epoch": 0.1063140152988461, "grad_norm": 0.6570200324058533, "learning_rate": 1.9696271136483266e-05, "loss": 1.0468, "step": 7380 }, { "epoch": 0.10645807223014535, "grad_norm": 0.5907531380653381, "learning_rate": 1.9695128910754115e-05, "loss": 1.0317, "step": 7390 }, { "epoch": 0.1066021291614446, "grad_norm": 0.8321852087974548, "learning_rate": 1.9693984574529147e-05, "loss": 1.0775, "step": 7400 }, { "epoch": 0.10674618609274385, "grad_norm": 0.7287123203277588, "learning_rate": 1.9692838128057466e-05, "loss": 1.0651, "step": 7410 }, { "epoch": 0.1068902430240431, "grad_norm": 0.7644389271736145, "learning_rate": 1.9691689571588637e-05, "loss": 1.0807, "step": 7420 }, { "epoch": 0.10703429995534235, "grad_norm": 0.6191002130508423, "learning_rate": 1.9690538905372685e-05, "loss": 1.055, "step": 7430 }, { "epoch": 0.1071783568866416, "grad_norm": 0.6928269863128662, "learning_rate": 1.9689386129660093e-05, "loss": 1.0293, "step": 7440 }, { "epoch": 0.10732241381794085, "grad_norm": 0.6972824931144714, "learning_rate": 1.968823124470181e-05, "loss": 1.0602, "step": 7450 }, { "epoch": 0.1074664707492401, "grad_norm": 0.6372748017311096, "learning_rate": 1.968707425074923e-05, "loss": 1.0496, "step": 7460 }, { "epoch": 0.10761052768053934, "grad_norm": 0.7519180774688721, "learning_rate": 1.968591514805422e-05, "loss": 1.0782, "step": 7470 }, { "epoch": 0.1077545846118386, "grad_norm": 0.7396631240844727, "learning_rate": 1.96847539368691e-05, "loss": 1.0467, "step": 7480 }, { "epoch": 0.10789864154313784, "grad_norm": 0.8233817219734192, "learning_rate": 1.9683590617446647e-05, "loss": 1.033, "step": 7490 }, { "epoch": 0.1080426984744371, "grad_norm": 0.6281062364578247, "learning_rate": 1.9682425190040104e-05, "loss": 1.0628, "step": 7500 }, { "epoch": 0.10818675540573634, "grad_norm": 0.8033367991447449, "learning_rate": 1.9681257654903164e-05, "loss": 1.0604, "step": 7510 }, { "epoch": 0.10833081233703559, "grad_norm": 0.6199148893356323, "learning_rate": 1.968008801228999e-05, "loss": 1.039, "step": 7520 }, { "epoch": 0.10847486926833484, "grad_norm": 0.8492701053619385, "learning_rate": 1.967891626245519e-05, "loss": 1.0424, "step": 7530 }, { "epoch": 0.10861892619963409, "grad_norm": 0.82408207654953, "learning_rate": 1.9677742405653837e-05, "loss": 1.0436, "step": 7540 }, { "epoch": 0.10876298313093334, "grad_norm": 0.7059163451194763, "learning_rate": 1.9676566442141472e-05, "loss": 1.0577, "step": 7550 }, { "epoch": 0.10890704006223259, "grad_norm": 0.8421022295951843, "learning_rate": 1.967538837217408e-05, "loss": 1.0604, "step": 7560 }, { "epoch": 0.10905109699353184, "grad_norm": 0.6940760612487793, "learning_rate": 1.967420819600811e-05, "loss": 1.0553, "step": 7570 }, { "epoch": 0.10919515392483109, "grad_norm": 0.7205594182014465, "learning_rate": 1.967302591390047e-05, "loss": 1.0596, "step": 7580 }, { "epoch": 0.10933921085613034, "grad_norm": 0.652677595615387, "learning_rate": 1.9671841526108534e-05, "loss": 1.0559, "step": 7590 }, { "epoch": 0.10948326778742959, "grad_norm": 0.6200570464134216, "learning_rate": 1.9670655032890115e-05, "loss": 1.0615, "step": 7600 }, { "epoch": 0.10962732471872884, "grad_norm": 0.6742100119590759, "learning_rate": 1.9669466434503506e-05, "loss": 1.051, "step": 7610 }, { "epoch": 0.10977138165002809, "grad_norm": 0.6377570033073425, "learning_rate": 1.9668275731207444e-05, "loss": 1.0206, "step": 7620 }, { "epoch": 0.10991543858132734, "grad_norm": 0.8838081359863281, "learning_rate": 1.9667082923261126e-05, "loss": 1.0426, "step": 7630 }, { "epoch": 0.11005949551262659, "grad_norm": 0.849931001663208, "learning_rate": 1.9665888010924212e-05, "loss": 1.052, "step": 7640 }, { "epoch": 0.11020355244392584, "grad_norm": 0.663451611995697, "learning_rate": 1.9664690994456824e-05, "loss": 1.0351, "step": 7650 }, { "epoch": 0.11034760937522509, "grad_norm": 0.7051265239715576, "learning_rate": 1.966349187411953e-05, "loss": 1.0693, "step": 7660 }, { "epoch": 0.11049166630652434, "grad_norm": 0.7019264698028564, "learning_rate": 1.966229065017336e-05, "loss": 1.0526, "step": 7670 }, { "epoch": 0.11063572323782359, "grad_norm": 0.526744544506073, "learning_rate": 1.966108732287981e-05, "loss": 1.041, "step": 7680 }, { "epoch": 0.11077978016912284, "grad_norm": 0.7118310928344727, "learning_rate": 1.965988189250082e-05, "loss": 1.058, "step": 7690 }, { "epoch": 0.11092383710042208, "grad_norm": 0.7060966491699219, "learning_rate": 1.9658674359298807e-05, "loss": 1.0537, "step": 7700 }, { "epoch": 0.11106789403172133, "grad_norm": 0.7887639999389648, "learning_rate": 1.9657464723536622e-05, "loss": 1.0521, "step": 7710 }, { "epoch": 0.11121195096302058, "grad_norm": 0.7852970361709595, "learning_rate": 1.9656252985477595e-05, "loss": 1.0575, "step": 7720 }, { "epoch": 0.11135600789431983, "grad_norm": 0.6167747378349304, "learning_rate": 1.9655039145385498e-05, "loss": 1.0586, "step": 7730 }, { "epoch": 0.11150006482561908, "grad_norm": 0.6489522457122803, "learning_rate": 1.9653823203524573e-05, "loss": 1.0552, "step": 7740 }, { "epoch": 0.11164412175691833, "grad_norm": 0.7230646014213562, "learning_rate": 1.965260516015951e-05, "loss": 1.0477, "step": 7750 }, { "epoch": 0.11178817868821758, "grad_norm": 0.9800652265548706, "learning_rate": 1.965138501555546e-05, "loss": 1.0679, "step": 7760 }, { "epoch": 0.11193223561951683, "grad_norm": 0.7883552312850952, "learning_rate": 1.9650162769978034e-05, "loss": 1.0583, "step": 7770 }, { "epoch": 0.11207629255081608, "grad_norm": 0.7069298028945923, "learning_rate": 1.9648938423693302e-05, "loss": 1.0376, "step": 7780 }, { "epoch": 0.11222034948211533, "grad_norm": 0.6600155234336853, "learning_rate": 1.9647711976967776e-05, "loss": 1.0452, "step": 7790 }, { "epoch": 0.11236440641341458, "grad_norm": 0.7556878924369812, "learning_rate": 1.9646483430068442e-05, "loss": 1.0588, "step": 7800 }, { "epoch": 0.11250846334471383, "grad_norm": 0.7326832413673401, "learning_rate": 1.9645252783262742e-05, "loss": 1.0325, "step": 7810 }, { "epoch": 0.11265252027601308, "grad_norm": 0.7369070053100586, "learning_rate": 1.964402003681857e-05, "loss": 1.0459, "step": 7820 }, { "epoch": 0.11279657720731233, "grad_norm": 0.554426372051239, "learning_rate": 1.964278519100427e-05, "loss": 1.0645, "step": 7830 }, { "epoch": 0.11294063413861158, "grad_norm": 0.5718262791633606, "learning_rate": 1.9641548246088658e-05, "loss": 1.0426, "step": 7840 }, { "epoch": 0.11308469106991083, "grad_norm": 0.5710174441337585, "learning_rate": 1.9640309202341e-05, "loss": 1.0486, "step": 7850 }, { "epoch": 0.11322874800121008, "grad_norm": 0.7513896226882935, "learning_rate": 1.9639068060031013e-05, "loss": 1.0544, "step": 7860 }, { "epoch": 0.11337280493250933, "grad_norm": 0.7050769925117493, "learning_rate": 1.963782481942888e-05, "loss": 1.0801, "step": 7870 }, { "epoch": 0.11351686186380858, "grad_norm": 0.6037879586219788, "learning_rate": 1.9636579480805242e-05, "loss": 1.0359, "step": 7880 }, { "epoch": 0.11366091879510783, "grad_norm": 0.5690057277679443, "learning_rate": 1.9635332044431187e-05, "loss": 1.0455, "step": 7890 }, { "epoch": 0.11380497572640708, "grad_norm": 0.7461000084877014, "learning_rate": 1.9634082510578264e-05, "loss": 1.0555, "step": 7900 }, { "epoch": 0.11394903265770633, "grad_norm": 0.7836570143699646, "learning_rate": 1.9632830879518482e-05, "loss": 1.0686, "step": 7910 }, { "epoch": 0.11409308958900558, "grad_norm": 0.62332683801651, "learning_rate": 1.9631577151524298e-05, "loss": 1.059, "step": 7920 }, { "epoch": 0.11423714652030482, "grad_norm": 0.803758978843689, "learning_rate": 1.963032132686864e-05, "loss": 1.0624, "step": 7930 }, { "epoch": 0.11438120345160407, "grad_norm": 0.65031498670578, "learning_rate": 1.962906340582488e-05, "loss": 1.0323, "step": 7940 }, { "epoch": 0.11452526038290332, "grad_norm": 0.6738755106925964, "learning_rate": 1.9627803388666845e-05, "loss": 1.077, "step": 7950 }, { "epoch": 0.11466931731420257, "grad_norm": 0.6606619954109192, "learning_rate": 1.9626541275668832e-05, "loss": 1.0569, "step": 7960 }, { "epoch": 0.11481337424550182, "grad_norm": 0.6373894810676575, "learning_rate": 1.9625277067105583e-05, "loss": 1.0637, "step": 7970 }, { "epoch": 0.11495743117680107, "grad_norm": 0.6798399686813354, "learning_rate": 1.9624010763252296e-05, "loss": 1.0714, "step": 7980 }, { "epoch": 0.11510148810810032, "grad_norm": 0.6052120923995972, "learning_rate": 1.9622742364384625e-05, "loss": 1.0532, "step": 7990 }, { "epoch": 0.11524554503939957, "grad_norm": 0.8813684582710266, "learning_rate": 1.9621471870778692e-05, "loss": 1.0527, "step": 8000 }, { "epoch": 0.11538960197069882, "grad_norm": 0.6334227919578552, "learning_rate": 1.962019928271106e-05, "loss": 1.0443, "step": 8010 }, { "epoch": 0.11553365890199807, "grad_norm": 0.6585490107536316, "learning_rate": 1.9618924600458755e-05, "loss": 1.038, "step": 8020 }, { "epoch": 0.11567771583329732, "grad_norm": 0.8443551659584045, "learning_rate": 1.9617647824299256e-05, "loss": 1.0418, "step": 8030 }, { "epoch": 0.11582177276459657, "grad_norm": 0.6266554594039917, "learning_rate": 1.9616368954510498e-05, "loss": 1.0462, "step": 8040 }, { "epoch": 0.11596582969589582, "grad_norm": 0.6462236046791077, "learning_rate": 1.9615087991370884e-05, "loss": 1.0703, "step": 8050 }, { "epoch": 0.11610988662719507, "grad_norm": 0.8236831426620483, "learning_rate": 1.9613804935159247e-05, "loss": 1.055, "step": 8060 }, { "epoch": 0.11625394355849432, "grad_norm": 0.7455435395240784, "learning_rate": 1.9612519786154905e-05, "loss": 1.059, "step": 8070 }, { "epoch": 0.11639800048979357, "grad_norm": 0.8345641493797302, "learning_rate": 1.9611232544637605e-05, "loss": 1.0405, "step": 8080 }, { "epoch": 0.11654205742109282, "grad_norm": 0.6024748682975769, "learning_rate": 1.960994321088757e-05, "loss": 1.034, "step": 8090 }, { "epoch": 0.11668611435239207, "grad_norm": 0.752213180065155, "learning_rate": 1.9608651785185468e-05, "loss": 1.0471, "step": 8100 }, { "epoch": 0.11683017128369132, "grad_norm": 0.6021220088005066, "learning_rate": 1.960735826781242e-05, "loss": 1.0413, "step": 8110 }, { "epoch": 0.11697422821499057, "grad_norm": 0.8410196304321289, "learning_rate": 1.9606062659050015e-05, "loss": 1.0536, "step": 8120 }, { "epoch": 0.11711828514628982, "grad_norm": 0.749474823474884, "learning_rate": 1.9604764959180283e-05, "loss": 1.062, "step": 8130 }, { "epoch": 0.11726234207758907, "grad_norm": 0.5890676975250244, "learning_rate": 1.9603465168485716e-05, "loss": 1.0469, "step": 8140 }, { "epoch": 0.11740639900888832, "grad_norm": 0.8925262093544006, "learning_rate": 1.9602163287249264e-05, "loss": 1.0546, "step": 8150 }, { "epoch": 0.11755045594018756, "grad_norm": 0.7735393643379211, "learning_rate": 1.9600859315754325e-05, "loss": 1.0219, "step": 8160 }, { "epoch": 0.11769451287148681, "grad_norm": 0.6977030038833618, "learning_rate": 1.9599553254284755e-05, "loss": 1.052, "step": 8170 }, { "epoch": 0.11783856980278606, "grad_norm": 0.7124899625778198, "learning_rate": 1.959824510312487e-05, "loss": 1.0424, "step": 8180 }, { "epoch": 0.11798262673408531, "grad_norm": 0.8691676259040833, "learning_rate": 1.9596934862559432e-05, "loss": 1.055, "step": 8190 }, { "epoch": 0.11812668366538456, "grad_norm": 0.7811499834060669, "learning_rate": 1.9595622532873665e-05, "loss": 1.0518, "step": 8200 }, { "epoch": 0.11827074059668381, "grad_norm": 0.7809152007102966, "learning_rate": 1.9594308114353248e-05, "loss": 1.0351, "step": 8210 }, { "epoch": 0.11841479752798306, "grad_norm": 0.7715036869049072, "learning_rate": 1.9592991607284302e-05, "loss": 1.0373, "step": 8220 }, { "epoch": 0.11855885445928231, "grad_norm": 0.9572784304618835, "learning_rate": 1.959167301195342e-05, "loss": 1.0555, "step": 8230 }, { "epoch": 0.11870291139058156, "grad_norm": 0.7498046159744263, "learning_rate": 1.9590352328647645e-05, "loss": 1.0364, "step": 8240 }, { "epoch": 0.11884696832188081, "grad_norm": 0.7903684377670288, "learning_rate": 1.9589029557654462e-05, "loss": 1.0493, "step": 8250 }, { "epoch": 0.11899102525318006, "grad_norm": 0.7427484393119812, "learning_rate": 1.9587704699261825e-05, "loss": 1.0607, "step": 8260 }, { "epoch": 0.11913508218447931, "grad_norm": 0.7210958003997803, "learning_rate": 1.958637775375814e-05, "loss": 1.0551, "step": 8270 }, { "epoch": 0.11927913911577856, "grad_norm": 0.6811267733573914, "learning_rate": 1.9585048721432263e-05, "loss": 1.0474, "step": 8280 }, { "epoch": 0.11942319604707781, "grad_norm": 0.6749264001846313, "learning_rate": 1.9583717602573503e-05, "loss": 1.0475, "step": 8290 }, { "epoch": 0.11956725297837706, "grad_norm": 0.5424134135246277, "learning_rate": 1.958238439747163e-05, "loss": 1.0468, "step": 8300 }, { "epoch": 0.11971130990967631, "grad_norm": 0.643915593624115, "learning_rate": 1.958104910641686e-05, "loss": 1.0617, "step": 8310 }, { "epoch": 0.11985536684097556, "grad_norm": 0.8453126549720764, "learning_rate": 1.957971172969987e-05, "loss": 1.0658, "step": 8320 }, { "epoch": 0.11999942377227481, "grad_norm": 0.8805552124977112, "learning_rate": 1.9578372267611795e-05, "loss": 1.0582, "step": 8330 }, { "epoch": 0.12014348070357406, "grad_norm": 0.6731013655662537, "learning_rate": 1.9577030720444203e-05, "loss": 1.0604, "step": 8340 }, { "epoch": 0.1202875376348733, "grad_norm": 0.7662180066108704, "learning_rate": 1.9575687088489143e-05, "loss": 1.0298, "step": 8350 }, { "epoch": 0.12043159456617256, "grad_norm": 0.6646352410316467, "learning_rate": 1.95743413720391e-05, "loss": 1.0513, "step": 8360 }, { "epoch": 0.1205756514974718, "grad_norm": 0.7564797401428223, "learning_rate": 1.9572993571387018e-05, "loss": 1.0308, "step": 8370 }, { "epoch": 0.12071970842877106, "grad_norm": 0.7111014723777771, "learning_rate": 1.9571643686826296e-05, "loss": 1.0268, "step": 8380 }, { "epoch": 0.1208637653600703, "grad_norm": 0.6569526791572571, "learning_rate": 1.9570291718650782e-05, "loss": 1.0386, "step": 8390 }, { "epoch": 0.12100782229136955, "grad_norm": 0.6299819946289062, "learning_rate": 1.9568937667154784e-05, "loss": 1.0638, "step": 8400 }, { "epoch": 0.1211518792226688, "grad_norm": 0.6129927039146423, "learning_rate": 1.956758153263306e-05, "loss": 1.0417, "step": 8410 }, { "epoch": 0.12129593615396805, "grad_norm": 0.7334028482437134, "learning_rate": 1.956622331538082e-05, "loss": 1.0578, "step": 8420 }, { "epoch": 0.1214399930852673, "grad_norm": 0.624396026134491, "learning_rate": 1.956486301569373e-05, "loss": 1.0387, "step": 8430 }, { "epoch": 0.12158405001656655, "grad_norm": 0.9984021782875061, "learning_rate": 1.956350063386791e-05, "loss": 1.0378, "step": 8440 }, { "epoch": 0.1217281069478658, "grad_norm": 0.8487315773963928, "learning_rate": 1.9562136170199933e-05, "loss": 1.0597, "step": 8450 }, { "epoch": 0.12187216387916505, "grad_norm": 0.7279149889945984, "learning_rate": 1.9560769624986817e-05, "loss": 1.049, "step": 8460 }, { "epoch": 0.1220162208104643, "grad_norm": 0.6539647579193115, "learning_rate": 1.9559400998526048e-05, "loss": 1.0724, "step": 8470 }, { "epoch": 0.12216027774176355, "grad_norm": 0.6667382121086121, "learning_rate": 1.955803029111555e-05, "loss": 1.017, "step": 8480 }, { "epoch": 0.1223043346730628, "grad_norm": 0.7969838976860046, "learning_rate": 1.9556657503053715e-05, "loss": 1.0552, "step": 8490 }, { "epoch": 0.12244839160436205, "grad_norm": 0.6321619153022766, "learning_rate": 1.9555282634639374e-05, "loss": 1.0535, "step": 8500 }, { "epoch": 0.1225924485356613, "grad_norm": 0.6364926695823669, "learning_rate": 1.955390568617182e-05, "loss": 1.0483, "step": 8510 }, { "epoch": 0.12273650546696055, "grad_norm": 0.697006344795227, "learning_rate": 1.955252665795079e-05, "loss": 1.0741, "step": 8520 }, { "epoch": 0.1228805623982598, "grad_norm": 0.5230076909065247, "learning_rate": 1.955114555027649e-05, "loss": 1.044, "step": 8530 }, { "epoch": 0.12302461932955905, "grad_norm": 0.6966224312782288, "learning_rate": 1.9549762363449555e-05, "loss": 1.0734, "step": 8540 }, { "epoch": 0.1231686762608583, "grad_norm": 0.6040503978729248, "learning_rate": 1.9548377097771097e-05, "loss": 1.0711, "step": 8550 }, { "epoch": 0.12331273319215755, "grad_norm": 0.660383939743042, "learning_rate": 1.9546989753542666e-05, "loss": 1.0213, "step": 8560 }, { "epoch": 0.12345679012345678, "grad_norm": 0.6777083873748779, "learning_rate": 1.954560033106626e-05, "loss": 1.0408, "step": 8570 }, { "epoch": 0.12360084705475603, "grad_norm": 0.6483912467956543, "learning_rate": 1.9544208830644347e-05, "loss": 1.0244, "step": 8580 }, { "epoch": 0.12374490398605528, "grad_norm": 0.6874343156814575, "learning_rate": 1.9542815252579834e-05, "loss": 1.0437, "step": 8590 }, { "epoch": 0.12388896091735453, "grad_norm": 0.7024880647659302, "learning_rate": 1.9541419597176084e-05, "loss": 1.0362, "step": 8600 }, { "epoch": 0.12403301784865378, "grad_norm": 0.8031400442123413, "learning_rate": 1.954002186473691e-05, "loss": 1.0571, "step": 8610 }, { "epoch": 0.12417707477995303, "grad_norm": 0.7505627870559692, "learning_rate": 1.9538622055566584e-05, "loss": 1.0514, "step": 8620 }, { "epoch": 0.12432113171125228, "grad_norm": 0.7040508985519409, "learning_rate": 1.953722016996982e-05, "loss": 1.0483, "step": 8630 }, { "epoch": 0.12446518864255153, "grad_norm": 0.5585609078407288, "learning_rate": 1.9535816208251796e-05, "loss": 1.0283, "step": 8640 }, { "epoch": 0.12460924557385078, "grad_norm": 0.6182272434234619, "learning_rate": 1.9534410170718123e-05, "loss": 1.0465, "step": 8650 }, { "epoch": 0.12475330250515003, "grad_norm": 0.7459903955459595, "learning_rate": 1.9533002057674886e-05, "loss": 1.0645, "step": 8660 }, { "epoch": 0.12489735943644928, "grad_norm": 0.8726353049278259, "learning_rate": 1.9531591869428607e-05, "loss": 1.0383, "step": 8670 }, { "epoch": 0.12504141636774854, "grad_norm": 0.7270867824554443, "learning_rate": 1.953017960628627e-05, "loss": 1.0642, "step": 8680 }, { "epoch": 0.12518547329904778, "grad_norm": 0.6112450361251831, "learning_rate": 1.95287652685553e-05, "loss": 1.0575, "step": 8690 }, { "epoch": 0.12532953023034704, "grad_norm": 0.7989335656166077, "learning_rate": 1.952734885654358e-05, "loss": 1.0528, "step": 8700 }, { "epoch": 0.12547358716164628, "grad_norm": 0.7024810910224915, "learning_rate": 1.9525930370559446e-05, "loss": 1.0279, "step": 8710 }, { "epoch": 0.12561764409294554, "grad_norm": 0.67440265417099, "learning_rate": 1.9524509810911675e-05, "loss": 1.0368, "step": 8720 }, { "epoch": 0.12576170102424478, "grad_norm": 0.7015420794487, "learning_rate": 1.9523087177909513e-05, "loss": 1.0496, "step": 8730 }, { "epoch": 0.12590575795554404, "grad_norm": 0.717402458190918, "learning_rate": 1.952166247186264e-05, "loss": 1.0604, "step": 8740 }, { "epoch": 0.12604981488684328, "grad_norm": 0.6328299045562744, "learning_rate": 1.9520235693081205e-05, "loss": 1.0441, "step": 8750 }, { "epoch": 0.12619387181814254, "grad_norm": 0.7053465843200684, "learning_rate": 1.9518806841875787e-05, "loss": 1.0623, "step": 8760 }, { "epoch": 0.12633792874944177, "grad_norm": 0.6671106219291687, "learning_rate": 1.951737591855743e-05, "loss": 1.0458, "step": 8770 }, { "epoch": 0.12648198568074104, "grad_norm": 0.5957756042480469, "learning_rate": 1.9515942923437632e-05, "loss": 1.0615, "step": 8780 }, { "epoch": 0.12662604261204027, "grad_norm": 0.6355538964271545, "learning_rate": 1.9514507856828328e-05, "loss": 1.0684, "step": 8790 }, { "epoch": 0.12677009954333954, "grad_norm": 0.6601867079734802, "learning_rate": 1.9513070719041918e-05, "loss": 1.04, "step": 8800 }, { "epoch": 0.12691415647463877, "grad_norm": 0.6541124582290649, "learning_rate": 1.9511631510391245e-05, "loss": 1.0608, "step": 8810 }, { "epoch": 0.12705821340593804, "grad_norm": 0.6592003703117371, "learning_rate": 1.9510190231189607e-05, "loss": 1.0398, "step": 8820 }, { "epoch": 0.12720227033723727, "grad_norm": 0.6421726942062378, "learning_rate": 1.9508746881750745e-05, "loss": 1.0588, "step": 8830 }, { "epoch": 0.12734632726853654, "grad_norm": 0.6557596921920776, "learning_rate": 1.9507301462388864e-05, "loss": 1.0732, "step": 8840 }, { "epoch": 0.12749038419983577, "grad_norm": 0.790360152721405, "learning_rate": 1.950585397341861e-05, "loss": 1.0473, "step": 8850 }, { "epoch": 0.12763444113113503, "grad_norm": 0.5429847240447998, "learning_rate": 1.9504404415155073e-05, "loss": 1.0565, "step": 8860 }, { "epoch": 0.12777849806243427, "grad_norm": 0.8884764313697815, "learning_rate": 1.950295278791381e-05, "loss": 1.0664, "step": 8870 }, { "epoch": 0.12792255499373353, "grad_norm": 0.6776615977287292, "learning_rate": 1.9501499092010818e-05, "loss": 1.048, "step": 8880 }, { "epoch": 0.12806661192503277, "grad_norm": 0.6573163270950317, "learning_rate": 1.950004332776255e-05, "loss": 1.0378, "step": 8890 }, { "epoch": 0.12821066885633203, "grad_norm": 0.6077698469161987, "learning_rate": 1.94985854954859e-05, "loss": 1.0513, "step": 8900 }, { "epoch": 0.12835472578763127, "grad_norm": 0.9807862043380737, "learning_rate": 1.949712559549822e-05, "loss": 1.0462, "step": 8910 }, { "epoch": 0.12849878271893053, "grad_norm": 0.6129643321037292, "learning_rate": 1.9495663628117315e-05, "loss": 1.0552, "step": 8920 }, { "epoch": 0.12864283965022977, "grad_norm": 0.7125595211982727, "learning_rate": 1.9494199593661426e-05, "loss": 1.0726, "step": 8930 }, { "epoch": 0.12878689658152903, "grad_norm": 0.5645942091941833, "learning_rate": 1.949273349244926e-05, "loss": 1.0338, "step": 8940 }, { "epoch": 0.12893095351282827, "grad_norm": 0.8663925528526306, "learning_rate": 1.9491265324799966e-05, "loss": 1.0529, "step": 8950 }, { "epoch": 0.12907501044412753, "grad_norm": 0.7779350876808167, "learning_rate": 1.9489795091033142e-05, "loss": 1.0262, "step": 8960 }, { "epoch": 0.12921906737542677, "grad_norm": 0.6418645977973938, "learning_rate": 1.9488322791468837e-05, "loss": 1.0375, "step": 8970 }, { "epoch": 0.12936312430672603, "grad_norm": 0.7466892600059509, "learning_rate": 1.9486848426427552e-05, "loss": 1.053, "step": 8980 }, { "epoch": 0.12950718123802527, "grad_norm": 0.5885640382766724, "learning_rate": 1.948537199623024e-05, "loss": 1.0315, "step": 8990 }, { "epoch": 0.12965123816932453, "grad_norm": 0.716010570526123, "learning_rate": 1.948389350119829e-05, "loss": 1.0322, "step": 9000 }, { "epoch": 0.12979529510062376, "grad_norm": 0.7401978969573975, "learning_rate": 1.948241294165356e-05, "loss": 1.0596, "step": 9010 }, { "epoch": 0.12993935203192303, "grad_norm": 0.6437592506408691, "learning_rate": 1.9480930317918337e-05, "loss": 1.0647, "step": 9020 }, { "epoch": 0.13008340896322226, "grad_norm": 0.7413766980171204, "learning_rate": 1.9479445630315378e-05, "loss": 1.0397, "step": 9030 }, { "epoch": 0.13022746589452153, "grad_norm": 0.5429770946502686, "learning_rate": 1.9477958879167874e-05, "loss": 1.0569, "step": 9040 }, { "epoch": 0.13037152282582076, "grad_norm": 0.7051211595535278, "learning_rate": 1.9476470064799467e-05, "loss": 1.0469, "step": 9050 }, { "epoch": 0.13051557975712003, "grad_norm": 0.7300934791564941, "learning_rate": 1.9474979187534258e-05, "loss": 1.038, "step": 9060 }, { "epoch": 0.13065963668841926, "grad_norm": 0.6431097984313965, "learning_rate": 1.9473486247696785e-05, "loss": 1.0461, "step": 9070 }, { "epoch": 0.13080369361971853, "grad_norm": 0.6957307457923889, "learning_rate": 1.9471991245612044e-05, "loss": 1.0518, "step": 9080 }, { "epoch": 0.13094775055101776, "grad_norm": 0.6514873504638672, "learning_rate": 1.9470494181605476e-05, "loss": 1.0388, "step": 9090 }, { "epoch": 0.13109180748231702, "grad_norm": 0.6488949656486511, "learning_rate": 1.946899505600297e-05, "loss": 1.0551, "step": 9100 }, { "epoch": 0.13123586441361626, "grad_norm": 0.6016454696655273, "learning_rate": 1.946749386913086e-05, "loss": 1.0441, "step": 9110 }, { "epoch": 0.13137992134491552, "grad_norm": 0.7101909518241882, "learning_rate": 1.9465990621315945e-05, "loss": 1.0375, "step": 9120 }, { "epoch": 0.13152397827621476, "grad_norm": 0.6185438632965088, "learning_rate": 1.9464485312885455e-05, "loss": 1.0401, "step": 9130 }, { "epoch": 0.13166803520751402, "grad_norm": 0.6171557307243347, "learning_rate": 1.9462977944167074e-05, "loss": 1.0416, "step": 9140 }, { "epoch": 0.13181209213881326, "grad_norm": 0.6617201566696167, "learning_rate": 1.9461468515488936e-05, "loss": 1.0503, "step": 9150 }, { "epoch": 0.13195614907011252, "grad_norm": 0.5675362348556519, "learning_rate": 1.9459957027179623e-05, "loss": 1.031, "step": 9160 }, { "epoch": 0.13210020600141176, "grad_norm": 0.6881184577941895, "learning_rate": 1.9458443479568165e-05, "loss": 1.0113, "step": 9170 }, { "epoch": 0.13224426293271102, "grad_norm": 0.5947275757789612, "learning_rate": 1.945692787298404e-05, "loss": 1.0473, "step": 9180 }, { "epoch": 0.13238831986401026, "grad_norm": 0.7350253462791443, "learning_rate": 1.9455410207757176e-05, "loss": 1.0461, "step": 9190 }, { "epoch": 0.13253237679530952, "grad_norm": 0.605141282081604, "learning_rate": 1.9453890484217947e-05, "loss": 1.0323, "step": 9200 }, { "epoch": 0.13267643372660876, "grad_norm": 0.661953330039978, "learning_rate": 1.9452368702697177e-05, "loss": 1.0632, "step": 9210 }, { "epoch": 0.13282049065790802, "grad_norm": 0.5995630621910095, "learning_rate": 1.9450844863526136e-05, "loss": 1.0591, "step": 9220 }, { "epoch": 0.13296454758920725, "grad_norm": 0.5376889705657959, "learning_rate": 1.9449318967036543e-05, "loss": 1.0434, "step": 9230 }, { "epoch": 0.1331086045205065, "grad_norm": 0.7438921332359314, "learning_rate": 1.9447791013560563e-05, "loss": 1.056, "step": 9240 }, { "epoch": 0.13325266145180575, "grad_norm": 0.5766037702560425, "learning_rate": 1.9446261003430816e-05, "loss": 1.0466, "step": 9250 }, { "epoch": 0.133396718383105, "grad_norm": 0.6660729050636292, "learning_rate": 1.9444728936980355e-05, "loss": 1.0486, "step": 9260 }, { "epoch": 0.13354077531440425, "grad_norm": 0.7104220986366272, "learning_rate": 1.9443194814542695e-05, "loss": 1.0581, "step": 9270 }, { "epoch": 0.1336848322457035, "grad_norm": 0.6871854662895203, "learning_rate": 1.9441658636451794e-05, "loss": 1.0393, "step": 9280 }, { "epoch": 0.13382888917700275, "grad_norm": 0.6358904242515564, "learning_rate": 1.9440120403042056e-05, "loss": 1.0224, "step": 9290 }, { "epoch": 0.133972946108302, "grad_norm": 0.6637552380561829, "learning_rate": 1.943858011464833e-05, "loss": 1.0585, "step": 9300 }, { "epoch": 0.13411700303960125, "grad_norm": 0.6891149282455444, "learning_rate": 1.9437037771605922e-05, "loss": 1.0488, "step": 9310 }, { "epoch": 0.1342610599709005, "grad_norm": 0.615485429763794, "learning_rate": 1.9435493374250572e-05, "loss": 1.0545, "step": 9320 }, { "epoch": 0.13440511690219975, "grad_norm": 0.6307300329208374, "learning_rate": 1.943394692291848e-05, "loss": 1.0512, "step": 9330 }, { "epoch": 0.134549173833499, "grad_norm": 0.6017307043075562, "learning_rate": 1.943239841794628e-05, "loss": 1.0623, "step": 9340 }, { "epoch": 0.13469323076479825, "grad_norm": 0.7184136509895325, "learning_rate": 1.943084785967107e-05, "loss": 1.0497, "step": 9350 }, { "epoch": 0.13483728769609749, "grad_norm": 0.5148404836654663, "learning_rate": 1.9429295248430376e-05, "loss": 1.0158, "step": 9360 }, { "epoch": 0.13498134462739675, "grad_norm": 0.6019318103790283, "learning_rate": 1.9427740584562184e-05, "loss": 1.0482, "step": 9370 }, { "epoch": 0.13512540155869598, "grad_norm": 0.6289660930633545, "learning_rate": 1.942618386840492e-05, "loss": 1.0388, "step": 9380 }, { "epoch": 0.13526945848999525, "grad_norm": 0.6551307439804077, "learning_rate": 1.9424625100297468e-05, "loss": 1.0366, "step": 9390 }, { "epoch": 0.13541351542129448, "grad_norm": 0.7314261794090271, "learning_rate": 1.942306428057914e-05, "loss": 1.0653, "step": 9400 }, { "epoch": 0.13555757235259375, "grad_norm": 0.6418008208274841, "learning_rate": 1.942150140958971e-05, "loss": 1.0536, "step": 9410 }, { "epoch": 0.13570162928389298, "grad_norm": 0.7704811096191406, "learning_rate": 1.9419936487669396e-05, "loss": 1.0447, "step": 9420 }, { "epoch": 0.13584568621519225, "grad_norm": 0.5979043841362, "learning_rate": 1.9418369515158854e-05, "loss": 1.0515, "step": 9430 }, { "epoch": 0.13598974314649148, "grad_norm": 0.6340309977531433, "learning_rate": 1.9416800492399195e-05, "loss": 1.0646, "step": 9440 }, { "epoch": 0.13613380007779075, "grad_norm": 0.5940436124801636, "learning_rate": 1.9415229419731974e-05, "loss": 1.0511, "step": 9450 }, { "epoch": 0.13627785700908998, "grad_norm": 0.6780868768692017, "learning_rate": 1.9413656297499194e-05, "loss": 1.0749, "step": 9460 }, { "epoch": 0.13642191394038924, "grad_norm": 0.6650206446647644, "learning_rate": 1.9412081126043295e-05, "loss": 1.0406, "step": 9470 }, { "epoch": 0.13656597087168848, "grad_norm": 0.7658565044403076, "learning_rate": 1.9410503905707175e-05, "loss": 1.0391, "step": 9480 }, { "epoch": 0.13671002780298774, "grad_norm": 0.6320905089378357, "learning_rate": 1.9408924636834175e-05, "loss": 1.0405, "step": 9490 }, { "epoch": 0.13685408473428698, "grad_norm": 0.6337910890579224, "learning_rate": 1.9407343319768077e-05, "loss": 1.0159, "step": 9500 }, { "epoch": 0.13699814166558624, "grad_norm": 0.7351735234260559, "learning_rate": 1.940575995485311e-05, "loss": 1.0511, "step": 9510 }, { "epoch": 0.13714219859688548, "grad_norm": 0.610489547252655, "learning_rate": 1.940417454243396e-05, "loss": 1.0386, "step": 9520 }, { "epoch": 0.13728625552818474, "grad_norm": 0.5717934370040894, "learning_rate": 1.940258708285574e-05, "loss": 1.0694, "step": 9530 }, { "epoch": 0.13743031245948398, "grad_norm": 0.6930076479911804, "learning_rate": 1.9400997576464018e-05, "loss": 1.0618, "step": 9540 }, { "epoch": 0.13757436939078324, "grad_norm": 0.6314212679862976, "learning_rate": 1.939940602360481e-05, "loss": 1.0526, "step": 9550 }, { "epoch": 0.13771842632208248, "grad_norm": 0.6929250955581665, "learning_rate": 1.9397812424624578e-05, "loss": 1.0472, "step": 9560 }, { "epoch": 0.13786248325338174, "grad_norm": 0.6031248569488525, "learning_rate": 1.9396216779870224e-05, "loss": 1.0235, "step": 9570 }, { "epoch": 0.13800654018468098, "grad_norm": 0.67605060338974, "learning_rate": 1.9394619089689098e-05, "loss": 1.0479, "step": 9580 }, { "epoch": 0.13815059711598024, "grad_norm": 0.6733813285827637, "learning_rate": 1.9393019354428994e-05, "loss": 1.0312, "step": 9590 }, { "epoch": 0.13829465404727947, "grad_norm": 0.5362619161605835, "learning_rate": 1.9391417574438157e-05, "loss": 1.0393, "step": 9600 }, { "epoch": 0.13843871097857874, "grad_norm": 0.724517822265625, "learning_rate": 1.9389813750065263e-05, "loss": 1.0562, "step": 9610 }, { "epoch": 0.13858276790987797, "grad_norm": 0.7083704471588135, "learning_rate": 1.9388207881659452e-05, "loss": 1.0551, "step": 9620 }, { "epoch": 0.13872682484117724, "grad_norm": 0.6152426600456238, "learning_rate": 1.9386599969570296e-05, "loss": 1.041, "step": 9630 }, { "epoch": 0.13887088177247647, "grad_norm": 0.6370306015014648, "learning_rate": 1.938499001414781e-05, "loss": 1.0503, "step": 9640 }, { "epoch": 0.13901493870377574, "grad_norm": 0.6503737568855286, "learning_rate": 1.9383378015742468e-05, "loss": 1.0491, "step": 9650 }, { "epoch": 0.13915899563507497, "grad_norm": 0.7164560556411743, "learning_rate": 1.9381763974705173e-05, "loss": 1.0313, "step": 9660 }, { "epoch": 0.13930305256637424, "grad_norm": 0.6730453968048096, "learning_rate": 1.9380147891387285e-05, "loss": 1.0454, "step": 9670 }, { "epoch": 0.13944710949767347, "grad_norm": 0.6627982258796692, "learning_rate": 1.93785297661406e-05, "loss": 1.0701, "step": 9680 }, { "epoch": 0.13959116642897273, "grad_norm": 0.5412722229957581, "learning_rate": 1.937690959931736e-05, "loss": 1.0595, "step": 9690 }, { "epoch": 0.13973522336027197, "grad_norm": 0.5805692076683044, "learning_rate": 1.9375287391270253e-05, "loss": 1.0541, "step": 9700 }, { "epoch": 0.13987928029157123, "grad_norm": 0.6356375813484192, "learning_rate": 1.9373663142352417e-05, "loss": 1.0406, "step": 9710 }, { "epoch": 0.14002333722287047, "grad_norm": 0.6632521748542786, "learning_rate": 1.9372036852917423e-05, "loss": 1.0405, "step": 9720 }, { "epoch": 0.14016739415416973, "grad_norm": 0.6239160895347595, "learning_rate": 1.9370408523319298e-05, "loss": 1.0385, "step": 9730 }, { "epoch": 0.14031145108546897, "grad_norm": 0.7359287142753601, "learning_rate": 1.9368778153912497e-05, "loss": 1.0455, "step": 9740 }, { "epoch": 0.14045550801676823, "grad_norm": 0.583785891532898, "learning_rate": 1.9367145745051935e-05, "loss": 1.0476, "step": 9750 }, { "epoch": 0.14059956494806747, "grad_norm": 0.5760996341705322, "learning_rate": 1.9365511297092964e-05, "loss": 1.0303, "step": 9760 }, { "epoch": 0.14074362187936673, "grad_norm": 0.5912659168243408, "learning_rate": 1.9363874810391384e-05, "loss": 1.0533, "step": 9770 }, { "epoch": 0.14088767881066597, "grad_norm": 0.816480815410614, "learning_rate": 1.936223628530343e-05, "loss": 1.059, "step": 9780 }, { "epoch": 0.14103173574196523, "grad_norm": 0.550395131111145, "learning_rate": 1.9360595722185785e-05, "loss": 1.0953, "step": 9790 }, { "epoch": 0.14117579267326447, "grad_norm": 0.6576113104820251, "learning_rate": 1.9358953121395587e-05, "loss": 1.0441, "step": 9800 }, { "epoch": 0.14131984960456373, "grad_norm": 0.651944637298584, "learning_rate": 1.93573084832904e-05, "loss": 1.0535, "step": 9810 }, { "epoch": 0.14146390653586297, "grad_norm": 0.6657174229621887, "learning_rate": 1.935566180822824e-05, "loss": 1.0358, "step": 9820 }, { "epoch": 0.14160796346716223, "grad_norm": 0.7346987724304199, "learning_rate": 1.9354013096567567e-05, "loss": 1.0442, "step": 9830 }, { "epoch": 0.14175202039846146, "grad_norm": 0.6800191402435303, "learning_rate": 1.9352362348667276e-05, "loss": 1.0471, "step": 9840 }, { "epoch": 0.14189607732976073, "grad_norm": 0.5743468403816223, "learning_rate": 1.9350709564886722e-05, "loss": 1.0337, "step": 9850 }, { "epoch": 0.14204013426105996, "grad_norm": 0.6555769443511963, "learning_rate": 1.9349054745585686e-05, "loss": 1.0422, "step": 9860 }, { "epoch": 0.14218419119235923, "grad_norm": 0.6481942534446716, "learning_rate": 1.9347397891124406e-05, "loss": 1.0607, "step": 9870 }, { "epoch": 0.14232824812365846, "grad_norm": 0.6060455441474915, "learning_rate": 1.934573900186355e-05, "loss": 1.0556, "step": 9880 }, { "epoch": 0.14247230505495773, "grad_norm": 0.5978740453720093, "learning_rate": 1.9344078078164237e-05, "loss": 1.034, "step": 9890 }, { "epoch": 0.14261636198625696, "grad_norm": 0.5864404439926147, "learning_rate": 1.934241512038803e-05, "loss": 1.0557, "step": 9900 }, { "epoch": 0.14276041891755623, "grad_norm": 0.6731454730033875, "learning_rate": 1.9340750128896933e-05, "loss": 1.0486, "step": 9910 }, { "epoch": 0.14290447584885546, "grad_norm": 0.6897068619728088, "learning_rate": 1.9339083104053386e-05, "loss": 1.0499, "step": 9920 }, { "epoch": 0.14304853278015472, "grad_norm": 0.6522696614265442, "learning_rate": 1.9337414046220278e-05, "loss": 1.0532, "step": 9930 }, { "epoch": 0.14319258971145396, "grad_norm": 0.6743817925453186, "learning_rate": 1.9335742955760943e-05, "loss": 1.0644, "step": 9940 }, { "epoch": 0.14333664664275322, "grad_norm": 0.6498114466667175, "learning_rate": 1.9334069833039154e-05, "loss": 1.0606, "step": 9950 }, { "epoch": 0.14348070357405246, "grad_norm": 0.5908329486846924, "learning_rate": 1.9332394678419125e-05, "loss": 1.0363, "step": 9960 }, { "epoch": 0.14362476050535172, "grad_norm": 0.7792730331420898, "learning_rate": 1.9330717492265514e-05, "loss": 1.0623, "step": 9970 }, { "epoch": 0.14376881743665096, "grad_norm": 0.656216561794281, "learning_rate": 1.9329038274943423e-05, "loss": 1.056, "step": 9980 }, { "epoch": 0.14391287436795022, "grad_norm": 0.6079681515693665, "learning_rate": 1.9327357026818394e-05, "loss": 1.0588, "step": 9990 }, { "epoch": 0.14405693129924946, "grad_norm": 0.7425257563591003, "learning_rate": 1.932567374825641e-05, "loss": 1.0762, "step": 10000 }, { "epoch": 0.14420098823054872, "grad_norm": 0.5706034898757935, "learning_rate": 1.9323988439623903e-05, "loss": 1.0358, "step": 10010 }, { "epoch": 0.14434504516184796, "grad_norm": 0.62879478931427, "learning_rate": 1.932230110128773e-05, "loss": 1.0365, "step": 10020 }, { "epoch": 0.14448910209314722, "grad_norm": 0.8347147703170776, "learning_rate": 1.9320611733615215e-05, "loss": 1.0526, "step": 10030 }, { "epoch": 0.14463315902444646, "grad_norm": 0.5717446804046631, "learning_rate": 1.93189203369741e-05, "loss": 1.0577, "step": 10040 }, { "epoch": 0.14477721595574572, "grad_norm": 0.633722722530365, "learning_rate": 1.9317226911732586e-05, "loss": 1.068, "step": 10050 }, { "epoch": 0.14492127288704496, "grad_norm": 0.73835688829422, "learning_rate": 1.9315531458259302e-05, "loss": 1.055, "step": 10060 }, { "epoch": 0.14506532981834422, "grad_norm": 0.6140055656433105, "learning_rate": 1.9313833976923327e-05, "loss": 1.0642, "step": 10070 }, { "epoch": 0.14520938674964345, "grad_norm": 0.7030134797096252, "learning_rate": 1.931213446809418e-05, "loss": 1.0421, "step": 10080 }, { "epoch": 0.14535344368094272, "grad_norm": 0.5750778913497925, "learning_rate": 1.931043293214182e-05, "loss": 1.0405, "step": 10090 }, { "epoch": 0.14549750061224195, "grad_norm": 0.6405826210975647, "learning_rate": 1.930872936943665e-05, "loss": 1.0544, "step": 10100 }, { "epoch": 0.14564155754354122, "grad_norm": 0.6836420893669128, "learning_rate": 1.930702378034951e-05, "loss": 1.0424, "step": 10110 }, { "epoch": 0.14578561447484045, "grad_norm": 0.6426882743835449, "learning_rate": 1.9305316165251676e-05, "loss": 1.027, "step": 10120 }, { "epoch": 0.14592967140613972, "grad_norm": 0.6070466041564941, "learning_rate": 1.930360652451489e-05, "loss": 1.0395, "step": 10130 }, { "epoch": 0.14607372833743895, "grad_norm": 0.7554711699485779, "learning_rate": 1.93018948585113e-05, "loss": 1.0445, "step": 10140 }, { "epoch": 0.14621778526873822, "grad_norm": 0.6213268041610718, "learning_rate": 1.9300181167613522e-05, "loss": 1.0542, "step": 10150 }, { "epoch": 0.14636184220003745, "grad_norm": 0.6937504410743713, "learning_rate": 1.92984654521946e-05, "loss": 1.0606, "step": 10160 }, { "epoch": 0.14650589913133671, "grad_norm": 0.5986953377723694, "learning_rate": 1.9296747712628022e-05, "loss": 1.0268, "step": 10170 }, { "epoch": 0.14664995606263595, "grad_norm": 0.6458836197853088, "learning_rate": 1.929502794928771e-05, "loss": 1.0373, "step": 10180 }, { "epoch": 0.1467940129939352, "grad_norm": 0.6366413831710815, "learning_rate": 1.9293306162548045e-05, "loss": 1.0525, "step": 10190 }, { "epoch": 0.14693806992523445, "grad_norm": 0.6416839957237244, "learning_rate": 1.929158235278383e-05, "loss": 1.0415, "step": 10200 }, { "epoch": 0.1470821268565337, "grad_norm": 0.6393887400627136, "learning_rate": 1.9289856520370313e-05, "loss": 1.0599, "step": 10210 }, { "epoch": 0.14722618378783295, "grad_norm": 0.5655863285064697, "learning_rate": 1.9288128665683182e-05, "loss": 1.0683, "step": 10220 }, { "epoch": 0.1473702407191322, "grad_norm": 0.5792210698127747, "learning_rate": 1.9286398789098574e-05, "loss": 1.0298, "step": 10230 }, { "epoch": 0.14751429765043145, "grad_norm": 0.6570830345153809, "learning_rate": 1.9284666890993055e-05, "loss": 1.0458, "step": 10240 }, { "epoch": 0.1476583545817307, "grad_norm": 0.5635051727294922, "learning_rate": 1.9282932971743633e-05, "loss": 1.0381, "step": 10250 }, { "epoch": 0.14780241151302995, "grad_norm": 0.6540777683258057, "learning_rate": 1.9281197031727763e-05, "loss": 1.0634, "step": 10260 }, { "epoch": 0.1479464684443292, "grad_norm": 0.7320727705955505, "learning_rate": 1.9279459071323334e-05, "loss": 1.0737, "step": 10270 }, { "epoch": 0.14809052537562845, "grad_norm": 0.6890342831611633, "learning_rate": 1.9277719090908674e-05, "loss": 1.0184, "step": 10280 }, { "epoch": 0.1482345823069277, "grad_norm": 0.6491656303405762, "learning_rate": 1.9275977090862556e-05, "loss": 1.0574, "step": 10290 }, { "epoch": 0.14837863923822694, "grad_norm": 0.8918922543525696, "learning_rate": 1.9274233071564187e-05, "loss": 1.0522, "step": 10300 }, { "epoch": 0.1485226961695262, "grad_norm": 0.7084387540817261, "learning_rate": 1.9272487033393217e-05, "loss": 1.0371, "step": 10310 }, { "epoch": 0.14866675310082544, "grad_norm": 0.7133810520172119, "learning_rate": 1.927073897672973e-05, "loss": 1.0353, "step": 10320 }, { "epoch": 0.1488108100321247, "grad_norm": 0.6229279041290283, "learning_rate": 1.926898890195426e-05, "loss": 1.0335, "step": 10330 }, { "epoch": 0.14895486696342394, "grad_norm": 0.5358096957206726, "learning_rate": 1.9267236809447774e-05, "loss": 1.0058, "step": 10340 }, { "epoch": 0.1490989238947232, "grad_norm": 0.751314103603363, "learning_rate": 1.9265482699591675e-05, "loss": 1.0572, "step": 10350 }, { "epoch": 0.14924298082602244, "grad_norm": 0.7541031837463379, "learning_rate": 1.926372657276781e-05, "loss": 1.0529, "step": 10360 }, { "epoch": 0.1493870377573217, "grad_norm": 0.5620636940002441, "learning_rate": 1.9261968429358462e-05, "loss": 1.0555, "step": 10370 }, { "epoch": 0.14953109468862094, "grad_norm": 0.7588586807250977, "learning_rate": 1.9260208269746354e-05, "loss": 1.0538, "step": 10380 }, { "epoch": 0.1496751516199202, "grad_norm": 0.608447790145874, "learning_rate": 1.9258446094314653e-05, "loss": 1.0565, "step": 10390 }, { "epoch": 0.14981920855121944, "grad_norm": 0.6301183700561523, "learning_rate": 1.9256681903446957e-05, "loss": 1.0649, "step": 10400 }, { "epoch": 0.1499632654825187, "grad_norm": 0.6962247490882874, "learning_rate": 1.9254915697527307e-05, "loss": 1.0667, "step": 10410 }, { "epoch": 0.15010732241381794, "grad_norm": 0.7227652072906494, "learning_rate": 1.9253147476940182e-05, "loss": 1.0578, "step": 10420 }, { "epoch": 0.1502513793451172, "grad_norm": 0.7597373723983765, "learning_rate": 1.9251377242070497e-05, "loss": 1.0496, "step": 10430 }, { "epoch": 0.15039543627641644, "grad_norm": 0.6414986848831177, "learning_rate": 1.924960499330361e-05, "loss": 1.0501, "step": 10440 }, { "epoch": 0.1505394932077157, "grad_norm": 0.7147359848022461, "learning_rate": 1.9247830731025315e-05, "loss": 1.0474, "step": 10450 }, { "epoch": 0.15068355013901494, "grad_norm": 0.6228944659233093, "learning_rate": 1.9246054455621844e-05, "loss": 1.0527, "step": 10460 }, { "epoch": 0.1508276070703142, "grad_norm": 0.5738548636436462, "learning_rate": 1.9244276167479863e-05, "loss": 1.0503, "step": 10470 }, { "epoch": 0.15097166400161344, "grad_norm": 0.6146776676177979, "learning_rate": 1.924249586698649e-05, "loss": 1.0461, "step": 10480 }, { "epoch": 0.1511157209329127, "grad_norm": 0.6455808281898499, "learning_rate": 1.9240713554529264e-05, "loss": 1.0357, "step": 10490 }, { "epoch": 0.15125977786421194, "grad_norm": 0.6520363688468933, "learning_rate": 1.9238929230496173e-05, "loss": 1.065, "step": 10500 }, { "epoch": 0.1514038347955112, "grad_norm": 0.7376220226287842, "learning_rate": 1.9237142895275642e-05, "loss": 1.036, "step": 10510 }, { "epoch": 0.15154789172681044, "grad_norm": 0.6889696717262268, "learning_rate": 1.9235354549256532e-05, "loss": 1.0455, "step": 10520 }, { "epoch": 0.1516919486581097, "grad_norm": 0.7327942848205566, "learning_rate": 1.923356419282813e-05, "loss": 1.0619, "step": 10530 }, { "epoch": 0.15183600558940893, "grad_norm": 0.6709638237953186, "learning_rate": 1.9231771826380185e-05, "loss": 1.0572, "step": 10540 }, { "epoch": 0.15198006252070817, "grad_norm": 0.6950246095657349, "learning_rate": 1.922997745030287e-05, "loss": 1.0418, "step": 10550 }, { "epoch": 0.15212411945200743, "grad_norm": 0.6457738876342773, "learning_rate": 1.9228181064986785e-05, "loss": 1.0289, "step": 10560 }, { "epoch": 0.15226817638330667, "grad_norm": 0.6225009560585022, "learning_rate": 1.9226382670822986e-05, "loss": 1.0484, "step": 10570 }, { "epoch": 0.15241223331460593, "grad_norm": 0.6474888324737549, "learning_rate": 1.922458226820296e-05, "loss": 1.067, "step": 10580 }, { "epoch": 0.15255629024590517, "grad_norm": 0.6236519813537598, "learning_rate": 1.9222779857518627e-05, "loss": 1.0439, "step": 10590 }, { "epoch": 0.15270034717720443, "grad_norm": 0.6439448595046997, "learning_rate": 1.9220975439162347e-05, "loss": 1.0399, "step": 10600 }, { "epoch": 0.15284440410850367, "grad_norm": 0.6349307298660278, "learning_rate": 1.921916901352692e-05, "loss": 1.0578, "step": 10610 }, { "epoch": 0.15298846103980293, "grad_norm": 0.642374575138092, "learning_rate": 1.921736058100557e-05, "loss": 1.0714, "step": 10620 }, { "epoch": 0.15313251797110217, "grad_norm": 0.8094242811203003, "learning_rate": 1.921555014199198e-05, "loss": 1.043, "step": 10630 }, { "epoch": 0.15327657490240143, "grad_norm": 0.6853436827659607, "learning_rate": 1.921373769688025e-05, "loss": 1.075, "step": 10640 }, { "epoch": 0.15342063183370067, "grad_norm": 0.5926113128662109, "learning_rate": 1.921192324606493e-05, "loss": 1.0214, "step": 10650 }, { "epoch": 0.15356468876499993, "grad_norm": 0.563201367855072, "learning_rate": 1.9210106789940996e-05, "loss": 1.0729, "step": 10660 }, { "epoch": 0.15370874569629916, "grad_norm": 0.5924573540687561, "learning_rate": 1.9208288328903867e-05, "loss": 1.0454, "step": 10670 }, { "epoch": 0.15385280262759843, "grad_norm": 0.6228936910629272, "learning_rate": 1.92064678633494e-05, "loss": 1.0638, "step": 10680 }, { "epoch": 0.15399685955889766, "grad_norm": 0.617900550365448, "learning_rate": 1.9204645393673882e-05, "loss": 1.0514, "step": 10690 }, { "epoch": 0.15414091649019693, "grad_norm": 0.5415571331977844, "learning_rate": 1.920282092027404e-05, "loss": 1.0561, "step": 10700 }, { "epoch": 0.15428497342149616, "grad_norm": 0.6749228835105896, "learning_rate": 1.9200994443547036e-05, "loss": 1.0568, "step": 10710 }, { "epoch": 0.15442903035279543, "grad_norm": 0.6251257061958313, "learning_rate": 1.9199165963890465e-05, "loss": 1.0803, "step": 10720 }, { "epoch": 0.15457308728409466, "grad_norm": 0.7738173604011536, "learning_rate": 1.9197335481702374e-05, "loss": 1.045, "step": 10730 }, { "epoch": 0.15471714421539393, "grad_norm": 0.8898658156394958, "learning_rate": 1.919550299738122e-05, "loss": 1.027, "step": 10740 }, { "epoch": 0.15486120114669316, "grad_norm": 0.7574092745780945, "learning_rate": 1.9193668511325917e-05, "loss": 1.0434, "step": 10750 }, { "epoch": 0.15500525807799242, "grad_norm": 0.7242676615715027, "learning_rate": 1.9191832023935805e-05, "loss": 1.0437, "step": 10760 }, { "epoch": 0.15514931500929166, "grad_norm": 0.653777539730072, "learning_rate": 1.9189993535610664e-05, "loss": 1.0647, "step": 10770 }, { "epoch": 0.15529337194059092, "grad_norm": 0.7875447273254395, "learning_rate": 1.9188153046750705e-05, "loss": 1.0201, "step": 10780 }, { "epoch": 0.15543742887189016, "grad_norm": 0.7534798979759216, "learning_rate": 1.9186310557756583e-05, "loss": 1.0434, "step": 10790 }, { "epoch": 0.15558148580318942, "grad_norm": 0.7386358380317688, "learning_rate": 1.9184466069029373e-05, "loss": 1.0459, "step": 10800 }, { "epoch": 0.15572554273448866, "grad_norm": 0.5817023515701294, "learning_rate": 1.9182619580970602e-05, "loss": 1.0513, "step": 10810 }, { "epoch": 0.15586959966578792, "grad_norm": 0.7862343788146973, "learning_rate": 1.918077109398222e-05, "loss": 1.0559, "step": 10820 }, { "epoch": 0.15601365659708716, "grad_norm": 0.7072425484657288, "learning_rate": 1.917892060846662e-05, "loss": 1.0439, "step": 10830 }, { "epoch": 0.15615771352838642, "grad_norm": 0.6007922291755676, "learning_rate": 1.917706812482663e-05, "loss": 1.046, "step": 10840 }, { "epoch": 0.15630177045968566, "grad_norm": 0.7150635719299316, "learning_rate": 1.9175213643465503e-05, "loss": 1.05, "step": 10850 }, { "epoch": 0.15644582739098492, "grad_norm": 0.6381576657295227, "learning_rate": 1.9173357164786944e-05, "loss": 1.0467, "step": 10860 }, { "epoch": 0.15658988432228416, "grad_norm": 0.5520989298820496, "learning_rate": 1.9171498689195068e-05, "loss": 1.0473, "step": 10870 }, { "epoch": 0.15673394125358342, "grad_norm": 0.5694143176078796, "learning_rate": 1.9169638217094454e-05, "loss": 1.0542, "step": 10880 }, { "epoch": 0.15687799818488266, "grad_norm": 0.7016416788101196, "learning_rate": 1.9167775748890097e-05, "loss": 1.027, "step": 10890 }, { "epoch": 0.15702205511618192, "grad_norm": 0.6645250916481018, "learning_rate": 1.9165911284987426e-05, "loss": 1.0427, "step": 10900 }, { "epoch": 0.15716611204748115, "grad_norm": 0.7823187708854675, "learning_rate": 1.916404482579231e-05, "loss": 1.0617, "step": 10910 }, { "epoch": 0.15731016897878042, "grad_norm": 0.6369243264198303, "learning_rate": 1.916217637171106e-05, "loss": 1.0344, "step": 10920 }, { "epoch": 0.15745422591007965, "grad_norm": 0.5803185105323792, "learning_rate": 1.9160305923150398e-05, "loss": 1.0341, "step": 10930 }, { "epoch": 0.15759828284137892, "grad_norm": 0.6197068095207214, "learning_rate": 1.9158433480517508e-05, "loss": 1.0616, "step": 10940 }, { "epoch": 0.15774233977267815, "grad_norm": 0.5247350931167603, "learning_rate": 1.915655904421999e-05, "loss": 1.0534, "step": 10950 }, { "epoch": 0.15788639670397742, "grad_norm": 0.6269147396087646, "learning_rate": 1.9154682614665883e-05, "loss": 1.0323, "step": 10960 }, { "epoch": 0.15803045363527665, "grad_norm": 0.640401303768158, "learning_rate": 1.915280419226366e-05, "loss": 1.056, "step": 10970 }, { "epoch": 0.15817451056657592, "grad_norm": 0.607360303401947, "learning_rate": 1.9150923777422224e-05, "loss": 1.0513, "step": 10980 }, { "epoch": 0.15831856749787515, "grad_norm": 0.6519664525985718, "learning_rate": 1.9149041370550922e-05, "loss": 1.053, "step": 10990 }, { "epoch": 0.15846262442917441, "grad_norm": 0.579047441482544, "learning_rate": 1.9147156972059518e-05, "loss": 1.0417, "step": 11000 }, { "epoch": 0.15860668136047365, "grad_norm": 0.9328927993774414, "learning_rate": 1.9145270582358234e-05, "loss": 1.0421, "step": 11010 }, { "epoch": 0.1587507382917729, "grad_norm": 0.689149022102356, "learning_rate": 1.91433822018577e-05, "loss": 1.0659, "step": 11020 }, { "epoch": 0.15889479522307215, "grad_norm": 0.6681456565856934, "learning_rate": 1.9141491830968996e-05, "loss": 1.0402, "step": 11030 }, { "epoch": 0.1590388521543714, "grad_norm": 0.7554347515106201, "learning_rate": 1.9139599470103624e-05, "loss": 1.0461, "step": 11040 }, { "epoch": 0.15918290908567065, "grad_norm": 0.5673980116844177, "learning_rate": 1.913770511967353e-05, "loss": 1.0614, "step": 11050 }, { "epoch": 0.1593269660169699, "grad_norm": 0.5546961426734924, "learning_rate": 1.913580878009109e-05, "loss": 1.052, "step": 11060 }, { "epoch": 0.15947102294826915, "grad_norm": 0.5435275435447693, "learning_rate": 1.9133910451769102e-05, "loss": 1.0521, "step": 11070 }, { "epoch": 0.1596150798795684, "grad_norm": 0.6310040950775146, "learning_rate": 1.9132010135120813e-05, "loss": 1.0281, "step": 11080 }, { "epoch": 0.15975913681086765, "grad_norm": 0.6460842490196228, "learning_rate": 1.9130107830559895e-05, "loss": 1.0518, "step": 11090 }, { "epoch": 0.1599031937421669, "grad_norm": 0.6667739748954773, "learning_rate": 1.912820353850045e-05, "loss": 1.05, "step": 11100 }, { "epoch": 0.16004725067346615, "grad_norm": 0.5846189856529236, "learning_rate": 1.9126297259357018e-05, "loss": 1.0537, "step": 11110 }, { "epoch": 0.1601913076047654, "grad_norm": 0.710723340511322, "learning_rate": 1.9124388993544568e-05, "loss": 1.0587, "step": 11120 }, { "epoch": 0.16033536453606465, "grad_norm": 0.7449038624763489, "learning_rate": 1.912247874147851e-05, "loss": 1.0503, "step": 11130 }, { "epoch": 0.1604794214673639, "grad_norm": 0.611016571521759, "learning_rate": 1.9120566503574675e-05, "loss": 1.0271, "step": 11140 }, { "epoch": 0.16062347839866314, "grad_norm": 0.6310858130455017, "learning_rate": 1.9118652280249327e-05, "loss": 1.0511, "step": 11150 }, { "epoch": 0.1607675353299624, "grad_norm": 0.6700077056884766, "learning_rate": 1.911673607191917e-05, "loss": 1.0441, "step": 11160 }, { "epoch": 0.16091159226126164, "grad_norm": 0.6764875054359436, "learning_rate": 1.9114817879001335e-05, "loss": 1.0239, "step": 11170 }, { "epoch": 0.1610556491925609, "grad_norm": 0.8168401718139648, "learning_rate": 1.9112897701913387e-05, "loss": 1.0313, "step": 11180 }, { "epoch": 0.16119970612386014, "grad_norm": 0.6589007377624512, "learning_rate": 1.9110975541073322e-05, "loss": 1.072, "step": 11190 }, { "epoch": 0.1613437630551594, "grad_norm": 0.577829122543335, "learning_rate": 1.9109051396899567e-05, "loss": 1.0613, "step": 11200 }, { "epoch": 0.16148781998645864, "grad_norm": 0.677116334438324, "learning_rate": 1.9107125269810985e-05, "loss": 1.0231, "step": 11210 }, { "epoch": 0.1616318769177579, "grad_norm": 0.622451663017273, "learning_rate": 1.9105197160226863e-05, "loss": 1.0336, "step": 11220 }, { "epoch": 0.16177593384905714, "grad_norm": 0.7007042169570923, "learning_rate": 1.9103267068566924e-05, "loss": 1.0192, "step": 11230 }, { "epoch": 0.1619199907803564, "grad_norm": 0.5768957138061523, "learning_rate": 1.9101334995251325e-05, "loss": 1.0286, "step": 11240 }, { "epoch": 0.16206404771165564, "grad_norm": 0.746347963809967, "learning_rate": 1.909940094070065e-05, "loss": 1.0645, "step": 11250 }, { "epoch": 0.1622081046429549, "grad_norm": 0.6460885405540466, "learning_rate": 1.909746490533592e-05, "loss": 1.0527, "step": 11260 }, { "epoch": 0.16235216157425414, "grad_norm": 0.7336540818214417, "learning_rate": 1.9095526889578577e-05, "loss": 1.0432, "step": 11270 }, { "epoch": 0.1624962185055534, "grad_norm": 0.6185027360916138, "learning_rate": 1.9093586893850503e-05, "loss": 1.025, "step": 11280 }, { "epoch": 0.16264027543685264, "grad_norm": 0.7867199182510376, "learning_rate": 1.909164491857401e-05, "loss": 1.046, "step": 11290 }, { "epoch": 0.1627843323681519, "grad_norm": 0.6278853416442871, "learning_rate": 1.9089700964171832e-05, "loss": 1.0176, "step": 11300 }, { "epoch": 0.16292838929945114, "grad_norm": 0.6531842350959778, "learning_rate": 1.9087755031067153e-05, "loss": 1.0257, "step": 11310 }, { "epoch": 0.1630724462307504, "grad_norm": 0.6189087629318237, "learning_rate": 1.9085807119683565e-05, "loss": 1.0491, "step": 11320 }, { "epoch": 0.16321650316204964, "grad_norm": 0.6218734383583069, "learning_rate": 1.908385723044511e-05, "loss": 1.0469, "step": 11330 }, { "epoch": 0.1633605600933489, "grad_norm": 0.6597093343734741, "learning_rate": 1.9081905363776244e-05, "loss": 1.0381, "step": 11340 }, { "epoch": 0.16350461702464814, "grad_norm": 0.5974107384681702, "learning_rate": 1.9079951520101868e-05, "loss": 1.0585, "step": 11350 }, { "epoch": 0.1636486739559474, "grad_norm": 0.5562683939933777, "learning_rate": 1.9077995699847304e-05, "loss": 1.0231, "step": 11360 }, { "epoch": 0.16379273088724663, "grad_norm": 0.6765367388725281, "learning_rate": 1.9076037903438304e-05, "loss": 1.0318, "step": 11370 }, { "epoch": 0.1639367878185459, "grad_norm": 0.6025965809822083, "learning_rate": 1.9074078131301058e-05, "loss": 1.0349, "step": 11380 }, { "epoch": 0.16408084474984513, "grad_norm": 0.7687731981277466, "learning_rate": 1.907211638386218e-05, "loss": 1.0488, "step": 11390 }, { "epoch": 0.1642249016811444, "grad_norm": 0.6119621992111206, "learning_rate": 1.9070152661548715e-05, "loss": 1.0253, "step": 11400 }, { "epoch": 0.16436895861244363, "grad_norm": 0.5947549939155579, "learning_rate": 1.906818696478814e-05, "loss": 1.0422, "step": 11410 }, { "epoch": 0.1645130155437429, "grad_norm": 0.6977937817573547, "learning_rate": 1.906621929400836e-05, "loss": 1.0276, "step": 11420 }, { "epoch": 0.16465707247504213, "grad_norm": 0.6394926309585571, "learning_rate": 1.9064249649637703e-05, "loss": 1.0176, "step": 11430 }, { "epoch": 0.1648011294063414, "grad_norm": 0.7498409748077393, "learning_rate": 1.9062278032104943e-05, "loss": 1.0383, "step": 11440 }, { "epoch": 0.16494518633764063, "grad_norm": 0.675934374332428, "learning_rate": 1.9060304441839265e-05, "loss": 1.031, "step": 11450 }, { "epoch": 0.1650892432689399, "grad_norm": 0.7206798791885376, "learning_rate": 1.90583288792703e-05, "loss": 1.0612, "step": 11460 }, { "epoch": 0.16523330020023913, "grad_norm": 0.6218888759613037, "learning_rate": 1.9056351344828097e-05, "loss": 1.0413, "step": 11470 }, { "epoch": 0.1653773571315384, "grad_norm": 0.6903321146965027, "learning_rate": 1.905437183894314e-05, "loss": 1.0694, "step": 11480 }, { "epoch": 0.16552141406283763, "grad_norm": 0.6396926045417786, "learning_rate": 1.905239036204634e-05, "loss": 1.0397, "step": 11490 }, { "epoch": 0.1656654709941369, "grad_norm": 0.6658194065093994, "learning_rate": 1.9050406914569032e-05, "loss": 1.0303, "step": 11500 }, { "epoch": 0.16580952792543613, "grad_norm": 0.537991464138031, "learning_rate": 1.904842149694299e-05, "loss": 1.0419, "step": 11510 }, { "epoch": 0.1659535848567354, "grad_norm": 0.7544538378715515, "learning_rate": 1.9046434109600413e-05, "loss": 1.0099, "step": 11520 }, { "epoch": 0.16609764178803463, "grad_norm": 0.6946489810943604, "learning_rate": 1.9044444752973927e-05, "loss": 1.0324, "step": 11530 }, { "epoch": 0.1662416987193339, "grad_norm": 0.6735936999320984, "learning_rate": 1.9042453427496584e-05, "loss": 1.0402, "step": 11540 }, { "epoch": 0.16638575565063313, "grad_norm": 0.6542741656303406, "learning_rate": 1.904046013360187e-05, "loss": 1.0545, "step": 11550 }, { "epoch": 0.1665298125819324, "grad_norm": 0.5523136258125305, "learning_rate": 1.90384648717237e-05, "loss": 1.0371, "step": 11560 }, { "epoch": 0.16667386951323163, "grad_norm": 0.7047058343887329, "learning_rate": 1.9036467642296413e-05, "loss": 1.0344, "step": 11570 }, { "epoch": 0.1668179264445309, "grad_norm": 0.5968069434165955, "learning_rate": 1.9034468445754776e-05, "loss": 1.0649, "step": 11580 }, { "epoch": 0.16696198337583013, "grad_norm": 0.648099422454834, "learning_rate": 1.9032467282533993e-05, "loss": 1.041, "step": 11590 }, { "epoch": 0.1671060403071294, "grad_norm": 0.8043709397315979, "learning_rate": 1.9030464153069684e-05, "loss": 1.0628, "step": 11600 }, { "epoch": 0.16725009723842862, "grad_norm": 0.678976833820343, "learning_rate": 1.90284590577979e-05, "loss": 1.0556, "step": 11610 }, { "epoch": 0.1673941541697279, "grad_norm": 0.6578510999679565, "learning_rate": 1.902645199715513e-05, "loss": 1.0441, "step": 11620 }, { "epoch": 0.16753821110102712, "grad_norm": 0.711756706237793, "learning_rate": 1.9024442971578282e-05, "loss": 1.0566, "step": 11630 }, { "epoch": 0.1676822680323264, "grad_norm": 0.6507493853569031, "learning_rate": 1.902243198150469e-05, "loss": 1.0528, "step": 11640 }, { "epoch": 0.16782632496362562, "grad_norm": 0.6045960783958435, "learning_rate": 1.9020419027372117e-05, "loss": 1.058, "step": 11650 }, { "epoch": 0.1679703818949249, "grad_norm": 0.6027932167053223, "learning_rate": 1.9018404109618764e-05, "loss": 1.0447, "step": 11660 }, { "epoch": 0.16811443882622412, "grad_norm": 0.6618443131446838, "learning_rate": 1.9016387228683243e-05, "loss": 1.0531, "step": 11670 }, { "epoch": 0.16825849575752339, "grad_norm": 0.6192331314086914, "learning_rate": 1.9014368385004604e-05, "loss": 1.0429, "step": 11680 }, { "epoch": 0.16840255268882262, "grad_norm": 0.6641239523887634, "learning_rate": 1.901234757902232e-05, "loss": 1.0478, "step": 11690 }, { "epoch": 0.16854660962012188, "grad_norm": 0.8388392329216003, "learning_rate": 1.9010324811176293e-05, "loss": 1.029, "step": 11700 }, { "epoch": 0.16869066655142112, "grad_norm": 0.6802681684494019, "learning_rate": 1.900830008190685e-05, "loss": 1.034, "step": 11710 }, { "epoch": 0.16883472348272038, "grad_norm": 0.7114946246147156, "learning_rate": 1.9006273391654754e-05, "loss": 1.0461, "step": 11720 }, { "epoch": 0.16897878041401962, "grad_norm": 0.6896271109580994, "learning_rate": 1.9004244740861176e-05, "loss": 1.0505, "step": 11730 }, { "epoch": 0.16912283734531888, "grad_norm": 0.6172512173652649, "learning_rate": 1.9002214129967737e-05, "loss": 1.0593, "step": 11740 }, { "epoch": 0.16926689427661812, "grad_norm": 0.5373944640159607, "learning_rate": 1.9000181559416465e-05, "loss": 1.0364, "step": 11750 }, { "epoch": 0.16941095120791738, "grad_norm": 0.6389153003692627, "learning_rate": 1.8998147029649828e-05, "loss": 1.0255, "step": 11760 }, { "epoch": 0.16955500813921662, "grad_norm": 0.5937640070915222, "learning_rate": 1.8996110541110708e-05, "loss": 1.0388, "step": 11770 }, { "epoch": 0.16969906507051588, "grad_norm": 0.6417317986488342, "learning_rate": 1.8994072094242426e-05, "loss": 1.0438, "step": 11780 }, { "epoch": 0.16984312200181512, "grad_norm": 0.751429557800293, "learning_rate": 1.8992031689488724e-05, "loss": 1.0434, "step": 11790 }, { "epoch": 0.16998717893311438, "grad_norm": 0.6613959670066833, "learning_rate": 1.8989989327293767e-05, "loss": 1.0347, "step": 11800 }, { "epoch": 0.17013123586441362, "grad_norm": 0.6063255667686462, "learning_rate": 1.898794500810215e-05, "loss": 1.0476, "step": 11810 }, { "epoch": 0.17027529279571288, "grad_norm": 0.6742021441459656, "learning_rate": 1.8985898732358894e-05, "loss": 1.0263, "step": 11820 }, { "epoch": 0.17041934972701211, "grad_norm": 0.6540886163711548, "learning_rate": 1.8983850500509447e-05, "loss": 1.0431, "step": 11830 }, { "epoch": 0.17056340665831138, "grad_norm": 0.6701706647872925, "learning_rate": 1.8981800312999675e-05, "loss": 1.0553, "step": 11840 }, { "epoch": 0.17070746358961061, "grad_norm": 0.5419948101043701, "learning_rate": 1.897974817027588e-05, "loss": 1.0466, "step": 11850 }, { "epoch": 0.17085152052090985, "grad_norm": 0.6257351636886597, "learning_rate": 1.8977694072784782e-05, "loss": 1.0502, "step": 11860 }, { "epoch": 0.1709955774522091, "grad_norm": 0.6071768999099731, "learning_rate": 1.8975638020973534e-05, "loss": 1.0512, "step": 11870 }, { "epoch": 0.17113963438350835, "grad_norm": 0.7279354333877563, "learning_rate": 1.8973580015289706e-05, "loss": 1.054, "step": 11880 }, { "epoch": 0.1712836913148076, "grad_norm": 0.6097837090492249, "learning_rate": 1.8971520056181304e-05, "loss": 1.0842, "step": 11890 }, { "epoch": 0.17142774824610685, "grad_norm": 0.6748334169387817, "learning_rate": 1.896945814409674e-05, "loss": 1.0441, "step": 11900 }, { "epoch": 0.1715718051774061, "grad_norm": 0.595467746257782, "learning_rate": 1.8967394279484878e-05, "loss": 1.0301, "step": 11910 }, { "epoch": 0.17171586210870535, "grad_norm": 0.5484843254089355, "learning_rate": 1.8965328462794987e-05, "loss": 1.0407, "step": 11920 }, { "epoch": 0.1718599190400046, "grad_norm": 0.7679455280303955, "learning_rate": 1.8963260694476763e-05, "loss": 1.0348, "step": 11930 }, { "epoch": 0.17200397597130385, "grad_norm": 0.5871204137802124, "learning_rate": 1.8961190974980333e-05, "loss": 1.063, "step": 11940 }, { "epoch": 0.1721480329026031, "grad_norm": 0.6413636207580566, "learning_rate": 1.8959119304756252e-05, "loss": 1.0446, "step": 11950 }, { "epoch": 0.17229208983390235, "grad_norm": 0.5831906199455261, "learning_rate": 1.895704568425549e-05, "loss": 1.0469, "step": 11960 }, { "epoch": 0.1724361467652016, "grad_norm": 0.5874209403991699, "learning_rate": 1.895497011392944e-05, "loss": 1.0436, "step": 11970 }, { "epoch": 0.17258020369650084, "grad_norm": 0.7418249845504761, "learning_rate": 1.895289259422993e-05, "loss": 1.0553, "step": 11980 }, { "epoch": 0.1727242606278001, "grad_norm": 0.46058523654937744, "learning_rate": 1.895081312560921e-05, "loss": 1.0286, "step": 11990 }, { "epoch": 0.17286831755909934, "grad_norm": 0.6460348963737488, "learning_rate": 1.8948731708519952e-05, "loss": 1.0266, "step": 12000 }, { "epoch": 0.1730123744903986, "grad_norm": 0.6433600187301636, "learning_rate": 1.8946648343415245e-05, "loss": 1.0391, "step": 12010 }, { "epoch": 0.17315643142169784, "grad_norm": 0.693173348903656, "learning_rate": 1.8944563030748614e-05, "loss": 1.0244, "step": 12020 }, { "epoch": 0.1733004883529971, "grad_norm": 0.7197710275650024, "learning_rate": 1.8942475770974002e-05, "loss": 1.0269, "step": 12030 }, { "epoch": 0.17344454528429634, "grad_norm": 0.8151099681854248, "learning_rate": 1.8940386564545773e-05, "loss": 1.0449, "step": 12040 }, { "epoch": 0.1735886022155956, "grad_norm": 0.6173912882804871, "learning_rate": 1.8938295411918727e-05, "loss": 1.0361, "step": 12050 }, { "epoch": 0.17373265914689484, "grad_norm": 0.5067716240882874, "learning_rate": 1.893620231354807e-05, "loss": 1.0561, "step": 12060 }, { "epoch": 0.1738767160781941, "grad_norm": 0.5978410840034485, "learning_rate": 1.8934107269889442e-05, "loss": 1.0561, "step": 12070 }, { "epoch": 0.17402077300949334, "grad_norm": 0.567631721496582, "learning_rate": 1.8932010281398912e-05, "loss": 1.0361, "step": 12080 }, { "epoch": 0.1741648299407926, "grad_norm": 0.5313737392425537, "learning_rate": 1.892991134853296e-05, "loss": 1.0588, "step": 12090 }, { "epoch": 0.17430888687209184, "grad_norm": 0.580583930015564, "learning_rate": 1.8927810471748494e-05, "loss": 1.0368, "step": 12100 }, { "epoch": 0.1744529438033911, "grad_norm": 0.589990496635437, "learning_rate": 1.892570765150285e-05, "loss": 1.058, "step": 12110 }, { "epoch": 0.17459700073469034, "grad_norm": 0.6194111704826355, "learning_rate": 1.8923602888253777e-05, "loss": 1.0588, "step": 12120 }, { "epoch": 0.1747410576659896, "grad_norm": 0.5884047746658325, "learning_rate": 1.892149618245946e-05, "loss": 1.047, "step": 12130 }, { "epoch": 0.17488511459728884, "grad_norm": 0.7635648846626282, "learning_rate": 1.8919387534578493e-05, "loss": 1.0481, "step": 12140 }, { "epoch": 0.1750291715285881, "grad_norm": 0.6717743277549744, "learning_rate": 1.8917276945069903e-05, "loss": 1.0641, "step": 12150 }, { "epoch": 0.17517322845988734, "grad_norm": 0.6033238768577576, "learning_rate": 1.891516441439314e-05, "loss": 1.0463, "step": 12160 }, { "epoch": 0.1753172853911866, "grad_norm": 0.633753776550293, "learning_rate": 1.8913049943008063e-05, "loss": 1.0293, "step": 12170 }, { "epoch": 0.17546134232248584, "grad_norm": 0.5848496556282043, "learning_rate": 1.891093353137497e-05, "loss": 1.0271, "step": 12180 }, { "epoch": 0.1756053992537851, "grad_norm": 0.6577739119529724, "learning_rate": 1.8908815179954578e-05, "loss": 1.0413, "step": 12190 }, { "epoch": 0.17574945618508434, "grad_norm": 0.6133542060852051, "learning_rate": 1.890669488920802e-05, "loss": 1.0549, "step": 12200 }, { "epoch": 0.1758935131163836, "grad_norm": 0.7499854564666748, "learning_rate": 1.8904572659596843e-05, "loss": 1.0407, "step": 12210 }, { "epoch": 0.17603757004768283, "grad_norm": 0.5730758309364319, "learning_rate": 1.8902448491583044e-05, "loss": 1.023, "step": 12220 }, { "epoch": 0.1761816269789821, "grad_norm": 0.6072816252708435, "learning_rate": 1.8900322385629015e-05, "loss": 1.0161, "step": 12230 }, { "epoch": 0.17632568391028133, "grad_norm": 0.5822769999504089, "learning_rate": 1.8898194342197582e-05, "loss": 1.0215, "step": 12240 }, { "epoch": 0.1764697408415806, "grad_norm": 0.6300902962684631, "learning_rate": 1.8896064361751995e-05, "loss": 1.0528, "step": 12250 }, { "epoch": 0.17661379777287983, "grad_norm": 0.5611024498939514, "learning_rate": 1.8893932444755916e-05, "loss": 1.0572, "step": 12260 }, { "epoch": 0.1767578547041791, "grad_norm": 0.6063017249107361, "learning_rate": 1.8891798591673434e-05, "loss": 1.0583, "step": 12270 }, { "epoch": 0.17690191163547833, "grad_norm": 0.6732243895530701, "learning_rate": 1.8889662802969063e-05, "loss": 1.0525, "step": 12280 }, { "epoch": 0.1770459685667776, "grad_norm": 0.7215601801872253, "learning_rate": 1.888752507910773e-05, "loss": 1.0349, "step": 12290 }, { "epoch": 0.17719002549807683, "grad_norm": 0.6400814652442932, "learning_rate": 1.8885385420554795e-05, "loss": 1.0388, "step": 12300 }, { "epoch": 0.1773340824293761, "grad_norm": 0.5283488631248474, "learning_rate": 1.8883243827776026e-05, "loss": 1.0505, "step": 12310 }, { "epoch": 0.17747813936067533, "grad_norm": 0.5196974873542786, "learning_rate": 1.888110030123762e-05, "loss": 1.0404, "step": 12320 }, { "epoch": 0.1776221962919746, "grad_norm": 0.6163458824157715, "learning_rate": 1.8878954841406193e-05, "loss": 1.0391, "step": 12330 }, { "epoch": 0.17776625322327383, "grad_norm": 0.6230759024620056, "learning_rate": 1.887680744874878e-05, "loss": 1.0292, "step": 12340 }, { "epoch": 0.1779103101545731, "grad_norm": 0.6491036415100098, "learning_rate": 1.8874658123732844e-05, "loss": 0.9997, "step": 12350 }, { "epoch": 0.17805436708587233, "grad_norm": 0.5875597596168518, "learning_rate": 1.887250686682626e-05, "loss": 1.0458, "step": 12360 }, { "epoch": 0.1781984240171716, "grad_norm": 0.6479558348655701, "learning_rate": 1.8870353678497327e-05, "loss": 1.0612, "step": 12370 }, { "epoch": 0.17834248094847083, "grad_norm": 0.6008232235908508, "learning_rate": 1.8868198559214765e-05, "loss": 1.0379, "step": 12380 }, { "epoch": 0.1784865378797701, "grad_norm": 0.660557746887207, "learning_rate": 1.8866041509447713e-05, "loss": 1.059, "step": 12390 }, { "epoch": 0.17863059481106933, "grad_norm": 0.7118351459503174, "learning_rate": 1.8863882529665733e-05, "loss": 1.0586, "step": 12400 }, { "epoch": 0.1787746517423686, "grad_norm": 0.5953468084335327, "learning_rate": 1.88617216203388e-05, "loss": 1.0099, "step": 12410 }, { "epoch": 0.17891870867366783, "grad_norm": 0.7735770344734192, "learning_rate": 1.885955878193732e-05, "loss": 1.0581, "step": 12420 }, { "epoch": 0.1790627656049671, "grad_norm": 0.5638709664344788, "learning_rate": 1.8857394014932114e-05, "loss": 1.0236, "step": 12430 }, { "epoch": 0.17920682253626632, "grad_norm": 0.6094455122947693, "learning_rate": 1.8855227319794415e-05, "loss": 1.049, "step": 12440 }, { "epoch": 0.1793508794675656, "grad_norm": 0.6717058420181274, "learning_rate": 1.8853058696995886e-05, "loss": 1.031, "step": 12450 }, { "epoch": 0.17949493639886482, "grad_norm": 0.6776756644248962, "learning_rate": 1.885088814700861e-05, "loss": 1.0461, "step": 12460 }, { "epoch": 0.1796389933301641, "grad_norm": 0.5989999175071716, "learning_rate": 1.884871567030508e-05, "loss": 1.057, "step": 12470 }, { "epoch": 0.17978305026146332, "grad_norm": 0.502468466758728, "learning_rate": 1.8846541267358217e-05, "loss": 1.0272, "step": 12480 }, { "epoch": 0.1799271071927626, "grad_norm": 0.5758592486381531, "learning_rate": 1.884436493864136e-05, "loss": 1.05, "step": 12490 }, { "epoch": 0.18007116412406182, "grad_norm": 0.5701432824134827, "learning_rate": 1.884218668462826e-05, "loss": 1.0405, "step": 12500 }, { "epoch": 0.18021522105536109, "grad_norm": 0.6102204322814941, "learning_rate": 1.88400065057931e-05, "loss": 1.0431, "step": 12510 }, { "epoch": 0.18035927798666032, "grad_norm": 0.6020178198814392, "learning_rate": 1.883782440261047e-05, "loss": 1.0329, "step": 12520 }, { "epoch": 0.18050333491795958, "grad_norm": 0.7687045931816101, "learning_rate": 1.8835640375555387e-05, "loss": 1.0373, "step": 12530 }, { "epoch": 0.18064739184925882, "grad_norm": 0.5445989370346069, "learning_rate": 1.883345442510328e-05, "loss": 1.0317, "step": 12540 }, { "epoch": 0.18079144878055808, "grad_norm": 0.5568065047264099, "learning_rate": 1.8831266551730002e-05, "loss": 1.0345, "step": 12550 }, { "epoch": 0.18093550571185732, "grad_norm": 0.7024680376052856, "learning_rate": 1.8829076755911826e-05, "loss": 1.0545, "step": 12560 }, { "epoch": 0.18107956264315658, "grad_norm": 0.608268678188324, "learning_rate": 1.8826885038125437e-05, "loss": 1.027, "step": 12570 }, { "epoch": 0.18122361957445582, "grad_norm": 0.6229660511016846, "learning_rate": 1.882469139884794e-05, "loss": 1.0678, "step": 12580 }, { "epoch": 0.18136767650575508, "grad_norm": 0.623124361038208, "learning_rate": 1.8822495838556866e-05, "loss": 1.0501, "step": 12590 }, { "epoch": 0.18151173343705432, "grad_norm": 0.6334506869316101, "learning_rate": 1.882029835773015e-05, "loss": 1.0186, "step": 12600 }, { "epoch": 0.18165579036835358, "grad_norm": 0.6877279877662659, "learning_rate": 1.8818098956846157e-05, "loss": 1.05, "step": 12610 }, { "epoch": 0.18179984729965282, "grad_norm": 0.6072930693626404, "learning_rate": 1.881589763638367e-05, "loss": 1.0413, "step": 12620 }, { "epoch": 0.18194390423095208, "grad_norm": 0.5373785495758057, "learning_rate": 1.881369439682188e-05, "loss": 1.034, "step": 12630 }, { "epoch": 0.18208796116225132, "grad_norm": 0.5830142498016357, "learning_rate": 1.8811489238640407e-05, "loss": 1.0197, "step": 12640 }, { "epoch": 0.18223201809355058, "grad_norm": 0.5567898154258728, "learning_rate": 1.8809282162319282e-05, "loss": 1.0145, "step": 12650 }, { "epoch": 0.18237607502484982, "grad_norm": 0.6073896288871765, "learning_rate": 1.8807073168338953e-05, "loss": 1.012, "step": 12660 }, { "epoch": 0.18252013195614908, "grad_norm": 0.5818758606910706, "learning_rate": 1.8804862257180287e-05, "loss": 1.0529, "step": 12670 }, { "epoch": 0.18266418888744831, "grad_norm": 0.580693781375885, "learning_rate": 1.8802649429324574e-05, "loss": 1.0249, "step": 12680 }, { "epoch": 0.18280824581874758, "grad_norm": 0.8039982318878174, "learning_rate": 1.8800434685253514e-05, "loss": 1.044, "step": 12690 }, { "epoch": 0.1829523027500468, "grad_norm": 0.6613562703132629, "learning_rate": 1.8798218025449222e-05, "loss": 1.0332, "step": 12700 }, { "epoch": 0.18309635968134608, "grad_norm": 0.5530600547790527, "learning_rate": 1.8795999450394237e-05, "loss": 1.0442, "step": 12710 }, { "epoch": 0.1832404166126453, "grad_norm": 0.7691448926925659, "learning_rate": 1.8793778960571516e-05, "loss": 1.0533, "step": 12720 }, { "epoch": 0.18338447354394458, "grad_norm": 0.6959360241889954, "learning_rate": 1.879155655646442e-05, "loss": 1.0444, "step": 12730 }, { "epoch": 0.1835285304752438, "grad_norm": 0.7252940535545349, "learning_rate": 1.8789332238556747e-05, "loss": 1.0378, "step": 12740 }, { "epoch": 0.18367258740654308, "grad_norm": 0.6496034264564514, "learning_rate": 1.878710600733269e-05, "loss": 1.0511, "step": 12750 }, { "epoch": 0.1838166443378423, "grad_norm": 0.6047234535217285, "learning_rate": 1.878487786327688e-05, "loss": 1.0383, "step": 12760 }, { "epoch": 0.18396070126914157, "grad_norm": 0.6182611584663391, "learning_rate": 1.878264780687434e-05, "loss": 1.0238, "step": 12770 }, { "epoch": 0.1841047582004408, "grad_norm": 0.6061338186264038, "learning_rate": 1.878041583861053e-05, "loss": 1.0426, "step": 12780 }, { "epoch": 0.18424881513174007, "grad_norm": 0.6683133840560913, "learning_rate": 1.877818195897132e-05, "loss": 1.0447, "step": 12790 }, { "epoch": 0.1843928720630393, "grad_norm": 0.5210486054420471, "learning_rate": 1.877594616844299e-05, "loss": 1.0325, "step": 12800 }, { "epoch": 0.18453692899433857, "grad_norm": 0.5701860785484314, "learning_rate": 1.8773708467512247e-05, "loss": 1.0367, "step": 12810 }, { "epoch": 0.1846809859256378, "grad_norm": 0.6415544748306274, "learning_rate": 1.8771468856666203e-05, "loss": 1.0507, "step": 12820 }, { "epoch": 0.18482504285693707, "grad_norm": 0.5568451881408691, "learning_rate": 1.876922733639239e-05, "loss": 1.0534, "step": 12830 }, { "epoch": 0.1849690997882363, "grad_norm": 0.6413741707801819, "learning_rate": 1.8766983907178756e-05, "loss": 1.0538, "step": 12840 }, { "epoch": 0.18511315671953557, "grad_norm": 0.6150505542755127, "learning_rate": 1.876473856951367e-05, "loss": 1.0401, "step": 12850 }, { "epoch": 0.1852572136508348, "grad_norm": 1.147980809211731, "learning_rate": 1.8762491323885904e-05, "loss": 1.0432, "step": 12860 }, { "epoch": 0.18540127058213407, "grad_norm": 0.6297221779823303, "learning_rate": 1.8760242170784656e-05, "loss": 1.0491, "step": 12870 }, { "epoch": 0.1855453275134333, "grad_norm": 0.6736859083175659, "learning_rate": 1.8757991110699537e-05, "loss": 1.0353, "step": 12880 }, { "epoch": 0.18568938444473257, "grad_norm": 0.6270202994346619, "learning_rate": 1.8755738144120567e-05, "loss": 1.0411, "step": 12890 }, { "epoch": 0.1858334413760318, "grad_norm": 0.5883143544197083, "learning_rate": 1.875348327153819e-05, "loss": 1.047, "step": 12900 }, { "epoch": 0.18597749830733107, "grad_norm": 0.9572540521621704, "learning_rate": 1.8751226493443257e-05, "loss": 1.0395, "step": 12910 }, { "epoch": 0.1861215552386303, "grad_norm": 0.6460018157958984, "learning_rate": 1.874896781032704e-05, "loss": 1.0468, "step": 12920 }, { "epoch": 0.18626561216992957, "grad_norm": 0.6704892516136169, "learning_rate": 1.8746707222681225e-05, "loss": 1.0566, "step": 12930 }, { "epoch": 0.1864096691012288, "grad_norm": 0.6742843389511108, "learning_rate": 1.8744444730997907e-05, "loss": 1.0416, "step": 12940 }, { "epoch": 0.18655372603252807, "grad_norm": 0.6100138425827026, "learning_rate": 1.8742180335769603e-05, "loss": 1.0472, "step": 12950 }, { "epoch": 0.1866977829638273, "grad_norm": 0.7344133853912354, "learning_rate": 1.8739914037489236e-05, "loss": 1.0576, "step": 12960 }, { "epoch": 0.18684183989512657, "grad_norm": 0.8023427724838257, "learning_rate": 1.873764583665015e-05, "loss": 1.0604, "step": 12970 }, { "epoch": 0.1869858968264258, "grad_norm": 0.5891733765602112, "learning_rate": 1.8735375733746104e-05, "loss": 1.0359, "step": 12980 }, { "epoch": 0.18712995375772506, "grad_norm": 0.6603121161460876, "learning_rate": 1.873310372927126e-05, "loss": 1.0515, "step": 12990 }, { "epoch": 0.1872740106890243, "grad_norm": 0.5632944703102112, "learning_rate": 1.8730829823720215e-05, "loss": 1.0353, "step": 13000 }, { "epoch": 0.18741806762032356, "grad_norm": 0.548271894454956, "learning_rate": 1.8728554017587954e-05, "loss": 1.0416, "step": 13010 }, { "epoch": 0.1875621245516228, "grad_norm": 0.6604188680648804, "learning_rate": 1.8726276311369896e-05, "loss": 1.0312, "step": 13020 }, { "epoch": 0.18770618148292206, "grad_norm": 0.6293408870697021, "learning_rate": 1.8723996705561865e-05, "loss": 1.047, "step": 13030 }, { "epoch": 0.1878502384142213, "grad_norm": 0.6757556796073914, "learning_rate": 1.8721715200660094e-05, "loss": 1.0323, "step": 13040 }, { "epoch": 0.18799429534552056, "grad_norm": 0.5530831813812256, "learning_rate": 1.8719431797161244e-05, "loss": 1.0652, "step": 13050 }, { "epoch": 0.1881383522768198, "grad_norm": 0.6663712859153748, "learning_rate": 1.8717146495562372e-05, "loss": 1.0395, "step": 13060 }, { "epoch": 0.18828240920811906, "grad_norm": 0.5699810981750488, "learning_rate": 1.8714859296360966e-05, "loss": 1.0316, "step": 13070 }, { "epoch": 0.1884264661394183, "grad_norm": 0.5460770726203918, "learning_rate": 1.871257020005491e-05, "loss": 1.0713, "step": 13080 }, { "epoch": 0.18857052307071756, "grad_norm": 0.6627286076545715, "learning_rate": 1.8710279207142516e-05, "loss": 1.0467, "step": 13090 }, { "epoch": 0.1887145800020168, "grad_norm": 0.760555624961853, "learning_rate": 1.870798631812249e-05, "loss": 1.0621, "step": 13100 }, { "epoch": 0.18885863693331606, "grad_norm": 0.6630510687828064, "learning_rate": 1.8705691533493976e-05, "loss": 1.0214, "step": 13110 }, { "epoch": 0.1890026938646153, "grad_norm": 0.6597281694412231, "learning_rate": 1.8703394853756503e-05, "loss": 1.0326, "step": 13120 }, { "epoch": 0.18914675079591456, "grad_norm": 0.6269526481628418, "learning_rate": 1.8701096279410042e-05, "loss": 1.0398, "step": 13130 }, { "epoch": 0.1892908077272138, "grad_norm": 0.6193029880523682, "learning_rate": 1.8698795810954945e-05, "loss": 1.0229, "step": 13140 }, { "epoch": 0.18943486465851306, "grad_norm": 0.7493313550949097, "learning_rate": 1.8696493448892002e-05, "loss": 1.0617, "step": 13150 }, { "epoch": 0.1895789215898123, "grad_norm": 0.6989824771881104, "learning_rate": 1.8694189193722406e-05, "loss": 1.0333, "step": 13160 }, { "epoch": 0.18972297852111156, "grad_norm": 0.7208826541900635, "learning_rate": 1.8691883045947756e-05, "loss": 1.0427, "step": 13170 }, { "epoch": 0.1898670354524108, "grad_norm": 0.5444157123565674, "learning_rate": 1.8689575006070075e-05, "loss": 1.036, "step": 13180 }, { "epoch": 0.19001109238371003, "grad_norm": 0.6155458688735962, "learning_rate": 1.8687265074591785e-05, "loss": 1.0511, "step": 13190 }, { "epoch": 0.1901551493150093, "grad_norm": 0.65130215883255, "learning_rate": 1.868495325201573e-05, "loss": 1.0507, "step": 13200 }, { "epoch": 0.19029920624630853, "grad_norm": 0.5150203108787537, "learning_rate": 1.8682639538845162e-05, "loss": 1.0292, "step": 13210 }, { "epoch": 0.1904432631776078, "grad_norm": 0.6285935044288635, "learning_rate": 1.868032393558374e-05, "loss": 1.052, "step": 13220 }, { "epoch": 0.19058732010890703, "grad_norm": 0.6595324873924255, "learning_rate": 1.8678006442735546e-05, "loss": 1.0423, "step": 13230 }, { "epoch": 0.1907313770402063, "grad_norm": 0.6310179233551025, "learning_rate": 1.8675687060805062e-05, "loss": 1.0604, "step": 13240 }, { "epoch": 0.19087543397150553, "grad_norm": 0.5544841885566711, "learning_rate": 1.8673365790297184e-05, "loss": 1.0275, "step": 13250 }, { "epoch": 0.1910194909028048, "grad_norm": 0.6470851898193359, "learning_rate": 1.8671042631717226e-05, "loss": 1.0454, "step": 13260 }, { "epoch": 0.19116354783410403, "grad_norm": 0.6718747019767761, "learning_rate": 1.86687175855709e-05, "loss": 1.0478, "step": 13270 }, { "epoch": 0.1913076047654033, "grad_norm": 0.8221065998077393, "learning_rate": 1.8666390652364343e-05, "loss": 1.0537, "step": 13280 }, { "epoch": 0.19145166169670252, "grad_norm": 0.7532829642295837, "learning_rate": 1.8664061832604093e-05, "loss": 1.0496, "step": 13290 }, { "epoch": 0.1915957186280018, "grad_norm": 0.5681278109550476, "learning_rate": 1.8661731126797105e-05, "loss": 1.0237, "step": 13300 }, { "epoch": 0.19173977555930102, "grad_norm": 0.586743175983429, "learning_rate": 1.865939853545074e-05, "loss": 1.041, "step": 13310 }, { "epoch": 0.1918838324906003, "grad_norm": 0.6237199902534485, "learning_rate": 1.865706405907277e-05, "loss": 1.0306, "step": 13320 }, { "epoch": 0.19202788942189952, "grad_norm": 1.0356930494308472, "learning_rate": 1.8654727698171372e-05, "loss": 1.042, "step": 13330 }, { "epoch": 0.19217194635319879, "grad_norm": 0.683358371257782, "learning_rate": 1.8652389453255154e-05, "loss": 1.0573, "step": 13340 }, { "epoch": 0.19231600328449802, "grad_norm": 0.614015519618988, "learning_rate": 1.8650049324833107e-05, "loss": 1.0321, "step": 13350 }, { "epoch": 0.19246006021579729, "grad_norm": 0.8009682893753052, "learning_rate": 1.8647707313414652e-05, "loss": 1.0486, "step": 13360 }, { "epoch": 0.19260411714709652, "grad_norm": 0.832682192325592, "learning_rate": 1.8645363419509606e-05, "loss": 1.0345, "step": 13370 }, { "epoch": 0.19274817407839578, "grad_norm": 0.625429630279541, "learning_rate": 1.8643017643628214e-05, "loss": 1.0202, "step": 13380 }, { "epoch": 0.19289223100969502, "grad_norm": 0.6398469805717468, "learning_rate": 1.8640669986281106e-05, "loss": 1.0328, "step": 13390 }, { "epoch": 0.19303628794099428, "grad_norm": 0.6227033734321594, "learning_rate": 1.863832044797934e-05, "loss": 1.0634, "step": 13400 }, { "epoch": 0.19318034487229352, "grad_norm": 0.6822697520256042, "learning_rate": 1.863596902923438e-05, "loss": 1.0212, "step": 13410 }, { "epoch": 0.19332440180359278, "grad_norm": 0.5786993503570557, "learning_rate": 1.8633615730558095e-05, "loss": 1.0208, "step": 13420 }, { "epoch": 0.19346845873489202, "grad_norm": 0.6412922739982605, "learning_rate": 1.863126055246277e-05, "loss": 1.0315, "step": 13430 }, { "epoch": 0.19361251566619128, "grad_norm": 0.660926342010498, "learning_rate": 1.8628903495461083e-05, "loss": 1.0576, "step": 13440 }, { "epoch": 0.19375657259749052, "grad_norm": 0.6085519194602966, "learning_rate": 1.8626544560066146e-05, "loss": 1.0401, "step": 13450 }, { "epoch": 0.19390062952878978, "grad_norm": 0.73714679479599, "learning_rate": 1.862418374679146e-05, "loss": 1.0363, "step": 13460 }, { "epoch": 0.19404468646008902, "grad_norm": 0.635399341583252, "learning_rate": 1.8621821056150945e-05, "loss": 1.0373, "step": 13470 }, { "epoch": 0.19418874339138828, "grad_norm": 0.5904781818389893, "learning_rate": 1.861945648865892e-05, "loss": 1.0189, "step": 13480 }, { "epoch": 0.19433280032268752, "grad_norm": 0.589888334274292, "learning_rate": 1.8617090044830128e-05, "loss": 1.0447, "step": 13490 }, { "epoch": 0.19447685725398678, "grad_norm": 0.6887579560279846, "learning_rate": 1.8614721725179706e-05, "loss": 1.0389, "step": 13500 }, { "epoch": 0.19462091418528601, "grad_norm": 0.5638279318809509, "learning_rate": 1.86123515302232e-05, "loss": 1.0291, "step": 13510 }, { "epoch": 0.19476497111658528, "grad_norm": 0.5188870429992676, "learning_rate": 1.8609979460476576e-05, "loss": 1.0323, "step": 13520 }, { "epoch": 0.1949090280478845, "grad_norm": 0.6359505653381348, "learning_rate": 1.8607605516456196e-05, "loss": 1.0305, "step": 13530 }, { "epoch": 0.19505308497918378, "grad_norm": 0.5990992784500122, "learning_rate": 1.8605229698678842e-05, "loss": 1.0319, "step": 13540 }, { "epoch": 0.195197141910483, "grad_norm": 0.4979810118675232, "learning_rate": 1.860285200766169e-05, "loss": 1.0318, "step": 13550 }, { "epoch": 0.19534119884178228, "grad_norm": 0.7600204944610596, "learning_rate": 1.860047244392233e-05, "loss": 1.0494, "step": 13560 }, { "epoch": 0.1954852557730815, "grad_norm": 0.5677869319915771, "learning_rate": 1.8598091007978762e-05, "loss": 1.0409, "step": 13570 }, { "epoch": 0.19562931270438078, "grad_norm": 0.7052412033081055, "learning_rate": 1.8595707700349394e-05, "loss": 1.0759, "step": 13580 }, { "epoch": 0.19577336963568, "grad_norm": 0.5724908113479614, "learning_rate": 1.8593322521553036e-05, "loss": 1.0516, "step": 13590 }, { "epoch": 0.19591742656697927, "grad_norm": 0.6373711824417114, "learning_rate": 1.859093547210891e-05, "loss": 1.0468, "step": 13600 }, { "epoch": 0.1960614834982785, "grad_norm": 0.6027516722679138, "learning_rate": 1.858854655253665e-05, "loss": 1.0475, "step": 13610 }, { "epoch": 0.19620554042957777, "grad_norm": 0.5821101069450378, "learning_rate": 1.858615576335628e-05, "loss": 1.0271, "step": 13620 }, { "epoch": 0.196349597360877, "grad_norm": 0.6327353119850159, "learning_rate": 1.8583763105088242e-05, "loss": 1.0509, "step": 13630 }, { "epoch": 0.19649365429217627, "grad_norm": 0.6976615786552429, "learning_rate": 1.8581368578253395e-05, "loss": 1.0328, "step": 13640 }, { "epoch": 0.1966377112234755, "grad_norm": 0.6341861486434937, "learning_rate": 1.8578972183372987e-05, "loss": 1.054, "step": 13650 }, { "epoch": 0.19678176815477477, "grad_norm": 0.626846194267273, "learning_rate": 1.857657392096868e-05, "loss": 1.0232, "step": 13660 }, { "epoch": 0.196925825086074, "grad_norm": 0.7061647176742554, "learning_rate": 1.8574173791562547e-05, "loss": 1.0262, "step": 13670 }, { "epoch": 0.19706988201737327, "grad_norm": 0.593008816242218, "learning_rate": 1.857177179567706e-05, "loss": 1.0289, "step": 13680 }, { "epoch": 0.1972139389486725, "grad_norm": 0.7061359882354736, "learning_rate": 1.85693679338351e-05, "loss": 1.02, "step": 13690 }, { "epoch": 0.19735799587997177, "grad_norm": 0.8743509650230408, "learning_rate": 1.8566962206559955e-05, "loss": 1.028, "step": 13700 }, { "epoch": 0.197502052811271, "grad_norm": 0.5671470165252686, "learning_rate": 1.8564554614375317e-05, "loss": 1.0318, "step": 13710 }, { "epoch": 0.19764610974257027, "grad_norm": 0.6084232926368713, "learning_rate": 1.856214515780529e-05, "loss": 1.0327, "step": 13720 }, { "epoch": 0.1977901666738695, "grad_norm": 0.6257429718971252, "learning_rate": 1.8559733837374378e-05, "loss": 1.0492, "step": 13730 }, { "epoch": 0.19793422360516877, "grad_norm": 0.5424414277076721, "learning_rate": 1.8557320653607492e-05, "loss": 1.0671, "step": 13740 }, { "epoch": 0.198078280536468, "grad_norm": 0.5478965044021606, "learning_rate": 1.8554905607029944e-05, "loss": 1.0498, "step": 13750 }, { "epoch": 0.19822233746776727, "grad_norm": 0.6122015714645386, "learning_rate": 1.8552488698167462e-05, "loss": 1.0428, "step": 13760 }, { "epoch": 0.1983663943990665, "grad_norm": 0.580527126789093, "learning_rate": 1.8550069927546172e-05, "loss": 1.0351, "step": 13770 }, { "epoch": 0.19851045133036577, "grad_norm": 0.5698811411857605, "learning_rate": 1.8547649295692608e-05, "loss": 1.0522, "step": 13780 }, { "epoch": 0.198654508261665, "grad_norm": 0.6567976474761963, "learning_rate": 1.8545226803133704e-05, "loss": 1.018, "step": 13790 }, { "epoch": 0.19879856519296427, "grad_norm": 0.6164844036102295, "learning_rate": 1.854280245039681e-05, "loss": 1.0182, "step": 13800 }, { "epoch": 0.1989426221242635, "grad_norm": 0.7950931191444397, "learning_rate": 1.854037623800967e-05, "loss": 1.0535, "step": 13810 }, { "epoch": 0.19908667905556277, "grad_norm": 0.5991912484169006, "learning_rate": 1.853794816650044e-05, "loss": 1.0357, "step": 13820 }, { "epoch": 0.199230735986862, "grad_norm": 0.7581918835639954, "learning_rate": 1.8535518236397668e-05, "loss": 1.0462, "step": 13830 }, { "epoch": 0.19937479291816126, "grad_norm": 0.6731258630752563, "learning_rate": 1.853308644823033e-05, "loss": 1.0167, "step": 13840 }, { "epoch": 0.1995188498494605, "grad_norm": 0.6933732628822327, "learning_rate": 1.8530652802527784e-05, "loss": 1.0496, "step": 13850 }, { "epoch": 0.19966290678075976, "grad_norm": 0.5585038065910339, "learning_rate": 1.8528217299819803e-05, "loss": 1.0173, "step": 13860 }, { "epoch": 0.199806963712059, "grad_norm": 0.5706941485404968, "learning_rate": 1.852577994063656e-05, "loss": 1.0693, "step": 13870 }, { "epoch": 0.19995102064335826, "grad_norm": 0.641236424446106, "learning_rate": 1.852334072550864e-05, "loss": 1.0327, "step": 13880 }, { "epoch": 0.2000950775746575, "grad_norm": 0.5940988659858704, "learning_rate": 1.8520899654967025e-05, "loss": 1.0509, "step": 13890 }, { "epoch": 0.20023913450595676, "grad_norm": 0.5936237573623657, "learning_rate": 1.8518456729543103e-05, "loss": 1.0459, "step": 13900 }, { "epoch": 0.200383191437256, "grad_norm": 0.604388415813446, "learning_rate": 1.851601194976866e-05, "loss": 1.0371, "step": 13910 }, { "epoch": 0.20052724836855526, "grad_norm": 0.49210256338119507, "learning_rate": 1.851356531617589e-05, "loss": 1.03, "step": 13920 }, { "epoch": 0.2006713052998545, "grad_norm": 0.5752059817314148, "learning_rate": 1.85111168292974e-05, "loss": 1.0049, "step": 13930 }, { "epoch": 0.20081536223115376, "grad_norm": 0.6246433854103088, "learning_rate": 1.8508666489666187e-05, "loss": 1.0154, "step": 13940 }, { "epoch": 0.200959419162453, "grad_norm": 0.5601815581321716, "learning_rate": 1.8506214297815653e-05, "loss": 1.0429, "step": 13950 }, { "epoch": 0.20110347609375226, "grad_norm": 0.6816391944885254, "learning_rate": 1.8503760254279614e-05, "loss": 1.033, "step": 13960 }, { "epoch": 0.2012475330250515, "grad_norm": 0.6566990613937378, "learning_rate": 1.8501304359592276e-05, "loss": 1.0417, "step": 13970 }, { "epoch": 0.20139158995635076, "grad_norm": 0.5980787873268127, "learning_rate": 1.8498846614288252e-05, "loss": 1.0308, "step": 13980 }, { "epoch": 0.20153564688765, "grad_norm": 0.677169680595398, "learning_rate": 1.8496387018902562e-05, "loss": 1.0378, "step": 13990 }, { "epoch": 0.20167970381894926, "grad_norm": 0.61399245262146, "learning_rate": 1.8493925573970627e-05, "loss": 1.0486, "step": 14000 }, { "epoch": 0.2018237607502485, "grad_norm": 0.6262736320495605, "learning_rate": 1.8491462280028273e-05, "loss": 1.0625, "step": 14010 }, { "epoch": 0.20196781768154776, "grad_norm": 0.5216875076293945, "learning_rate": 1.8488997137611714e-05, "loss": 1.0375, "step": 14020 }, { "epoch": 0.202111874612847, "grad_norm": 0.5989294648170471, "learning_rate": 1.8486530147257588e-05, "loss": 1.05, "step": 14030 }, { "epoch": 0.20225593154414626, "grad_norm": 0.6899014711380005, "learning_rate": 1.8484061309502916e-05, "loss": 1.0441, "step": 14040 }, { "epoch": 0.2023999884754455, "grad_norm": 0.5948405861854553, "learning_rate": 1.848159062488514e-05, "loss": 1.0231, "step": 14050 }, { "epoch": 0.20254404540674475, "grad_norm": 0.6017546653747559, "learning_rate": 1.8479118093942083e-05, "loss": 1.0351, "step": 14060 }, { "epoch": 0.202688102338044, "grad_norm": 0.5726602673530579, "learning_rate": 1.8476643717211988e-05, "loss": 1.04, "step": 14070 }, { "epoch": 0.20283215926934325, "grad_norm": 0.6256138682365417, "learning_rate": 1.8474167495233493e-05, "loss": 1.0376, "step": 14080 }, { "epoch": 0.2029762162006425, "grad_norm": 0.5992451906204224, "learning_rate": 1.8471689428545635e-05, "loss": 1.0501, "step": 14090 }, { "epoch": 0.20312027313194175, "grad_norm": 0.5503131747245789, "learning_rate": 1.8469209517687856e-05, "loss": 1.0287, "step": 14100 }, { "epoch": 0.203264330063241, "grad_norm": 0.5980885028839111, "learning_rate": 1.8466727763199997e-05, "loss": 1.0316, "step": 14110 }, { "epoch": 0.20340838699454025, "grad_norm": 0.5954563617706299, "learning_rate": 1.8464244165622303e-05, "loss": 1.0386, "step": 14120 }, { "epoch": 0.2035524439258395, "grad_norm": 0.5852318406105042, "learning_rate": 1.8461758725495417e-05, "loss": 1.0321, "step": 14130 }, { "epoch": 0.20369650085713875, "grad_norm": 0.7828177809715271, "learning_rate": 1.845927144336039e-05, "loss": 1.0409, "step": 14140 }, { "epoch": 0.203840557788438, "grad_norm": 0.49479296803474426, "learning_rate": 1.845678231975866e-05, "loss": 1.0388, "step": 14150 }, { "epoch": 0.20398461471973725, "grad_norm": 0.6297884583473206, "learning_rate": 1.8454291355232085e-05, "loss": 1.0491, "step": 14160 }, { "epoch": 0.2041286716510365, "grad_norm": 0.5248891711235046, "learning_rate": 1.8451798550322906e-05, "loss": 1.0474, "step": 14170 }, { "epoch": 0.20427272858233575, "grad_norm": 0.6096994280815125, "learning_rate": 1.8449303905573777e-05, "loss": 1.0565, "step": 14180 }, { "epoch": 0.20441678551363499, "grad_norm": 0.71397864818573, "learning_rate": 1.8446807421527745e-05, "loss": 1.0376, "step": 14190 }, { "epoch": 0.20456084244493425, "grad_norm": 0.623956024646759, "learning_rate": 1.844430909872826e-05, "loss": 1.0212, "step": 14200 }, { "epoch": 0.20470489937623348, "grad_norm": 0.682212769985199, "learning_rate": 1.8441808937719177e-05, "loss": 1.0341, "step": 14210 }, { "epoch": 0.20484895630753275, "grad_norm": 0.6393522620201111, "learning_rate": 1.843930693904474e-05, "loss": 1.0308, "step": 14220 }, { "epoch": 0.20499301323883198, "grad_norm": 0.705729603767395, "learning_rate": 1.8436803103249604e-05, "loss": 1.0362, "step": 14230 }, { "epoch": 0.20513707017013125, "grad_norm": 0.6077045798301697, "learning_rate": 1.843429743087882e-05, "loss": 1.0389, "step": 14240 }, { "epoch": 0.20528112710143048, "grad_norm": 0.6043947339057922, "learning_rate": 1.8431789922477835e-05, "loss": 1.0303, "step": 14250 }, { "epoch": 0.20542518403272975, "grad_norm": 0.6769972443580627, "learning_rate": 1.8429280578592497e-05, "loss": 1.0263, "step": 14260 }, { "epoch": 0.20556924096402898, "grad_norm": 0.5661643147468567, "learning_rate": 1.8426769399769064e-05, "loss": 1.0458, "step": 14270 }, { "epoch": 0.20571329789532825, "grad_norm": 0.742868185043335, "learning_rate": 1.8424256386554177e-05, "loss": 1.0554, "step": 14280 }, { "epoch": 0.20585735482662748, "grad_norm": 0.6280497312545776, "learning_rate": 1.8421741539494887e-05, "loss": 1.0331, "step": 14290 }, { "epoch": 0.20600141175792674, "grad_norm": 0.5431658625602722, "learning_rate": 1.8419224859138648e-05, "loss": 1.016, "step": 14300 }, { "epoch": 0.20614546868922598, "grad_norm": 0.5135045051574707, "learning_rate": 1.8416706346033294e-05, "loss": 1.0596, "step": 14310 }, { "epoch": 0.20628952562052524, "grad_norm": 0.589107096195221, "learning_rate": 1.8414186000727075e-05, "loss": 1.0417, "step": 14320 }, { "epoch": 0.20643358255182448, "grad_norm": 0.5661746859550476, "learning_rate": 1.8411663823768643e-05, "loss": 1.0341, "step": 14330 }, { "epoch": 0.20657763948312374, "grad_norm": 0.7416191101074219, "learning_rate": 1.8409139815707036e-05, "loss": 1.0356, "step": 14340 }, { "epoch": 0.20672169641442298, "grad_norm": 0.6631795167922974, "learning_rate": 1.840661397709169e-05, "loss": 1.0494, "step": 14350 }, { "epoch": 0.20686575334572224, "grad_norm": 0.6519991755485535, "learning_rate": 1.8404086308472456e-05, "loss": 1.018, "step": 14360 }, { "epoch": 0.20700981027702148, "grad_norm": 0.6510144472122192, "learning_rate": 1.8401556810399565e-05, "loss": 1.0382, "step": 14370 }, { "epoch": 0.20715386720832074, "grad_norm": 0.6138314604759216, "learning_rate": 1.8399025483423654e-05, "loss": 1.012, "step": 14380 }, { "epoch": 0.20729792413961998, "grad_norm": 0.6701445579528809, "learning_rate": 1.839649232809576e-05, "loss": 1.043, "step": 14390 }, { "epoch": 0.20744198107091924, "grad_norm": 0.6574751734733582, "learning_rate": 1.8393957344967317e-05, "loss": 1.0224, "step": 14400 }, { "epoch": 0.20758603800221848, "grad_norm": 0.5700885057449341, "learning_rate": 1.8391420534590153e-05, "loss": 1.0383, "step": 14410 }, { "epoch": 0.20773009493351774, "grad_norm": 0.590834379196167, "learning_rate": 1.83888818975165e-05, "loss": 1.046, "step": 14420 }, { "epoch": 0.20787415186481698, "grad_norm": 0.5771095752716064, "learning_rate": 1.8386341434298985e-05, "loss": 1.0215, "step": 14430 }, { "epoch": 0.20801820879611624, "grad_norm": 0.6535927653312683, "learning_rate": 1.8383799145490627e-05, "loss": 1.037, "step": 14440 }, { "epoch": 0.20816226572741547, "grad_norm": 0.5353608131408691, "learning_rate": 1.8381255031644853e-05, "loss": 1.0457, "step": 14450 }, { "epoch": 0.20830632265871474, "grad_norm": 0.5100358724594116, "learning_rate": 1.8378709093315474e-05, "loss": 1.0272, "step": 14460 }, { "epoch": 0.20845037959001397, "grad_norm": 0.6513608694076538, "learning_rate": 1.8376161331056707e-05, "loss": 1.0404, "step": 14470 }, { "epoch": 0.20859443652131324, "grad_norm": 0.8126131296157837, "learning_rate": 1.8373611745423173e-05, "loss": 1.0584, "step": 14480 }, { "epoch": 0.20873849345261247, "grad_norm": 0.6187640428543091, "learning_rate": 1.8371060336969874e-05, "loss": 1.0506, "step": 14490 }, { "epoch": 0.2088825503839117, "grad_norm": 0.6069515943527222, "learning_rate": 1.8368507106252217e-05, "loss": 1.0352, "step": 14500 }, { "epoch": 0.20902660731521097, "grad_norm": 0.5389923453330994, "learning_rate": 1.836595205382601e-05, "loss": 1.039, "step": 14510 }, { "epoch": 0.2091706642465102, "grad_norm": 0.6446439027786255, "learning_rate": 1.8363395180247446e-05, "loss": 1.0139, "step": 14520 }, { "epoch": 0.20931472117780947, "grad_norm": 0.609357476234436, "learning_rate": 1.8360836486073127e-05, "loss": 1.0354, "step": 14530 }, { "epoch": 0.2094587781091087, "grad_norm": 0.7137535810470581, "learning_rate": 1.835827597186004e-05, "loss": 1.0388, "step": 14540 }, { "epoch": 0.20960283504040797, "grad_norm": 0.5982151627540588, "learning_rate": 1.8355713638165576e-05, "loss": 1.0352, "step": 14550 }, { "epoch": 0.2097468919717072, "grad_norm": 0.5672206878662109, "learning_rate": 1.8353149485547525e-05, "loss": 1.0297, "step": 14560 }, { "epoch": 0.20989094890300647, "grad_norm": 0.5580592751502991, "learning_rate": 1.835058351456406e-05, "loss": 1.0468, "step": 14570 }, { "epoch": 0.2100350058343057, "grad_norm": 0.5752267241477966, "learning_rate": 1.834801572577376e-05, "loss": 1.032, "step": 14580 }, { "epoch": 0.21017906276560497, "grad_norm": 0.5035666227340698, "learning_rate": 1.8345446119735596e-05, "loss": 1.048, "step": 14590 }, { "epoch": 0.2103231196969042, "grad_norm": 0.5395452380180359, "learning_rate": 1.8342874697008937e-05, "loss": 1.0421, "step": 14600 }, { "epoch": 0.21046717662820347, "grad_norm": 0.5929610729217529, "learning_rate": 1.834030145815355e-05, "loss": 1.031, "step": 14610 }, { "epoch": 0.2106112335595027, "grad_norm": 0.5652207136154175, "learning_rate": 1.8337726403729588e-05, "loss": 1.0367, "step": 14620 }, { "epoch": 0.21075529049080197, "grad_norm": 0.7196754217147827, "learning_rate": 1.8335149534297606e-05, "loss": 1.0086, "step": 14630 }, { "epoch": 0.2108993474221012, "grad_norm": 0.6522864103317261, "learning_rate": 1.8332570850418557e-05, "loss": 1.0353, "step": 14640 }, { "epoch": 0.21104340435340047, "grad_norm": 0.6994026303291321, "learning_rate": 1.832999035265378e-05, "loss": 1.0311, "step": 14650 }, { "epoch": 0.2111874612846997, "grad_norm": 0.6242994070053101, "learning_rate": 1.8327408041565013e-05, "loss": 1.0671, "step": 14660 }, { "epoch": 0.21133151821599896, "grad_norm": 0.7104344367980957, "learning_rate": 1.8324823917714395e-05, "loss": 1.049, "step": 14670 }, { "epoch": 0.2114755751472982, "grad_norm": 0.55716472864151, "learning_rate": 1.832223798166445e-05, "loss": 1.0338, "step": 14680 }, { "epoch": 0.21161963207859746, "grad_norm": 0.9544410109519958, "learning_rate": 1.8319650233978103e-05, "loss": 1.0276, "step": 14690 }, { "epoch": 0.2117636890098967, "grad_norm": 0.6191002726554871, "learning_rate": 1.831706067521867e-05, "loss": 1.0287, "step": 14700 }, { "epoch": 0.21190774594119596, "grad_norm": 0.5778596997261047, "learning_rate": 1.8314469305949856e-05, "loss": 1.0143, "step": 14710 }, { "epoch": 0.2120518028724952, "grad_norm": 0.5484819412231445, "learning_rate": 1.8311876126735778e-05, "loss": 1.0174, "step": 14720 }, { "epoch": 0.21219585980379446, "grad_norm": 0.6016744375228882, "learning_rate": 1.8309281138140932e-05, "loss": 1.0573, "step": 14730 }, { "epoch": 0.2123399167350937, "grad_norm": 0.6980452537536621, "learning_rate": 1.8306684340730202e-05, "loss": 1.0425, "step": 14740 }, { "epoch": 0.21248397366639296, "grad_norm": 0.5923468470573425, "learning_rate": 1.8304085735068885e-05, "loss": 1.0598, "step": 14750 }, { "epoch": 0.2126280305976922, "grad_norm": 0.655491292476654, "learning_rate": 1.830148532172266e-05, "loss": 1.0563, "step": 14760 }, { "epoch": 0.21277208752899146, "grad_norm": 0.6702054142951965, "learning_rate": 1.8298883101257597e-05, "loss": 1.0397, "step": 14770 }, { "epoch": 0.2129161444602907, "grad_norm": 0.6654492616653442, "learning_rate": 1.829627907424017e-05, "loss": 1.0177, "step": 14780 }, { "epoch": 0.21306020139158996, "grad_norm": 0.5811224579811096, "learning_rate": 1.829367324123723e-05, "loss": 1.0396, "step": 14790 }, { "epoch": 0.2132042583228892, "grad_norm": 0.5703511238098145, "learning_rate": 1.8291065602816038e-05, "loss": 1.0348, "step": 14800 }, { "epoch": 0.21334831525418846, "grad_norm": 0.5730449557304382, "learning_rate": 1.8288456159544242e-05, "loss": 1.0289, "step": 14810 }, { "epoch": 0.2134923721854877, "grad_norm": 0.5400986075401306, "learning_rate": 1.8285844911989882e-05, "loss": 1.0476, "step": 14820 }, { "epoch": 0.21363642911678696, "grad_norm": 0.5935134291648865, "learning_rate": 1.828323186072138e-05, "loss": 1.0435, "step": 14830 }, { "epoch": 0.2137804860480862, "grad_norm": 0.5888200998306274, "learning_rate": 1.8280617006307575e-05, "loss": 1.0365, "step": 14840 }, { "epoch": 0.21392454297938546, "grad_norm": 0.6139171123504639, "learning_rate": 1.827800034931768e-05, "loss": 1.0399, "step": 14850 }, { "epoch": 0.2140685999106847, "grad_norm": 0.5612664818763733, "learning_rate": 1.8275381890321297e-05, "loss": 1.0416, "step": 14860 }, { "epoch": 0.21421265684198396, "grad_norm": 0.5276797413825989, "learning_rate": 1.8272761629888444e-05, "loss": 1.0504, "step": 14870 }, { "epoch": 0.2143567137732832, "grad_norm": 0.5620393753051758, "learning_rate": 1.82701395685895e-05, "loss": 1.0465, "step": 14880 }, { "epoch": 0.21450077070458246, "grad_norm": 0.5414578318595886, "learning_rate": 1.8267515706995262e-05, "loss": 1.0447, "step": 14890 }, { "epoch": 0.2146448276358817, "grad_norm": 0.636154055595398, "learning_rate": 1.8264890045676903e-05, "loss": 1.0263, "step": 14900 }, { "epoch": 0.21478888456718095, "grad_norm": 0.5891751646995544, "learning_rate": 1.8262262585206e-05, "loss": 1.03, "step": 14910 }, { "epoch": 0.2149329414984802, "grad_norm": 0.6565343737602234, "learning_rate": 1.8259633326154506e-05, "loss": 1.038, "step": 14920 }, { "epoch": 0.21507699842977945, "grad_norm": 0.6619620323181152, "learning_rate": 1.8257002269094778e-05, "loss": 1.0412, "step": 14930 }, { "epoch": 0.2152210553610787, "grad_norm": 0.5966458320617676, "learning_rate": 1.8254369414599562e-05, "loss": 1.0184, "step": 14940 }, { "epoch": 0.21536511229237795, "grad_norm": 0.6068892478942871, "learning_rate": 1.8251734763241998e-05, "loss": 1.0259, "step": 14950 }, { "epoch": 0.2155091692236772, "grad_norm": 0.6060580611228943, "learning_rate": 1.8249098315595605e-05, "loss": 1.0255, "step": 14960 }, { "epoch": 0.21565322615497645, "grad_norm": 0.7341133952140808, "learning_rate": 1.8246460072234305e-05, "loss": 1.0336, "step": 14970 }, { "epoch": 0.2157972830862757, "grad_norm": 0.6035784482955933, "learning_rate": 1.8243820033732407e-05, "loss": 1.0283, "step": 14980 }, { "epoch": 0.21594134001757495, "grad_norm": 0.5042765736579895, "learning_rate": 1.824117820066461e-05, "loss": 1.042, "step": 14990 }, { "epoch": 0.2160853969488742, "grad_norm": 0.6901978254318237, "learning_rate": 1.8238534573606007e-05, "loss": 1.0523, "step": 15000 }, { "epoch": 0.21622945388017345, "grad_norm": 0.5616651773452759, "learning_rate": 1.823588915313208e-05, "loss": 1.0401, "step": 15010 }, { "epoch": 0.21637351081147269, "grad_norm": 0.613223671913147, "learning_rate": 1.8233241939818697e-05, "loss": 1.0434, "step": 15020 }, { "epoch": 0.21651756774277195, "grad_norm": 0.5958622097969055, "learning_rate": 1.823059293424212e-05, "loss": 1.0412, "step": 15030 }, { "epoch": 0.21666162467407118, "grad_norm": 0.5168639421463013, "learning_rate": 1.8227942136979004e-05, "loss": 1.023, "step": 15040 }, { "epoch": 0.21680568160537045, "grad_norm": 0.7793817520141602, "learning_rate": 1.8225289548606387e-05, "loss": 1.025, "step": 15050 }, { "epoch": 0.21694973853666968, "grad_norm": 0.5913326740264893, "learning_rate": 1.822263516970171e-05, "loss": 1.0333, "step": 15060 }, { "epoch": 0.21709379546796895, "grad_norm": 0.6637604236602783, "learning_rate": 1.8219979000842782e-05, "loss": 1.0613, "step": 15070 }, { "epoch": 0.21723785239926818, "grad_norm": 0.5894302129745483, "learning_rate": 1.821732104260782e-05, "loss": 1.0533, "step": 15080 }, { "epoch": 0.21738190933056745, "grad_norm": 0.6399484872817993, "learning_rate": 1.8214661295575427e-05, "loss": 1.0251, "step": 15090 }, { "epoch": 0.21752596626186668, "grad_norm": 0.6171691417694092, "learning_rate": 1.8211999760324592e-05, "loss": 1.027, "step": 15100 }, { "epoch": 0.21767002319316595, "grad_norm": 0.5771392583847046, "learning_rate": 1.820933643743469e-05, "loss": 1.0433, "step": 15110 }, { "epoch": 0.21781408012446518, "grad_norm": 0.6488246917724609, "learning_rate": 1.8206671327485496e-05, "loss": 1.0251, "step": 15120 }, { "epoch": 0.21795813705576444, "grad_norm": 0.6200474500656128, "learning_rate": 1.8204004431057166e-05, "loss": 1.0381, "step": 15130 }, { "epoch": 0.21810219398706368, "grad_norm": 0.5430780053138733, "learning_rate": 1.8201335748730244e-05, "loss": 1.0392, "step": 15140 }, { "epoch": 0.21824625091836294, "grad_norm": 0.6101915836334229, "learning_rate": 1.8198665281085668e-05, "loss": 1.0672, "step": 15150 }, { "epoch": 0.21839030784966218, "grad_norm": 0.5767624378204346, "learning_rate": 1.819599302870476e-05, "loss": 1.0095, "step": 15160 }, { "epoch": 0.21853436478096144, "grad_norm": 0.5709320306777954, "learning_rate": 1.8193318992169234e-05, "loss": 1.0373, "step": 15170 }, { "epoch": 0.21867842171226068, "grad_norm": 0.5829881429672241, "learning_rate": 1.819064317206119e-05, "loss": 1.0247, "step": 15180 }, { "epoch": 0.21882247864355994, "grad_norm": 0.5822725296020508, "learning_rate": 1.8187965568963117e-05, "loss": 1.0547, "step": 15190 }, { "epoch": 0.21896653557485918, "grad_norm": 0.5555112361907959, "learning_rate": 1.8185286183457896e-05, "loss": 1.0442, "step": 15200 }, { "epoch": 0.21911059250615844, "grad_norm": 0.5043044090270996, "learning_rate": 1.8182605016128786e-05, "loss": 1.0107, "step": 15210 }, { "epoch": 0.21925464943745768, "grad_norm": 0.5960538387298584, "learning_rate": 1.8179922067559445e-05, "loss": 1.0812, "step": 15220 }, { "epoch": 0.21939870636875694, "grad_norm": 0.5488318204879761, "learning_rate": 1.8177237338333908e-05, "loss": 1.041, "step": 15230 }, { "epoch": 0.21954276330005618, "grad_norm": 0.5550768971443176, "learning_rate": 1.8174550829036608e-05, "loss": 1.0605, "step": 15240 }, { "epoch": 0.21968682023135544, "grad_norm": 0.6654821634292603, "learning_rate": 1.8171862540252363e-05, "loss": 1.0322, "step": 15250 }, { "epoch": 0.21983087716265468, "grad_norm": 0.67624831199646, "learning_rate": 1.816917247256637e-05, "loss": 1.0566, "step": 15260 }, { "epoch": 0.21997493409395394, "grad_norm": 0.7128103375434875, "learning_rate": 1.8166480626564232e-05, "loss": 1.0366, "step": 15270 }, { "epoch": 0.22011899102525317, "grad_norm": 0.5526544451713562, "learning_rate": 1.816378700283191e-05, "loss": 1.0476, "step": 15280 }, { "epoch": 0.22026304795655244, "grad_norm": 0.5415549874305725, "learning_rate": 1.8161091601955782e-05, "loss": 1.028, "step": 15290 }, { "epoch": 0.22040710488785167, "grad_norm": 0.5962819457054138, "learning_rate": 1.8158394424522592e-05, "loss": 1.0619, "step": 15300 }, { "epoch": 0.22055116181915094, "grad_norm": 0.9033251404762268, "learning_rate": 1.8155695471119484e-05, "loss": 1.0442, "step": 15310 }, { "epoch": 0.22069521875045017, "grad_norm": 0.6221054196357727, "learning_rate": 1.8152994742333977e-05, "loss": 1.0386, "step": 15320 }, { "epoch": 0.22083927568174944, "grad_norm": 0.5393169522285461, "learning_rate": 1.8150292238753983e-05, "loss": 1.0347, "step": 15330 }, { "epoch": 0.22098333261304867, "grad_norm": 0.5820224285125732, "learning_rate": 1.8147587960967806e-05, "loss": 1.0396, "step": 15340 }, { "epoch": 0.22112738954434794, "grad_norm": 0.5936693549156189, "learning_rate": 1.8144881909564122e-05, "loss": 1.0574, "step": 15350 }, { "epoch": 0.22127144647564717, "grad_norm": 0.7099258303642273, "learning_rate": 1.8142174085132008e-05, "loss": 1.0312, "step": 15360 }, { "epoch": 0.22141550340694643, "grad_norm": 0.6385607719421387, "learning_rate": 1.8139464488260917e-05, "loss": 1.0564, "step": 15370 }, { "epoch": 0.22155956033824567, "grad_norm": 0.5421634316444397, "learning_rate": 1.813675311954069e-05, "loss": 1.0252, "step": 15380 }, { "epoch": 0.22170361726954493, "grad_norm": 0.7158612608909607, "learning_rate": 1.8134039979561558e-05, "loss": 1.0494, "step": 15390 }, { "epoch": 0.22184767420084417, "grad_norm": 0.5323343873023987, "learning_rate": 1.8131325068914125e-05, "loss": 1.0184, "step": 15400 }, { "epoch": 0.22199173113214343, "grad_norm": 0.6327012777328491, "learning_rate": 1.8128608388189402e-05, "loss": 1.0257, "step": 15410 }, { "epoch": 0.22213578806344267, "grad_norm": 0.5406925082206726, "learning_rate": 1.8125889937978765e-05, "loss": 1.0336, "step": 15420 }, { "epoch": 0.22227984499474193, "grad_norm": 0.504689633846283, "learning_rate": 1.8123169718873987e-05, "loss": 1.0346, "step": 15430 }, { "epoch": 0.22242390192604117, "grad_norm": 0.5919815897941589, "learning_rate": 1.812044773146722e-05, "loss": 1.0203, "step": 15440 }, { "epoch": 0.22256795885734043, "grad_norm": 0.6278406381607056, "learning_rate": 1.8117723976351e-05, "loss": 1.0313, "step": 15450 }, { "epoch": 0.22271201578863967, "grad_norm": 0.6674849390983582, "learning_rate": 1.811499845411826e-05, "loss": 1.0413, "step": 15460 }, { "epoch": 0.22285607271993893, "grad_norm": 0.7212050557136536, "learning_rate": 1.81122711653623e-05, "loss": 1.0494, "step": 15470 }, { "epoch": 0.22300012965123817, "grad_norm": 0.5566467046737671, "learning_rate": 1.8109542110676813e-05, "loss": 1.0483, "step": 15480 }, { "epoch": 0.22314418658253743, "grad_norm": 0.5412294268608093, "learning_rate": 1.8106811290655884e-05, "loss": 1.0288, "step": 15490 }, { "epoch": 0.22328824351383667, "grad_norm": 0.6451317667961121, "learning_rate": 1.810407870589397e-05, "loss": 1.0463, "step": 15500 }, { "epoch": 0.22343230044513593, "grad_norm": 0.5637590885162354, "learning_rate": 1.8101344356985918e-05, "loss": 1.0202, "step": 15510 }, { "epoch": 0.22357635737643516, "grad_norm": 0.6563864350318909, "learning_rate": 1.8098608244526955e-05, "loss": 1.068, "step": 15520 }, { "epoch": 0.22372041430773443, "grad_norm": 0.5868936777114868, "learning_rate": 1.80958703691127e-05, "loss": 1.0519, "step": 15530 }, { "epoch": 0.22386447123903366, "grad_norm": 0.6614019870758057, "learning_rate": 1.8093130731339146e-05, "loss": 1.0186, "step": 15540 }, { "epoch": 0.22400852817033293, "grad_norm": 0.5744838714599609, "learning_rate": 1.809038933180268e-05, "loss": 1.0282, "step": 15550 }, { "epoch": 0.22415258510163216, "grad_norm": 0.6549461483955383, "learning_rate": 1.808764617110006e-05, "loss": 1.0591, "step": 15560 }, { "epoch": 0.22429664203293143, "grad_norm": 0.629159152507782, "learning_rate": 1.808490124982844e-05, "loss": 1.0353, "step": 15570 }, { "epoch": 0.22444069896423066, "grad_norm": 0.5964781045913696, "learning_rate": 1.808215456858535e-05, "loss": 1.0396, "step": 15580 }, { "epoch": 0.22458475589552993, "grad_norm": 0.601401686668396, "learning_rate": 1.8079406127968702e-05, "loss": 1.0264, "step": 15590 }, { "epoch": 0.22472881282682916, "grad_norm": 0.5822338461875916, "learning_rate": 1.8076655928576795e-05, "loss": 1.0278, "step": 15600 }, { "epoch": 0.22487286975812842, "grad_norm": 0.6674925088882446, "learning_rate": 1.807390397100831e-05, "loss": 1.0466, "step": 15610 }, { "epoch": 0.22501692668942766, "grad_norm": 0.5742388963699341, "learning_rate": 1.807115025586231e-05, "loss": 1.0326, "step": 15620 }, { "epoch": 0.22516098362072692, "grad_norm": 0.657478928565979, "learning_rate": 1.8068394783738242e-05, "loss": 1.0251, "step": 15630 }, { "epoch": 0.22530504055202616, "grad_norm": 0.6310794353485107, "learning_rate": 1.8065637555235935e-05, "loss": 1.0334, "step": 15640 }, { "epoch": 0.22544909748332542, "grad_norm": 0.7108659744262695, "learning_rate": 1.806287857095559e-05, "loss": 1.03, "step": 15650 }, { "epoch": 0.22559315441462466, "grad_norm": 0.6125839948654175, "learning_rate": 1.8060117831497814e-05, "loss": 1.0516, "step": 15660 }, { "epoch": 0.22573721134592392, "grad_norm": 0.5679513216018677, "learning_rate": 1.8057355337463572e-05, "loss": 1.0308, "step": 15670 }, { "epoch": 0.22588126827722316, "grad_norm": 0.566606342792511, "learning_rate": 1.8054591089454228e-05, "loss": 1.0594, "step": 15680 }, { "epoch": 0.22602532520852242, "grad_norm": 0.5276409387588501, "learning_rate": 1.805182508807152e-05, "loss": 1.0178, "step": 15690 }, { "epoch": 0.22616938213982166, "grad_norm": 0.5563755035400391, "learning_rate": 1.8049057333917558e-05, "loss": 1.0329, "step": 15700 }, { "epoch": 0.22631343907112092, "grad_norm": 0.6629067063331604, "learning_rate": 1.8046287827594854e-05, "loss": 1.0243, "step": 15710 }, { "epoch": 0.22645749600242016, "grad_norm": 0.5855606198310852, "learning_rate": 1.8043516569706288e-05, "loss": 1.0274, "step": 15720 }, { "epoch": 0.22660155293371942, "grad_norm": 0.572472333908081, "learning_rate": 1.8040743560855127e-05, "loss": 1.0352, "step": 15730 }, { "epoch": 0.22674560986501865, "grad_norm": 0.6293073892593384, "learning_rate": 1.803796880164501e-05, "loss": 1.0395, "step": 15740 }, { "epoch": 0.22688966679631792, "grad_norm": 0.5595530867576599, "learning_rate": 1.8035192292679974e-05, "loss": 1.0303, "step": 15750 }, { "epoch": 0.22703372372761715, "grad_norm": 0.6038975119590759, "learning_rate": 1.803241403456442e-05, "loss": 1.0436, "step": 15760 }, { "epoch": 0.22717778065891642, "grad_norm": 0.6200108528137207, "learning_rate": 1.802963402790314e-05, "loss": 1.0428, "step": 15770 }, { "epoch": 0.22732183759021565, "grad_norm": 0.5227588415145874, "learning_rate": 1.80268522733013e-05, "loss": 1.0411, "step": 15780 }, { "epoch": 0.22746589452151492, "grad_norm": 0.6519966125488281, "learning_rate": 1.8024068771364446e-05, "loss": 1.0235, "step": 15790 }, { "epoch": 0.22760995145281415, "grad_norm": 0.5424467325210571, "learning_rate": 1.802128352269852e-05, "loss": 1.0239, "step": 15800 }, { "epoch": 0.2277540083841134, "grad_norm": 0.6248921751976013, "learning_rate": 1.8018496527909818e-05, "loss": 1.0474, "step": 15810 }, { "epoch": 0.22789806531541265, "grad_norm": 0.5277923941612244, "learning_rate": 1.8015707787605038e-05, "loss": 1.0323, "step": 15820 }, { "epoch": 0.2280421222467119, "grad_norm": 0.7009173035621643, "learning_rate": 1.801291730239125e-05, "loss": 1.0411, "step": 15830 }, { "epoch": 0.22818617917801115, "grad_norm": 0.5484472513198853, "learning_rate": 1.8010125072875907e-05, "loss": 1.0214, "step": 15840 }, { "epoch": 0.22833023610931039, "grad_norm": 0.5944812893867493, "learning_rate": 1.800733109966683e-05, "loss": 1.0365, "step": 15850 }, { "epoch": 0.22847429304060965, "grad_norm": 0.7652137875556946, "learning_rate": 1.800453538337224e-05, "loss": 1.0434, "step": 15860 }, { "epoch": 0.22861834997190889, "grad_norm": 0.5834174752235413, "learning_rate": 1.8001737924600716e-05, "loss": 1.0277, "step": 15870 }, { "epoch": 0.22876240690320815, "grad_norm": 0.5894598960876465, "learning_rate": 1.799893872396123e-05, "loss": 1.0364, "step": 15880 }, { "epoch": 0.22890646383450738, "grad_norm": 0.4981635808944702, "learning_rate": 1.799613778206313e-05, "loss": 1.0536, "step": 15890 }, { "epoch": 0.22905052076580665, "grad_norm": 0.5469146370887756, "learning_rate": 1.799333509951614e-05, "loss": 1.0337, "step": 15900 }, { "epoch": 0.22919457769710588, "grad_norm": 0.6103417277336121, "learning_rate": 1.799053067693037e-05, "loss": 1.0538, "step": 15910 }, { "epoch": 0.22933863462840515, "grad_norm": 0.5183495879173279, "learning_rate": 1.79877245149163e-05, "loss": 1.0317, "step": 15920 }, { "epoch": 0.22948269155970438, "grad_norm": 0.6987003087997437, "learning_rate": 1.798491661408479e-05, "loss": 1.0589, "step": 15930 }, { "epoch": 0.22962674849100365, "grad_norm": 0.6228061318397522, "learning_rate": 1.7982106975047088e-05, "loss": 1.0446, "step": 15940 }, { "epoch": 0.22977080542230288, "grad_norm": 0.5950088500976562, "learning_rate": 1.7979295598414814e-05, "loss": 1.0346, "step": 15950 }, { "epoch": 0.22991486235360215, "grad_norm": 0.5522655844688416, "learning_rate": 1.797648248479996e-05, "loss": 1.0191, "step": 15960 }, { "epoch": 0.23005891928490138, "grad_norm": 0.6294156312942505, "learning_rate": 1.79736676348149e-05, "loss": 1.041, "step": 15970 }, { "epoch": 0.23020297621620064, "grad_norm": 0.9068011045455933, "learning_rate": 1.7970851049072397e-05, "loss": 1.0469, "step": 15980 }, { "epoch": 0.23034703314749988, "grad_norm": 0.49483510851860046, "learning_rate": 1.7968032728185577e-05, "loss": 1.0358, "step": 15990 }, { "epoch": 0.23049109007879914, "grad_norm": 0.5636140704154968, "learning_rate": 1.7965212672767955e-05, "loss": 1.0401, "step": 16000 }, { "epoch": 0.23063514701009838, "grad_norm": 0.5015645027160645, "learning_rate": 1.796239088343341e-05, "loss": 1.0322, "step": 16010 }, { "epoch": 0.23077920394139764, "grad_norm": 0.8636760711669922, "learning_rate": 1.7959567360796215e-05, "loss": 1.0413, "step": 16020 }, { "epoch": 0.23092326087269688, "grad_norm": 0.6280728578567505, "learning_rate": 1.7956742105471006e-05, "loss": 1.0342, "step": 16030 }, { "epoch": 0.23106731780399614, "grad_norm": 0.5852565169334412, "learning_rate": 1.7953915118072803e-05, "loss": 1.0593, "step": 16040 }, { "epoch": 0.23121137473529538, "grad_norm": 0.5820422768592834, "learning_rate": 1.7951086399217002e-05, "loss": 1.054, "step": 16050 }, { "epoch": 0.23135543166659464, "grad_norm": 0.5244117379188538, "learning_rate": 1.794825594951938e-05, "loss": 1.0408, "step": 16060 }, { "epoch": 0.23149948859789388, "grad_norm": 0.6109441518783569, "learning_rate": 1.7945423769596083e-05, "loss": 1.046, "step": 16070 }, { "epoch": 0.23164354552919314, "grad_norm": 0.7189362049102783, "learning_rate": 1.794258986006364e-05, "loss": 1.0278, "step": 16080 }, { "epoch": 0.23178760246049238, "grad_norm": 0.6301892399787903, "learning_rate": 1.793975422153895e-05, "loss": 1.0443, "step": 16090 }, { "epoch": 0.23193165939179164, "grad_norm": 0.615687906742096, "learning_rate": 1.7936916854639298e-05, "loss": 1.0381, "step": 16100 }, { "epoch": 0.23207571632309087, "grad_norm": 0.5806190371513367, "learning_rate": 1.793407775998233e-05, "loss": 1.0284, "step": 16110 }, { "epoch": 0.23221977325439014, "grad_norm": 0.5128661394119263, "learning_rate": 1.7931236938186093e-05, "loss": 1.0288, "step": 16120 }, { "epoch": 0.23236383018568937, "grad_norm": 0.5688104033470154, "learning_rate": 1.792839438986898e-05, "loss": 1.0123, "step": 16130 }, { "epoch": 0.23250788711698864, "grad_norm": 0.5432714819908142, "learning_rate": 1.7925550115649782e-05, "loss": 1.0502, "step": 16140 }, { "epoch": 0.23265194404828787, "grad_norm": 0.5542018413543701, "learning_rate": 1.792270411614766e-05, "loss": 1.0323, "step": 16150 }, { "epoch": 0.23279600097958714, "grad_norm": 0.6536816358566284, "learning_rate": 1.791985639198214e-05, "loss": 1.0353, "step": 16160 }, { "epoch": 0.23294005791088637, "grad_norm": 0.6699745059013367, "learning_rate": 1.791700694377314e-05, "loss": 1.0264, "step": 16170 }, { "epoch": 0.23308411484218564, "grad_norm": 0.582292914390564, "learning_rate": 1.7914155772140946e-05, "loss": 1.0236, "step": 16180 }, { "epoch": 0.23322817177348487, "grad_norm": 0.5736205577850342, "learning_rate": 1.7911302877706217e-05, "loss": 1.0388, "step": 16190 }, { "epoch": 0.23337222870478413, "grad_norm": 0.5483204126358032, "learning_rate": 1.7908448261089985e-05, "loss": 1.0476, "step": 16200 }, { "epoch": 0.23351628563608337, "grad_norm": 0.5308883786201477, "learning_rate": 1.7905591922913664e-05, "loss": 1.047, "step": 16210 }, { "epoch": 0.23366034256738263, "grad_norm": 0.7244343757629395, "learning_rate": 1.7902733863799037e-05, "loss": 1.0416, "step": 16220 }, { "epoch": 0.23380439949868187, "grad_norm": 0.5258388519287109, "learning_rate": 1.7899874084368273e-05, "loss": 1.0434, "step": 16230 }, { "epoch": 0.23394845642998113, "grad_norm": 0.6775047779083252, "learning_rate": 1.789701258524389e-05, "loss": 1.0229, "step": 16240 }, { "epoch": 0.23409251336128037, "grad_norm": 0.5259122252464294, "learning_rate": 1.7894149367048815e-05, "loss": 1.0225, "step": 16250 }, { "epoch": 0.23423657029257963, "grad_norm": 0.6943677663803101, "learning_rate": 1.7891284430406316e-05, "loss": 1.0332, "step": 16260 }, { "epoch": 0.23438062722387887, "grad_norm": 0.6831935048103333, "learning_rate": 1.7888417775940062e-05, "loss": 1.0553, "step": 16270 }, { "epoch": 0.23452468415517813, "grad_norm": 0.5620949864387512, "learning_rate": 1.7885549404274075e-05, "loss": 1.0259, "step": 16280 }, { "epoch": 0.23466874108647737, "grad_norm": 0.5302318334579468, "learning_rate": 1.7882679316032766e-05, "loss": 1.0284, "step": 16290 }, { "epoch": 0.23481279801777663, "grad_norm": 0.6939184069633484, "learning_rate": 1.7879807511840916e-05, "loss": 1.065, "step": 16300 }, { "epoch": 0.23495685494907587, "grad_norm": 0.5826215744018555, "learning_rate": 1.7876933992323667e-05, "loss": 1.0345, "step": 16310 }, { "epoch": 0.23510091188037513, "grad_norm": 0.5437131524085999, "learning_rate": 1.787405875810655e-05, "loss": 1.0167, "step": 16320 }, { "epoch": 0.23524496881167437, "grad_norm": 0.6369717121124268, "learning_rate": 1.787118180981547e-05, "loss": 1.0243, "step": 16330 }, { "epoch": 0.23538902574297363, "grad_norm": 0.5772448182106018, "learning_rate": 1.786830314807669e-05, "loss": 1.0265, "step": 16340 }, { "epoch": 0.23553308267427286, "grad_norm": 0.6192107796669006, "learning_rate": 1.786542277351685e-05, "loss": 1.0324, "step": 16350 }, { "epoch": 0.23567713960557213, "grad_norm": 0.5261062383651733, "learning_rate": 1.7862540686762987e-05, "loss": 1.0226, "step": 16360 }, { "epoch": 0.23582119653687136, "grad_norm": 0.6214985251426697, "learning_rate": 1.785965688844247e-05, "loss": 1.0436, "step": 16370 }, { "epoch": 0.23596525346817063, "grad_norm": 0.5913611650466919, "learning_rate": 1.7856771379183083e-05, "loss": 1.026, "step": 16380 }, { "epoch": 0.23610931039946986, "grad_norm": 0.6042903661727905, "learning_rate": 1.7853884159612945e-05, "loss": 1.0267, "step": 16390 }, { "epoch": 0.23625336733076913, "grad_norm": 0.5531982779502869, "learning_rate": 1.785099523036057e-05, "loss": 1.0376, "step": 16400 }, { "epoch": 0.23639742426206836, "grad_norm": 0.6330571174621582, "learning_rate": 1.7848104592054838e-05, "loss": 1.0433, "step": 16410 }, { "epoch": 0.23654148119336763, "grad_norm": 0.6041118502616882, "learning_rate": 1.7845212245325e-05, "loss": 1.0391, "step": 16420 }, { "epoch": 0.23668553812466686, "grad_norm": 0.6083371639251709, "learning_rate": 1.7842318190800686e-05, "loss": 1.0476, "step": 16430 }, { "epoch": 0.23682959505596612, "grad_norm": 0.5628354549407959, "learning_rate": 1.7839422429111883e-05, "loss": 1.0445, "step": 16440 }, { "epoch": 0.23697365198726536, "grad_norm": 0.5915509462356567, "learning_rate": 1.783652496088896e-05, "loss": 1.0371, "step": 16450 }, { "epoch": 0.23711770891856462, "grad_norm": 0.6667011976242065, "learning_rate": 1.783362578676266e-05, "loss": 1.0386, "step": 16460 }, { "epoch": 0.23726176584986386, "grad_norm": 0.6184618473052979, "learning_rate": 1.7830724907364093e-05, "loss": 1.0266, "step": 16470 }, { "epoch": 0.23740582278116312, "grad_norm": 0.5585595965385437, "learning_rate": 1.7827822323324737e-05, "loss": 1.0399, "step": 16480 }, { "epoch": 0.23754987971246236, "grad_norm": 0.6253140568733215, "learning_rate": 1.7824918035276448e-05, "loss": 1.0329, "step": 16490 }, { "epoch": 0.23769393664376162, "grad_norm": 0.5357626676559448, "learning_rate": 1.7822012043851444e-05, "loss": 1.0336, "step": 16500 }, { "epoch": 0.23783799357506086, "grad_norm": 0.5444202423095703, "learning_rate": 1.7819104349682327e-05, "loss": 1.0453, "step": 16510 }, { "epoch": 0.23798205050636012, "grad_norm": 0.6564856767654419, "learning_rate": 1.781619495340205e-05, "loss": 1.0308, "step": 16520 }, { "epoch": 0.23812610743765936, "grad_norm": 0.615155041217804, "learning_rate": 1.7813283855643963e-05, "loss": 1.0289, "step": 16530 }, { "epoch": 0.23827016436895862, "grad_norm": 0.5427000522613525, "learning_rate": 1.7810371057041766e-05, "loss": 1.0366, "step": 16540 }, { "epoch": 0.23841422130025786, "grad_norm": 0.6768283843994141, "learning_rate": 1.780745655822953e-05, "loss": 1.0366, "step": 16550 }, { "epoch": 0.23855827823155712, "grad_norm": 0.6584059596061707, "learning_rate": 1.7804540359841705e-05, "loss": 1.0277, "step": 16560 }, { "epoch": 0.23870233516285635, "grad_norm": 0.5058512091636658, "learning_rate": 1.7801622462513107e-05, "loss": 1.0355, "step": 16570 }, { "epoch": 0.23884639209415562, "grad_norm": 0.5551502704620361, "learning_rate": 1.7798702866878924e-05, "loss": 1.039, "step": 16580 }, { "epoch": 0.23899044902545485, "grad_norm": 0.6150371432304382, "learning_rate": 1.779578157357471e-05, "loss": 1.0492, "step": 16590 }, { "epoch": 0.23913450595675412, "grad_norm": 0.658364474773407, "learning_rate": 1.7792858583236393e-05, "loss": 1.048, "step": 16600 }, { "epoch": 0.23927856288805335, "grad_norm": 0.5722893476486206, "learning_rate": 1.7789933896500262e-05, "loss": 1.0213, "step": 16610 }, { "epoch": 0.23942261981935262, "grad_norm": 0.6082572937011719, "learning_rate": 1.7787007514002984e-05, "loss": 1.0405, "step": 16620 }, { "epoch": 0.23956667675065185, "grad_norm": 0.5616563558578491, "learning_rate": 1.7784079436381593e-05, "loss": 1.0263, "step": 16630 }, { "epoch": 0.23971073368195112, "grad_norm": 0.5879483222961426, "learning_rate": 1.778114966427349e-05, "loss": 1.0273, "step": 16640 }, { "epoch": 0.23985479061325035, "grad_norm": 0.5526086091995239, "learning_rate": 1.7778218198316445e-05, "loss": 1.0605, "step": 16650 }, { "epoch": 0.23999884754454961, "grad_norm": 0.5538169741630554, "learning_rate": 1.77752850391486e-05, "loss": 1.0334, "step": 16660 }, { "epoch": 0.24014290447584885, "grad_norm": 0.6564728617668152, "learning_rate": 1.777235018740846e-05, "loss": 1.0355, "step": 16670 }, { "epoch": 0.24028696140714811, "grad_norm": 0.7152305245399475, "learning_rate": 1.7769413643734905e-05, "loss": 1.0464, "step": 16680 }, { "epoch": 0.24043101833844735, "grad_norm": 0.6362254619598389, "learning_rate": 1.776647540876718e-05, "loss": 1.0392, "step": 16690 }, { "epoch": 0.2405750752697466, "grad_norm": 0.6082077026367188, "learning_rate": 1.7763535483144895e-05, "loss": 1.0574, "step": 16700 }, { "epoch": 0.24071913220104585, "grad_norm": 0.7910304069519043, "learning_rate": 1.7760593867508036e-05, "loss": 1.0353, "step": 16710 }, { "epoch": 0.2408631891323451, "grad_norm": 0.7080479860305786, "learning_rate": 1.775765056249695e-05, "loss": 1.0257, "step": 16720 }, { "epoch": 0.24100724606364435, "grad_norm": 0.7581346035003662, "learning_rate": 1.775470556875235e-05, "loss": 1.0268, "step": 16730 }, { "epoch": 0.2411513029949436, "grad_norm": 0.6228922605514526, "learning_rate": 1.7751758886915324e-05, "loss": 1.0651, "step": 16740 }, { "epoch": 0.24129535992624285, "grad_norm": 0.6909641027450562, "learning_rate": 1.7748810517627325e-05, "loss": 1.0552, "step": 16750 }, { "epoch": 0.2414394168575421, "grad_norm": 0.5827440023422241, "learning_rate": 1.7745860461530173e-05, "loss": 1.0317, "step": 16760 }, { "epoch": 0.24158347378884135, "grad_norm": 0.6382594108581543, "learning_rate": 1.774290871926605e-05, "loss": 1.018, "step": 16770 }, { "epoch": 0.2417275307201406, "grad_norm": 0.6372701525688171, "learning_rate": 1.773995529147751e-05, "loss": 1.0662, "step": 16780 }, { "epoch": 0.24187158765143985, "grad_norm": 0.6797804832458496, "learning_rate": 1.7737000178807483e-05, "loss": 1.0404, "step": 16790 }, { "epoch": 0.2420156445827391, "grad_norm": 0.5957540273666382, "learning_rate": 1.7734043381899245e-05, "loss": 1.0415, "step": 16800 }, { "epoch": 0.24215970151403834, "grad_norm": 0.6584307551383972, "learning_rate": 1.7731084901396457e-05, "loss": 1.0577, "step": 16810 }, { "epoch": 0.2423037584453376, "grad_norm": 0.6933635473251343, "learning_rate": 1.7728124737943133e-05, "loss": 1.0501, "step": 16820 }, { "epoch": 0.24244781537663684, "grad_norm": 0.5589372515678406, "learning_rate": 1.7725162892183663e-05, "loss": 1.0187, "step": 16830 }, { "epoch": 0.2425918723079361, "grad_norm": 0.607906699180603, "learning_rate": 1.77221993647628e-05, "loss": 1.0586, "step": 16840 }, { "epoch": 0.24273592923923534, "grad_norm": 0.6130890250205994, "learning_rate": 1.7719234156325664e-05, "loss": 1.0521, "step": 16850 }, { "epoch": 0.2428799861705346, "grad_norm": 0.6340318918228149, "learning_rate": 1.771626726751774e-05, "loss": 1.0362, "step": 16860 }, { "epoch": 0.24302404310183384, "grad_norm": 0.6948375105857849, "learning_rate": 1.7713298698984876e-05, "loss": 1.0274, "step": 16870 }, { "epoch": 0.2431681000331331, "grad_norm": 0.6357523798942566, "learning_rate": 1.7710328451373292e-05, "loss": 1.0578, "step": 16880 }, { "epoch": 0.24331215696443234, "grad_norm": 0.6093547940254211, "learning_rate": 1.7707356525329565e-05, "loss": 1.0507, "step": 16890 }, { "epoch": 0.2434562138957316, "grad_norm": 0.5905465483665466, "learning_rate": 1.770438292150065e-05, "loss": 1.044, "step": 16900 }, { "epoch": 0.24360027082703084, "grad_norm": 0.47149771451950073, "learning_rate": 1.7701407640533852e-05, "loss": 1.0196, "step": 16910 }, { "epoch": 0.2437443277583301, "grad_norm": 0.5732711553573608, "learning_rate": 1.7698430683076854e-05, "loss": 1.0316, "step": 16920 }, { "epoch": 0.24388838468962934, "grad_norm": 0.5275248289108276, "learning_rate": 1.769545204977769e-05, "loss": 1.0371, "step": 16930 }, { "epoch": 0.2440324416209286, "grad_norm": 0.559514045715332, "learning_rate": 1.769247174128478e-05, "loss": 1.018, "step": 16940 }, { "epoch": 0.24417649855222784, "grad_norm": 0.6358952522277832, "learning_rate": 1.7689489758246887e-05, "loss": 1.0141, "step": 16950 }, { "epoch": 0.2443205554835271, "grad_norm": 0.6493760943412781, "learning_rate": 1.768650610131315e-05, "loss": 1.0541, "step": 16960 }, { "epoch": 0.24446461241482634, "grad_norm": 0.5447071194648743, "learning_rate": 1.768352077113307e-05, "loss": 1.0204, "step": 16970 }, { "epoch": 0.2446086693461256, "grad_norm": 0.6011931896209717, "learning_rate": 1.7680533768356517e-05, "loss": 1.0661, "step": 16980 }, { "epoch": 0.24475272627742484, "grad_norm": 0.558472752571106, "learning_rate": 1.7677545093633713e-05, "loss": 1.0344, "step": 16990 }, { "epoch": 0.2448967832087241, "grad_norm": 0.601445734500885, "learning_rate": 1.767455474761525e-05, "loss": 1.0485, "step": 17000 }, { "epoch": 0.24504084014002334, "grad_norm": 0.5469062328338623, "learning_rate": 1.767156273095209e-05, "loss": 1.0157, "step": 17010 }, { "epoch": 0.2451848970713226, "grad_norm": 0.6900678873062134, "learning_rate": 1.7668569044295558e-05, "loss": 1.0375, "step": 17020 }, { "epoch": 0.24532895400262184, "grad_norm": 0.5829229950904846, "learning_rate": 1.7665573688297328e-05, "loss": 1.018, "step": 17030 }, { "epoch": 0.2454730109339211, "grad_norm": 0.5610002279281616, "learning_rate": 1.7662576663609457e-05, "loss": 1.022, "step": 17040 }, { "epoch": 0.24561706786522033, "grad_norm": 0.564368724822998, "learning_rate": 1.7659577970884346e-05, "loss": 1.0282, "step": 17050 }, { "epoch": 0.2457611247965196, "grad_norm": 0.7293027639389038, "learning_rate": 1.765657761077478e-05, "loss": 1.0301, "step": 17060 }, { "epoch": 0.24590518172781883, "grad_norm": 0.7138099074363708, "learning_rate": 1.765357558393389e-05, "loss": 1.0271, "step": 17070 }, { "epoch": 0.2460492386591181, "grad_norm": 0.5354404449462891, "learning_rate": 1.7650571891015174e-05, "loss": 1.0383, "step": 17080 }, { "epoch": 0.24619329559041733, "grad_norm": 0.6015536785125732, "learning_rate": 1.7647566532672495e-05, "loss": 1.0432, "step": 17090 }, { "epoch": 0.2463373525217166, "grad_norm": 0.6709904670715332, "learning_rate": 1.7644559509560084e-05, "loss": 1.0375, "step": 17100 }, { "epoch": 0.24648140945301583, "grad_norm": 0.7079071998596191, "learning_rate": 1.7641550822332525e-05, "loss": 1.0468, "step": 17110 }, { "epoch": 0.2466254663843151, "grad_norm": 0.6117041707038879, "learning_rate": 1.7638540471644764e-05, "loss": 1.0446, "step": 17120 }, { "epoch": 0.24676952331561433, "grad_norm": 0.610614001750946, "learning_rate": 1.763552845815212e-05, "loss": 1.0107, "step": 17130 }, { "epoch": 0.24691358024691357, "grad_norm": 0.612248957157135, "learning_rate": 1.763251478251026e-05, "loss": 1.0223, "step": 17140 }, { "epoch": 0.24705763717821283, "grad_norm": 0.5730803608894348, "learning_rate": 1.7629499445375225e-05, "loss": 1.0188, "step": 17150 }, { "epoch": 0.24720169410951207, "grad_norm": 0.6203054189682007, "learning_rate": 1.7626482447403405e-05, "loss": 1.0188, "step": 17160 }, { "epoch": 0.24734575104081133, "grad_norm": 0.5931071043014526, "learning_rate": 1.762346378925157e-05, "loss": 1.0436, "step": 17170 }, { "epoch": 0.24748980797211056, "grad_norm": 0.535603404045105, "learning_rate": 1.7620443471576827e-05, "loss": 1.0411, "step": 17180 }, { "epoch": 0.24763386490340983, "grad_norm": 0.6503838896751404, "learning_rate": 1.761742149503667e-05, "loss": 1.0407, "step": 17190 }, { "epoch": 0.24777792183470906, "grad_norm": 0.762205183506012, "learning_rate": 1.761439786028893e-05, "loss": 1.0111, "step": 17200 }, { "epoch": 0.24792197876600833, "grad_norm": 0.7181898355484009, "learning_rate": 1.7611372567991823e-05, "loss": 1.0465, "step": 17210 }, { "epoch": 0.24806603569730756, "grad_norm": 0.4939252436161041, "learning_rate": 1.7608345618803903e-05, "loss": 1.0378, "step": 17220 }, { "epoch": 0.24821009262860683, "grad_norm": 0.4960174560546875, "learning_rate": 1.76053170133841e-05, "loss": 1.0362, "step": 17230 }, { "epoch": 0.24835414955990606, "grad_norm": 0.6288329362869263, "learning_rate": 1.7602286752391697e-05, "loss": 1.0444, "step": 17240 }, { "epoch": 0.24849820649120533, "grad_norm": 0.5479284524917603, "learning_rate": 1.759925483648634e-05, "loss": 1.0437, "step": 17250 }, { "epoch": 0.24864226342250456, "grad_norm": 0.5709022283554077, "learning_rate": 1.7596221266328043e-05, "loss": 1.0353, "step": 17260 }, { "epoch": 0.24878632035380382, "grad_norm": 0.6153503060340881, "learning_rate": 1.7593186042577163e-05, "loss": 1.0319, "step": 17270 }, { "epoch": 0.24893037728510306, "grad_norm": 0.6913089752197266, "learning_rate": 1.759014916589443e-05, "loss": 1.0377, "step": 17280 }, { "epoch": 0.24907443421640232, "grad_norm": 0.6153222322463989, "learning_rate": 1.758711063694093e-05, "loss": 1.0466, "step": 17290 }, { "epoch": 0.24921849114770156, "grad_norm": 0.51612389087677, "learning_rate": 1.7584070456378107e-05, "loss": 1.0568, "step": 17300 }, { "epoch": 0.24936254807900082, "grad_norm": 0.5424662828445435, "learning_rate": 1.758102862486777e-05, "loss": 1.0328, "step": 17310 }, { "epoch": 0.24950660501030006, "grad_norm": 0.5843436121940613, "learning_rate": 1.7577985143072077e-05, "loss": 1.0266, "step": 17320 }, { "epoch": 0.24965066194159932, "grad_norm": 0.5523232221603394, "learning_rate": 1.7574940011653563e-05, "loss": 1.0263, "step": 17330 }, { "epoch": 0.24979471887289856, "grad_norm": 0.6400233507156372, "learning_rate": 1.75718932312751e-05, "loss": 1.0569, "step": 17340 }, { "epoch": 0.24993877580419782, "grad_norm": 0.5132960677146912, "learning_rate": 1.756884480259994e-05, "loss": 1.0341, "step": 17350 }, { "epoch": 0.2500828327354971, "grad_norm": 0.763143002986908, "learning_rate": 1.7565794726291674e-05, "loss": 1.0489, "step": 17360 }, { "epoch": 0.2502268896667963, "grad_norm": 0.5453976392745972, "learning_rate": 1.756274300301427e-05, "loss": 1.0297, "step": 17370 }, { "epoch": 0.25037094659809556, "grad_norm": 0.5535269379615784, "learning_rate": 1.7559689633432038e-05, "loss": 1.0351, "step": 17380 }, { "epoch": 0.2505150035293948, "grad_norm": 0.6392086744308472, "learning_rate": 1.7556634618209663e-05, "loss": 1.0582, "step": 17390 }, { "epoch": 0.2506590604606941, "grad_norm": 0.675186276435852, "learning_rate": 1.7553577958012174e-05, "loss": 1.0405, "step": 17400 }, { "epoch": 0.2508031173919933, "grad_norm": 0.6513963937759399, "learning_rate": 1.7550519653504966e-05, "loss": 1.0504, "step": 17410 }, { "epoch": 0.25094717432329255, "grad_norm": 0.537735104560852, "learning_rate": 1.7547459705353787e-05, "loss": 1.0369, "step": 17420 }, { "epoch": 0.2510912312545918, "grad_norm": 0.6523123383522034, "learning_rate": 1.7544398114224746e-05, "loss": 1.0448, "step": 17430 }, { "epoch": 0.2512352881858911, "grad_norm": 0.5637397766113281, "learning_rate": 1.7541334880784314e-05, "loss": 1.0542, "step": 17440 }, { "epoch": 0.2513793451171903, "grad_norm": 0.5768483281135559, "learning_rate": 1.753827000569931e-05, "loss": 1.0274, "step": 17450 }, { "epoch": 0.25152340204848955, "grad_norm": 0.717302680015564, "learning_rate": 1.7535203489636915e-05, "loss": 1.0417, "step": 17460 }, { "epoch": 0.2516674589797888, "grad_norm": 0.6151435375213623, "learning_rate": 1.753213533326467e-05, "loss": 1.0323, "step": 17470 }, { "epoch": 0.2518115159110881, "grad_norm": 0.4801254868507385, "learning_rate": 1.752906553725047e-05, "loss": 1.0328, "step": 17480 }, { "epoch": 0.2519555728423873, "grad_norm": 0.5034201145172119, "learning_rate": 1.7525994102262564e-05, "loss": 1.0201, "step": 17490 }, { "epoch": 0.25209962977368655, "grad_norm": 0.6454114317893982, "learning_rate": 1.7522921028969562e-05, "loss": 1.056, "step": 17500 }, { "epoch": 0.2522436867049858, "grad_norm": 0.6408239006996155, "learning_rate": 1.7519846318040435e-05, "loss": 1.019, "step": 17510 }, { "epoch": 0.2523877436362851, "grad_norm": 0.5316129326820374, "learning_rate": 1.7516769970144497e-05, "loss": 1.0436, "step": 17520 }, { "epoch": 0.2525318005675843, "grad_norm": 0.575065016746521, "learning_rate": 1.751369198595143e-05, "loss": 1.0362, "step": 17530 }, { "epoch": 0.25267585749888355, "grad_norm": 0.7316829562187195, "learning_rate": 1.7510612366131273e-05, "loss": 1.0051, "step": 17540 }, { "epoch": 0.2528199144301828, "grad_norm": 0.7314066886901855, "learning_rate": 1.750753111135441e-05, "loss": 1.0318, "step": 17550 }, { "epoch": 0.2529639713614821, "grad_norm": 0.6269857287406921, "learning_rate": 1.750444822229159e-05, "loss": 1.0308, "step": 17560 }, { "epoch": 0.2531080282927813, "grad_norm": 0.7021530866622925, "learning_rate": 1.7501363699613917e-05, "loss": 1.0622, "step": 17570 }, { "epoch": 0.25325208522408055, "grad_norm": 0.5551626086235046, "learning_rate": 1.7498277543992848e-05, "loss": 1.0363, "step": 17580 }, { "epoch": 0.2533961421553798, "grad_norm": 0.5241049528121948, "learning_rate": 1.7495189756100198e-05, "loss": 1.0498, "step": 17590 }, { "epoch": 0.2535401990866791, "grad_norm": 0.6174381971359253, "learning_rate": 1.7492100336608133e-05, "loss": 1.0316, "step": 17600 }, { "epoch": 0.2536842560179783, "grad_norm": 0.49905723333358765, "learning_rate": 1.748900928618918e-05, "loss": 1.0334, "step": 17610 }, { "epoch": 0.25382831294927755, "grad_norm": 0.5309996008872986, "learning_rate": 1.7485916605516212e-05, "loss": 1.0198, "step": 17620 }, { "epoch": 0.2539723698805768, "grad_norm": 0.574708878993988, "learning_rate": 1.748282229526247e-05, "loss": 1.0464, "step": 17630 }, { "epoch": 0.2541164268118761, "grad_norm": 0.7914347648620605, "learning_rate": 1.7479726356101537e-05, "loss": 1.0485, "step": 17640 }, { "epoch": 0.2542604837431753, "grad_norm": 0.6053186655044556, "learning_rate": 1.7476628788707364e-05, "loss": 1.0203, "step": 17650 }, { "epoch": 0.25440454067447454, "grad_norm": 0.6396844387054443, "learning_rate": 1.747352959375424e-05, "loss": 1.0334, "step": 17660 }, { "epoch": 0.2545485976057738, "grad_norm": 0.5476585030555725, "learning_rate": 1.7470428771916827e-05, "loss": 1.0289, "step": 17670 }, { "epoch": 0.25469265453707307, "grad_norm": 0.5465442538261414, "learning_rate": 1.7467326323870117e-05, "loss": 1.02, "step": 17680 }, { "epoch": 0.2548367114683723, "grad_norm": 0.6411909461021423, "learning_rate": 1.7464222250289482e-05, "loss": 1.0334, "step": 17690 }, { "epoch": 0.25498076839967154, "grad_norm": 0.5101737380027771, "learning_rate": 1.7461116551850634e-05, "loss": 1.0424, "step": 17700 }, { "epoch": 0.2551248253309708, "grad_norm": 0.5736578106880188, "learning_rate": 1.7458009229229636e-05, "loss": 1.0282, "step": 17710 }, { "epoch": 0.25526888226227007, "grad_norm": 0.6061928868293762, "learning_rate": 1.745490028310291e-05, "loss": 1.059, "step": 17720 }, { "epoch": 0.2554129391935693, "grad_norm": 0.5113356113433838, "learning_rate": 1.7451789714147235e-05, "loss": 1.0301, "step": 17730 }, { "epoch": 0.25555699612486854, "grad_norm": 0.6834507584571838, "learning_rate": 1.7448677523039736e-05, "loss": 1.054, "step": 17740 }, { "epoch": 0.2557010530561678, "grad_norm": 0.5508944392204285, "learning_rate": 1.7445563710457897e-05, "loss": 1.0499, "step": 17750 }, { "epoch": 0.25584510998746707, "grad_norm": 0.5912610292434692, "learning_rate": 1.744244827707955e-05, "loss": 1.0546, "step": 17760 }, { "epoch": 0.2559891669187663, "grad_norm": 0.633089542388916, "learning_rate": 1.743933122358288e-05, "loss": 1.0337, "step": 17770 }, { "epoch": 0.25613322385006554, "grad_norm": 0.8170694708824158, "learning_rate": 1.743621255064643e-05, "loss": 1.0327, "step": 17780 }, { "epoch": 0.2562772807813648, "grad_norm": 0.5366178750991821, "learning_rate": 1.7433092258949087e-05, "loss": 1.0146, "step": 17790 }, { "epoch": 0.25642133771266407, "grad_norm": 0.5773735642433167, "learning_rate": 1.7429970349170104e-05, "loss": 1.036, "step": 17800 }, { "epoch": 0.2565653946439633, "grad_norm": 0.6646609902381897, "learning_rate": 1.7426846821989066e-05, "loss": 1.0362, "step": 17810 }, { "epoch": 0.25670945157526254, "grad_norm": 0.5626872181892395, "learning_rate": 1.7423721678085932e-05, "loss": 1.0139, "step": 17820 }, { "epoch": 0.2568535085065618, "grad_norm": 0.6005491614341736, "learning_rate": 1.7420594918140998e-05, "loss": 1.0261, "step": 17830 }, { "epoch": 0.25699756543786106, "grad_norm": 0.61122727394104, "learning_rate": 1.741746654283492e-05, "loss": 1.0213, "step": 17840 }, { "epoch": 0.25714162236916027, "grad_norm": 0.5532557964324951, "learning_rate": 1.7414336552848696e-05, "loss": 1.0215, "step": 17850 }, { "epoch": 0.25728567930045954, "grad_norm": 0.5664517879486084, "learning_rate": 1.7411204948863686e-05, "loss": 1.019, "step": 17860 }, { "epoch": 0.2574297362317588, "grad_norm": 0.6716731190681458, "learning_rate": 1.74080717315616e-05, "loss": 1.0369, "step": 17870 }, { "epoch": 0.25757379316305806, "grad_norm": 0.5846714377403259, "learning_rate": 1.7404936901624487e-05, "loss": 1.0284, "step": 17880 }, { "epoch": 0.25771785009435727, "grad_norm": 0.5675822496414185, "learning_rate": 1.7401800459734762e-05, "loss": 1.0316, "step": 17890 }, { "epoch": 0.25786190702565653, "grad_norm": 0.5803831815719604, "learning_rate": 1.739866240657519e-05, "loss": 1.0162, "step": 17900 }, { "epoch": 0.2580059639569558, "grad_norm": 0.5902814865112305, "learning_rate": 1.7395522742828874e-05, "loss": 1.0371, "step": 17910 }, { "epoch": 0.25815002088825506, "grad_norm": 0.5528362989425659, "learning_rate": 1.739238146917928e-05, "loss": 1.0391, "step": 17920 }, { "epoch": 0.25829407781955427, "grad_norm": 0.7447479963302612, "learning_rate": 1.7389238586310213e-05, "loss": 1.0342, "step": 17930 }, { "epoch": 0.25843813475085353, "grad_norm": 0.5475665330886841, "learning_rate": 1.7386094094905847e-05, "loss": 1.0363, "step": 17940 }, { "epoch": 0.2585821916821528, "grad_norm": 0.4762347638607025, "learning_rate": 1.738294799565068e-05, "loss": 1.0518, "step": 17950 }, { "epoch": 0.25872624861345206, "grad_norm": 0.5582030415534973, "learning_rate": 1.7379800289229587e-05, "loss": 1.0203, "step": 17960 }, { "epoch": 0.25887030554475127, "grad_norm": 0.6422776579856873, "learning_rate": 1.7376650976327772e-05, "loss": 1.0498, "step": 17970 }, { "epoch": 0.25901436247605053, "grad_norm": 0.575137197971344, "learning_rate": 1.7373500057630806e-05, "loss": 1.023, "step": 17980 }, { "epoch": 0.2591584194073498, "grad_norm": 0.5468251705169678, "learning_rate": 1.7370347533824592e-05, "loss": 1.0318, "step": 17990 }, { "epoch": 0.25930247633864906, "grad_norm": 0.569378137588501, "learning_rate": 1.7367193405595394e-05, "loss": 1.0156, "step": 18000 }, { "epoch": 0.25944653326994827, "grad_norm": 0.7267173528671265, "learning_rate": 1.7364037673629822e-05, "loss": 1.0593, "step": 18010 }, { "epoch": 0.25959059020124753, "grad_norm": 0.6949495673179626, "learning_rate": 1.7360880338614835e-05, "loss": 1.0443, "step": 18020 }, { "epoch": 0.2597346471325468, "grad_norm": 0.5617415904998779, "learning_rate": 1.7357721401237744e-05, "loss": 1.0242, "step": 18030 }, { "epoch": 0.25987870406384606, "grad_norm": 0.5180186033248901, "learning_rate": 1.73545608621862e-05, "loss": 1.0444, "step": 18040 }, { "epoch": 0.26002276099514526, "grad_norm": 0.5655116438865662, "learning_rate": 1.7351398722148214e-05, "loss": 1.0339, "step": 18050 }, { "epoch": 0.2601668179264445, "grad_norm": 0.5050772428512573, "learning_rate": 1.7348234981812143e-05, "loss": 1.0264, "step": 18060 }, { "epoch": 0.2603108748577438, "grad_norm": 0.6562170386314392, "learning_rate": 1.734506964186668e-05, "loss": 1.0326, "step": 18070 }, { "epoch": 0.26045493178904305, "grad_norm": 0.610063374042511, "learning_rate": 1.7341902703000883e-05, "loss": 1.0114, "step": 18080 }, { "epoch": 0.26059898872034226, "grad_norm": 0.6089850664138794, "learning_rate": 1.733873416590415e-05, "loss": 1.061, "step": 18090 }, { "epoch": 0.2607430456516415, "grad_norm": 0.6482865214347839, "learning_rate": 1.7335564031266224e-05, "loss": 1.0238, "step": 18100 }, { "epoch": 0.2608871025829408, "grad_norm": 0.5729652047157288, "learning_rate": 1.7332392299777206e-05, "loss": 1.0317, "step": 18110 }, { "epoch": 0.26103115951424005, "grad_norm": 0.5370912551879883, "learning_rate": 1.7329218972127532e-05, "loss": 1.0246, "step": 18120 }, { "epoch": 0.26117521644553926, "grad_norm": 0.5290127992630005, "learning_rate": 1.7326044049007995e-05, "loss": 1.0284, "step": 18130 }, { "epoch": 0.2613192733768385, "grad_norm": 0.653229296207428, "learning_rate": 1.7322867531109732e-05, "loss": 1.034, "step": 18140 }, { "epoch": 0.2614633303081378, "grad_norm": 0.5976172685623169, "learning_rate": 1.7319689419124228e-05, "loss": 1.0448, "step": 18150 }, { "epoch": 0.26160738723943705, "grad_norm": 0.5717659592628479, "learning_rate": 1.731650971374331e-05, "loss": 1.0596, "step": 18160 }, { "epoch": 0.26175144417073626, "grad_norm": 0.49149689078330994, "learning_rate": 1.731332841565916e-05, "loss": 1.0159, "step": 18170 }, { "epoch": 0.2618955011020355, "grad_norm": 0.6619843244552612, "learning_rate": 1.73101455255643e-05, "loss": 1.0159, "step": 18180 }, { "epoch": 0.2620395580333348, "grad_norm": 0.6329603791236877, "learning_rate": 1.7306961044151608e-05, "loss": 1.0431, "step": 18190 }, { "epoch": 0.26218361496463405, "grad_norm": 0.5910669565200806, "learning_rate": 1.730377497211429e-05, "loss": 1.0214, "step": 18200 }, { "epoch": 0.26232767189593326, "grad_norm": 0.5871611833572388, "learning_rate": 1.7300587310145917e-05, "loss": 1.0356, "step": 18210 }, { "epoch": 0.2624717288272325, "grad_norm": 0.5438082218170166, "learning_rate": 1.72973980589404e-05, "loss": 1.0273, "step": 18220 }, { "epoch": 0.2626157857585318, "grad_norm": 0.6762313842773438, "learning_rate": 1.7294207219191995e-05, "loss": 1.0485, "step": 18230 }, { "epoch": 0.26275984268983105, "grad_norm": 0.48789089918136597, "learning_rate": 1.72910147915953e-05, "loss": 1.0238, "step": 18240 }, { "epoch": 0.26290389962113025, "grad_norm": 0.5649069547653198, "learning_rate": 1.728782077684526e-05, "loss": 1.0525, "step": 18250 }, { "epoch": 0.2630479565524295, "grad_norm": 0.5777900815010071, "learning_rate": 1.7284625175637176e-05, "loss": 1.0468, "step": 18260 }, { "epoch": 0.2631920134837288, "grad_norm": 0.5941057801246643, "learning_rate": 1.7281427988666686e-05, "loss": 1.0515, "step": 18270 }, { "epoch": 0.26333607041502805, "grad_norm": 0.525132954120636, "learning_rate": 1.7278229216629767e-05, "loss": 1.0359, "step": 18280 }, { "epoch": 0.26348012734632725, "grad_norm": 0.6041716933250427, "learning_rate": 1.727502886022275e-05, "loss": 1.0122, "step": 18290 }, { "epoch": 0.2636241842776265, "grad_norm": 0.8156771063804626, "learning_rate": 1.7271826920142313e-05, "loss": 1.0439, "step": 18300 }, { "epoch": 0.2637682412089258, "grad_norm": 0.5602260231971741, "learning_rate": 1.7268623397085467e-05, "loss": 1.0141, "step": 18310 }, { "epoch": 0.26391229814022504, "grad_norm": 0.6049859523773193, "learning_rate": 1.7265418291749577e-05, "loss": 1.0439, "step": 18320 }, { "epoch": 0.26405635507152425, "grad_norm": 0.5453393459320068, "learning_rate": 1.7262211604832355e-05, "loss": 1.039, "step": 18330 }, { "epoch": 0.2642004120028235, "grad_norm": 0.5879477262496948, "learning_rate": 1.725900333703185e-05, "loss": 1.0509, "step": 18340 }, { "epoch": 0.2643444689341228, "grad_norm": 0.5193531513214111, "learning_rate": 1.7255793489046452e-05, "loss": 1.0346, "step": 18350 }, { "epoch": 0.26448852586542204, "grad_norm": 0.5647846460342407, "learning_rate": 1.7252582061574906e-05, "loss": 1.0114, "step": 18360 }, { "epoch": 0.26463258279672125, "grad_norm": 0.6013086438179016, "learning_rate": 1.72493690553163e-05, "loss": 1.0118, "step": 18370 }, { "epoch": 0.2647766397280205, "grad_norm": 0.5349829792976379, "learning_rate": 1.724615447097005e-05, "loss": 1.0332, "step": 18380 }, { "epoch": 0.2649206966593198, "grad_norm": 0.5422055125236511, "learning_rate": 1.724293830923593e-05, "loss": 1.0543, "step": 18390 }, { "epoch": 0.26506475359061904, "grad_norm": 0.4812210500240326, "learning_rate": 1.723972057081406e-05, "loss": 1.0321, "step": 18400 }, { "epoch": 0.26520881052191825, "grad_norm": 0.5967666506767273, "learning_rate": 1.72365012564049e-05, "loss": 1.0282, "step": 18410 }, { "epoch": 0.2653528674532175, "grad_norm": 0.652559757232666, "learning_rate": 1.723328036670923e-05, "loss": 1.0318, "step": 18420 }, { "epoch": 0.2654969243845168, "grad_norm": 0.5375730395317078, "learning_rate": 1.723005790242822e-05, "loss": 1.0408, "step": 18430 }, { "epoch": 0.26564098131581604, "grad_norm": 0.5464618802070618, "learning_rate": 1.7226833864263338e-05, "loss": 1.0216, "step": 18440 }, { "epoch": 0.26578503824711525, "grad_norm": 0.5174851417541504, "learning_rate": 1.722360825291642e-05, "loss": 1.0357, "step": 18450 }, { "epoch": 0.2659290951784145, "grad_norm": 0.5950974225997925, "learning_rate": 1.7220381069089635e-05, "loss": 1.0263, "step": 18460 }, { "epoch": 0.2660731521097138, "grad_norm": 0.6485792994499207, "learning_rate": 1.7217152313485495e-05, "loss": 1.0332, "step": 18470 }, { "epoch": 0.266217209041013, "grad_norm": 0.6197564005851746, "learning_rate": 1.7213921986806857e-05, "loss": 1.0375, "step": 18480 }, { "epoch": 0.26636126597231224, "grad_norm": 0.48383066058158875, "learning_rate": 1.7210690089756917e-05, "loss": 1.0362, "step": 18490 }, { "epoch": 0.2665053229036115, "grad_norm": 0.641230046749115, "learning_rate": 1.720745662303922e-05, "loss": 1.0518, "step": 18500 }, { "epoch": 0.26664937983491077, "grad_norm": 0.5075659155845642, "learning_rate": 1.7204221587357638e-05, "loss": 1.0454, "step": 18510 }, { "epoch": 0.26679343676621, "grad_norm": 0.5523113012313843, "learning_rate": 1.7200984983416402e-05, "loss": 1.027, "step": 18520 }, { "epoch": 0.26693749369750924, "grad_norm": 0.6356770992279053, "learning_rate": 1.7197746811920073e-05, "loss": 1.0468, "step": 18530 }, { "epoch": 0.2670815506288085, "grad_norm": 0.5757063627243042, "learning_rate": 1.7194507073573555e-05, "loss": 1.0389, "step": 18540 }, { "epoch": 0.26722560756010777, "grad_norm": 0.568065345287323, "learning_rate": 1.7191265769082098e-05, "loss": 1.0348, "step": 18550 }, { "epoch": 0.267369664491407, "grad_norm": 0.5348308682441711, "learning_rate": 1.7188022899151283e-05, "loss": 1.0255, "step": 18560 }, { "epoch": 0.26751372142270624, "grad_norm": 0.5122363567352295, "learning_rate": 1.7184778464487046e-05, "loss": 1.0179, "step": 18570 }, { "epoch": 0.2676577783540055, "grad_norm": 0.6201374530792236, "learning_rate": 1.7181532465795644e-05, "loss": 1.0208, "step": 18580 }, { "epoch": 0.26780183528530477, "grad_norm": 0.5712049603462219, "learning_rate": 1.7178284903783698e-05, "loss": 1.0393, "step": 18590 }, { "epoch": 0.267945892216604, "grad_norm": 0.7647679448127747, "learning_rate": 1.7175035779158157e-05, "loss": 1.0241, "step": 18600 }, { "epoch": 0.26808994914790324, "grad_norm": 0.7282312512397766, "learning_rate": 1.71717850926263e-05, "loss": 1.055, "step": 18610 }, { "epoch": 0.2682340060792025, "grad_norm": 0.677687406539917, "learning_rate": 1.7168532844895765e-05, "loss": 1.0393, "step": 18620 }, { "epoch": 0.26837806301050177, "grad_norm": 0.5776512622833252, "learning_rate": 1.7165279036674523e-05, "loss": 1.0328, "step": 18630 }, { "epoch": 0.268522119941801, "grad_norm": 0.6276368498802185, "learning_rate": 1.7162023668670874e-05, "loss": 1.0437, "step": 18640 }, { "epoch": 0.26866617687310024, "grad_norm": 0.547092616558075, "learning_rate": 1.715876674159348e-05, "loss": 1.0537, "step": 18650 }, { "epoch": 0.2688102338043995, "grad_norm": 0.5741291642189026, "learning_rate": 1.7155508256151315e-05, "loss": 1.0172, "step": 18660 }, { "epoch": 0.26895429073569876, "grad_norm": 0.5554395318031311, "learning_rate": 1.7152248213053716e-05, "loss": 1.0191, "step": 18670 }, { "epoch": 0.269098347666998, "grad_norm": 0.5335158705711365, "learning_rate": 1.7148986613010344e-05, "loss": 1.0162, "step": 18680 }, { "epoch": 0.26924240459829724, "grad_norm": 0.5677670240402222, "learning_rate": 1.7145723456731208e-05, "loss": 1.0108, "step": 18690 }, { "epoch": 0.2693864615295965, "grad_norm": 0.7178367376327515, "learning_rate": 1.714245874492665e-05, "loss": 1.0671, "step": 18700 }, { "epoch": 0.26953051846089576, "grad_norm": 0.5835092663764954, "learning_rate": 1.7139192478307354e-05, "loss": 1.0472, "step": 18710 }, { "epoch": 0.26967457539219497, "grad_norm": 0.47994673252105713, "learning_rate": 1.713592465758434e-05, "loss": 1.0238, "step": 18720 }, { "epoch": 0.26981863232349423, "grad_norm": 0.5740872621536255, "learning_rate": 1.713265528346897e-05, "loss": 1.0633, "step": 18730 }, { "epoch": 0.2699626892547935, "grad_norm": 0.601265549659729, "learning_rate": 1.712938435667294e-05, "loss": 1.0476, "step": 18740 }, { "epoch": 0.27010674618609276, "grad_norm": 0.6448707580566406, "learning_rate": 1.7126111877908283e-05, "loss": 1.0264, "step": 18750 }, { "epoch": 0.27025080311739197, "grad_norm": 0.6133461594581604, "learning_rate": 1.712283784788738e-05, "loss": 1.0142, "step": 18760 }, { "epoch": 0.27039486004869123, "grad_norm": 0.5708298087120056, "learning_rate": 1.711956226732293e-05, "loss": 1.0376, "step": 18770 }, { "epoch": 0.2705389169799905, "grad_norm": 0.5778443813323975, "learning_rate": 1.7116285136927992e-05, "loss": 1.0372, "step": 18780 }, { "epoch": 0.27068297391128976, "grad_norm": 0.495801717042923, "learning_rate": 1.7113006457415952e-05, "loss": 1.0356, "step": 18790 }, { "epoch": 0.27082703084258897, "grad_norm": 0.5203620791435242, "learning_rate": 1.710972622950053e-05, "loss": 1.0115, "step": 18800 }, { "epoch": 0.27097108777388823, "grad_norm": 0.5737788081169128, "learning_rate": 1.710644445389578e-05, "loss": 1.0738, "step": 18810 }, { "epoch": 0.2711151447051875, "grad_norm": 0.5559195280075073, "learning_rate": 1.710316113131611e-05, "loss": 1.0377, "step": 18820 }, { "epoch": 0.27125920163648676, "grad_norm": 0.4836956858634949, "learning_rate": 1.709987626247625e-05, "loss": 1.0487, "step": 18830 }, { "epoch": 0.27140325856778597, "grad_norm": 0.5687073469161987, "learning_rate": 1.709658984809127e-05, "loss": 1.034, "step": 18840 }, { "epoch": 0.27154731549908523, "grad_norm": 0.5611147880554199, "learning_rate": 1.709330188887658e-05, "loss": 1.026, "step": 18850 }, { "epoch": 0.2716913724303845, "grad_norm": 0.6767673492431641, "learning_rate": 1.7090012385547917e-05, "loss": 1.0136, "step": 18860 }, { "epoch": 0.27183542936168376, "grad_norm": 0.6080965399742126, "learning_rate": 1.7086721338821366e-05, "loss": 1.018, "step": 18870 }, { "epoch": 0.27197948629298296, "grad_norm": 0.6415413618087769, "learning_rate": 1.708342874941334e-05, "loss": 1.0508, "step": 18880 }, { "epoch": 0.2721235432242822, "grad_norm": 0.6209141612052917, "learning_rate": 1.7080134618040594e-05, "loss": 1.0544, "step": 18890 }, { "epoch": 0.2722676001555815, "grad_norm": 0.7484108209609985, "learning_rate": 1.7076838945420208e-05, "loss": 1.0484, "step": 18900 }, { "epoch": 0.27241165708688075, "grad_norm": 0.6004678010940552, "learning_rate": 1.7073541732269613e-05, "loss": 1.0143, "step": 18910 }, { "epoch": 0.27255571401817996, "grad_norm": 0.5813729763031006, "learning_rate": 1.707024297930656e-05, "loss": 1.0467, "step": 18920 }, { "epoch": 0.2726997709494792, "grad_norm": 0.5733342170715332, "learning_rate": 1.7066942687249146e-05, "loss": 1.0419, "step": 18930 }, { "epoch": 0.2728438278807785, "grad_norm": 0.5739596486091614, "learning_rate": 1.7063640856815795e-05, "loss": 1.0229, "step": 18940 }, { "epoch": 0.27298788481207775, "grad_norm": 0.5423083901405334, "learning_rate": 1.7060337488725276e-05, "loss": 1.0268, "step": 18950 }, { "epoch": 0.27313194174337696, "grad_norm": 0.6381399035453796, "learning_rate": 1.705703258369668e-05, "loss": 1.0172, "step": 18960 }, { "epoch": 0.2732759986746762, "grad_norm": 0.654782772064209, "learning_rate": 1.7053726142449444e-05, "loss": 1.0382, "step": 18970 }, { "epoch": 0.2734200556059755, "grad_norm": 0.5004866719245911, "learning_rate": 1.7050418165703332e-05, "loss": 1.0208, "step": 18980 }, { "epoch": 0.27356411253727475, "grad_norm": 0.6272166967391968, "learning_rate": 1.7047108654178446e-05, "loss": 1.0338, "step": 18990 }, { "epoch": 0.27370816946857396, "grad_norm": 0.5967186093330383, "learning_rate": 1.7043797608595222e-05, "loss": 1.0332, "step": 19000 }, { "epoch": 0.2738522263998732, "grad_norm": 0.5652865767478943, "learning_rate": 1.7040485029674427e-05, "loss": 1.0387, "step": 19010 }, { "epoch": 0.2739962833311725, "grad_norm": 0.49113377928733826, "learning_rate": 1.7037170918137163e-05, "loss": 1.0373, "step": 19020 }, { "epoch": 0.27414034026247175, "grad_norm": 0.6672168374061584, "learning_rate": 1.703385527470487e-05, "loss": 1.0214, "step": 19030 }, { "epoch": 0.27428439719377096, "grad_norm": 0.5516164302825928, "learning_rate": 1.7030538100099318e-05, "loss": 1.0452, "step": 19040 }, { "epoch": 0.2744284541250702, "grad_norm": 0.5690533518791199, "learning_rate": 1.7027219395042604e-05, "loss": 1.0477, "step": 19050 }, { "epoch": 0.2745725110563695, "grad_norm": 0.6833709478378296, "learning_rate": 1.702389916025717e-05, "loss": 1.0593, "step": 19060 }, { "epoch": 0.27471656798766875, "grad_norm": 0.5207808017730713, "learning_rate": 1.7020577396465783e-05, "loss": 1.0411, "step": 19070 }, { "epoch": 0.27486062491896796, "grad_norm": 0.5609144568443298, "learning_rate": 1.7017254104391544e-05, "loss": 1.0375, "step": 19080 }, { "epoch": 0.2750046818502672, "grad_norm": 0.5942318439483643, "learning_rate": 1.7013929284757894e-05, "loss": 1.0185, "step": 19090 }, { "epoch": 0.2751487387815665, "grad_norm": 0.5625672340393066, "learning_rate": 1.7010602938288592e-05, "loss": 1.0308, "step": 19100 }, { "epoch": 0.27529279571286575, "grad_norm": 0.503961443901062, "learning_rate": 1.7007275065707745e-05, "loss": 1.0325, "step": 19110 }, { "epoch": 0.27543685264416495, "grad_norm": 0.7923728227615356, "learning_rate": 1.7003945667739777e-05, "loss": 1.0231, "step": 19120 }, { "epoch": 0.2755809095754642, "grad_norm": 0.6111789345741272, "learning_rate": 1.7000614745109462e-05, "loss": 1.0258, "step": 19130 }, { "epoch": 0.2757249665067635, "grad_norm": 0.562232494354248, "learning_rate": 1.6997282298541887e-05, "loss": 1.0291, "step": 19140 }, { "epoch": 0.27586902343806274, "grad_norm": 0.7612364888191223, "learning_rate": 1.6993948328762484e-05, "loss": 1.0471, "step": 19150 }, { "epoch": 0.27601308036936195, "grad_norm": 0.5305697917938232, "learning_rate": 1.6990612836497012e-05, "loss": 1.0221, "step": 19160 }, { "epoch": 0.2761571373006612, "grad_norm": 0.5058618187904358, "learning_rate": 1.6987275822471557e-05, "loss": 1.0512, "step": 19170 }, { "epoch": 0.2763011942319605, "grad_norm": 0.5611950755119324, "learning_rate": 1.698393728741255e-05, "loss": 1.0357, "step": 19180 }, { "epoch": 0.27644525116325974, "grad_norm": 0.5557824373245239, "learning_rate": 1.698059723204674e-05, "loss": 1.056, "step": 19190 }, { "epoch": 0.27658930809455895, "grad_norm": 0.6539870500564575, "learning_rate": 1.6977255657101202e-05, "loss": 1.0623, "step": 19200 }, { "epoch": 0.2767333650258582, "grad_norm": 0.6483203172683716, "learning_rate": 1.6973912563303364e-05, "loss": 1.051, "step": 19210 }, { "epoch": 0.2768774219571575, "grad_norm": 0.6784367561340332, "learning_rate": 1.697056795138097e-05, "loss": 1.0378, "step": 19220 }, { "epoch": 0.27702147888845674, "grad_norm": 0.5995579957962036, "learning_rate": 1.6967221822062084e-05, "loss": 1.0218, "step": 19230 }, { "epoch": 0.27716553581975595, "grad_norm": 0.6947809457778931, "learning_rate": 1.6963874176075125e-05, "loss": 1.035, "step": 19240 }, { "epoch": 0.2773095927510552, "grad_norm": 0.5625107288360596, "learning_rate": 1.6960525014148825e-05, "loss": 1.0215, "step": 19250 }, { "epoch": 0.2774536496823545, "grad_norm": 0.6118045449256897, "learning_rate": 1.695717433701225e-05, "loss": 1.0293, "step": 19260 }, { "epoch": 0.27759770661365374, "grad_norm": 0.5792883634567261, "learning_rate": 1.6953822145394795e-05, "loss": 1.0274, "step": 19270 }, { "epoch": 0.27774176354495295, "grad_norm": 0.7543878555297852, "learning_rate": 1.6950468440026188e-05, "loss": 1.0373, "step": 19280 }, { "epoch": 0.2778858204762522, "grad_norm": 0.5332752466201782, "learning_rate": 1.694711322163648e-05, "loss": 1.0083, "step": 19290 }, { "epoch": 0.2780298774075515, "grad_norm": 0.5911184549331665, "learning_rate": 1.6943756490956063e-05, "loss": 1.0533, "step": 19300 }, { "epoch": 0.27817393433885074, "grad_norm": 0.49870455265045166, "learning_rate": 1.6940398248715642e-05, "loss": 1.0624, "step": 19310 }, { "epoch": 0.27831799127014994, "grad_norm": 0.5611423254013062, "learning_rate": 1.693703849564627e-05, "loss": 1.0509, "step": 19320 }, { "epoch": 0.2784620482014492, "grad_norm": 0.5123780965805054, "learning_rate": 1.6933677232479313e-05, "loss": 1.017, "step": 19330 }, { "epoch": 0.27860610513274847, "grad_norm": 0.5750264525413513, "learning_rate": 1.6930314459946474e-05, "loss": 1.0338, "step": 19340 }, { "epoch": 0.27875016206404774, "grad_norm": 0.6050014495849609, "learning_rate": 1.6926950178779777e-05, "loss": 1.0426, "step": 19350 }, { "epoch": 0.27889421899534694, "grad_norm": 0.5525252819061279, "learning_rate": 1.6923584389711587e-05, "loss": 1.061, "step": 19360 }, { "epoch": 0.2790382759266462, "grad_norm": 0.5604902505874634, "learning_rate": 1.6920217093474583e-05, "loss": 1.0249, "step": 19370 }, { "epoch": 0.27918233285794547, "grad_norm": 0.5230039954185486, "learning_rate": 1.6916848290801784e-05, "loss": 1.0, "step": 19380 }, { "epoch": 0.27932638978924473, "grad_norm": 0.612128734588623, "learning_rate": 1.691347798242653e-05, "loss": 1.0477, "step": 19390 }, { "epoch": 0.27947044672054394, "grad_norm": 0.6256338953971863, "learning_rate": 1.691010616908249e-05, "loss": 1.0532, "step": 19400 }, { "epoch": 0.2796145036518432, "grad_norm": 0.6843680739402771, "learning_rate": 1.6906732851503665e-05, "loss": 1.0241, "step": 19410 }, { "epoch": 0.27975856058314247, "grad_norm": 0.5366109609603882, "learning_rate": 1.6903358030424373e-05, "loss": 1.0332, "step": 19420 }, { "epoch": 0.27990261751444173, "grad_norm": 0.5890610814094543, "learning_rate": 1.689998170657927e-05, "loss": 1.0333, "step": 19430 }, { "epoch": 0.28004667444574094, "grad_norm": 0.6060905456542969, "learning_rate": 1.689660388070334e-05, "loss": 1.017, "step": 19440 }, { "epoch": 0.2801907313770402, "grad_norm": 0.6831796765327454, "learning_rate": 1.6893224553531876e-05, "loss": 1.0284, "step": 19450 }, { "epoch": 0.28033478830833947, "grad_norm": 0.5462735295295715, "learning_rate": 1.6889843725800523e-05, "loss": 1.0192, "step": 19460 }, { "epoch": 0.28047884523963873, "grad_norm": 0.5883505940437317, "learning_rate": 1.6886461398245234e-05, "loss": 1.0397, "step": 19470 }, { "epoch": 0.28062290217093794, "grad_norm": 0.5593507885932922, "learning_rate": 1.6883077571602303e-05, "loss": 1.0486, "step": 19480 }, { "epoch": 0.2807669591022372, "grad_norm": 0.5353315472602844, "learning_rate": 1.6879692246608332e-05, "loss": 1.0466, "step": 19490 }, { "epoch": 0.28091101603353646, "grad_norm": 0.5512276291847229, "learning_rate": 1.687630542400026e-05, "loss": 1.0276, "step": 19500 }, { "epoch": 0.28105507296483573, "grad_norm": 0.5370855927467346, "learning_rate": 1.6872917104515364e-05, "loss": 1.0362, "step": 19510 }, { "epoch": 0.28119912989613494, "grad_norm": 0.6030184030532837, "learning_rate": 1.6869527288891224e-05, "loss": 1.0432, "step": 19520 }, { "epoch": 0.2813431868274342, "grad_norm": 0.5397059917449951, "learning_rate": 1.6866135977865758e-05, "loss": 1.0351, "step": 19530 }, { "epoch": 0.28148724375873346, "grad_norm": 0.641499400138855, "learning_rate": 1.686274317217721e-05, "loss": 1.0503, "step": 19540 }, { "epoch": 0.2816313006900327, "grad_norm": 0.5896559953689575, "learning_rate": 1.6859348872564137e-05, "loss": 1.0373, "step": 19550 }, { "epoch": 0.28177535762133193, "grad_norm": 0.5953365564346313, "learning_rate": 1.685595307976545e-05, "loss": 1.0426, "step": 19560 }, { "epoch": 0.2819194145526312, "grad_norm": 0.5723481178283691, "learning_rate": 1.6852555794520346e-05, "loss": 1.0368, "step": 19570 }, { "epoch": 0.28206347148393046, "grad_norm": 0.5377451181411743, "learning_rate": 1.6849157017568382e-05, "loss": 1.0357, "step": 19580 }, { "epoch": 0.2822075284152297, "grad_norm": 0.5293382406234741, "learning_rate": 1.684575674964942e-05, "loss": 1.0455, "step": 19590 }, { "epoch": 0.28235158534652893, "grad_norm": 0.7441731095314026, "learning_rate": 1.684235499150365e-05, "loss": 1.0389, "step": 19600 }, { "epoch": 0.2824956422778282, "grad_norm": 0.49708226323127747, "learning_rate": 1.6838951743871587e-05, "loss": 1.0322, "step": 19610 }, { "epoch": 0.28263969920912746, "grad_norm": 0.4691644608974457, "learning_rate": 1.6835547007494073e-05, "loss": 1.0408, "step": 19620 }, { "epoch": 0.2827837561404267, "grad_norm": 0.6220194697380066, "learning_rate": 1.6832140783112274e-05, "loss": 1.0341, "step": 19630 }, { "epoch": 0.28292781307172593, "grad_norm": 0.6367720365524292, "learning_rate": 1.6828733071467674e-05, "loss": 1.019, "step": 19640 }, { "epoch": 0.2830718700030252, "grad_norm": 0.5318676233291626, "learning_rate": 1.6825323873302088e-05, "loss": 1.0198, "step": 19650 }, { "epoch": 0.28321592693432446, "grad_norm": 0.5065328478813171, "learning_rate": 1.682191318935765e-05, "loss": 1.0715, "step": 19660 }, { "epoch": 0.2833599838656237, "grad_norm": 0.5749115347862244, "learning_rate": 1.681850102037682e-05, "loss": 1.0481, "step": 19670 }, { "epoch": 0.28350404079692293, "grad_norm": 0.5503406524658203, "learning_rate": 1.6815087367102374e-05, "loss": 1.0602, "step": 19680 }, { "epoch": 0.2836480977282222, "grad_norm": 0.604962944984436, "learning_rate": 1.6811672230277423e-05, "loss": 1.0402, "step": 19690 }, { "epoch": 0.28379215465952146, "grad_norm": 0.5853504538536072, "learning_rate": 1.6808255610645405e-05, "loss": 1.0404, "step": 19700 }, { "epoch": 0.2839362115908207, "grad_norm": 0.5229265093803406, "learning_rate": 1.680483750895005e-05, "loss": 1.0282, "step": 19710 }, { "epoch": 0.2840802685221199, "grad_norm": 0.5563735365867615, "learning_rate": 1.6801417925935443e-05, "loss": 1.03, "step": 19720 }, { "epoch": 0.2842243254534192, "grad_norm": 0.5629031658172607, "learning_rate": 1.679799686234598e-05, "loss": 1.0443, "step": 19730 }, { "epoch": 0.28436838238471845, "grad_norm": 0.5461305379867554, "learning_rate": 1.6794574318926386e-05, "loss": 1.0291, "step": 19740 }, { "epoch": 0.2845124393160177, "grad_norm": 0.5271806120872498, "learning_rate": 1.6791150296421683e-05, "loss": 1.0134, "step": 19750 }, { "epoch": 0.2846564962473169, "grad_norm": 0.49659666419029236, "learning_rate": 1.6787724795577252e-05, "loss": 1.0211, "step": 19760 }, { "epoch": 0.2848005531786162, "grad_norm": 0.5652765035629272, "learning_rate": 1.6784297817138766e-05, "loss": 1.03, "step": 19770 }, { "epoch": 0.28494461010991545, "grad_norm": 0.541502058506012, "learning_rate": 1.6780869361852234e-05, "loss": 1.0104, "step": 19780 }, { "epoch": 0.28508866704121466, "grad_norm": 0.5626928806304932, "learning_rate": 1.6777439430463987e-05, "loss": 1.018, "step": 19790 }, { "epoch": 0.2852327239725139, "grad_norm": 0.5848968625068665, "learning_rate": 1.6774008023720667e-05, "loss": 1.0196, "step": 19800 }, { "epoch": 0.2853767809038132, "grad_norm": 0.5680802464485168, "learning_rate": 1.6770575142369254e-05, "loss": 1.0435, "step": 19810 }, { "epoch": 0.28552083783511245, "grad_norm": 0.4836186468601227, "learning_rate": 1.6767140787157025e-05, "loss": 1.0315, "step": 19820 }, { "epoch": 0.28566489476641166, "grad_norm": 0.5267698764801025, "learning_rate": 1.6763704958831604e-05, "loss": 1.0446, "step": 19830 }, { "epoch": 0.2858089516977109, "grad_norm": 0.6138490438461304, "learning_rate": 1.676026765814092e-05, "loss": 1.0096, "step": 19840 }, { "epoch": 0.2859530086290102, "grad_norm": 0.6735447645187378, "learning_rate": 1.6756828885833224e-05, "loss": 1.0333, "step": 19850 }, { "epoch": 0.28609706556030945, "grad_norm": 0.5271909236907959, "learning_rate": 1.6753388642657085e-05, "loss": 1.0198, "step": 19860 }, { "epoch": 0.28624112249160866, "grad_norm": 0.6136609315872192, "learning_rate": 1.6749946929361405e-05, "loss": 1.0168, "step": 19870 }, { "epoch": 0.2863851794229079, "grad_norm": 0.5632956027984619, "learning_rate": 1.6746503746695397e-05, "loss": 1.0367, "step": 19880 }, { "epoch": 0.2865292363542072, "grad_norm": 0.6717610359191895, "learning_rate": 1.674305909540859e-05, "loss": 1.0523, "step": 19890 }, { "epoch": 0.28667329328550645, "grad_norm": 0.6363637447357178, "learning_rate": 1.6739612976250836e-05, "loss": 1.0364, "step": 19900 }, { "epoch": 0.28681735021680566, "grad_norm": 0.5844237804412842, "learning_rate": 1.673616538997231e-05, "loss": 1.0168, "step": 19910 }, { "epoch": 0.2869614071481049, "grad_norm": 0.6943037509918213, "learning_rate": 1.6732716337323506e-05, "loss": 1.0192, "step": 19920 }, { "epoch": 0.2871054640794042, "grad_norm": 0.5689255595207214, "learning_rate": 1.6729265819055232e-05, "loss": 1.0391, "step": 19930 }, { "epoch": 0.28724952101070345, "grad_norm": 0.5841550827026367, "learning_rate": 1.6725813835918622e-05, "loss": 1.0263, "step": 19940 }, { "epoch": 0.28739357794200265, "grad_norm": 0.5294755101203918, "learning_rate": 1.6722360388665118e-05, "loss": 1.025, "step": 19950 }, { "epoch": 0.2875376348733019, "grad_norm": 0.5748166441917419, "learning_rate": 1.6718905478046497e-05, "loss": 1.0463, "step": 19960 }, { "epoch": 0.2876816918046012, "grad_norm": 0.5007639527320862, "learning_rate": 1.6715449104814842e-05, "loss": 1.0509, "step": 19970 }, { "epoch": 0.28782574873590044, "grad_norm": 0.6602321267127991, "learning_rate": 1.6711991269722552e-05, "loss": 1.0537, "step": 19980 }, { "epoch": 0.28796980566719965, "grad_norm": 0.5422115921974182, "learning_rate": 1.6708531973522355e-05, "loss": 1.0147, "step": 19990 }, { "epoch": 0.2881138625984989, "grad_norm": 0.6622305512428284, "learning_rate": 1.6705071216967294e-05, "loss": 1.0313, "step": 20000 }, { "epoch": 0.2882579195297982, "grad_norm": 0.658025324344635, "learning_rate": 1.670160900081072e-05, "loss": 1.0425, "step": 20010 }, { "epoch": 0.28840197646109744, "grad_norm": 0.5682501196861267, "learning_rate": 1.6698145325806322e-05, "loss": 1.0084, "step": 20020 }, { "epoch": 0.28854603339239665, "grad_norm": 0.5901638269424438, "learning_rate": 1.6694680192708083e-05, "loss": 1.0426, "step": 20030 }, { "epoch": 0.2886900903236959, "grad_norm": 0.49289217591285706, "learning_rate": 1.6691213602270325e-05, "loss": 1.0386, "step": 20040 }, { "epoch": 0.2888341472549952, "grad_norm": 0.6020851135253906, "learning_rate": 1.6687745555247664e-05, "loss": 1.0558, "step": 20050 }, { "epoch": 0.28897820418629444, "grad_norm": 0.5392109155654907, "learning_rate": 1.6684276052395056e-05, "loss": 1.001, "step": 20060 }, { "epoch": 0.28912226111759365, "grad_norm": 0.6716406941413879, "learning_rate": 1.6680805094467763e-05, "loss": 1.037, "step": 20070 }, { "epoch": 0.2892663180488929, "grad_norm": 0.6591319441795349, "learning_rate": 1.667733268222136e-05, "loss": 1.0455, "step": 20080 }, { "epoch": 0.2894103749801922, "grad_norm": 0.5621931552886963, "learning_rate": 1.667385881641175e-05, "loss": 1.0574, "step": 20090 }, { "epoch": 0.28955443191149144, "grad_norm": 0.5402848720550537, "learning_rate": 1.6670383497795136e-05, "loss": 1.0321, "step": 20100 }, { "epoch": 0.28969848884279065, "grad_norm": 0.6561526656150818, "learning_rate": 1.6666906727128055e-05, "loss": 1.0498, "step": 20110 }, { "epoch": 0.2898425457740899, "grad_norm": 0.5861624479293823, "learning_rate": 1.6663428505167353e-05, "loss": 1.057, "step": 20120 }, { "epoch": 0.2899866027053892, "grad_norm": 0.5896426439285278, "learning_rate": 1.6659948832670183e-05, "loss": 1.0493, "step": 20130 }, { "epoch": 0.29013065963668844, "grad_norm": 0.5974220633506775, "learning_rate": 1.665646771039403e-05, "loss": 1.0337, "step": 20140 }, { "epoch": 0.29027471656798765, "grad_norm": 0.6230769157409668, "learning_rate": 1.6652985139096678e-05, "loss": 1.0166, "step": 20150 }, { "epoch": 0.2904187734992869, "grad_norm": 0.6564093828201294, "learning_rate": 1.664950111953624e-05, "loss": 1.0296, "step": 20160 }, { "epoch": 0.29056283043058617, "grad_norm": 0.46283620595932007, "learning_rate": 1.664601565247114e-05, "loss": 1.0271, "step": 20170 }, { "epoch": 0.29070688736188544, "grad_norm": 0.521889328956604, "learning_rate": 1.664252873866011e-05, "loss": 1.0379, "step": 20180 }, { "epoch": 0.29085094429318464, "grad_norm": 0.6000293493270874, "learning_rate": 1.6639040378862215e-05, "loss": 1.0343, "step": 20190 }, { "epoch": 0.2909950012244839, "grad_norm": 0.5207245945930481, "learning_rate": 1.6635550573836806e-05, "loss": 1.033, "step": 20200 }, { "epoch": 0.29113905815578317, "grad_norm": 0.5225106477737427, "learning_rate": 1.6632059324343583e-05, "loss": 1.0286, "step": 20210 }, { "epoch": 0.29128311508708243, "grad_norm": 0.5262543559074402, "learning_rate": 1.662856663114253e-05, "loss": 1.0267, "step": 20220 }, { "epoch": 0.29142717201838164, "grad_norm": 0.5357270836830139, "learning_rate": 1.6625072494993967e-05, "loss": 1.0265, "step": 20230 }, { "epoch": 0.2915712289496809, "grad_norm": 0.5790686011314392, "learning_rate": 1.6621576916658508e-05, "loss": 1.0194, "step": 20240 }, { "epoch": 0.29171528588098017, "grad_norm": 0.5545904636383057, "learning_rate": 1.6618079896897105e-05, "loss": 1.0377, "step": 20250 }, { "epoch": 0.29185934281227943, "grad_norm": 0.520649254322052, "learning_rate": 1.6614581436471e-05, "loss": 1.0359, "step": 20260 }, { "epoch": 0.29200339974357864, "grad_norm": 0.5825948119163513, "learning_rate": 1.6611081536141768e-05, "loss": 1.0492, "step": 20270 }, { "epoch": 0.2921474566748779, "grad_norm": 0.597335696220398, "learning_rate": 1.660758019667128e-05, "loss": 1.0203, "step": 20280 }, { "epoch": 0.29229151360617717, "grad_norm": 0.6152435541152954, "learning_rate": 1.6604077418821742e-05, "loss": 1.0335, "step": 20290 }, { "epoch": 0.29243557053747643, "grad_norm": 0.5295214653015137, "learning_rate": 1.660057320335565e-05, "loss": 1.0415, "step": 20300 }, { "epoch": 0.29257962746877564, "grad_norm": 0.5754656195640564, "learning_rate": 1.6597067551035827e-05, "loss": 1.0281, "step": 20310 }, { "epoch": 0.2927236844000749, "grad_norm": 0.6352607607841492, "learning_rate": 1.6593560462625405e-05, "loss": 1.0496, "step": 20320 }, { "epoch": 0.29286774133137417, "grad_norm": 0.6430121064186096, "learning_rate": 1.6590051938887824e-05, "loss": 1.045, "step": 20330 }, { "epoch": 0.29301179826267343, "grad_norm": 0.7143761515617371, "learning_rate": 1.658654198058685e-05, "loss": 1.0392, "step": 20340 }, { "epoch": 0.29315585519397264, "grad_norm": 0.5576317310333252, "learning_rate": 1.6583030588486547e-05, "loss": 1.0267, "step": 20350 }, { "epoch": 0.2932999121252719, "grad_norm": 0.7117175459861755, "learning_rate": 1.6579517763351302e-05, "loss": 1.035, "step": 20360 }, { "epoch": 0.29344396905657116, "grad_norm": 0.5505491495132446, "learning_rate": 1.65760035059458e-05, "loss": 1.0217, "step": 20370 }, { "epoch": 0.2935880259878704, "grad_norm": 0.5846567153930664, "learning_rate": 1.6572487817035054e-05, "loss": 1.0179, "step": 20380 }, { "epoch": 0.29373208291916963, "grad_norm": 0.6030494570732117, "learning_rate": 1.656897069738437e-05, "loss": 1.05, "step": 20390 }, { "epoch": 0.2938761398504689, "grad_norm": 0.5789952874183655, "learning_rate": 1.6565452147759393e-05, "loss": 1.0117, "step": 20400 }, { "epoch": 0.29402019678176816, "grad_norm": 0.5466416478157043, "learning_rate": 1.6561932168926053e-05, "loss": 1.032, "step": 20410 }, { "epoch": 0.2941642537130674, "grad_norm": 0.635989248752594, "learning_rate": 1.6558410761650598e-05, "loss": 1.0556, "step": 20420 }, { "epoch": 0.29430831064436663, "grad_norm": 0.5572335720062256, "learning_rate": 1.6554887926699595e-05, "loss": 1.0115, "step": 20430 }, { "epoch": 0.2944523675756659, "grad_norm": 0.5357279777526855, "learning_rate": 1.6551363664839915e-05, "loss": 1.019, "step": 20440 }, { "epoch": 0.29459642450696516, "grad_norm": 0.5445215702056885, "learning_rate": 1.654783797683874e-05, "loss": 0.9884, "step": 20450 }, { "epoch": 0.2947404814382644, "grad_norm": 0.5228425860404968, "learning_rate": 1.6544310863463568e-05, "loss": 1.0215, "step": 20460 }, { "epoch": 0.29488453836956363, "grad_norm": 0.5866361260414124, "learning_rate": 1.6540782325482195e-05, "loss": 1.0442, "step": 20470 }, { "epoch": 0.2950285953008629, "grad_norm": 0.5385952591896057, "learning_rate": 1.6537252363662744e-05, "loss": 1.0343, "step": 20480 }, { "epoch": 0.29517265223216216, "grad_norm": 0.5235530138015747, "learning_rate": 1.6533720978773634e-05, "loss": 1.0344, "step": 20490 }, { "epoch": 0.2953167091634614, "grad_norm": 0.6012077927589417, "learning_rate": 1.65301881715836e-05, "loss": 1.0438, "step": 20500 }, { "epoch": 0.29546076609476063, "grad_norm": 0.58733731508255, "learning_rate": 1.6526653942861685e-05, "loss": 1.0227, "step": 20510 }, { "epoch": 0.2956048230260599, "grad_norm": 0.662129282951355, "learning_rate": 1.652311829337724e-05, "loss": 1.0133, "step": 20520 }, { "epoch": 0.29574887995735916, "grad_norm": 0.6669366955757141, "learning_rate": 1.651958122389993e-05, "loss": 1.0378, "step": 20530 }, { "epoch": 0.2958929368886584, "grad_norm": 0.638819694519043, "learning_rate": 1.651604273519973e-05, "loss": 1.0532, "step": 20540 }, { "epoch": 0.29603699381995763, "grad_norm": 0.5322790145874023, "learning_rate": 1.651250282804691e-05, "loss": 1.0466, "step": 20550 }, { "epoch": 0.2961810507512569, "grad_norm": 0.569996178150177, "learning_rate": 1.6508961503212063e-05, "loss": 1.0276, "step": 20560 }, { "epoch": 0.29632510768255615, "grad_norm": 0.6113317608833313, "learning_rate": 1.650541876146609e-05, "loss": 1.019, "step": 20570 }, { "epoch": 0.2964691646138554, "grad_norm": 0.5505111217498779, "learning_rate": 1.6501874603580195e-05, "loss": 1.0411, "step": 20580 }, { "epoch": 0.2966132215451546, "grad_norm": 0.5778427124023438, "learning_rate": 1.6498329030325894e-05, "loss": 1.0259, "step": 20590 }, { "epoch": 0.2967572784764539, "grad_norm": 0.5522878170013428, "learning_rate": 1.6494782042475007e-05, "loss": 1.0457, "step": 20600 }, { "epoch": 0.29690133540775315, "grad_norm": 0.6135400533676147, "learning_rate": 1.649123364079966e-05, "loss": 1.039, "step": 20610 }, { "epoch": 0.2970453923390524, "grad_norm": 0.623113214969635, "learning_rate": 1.6487683826072302e-05, "loss": 1.0324, "step": 20620 }, { "epoch": 0.2971894492703516, "grad_norm": 0.7084492444992065, "learning_rate": 1.6484132599065673e-05, "loss": 1.0225, "step": 20630 }, { "epoch": 0.2973335062016509, "grad_norm": 0.6530845165252686, "learning_rate": 1.648057996055282e-05, "loss": 1.02, "step": 20640 }, { "epoch": 0.29747756313295015, "grad_norm": 0.6086758375167847, "learning_rate": 1.6477025911307115e-05, "loss": 1.0203, "step": 20650 }, { "epoch": 0.2976216200642494, "grad_norm": 0.6114258766174316, "learning_rate": 1.6473470452102218e-05, "loss": 1.0293, "step": 20660 }, { "epoch": 0.2977656769955486, "grad_norm": 0.5102829933166504, "learning_rate": 1.6469913583712103e-05, "loss": 1.0324, "step": 20670 }, { "epoch": 0.2979097339268479, "grad_norm": 0.5642375349998474, "learning_rate": 1.6466355306911054e-05, "loss": 1.0168, "step": 20680 }, { "epoch": 0.29805379085814715, "grad_norm": 0.5559593439102173, "learning_rate": 1.6462795622473658e-05, "loss": 1.0528, "step": 20690 }, { "epoch": 0.2981978477894464, "grad_norm": 0.5291323065757751, "learning_rate": 1.645923453117481e-05, "loss": 1.0136, "step": 20700 }, { "epoch": 0.2983419047207456, "grad_norm": 0.5465613007545471, "learning_rate": 1.6455672033789712e-05, "loss": 1.0422, "step": 20710 }, { "epoch": 0.2984859616520449, "grad_norm": 0.5516693592071533, "learning_rate": 1.6452108131093866e-05, "loss": 1.0241, "step": 20720 }, { "epoch": 0.29863001858334415, "grad_norm": 0.652644157409668, "learning_rate": 1.6448542823863088e-05, "loss": 1.041, "step": 20730 }, { "epoch": 0.2987740755146434, "grad_norm": 0.7366124987602234, "learning_rate": 1.6444976112873493e-05, "loss": 1.0127, "step": 20740 }, { "epoch": 0.2989181324459426, "grad_norm": 0.49661996960639954, "learning_rate": 1.644140799890151e-05, "loss": 1.0279, "step": 20750 }, { "epoch": 0.2990621893772419, "grad_norm": 0.5000258088111877, "learning_rate": 1.643783848272386e-05, "loss": 1.0221, "step": 20760 }, { "epoch": 0.29920624630854115, "grad_norm": 0.5386760234832764, "learning_rate": 1.6434267565117586e-05, "loss": 1.0341, "step": 20770 }, { "epoch": 0.2993503032398404, "grad_norm": 0.5642584562301636, "learning_rate": 1.6430695246860026e-05, "loss": 1.0624, "step": 20780 }, { "epoch": 0.2994943601711396, "grad_norm": 0.5452033877372742, "learning_rate": 1.6427121528728816e-05, "loss": 1.0381, "step": 20790 }, { "epoch": 0.2996384171024389, "grad_norm": 0.5538674592971802, "learning_rate": 1.6423546411501916e-05, "loss": 1.0195, "step": 20800 }, { "epoch": 0.29978247403373814, "grad_norm": 0.6314508318901062, "learning_rate": 1.6419969895957574e-05, "loss": 1.0404, "step": 20810 }, { "epoch": 0.2999265309650374, "grad_norm": 0.5990069508552551, "learning_rate": 1.6416391982874348e-05, "loss": 1.0206, "step": 20820 }, { "epoch": 0.3000705878963366, "grad_norm": 0.5225420594215393, "learning_rate": 1.6412812673031102e-05, "loss": 1.0394, "step": 20830 }, { "epoch": 0.3002146448276359, "grad_norm": 0.5708862543106079, "learning_rate": 1.6409231967207004e-05, "loss": 1.0171, "step": 20840 }, { "epoch": 0.30035870175893514, "grad_norm": 0.6064662337303162, "learning_rate": 1.6405649866181518e-05, "loss": 1.0127, "step": 20850 }, { "epoch": 0.3005027586902344, "grad_norm": 0.5953640341758728, "learning_rate": 1.6402066370734426e-05, "loss": 1.0358, "step": 20860 }, { "epoch": 0.3006468156215336, "grad_norm": 0.603715181350708, "learning_rate": 1.63984814816458e-05, "loss": 1.0274, "step": 20870 }, { "epoch": 0.3007908725528329, "grad_norm": 0.615307092666626, "learning_rate": 1.639489519969602e-05, "loss": 1.0424, "step": 20880 }, { "epoch": 0.30093492948413214, "grad_norm": 0.5497210025787354, "learning_rate": 1.6391307525665777e-05, "loss": 1.0445, "step": 20890 }, { "epoch": 0.3010789864154314, "grad_norm": 0.5842812657356262, "learning_rate": 1.6387718460336052e-05, "loss": 1.0537, "step": 20900 }, { "epoch": 0.3012230433467306, "grad_norm": 0.4811582565307617, "learning_rate": 1.6384128004488137e-05, "loss": 1.0189, "step": 20910 }, { "epoch": 0.3013671002780299, "grad_norm": 0.6424331665039062, "learning_rate": 1.6380536158903627e-05, "loss": 1.0306, "step": 20920 }, { "epoch": 0.30151115720932914, "grad_norm": 0.6306112408638, "learning_rate": 1.637694292436441e-05, "loss": 1.0369, "step": 20930 }, { "epoch": 0.3016552141406284, "grad_norm": 0.5689916610717773, "learning_rate": 1.637334830165269e-05, "loss": 1.0384, "step": 20940 }, { "epoch": 0.3017992710719276, "grad_norm": 0.5546683669090271, "learning_rate": 1.6369752291550965e-05, "loss": 1.0277, "step": 20950 }, { "epoch": 0.3019433280032269, "grad_norm": 0.559262216091156, "learning_rate": 1.6366154894842038e-05, "loss": 1.0368, "step": 20960 }, { "epoch": 0.30208738493452614, "grad_norm": 0.5267927050590515, "learning_rate": 1.6362556112309008e-05, "loss": 1.0213, "step": 20970 }, { "epoch": 0.3022314418658254, "grad_norm": 0.560387909412384, "learning_rate": 1.635895594473529e-05, "loss": 1.0386, "step": 20980 }, { "epoch": 0.3023754987971246, "grad_norm": 0.5496255159378052, "learning_rate": 1.6355354392904584e-05, "loss": 1.0391, "step": 20990 }, { "epoch": 0.3025195557284239, "grad_norm": 0.5311712622642517, "learning_rate": 1.6351751457600894e-05, "loss": 1.0272, "step": 21000 }, { "epoch": 0.30266361265972314, "grad_norm": 0.5476694703102112, "learning_rate": 1.634814713960854e-05, "loss": 1.0277, "step": 21010 }, { "epoch": 0.3028076695910224, "grad_norm": 0.5251296162605286, "learning_rate": 1.634454143971212e-05, "loss": 1.0362, "step": 21020 }, { "epoch": 0.3029517265223216, "grad_norm": 0.46365106105804443, "learning_rate": 1.6340934358696553e-05, "loss": 1.0149, "step": 21030 }, { "epoch": 0.30309578345362087, "grad_norm": 0.7171838879585266, "learning_rate": 1.6337325897347052e-05, "loss": 1.0523, "step": 21040 }, { "epoch": 0.30323984038492013, "grad_norm": 0.6140922904014587, "learning_rate": 1.6333716056449125e-05, "loss": 1.0429, "step": 21050 }, { "epoch": 0.3033838973162194, "grad_norm": 0.5282112956047058, "learning_rate": 1.6330104836788588e-05, "loss": 1.033, "step": 21060 }, { "epoch": 0.3035279542475186, "grad_norm": 0.5564023852348328, "learning_rate": 1.632649223915155e-05, "loss": 1.052, "step": 21070 }, { "epoch": 0.30367201117881787, "grad_norm": 0.5264879465103149, "learning_rate": 1.6322878264324423e-05, "loss": 1.0567, "step": 21080 }, { "epoch": 0.30381606811011713, "grad_norm": 0.5021429061889648, "learning_rate": 1.631926291309393e-05, "loss": 1.0304, "step": 21090 }, { "epoch": 0.30396012504141634, "grad_norm": 0.5093965530395508, "learning_rate": 1.6315646186247067e-05, "loss": 1.0304, "step": 21100 }, { "epoch": 0.3041041819727156, "grad_norm": 0.556792140007019, "learning_rate": 1.6312028084571157e-05, "loss": 1.0294, "step": 21110 }, { "epoch": 0.30424823890401487, "grad_norm": 0.5673565864562988, "learning_rate": 1.630840860885381e-05, "loss": 1.0155, "step": 21120 }, { "epoch": 0.30439229583531413, "grad_norm": 0.9615490436553955, "learning_rate": 1.6304787759882928e-05, "loss": 1.0346, "step": 21130 }, { "epoch": 0.30453635276661334, "grad_norm": 0.5927960872650146, "learning_rate": 1.6301165538446732e-05, "loss": 1.0463, "step": 21140 }, { "epoch": 0.3046804096979126, "grad_norm": 0.5788955092430115, "learning_rate": 1.6297541945333716e-05, "loss": 1.0343, "step": 21150 }, { "epoch": 0.30482446662921187, "grad_norm": 0.5964028835296631, "learning_rate": 1.6293916981332695e-05, "loss": 1.0474, "step": 21160 }, { "epoch": 0.30496852356051113, "grad_norm": 0.5674517750740051, "learning_rate": 1.6290290647232774e-05, "loss": 1.0463, "step": 21170 }, { "epoch": 0.30511258049181034, "grad_norm": 0.5272372961044312, "learning_rate": 1.6286662943823352e-05, "loss": 1.0276, "step": 21180 }, { "epoch": 0.3052566374231096, "grad_norm": 0.46439695358276367, "learning_rate": 1.628303387189413e-05, "loss": 1.0119, "step": 21190 }, { "epoch": 0.30540069435440886, "grad_norm": 0.5587084889411926, "learning_rate": 1.6279403432235113e-05, "loss": 1.0111, "step": 21200 }, { "epoch": 0.3055447512857081, "grad_norm": 0.6520376801490784, "learning_rate": 1.6275771625636592e-05, "loss": 1.0378, "step": 21210 }, { "epoch": 0.30568880821700734, "grad_norm": 0.534944474697113, "learning_rate": 1.627213845288916e-05, "loss": 1.022, "step": 21220 }, { "epoch": 0.3058328651483066, "grad_norm": 0.642658531665802, "learning_rate": 1.6268503914783714e-05, "loss": 1.0178, "step": 21230 }, { "epoch": 0.30597692207960586, "grad_norm": 0.5053707361221313, "learning_rate": 1.6264868012111436e-05, "loss": 1.0179, "step": 21240 }, { "epoch": 0.3061209790109051, "grad_norm": 0.6561596393585205, "learning_rate": 1.626123074566382e-05, "loss": 1.0395, "step": 21250 }, { "epoch": 0.30626503594220433, "grad_norm": 0.5093657374382019, "learning_rate": 1.6257592116232642e-05, "loss": 1.0322, "step": 21260 }, { "epoch": 0.3064090928735036, "grad_norm": 0.5733696818351746, "learning_rate": 1.6253952124609984e-05, "loss": 1.0254, "step": 21270 }, { "epoch": 0.30655314980480286, "grad_norm": 0.6285299062728882, "learning_rate": 1.625031077158822e-05, "loss": 1.0453, "step": 21280 }, { "epoch": 0.3066972067361021, "grad_norm": 0.7546630501747131, "learning_rate": 1.6246668057960026e-05, "loss": 1.028, "step": 21290 }, { "epoch": 0.30684126366740133, "grad_norm": 0.5427360534667969, "learning_rate": 1.6243023984518368e-05, "loss": 1.0203, "step": 21300 }, { "epoch": 0.3069853205987006, "grad_norm": 0.594230592250824, "learning_rate": 1.6239378552056514e-05, "loss": 1.0062, "step": 21310 }, { "epoch": 0.30712937752999986, "grad_norm": 0.521277666091919, "learning_rate": 1.6235731761368022e-05, "loss": 1.0194, "step": 21320 }, { "epoch": 0.3072734344612991, "grad_norm": 0.5680876970291138, "learning_rate": 1.6232083613246745e-05, "loss": 1.0194, "step": 21330 }, { "epoch": 0.30741749139259833, "grad_norm": 0.5643933415412903, "learning_rate": 1.6228434108486833e-05, "loss": 1.0392, "step": 21340 }, { "epoch": 0.3075615483238976, "grad_norm": 0.6359453201293945, "learning_rate": 1.6224783247882743e-05, "loss": 1.0295, "step": 21350 }, { "epoch": 0.30770560525519686, "grad_norm": 0.4792833924293518, "learning_rate": 1.6221131032229208e-05, "loss": 1.0182, "step": 21360 }, { "epoch": 0.3078496621864961, "grad_norm": 0.5217666625976562, "learning_rate": 1.6217477462321264e-05, "loss": 1.0262, "step": 21370 }, { "epoch": 0.30799371911779533, "grad_norm": 0.5734424591064453, "learning_rate": 1.6213822538954255e-05, "loss": 1.0242, "step": 21380 }, { "epoch": 0.3081377760490946, "grad_norm": 0.4927959442138672, "learning_rate": 1.6210166262923794e-05, "loss": 1.0383, "step": 21390 }, { "epoch": 0.30828183298039386, "grad_norm": 0.5450374484062195, "learning_rate": 1.6206508635025808e-05, "loss": 1.0074, "step": 21400 }, { "epoch": 0.3084258899116931, "grad_norm": 0.49887827038764954, "learning_rate": 1.620284965605651e-05, "loss": 1.0352, "step": 21410 }, { "epoch": 0.3085699468429923, "grad_norm": 0.5779523849487305, "learning_rate": 1.619918932681241e-05, "loss": 1.0316, "step": 21420 }, { "epoch": 0.3087140037742916, "grad_norm": 0.5187239050865173, "learning_rate": 1.6195527648090314e-05, "loss": 1.0374, "step": 21430 }, { "epoch": 0.30885806070559085, "grad_norm": 0.543380081653595, "learning_rate": 1.6191864620687314e-05, "loss": 1.0418, "step": 21440 }, { "epoch": 0.3090021176368901, "grad_norm": 0.5300890207290649, "learning_rate": 1.6188200245400802e-05, "loss": 1.0279, "step": 21450 }, { "epoch": 0.3091461745681893, "grad_norm": 0.530333936214447, "learning_rate": 1.618453452302847e-05, "loss": 1.0377, "step": 21460 }, { "epoch": 0.3092902314994886, "grad_norm": 0.6680883765220642, "learning_rate": 1.618086745436828e-05, "loss": 1.03, "step": 21470 }, { "epoch": 0.30943428843078785, "grad_norm": 0.4743339419364929, "learning_rate": 1.6177199040218512e-05, "loss": 1.0197, "step": 21480 }, { "epoch": 0.3095783453620871, "grad_norm": 0.5322113037109375, "learning_rate": 1.6173529281377728e-05, "loss": 1.0161, "step": 21490 }, { "epoch": 0.3097224022933863, "grad_norm": 0.5997210144996643, "learning_rate": 1.6169858178644786e-05, "loss": 1.0276, "step": 21500 }, { "epoch": 0.3098664592246856, "grad_norm": 0.5533350706100464, "learning_rate": 1.6166185732818828e-05, "loss": 1.0364, "step": 21510 }, { "epoch": 0.31001051615598485, "grad_norm": 0.7028584480285645, "learning_rate": 1.6162511944699302e-05, "loss": 1.0329, "step": 21520 }, { "epoch": 0.3101545730872841, "grad_norm": 0.49295574426651, "learning_rate": 1.6158836815085934e-05, "loss": 1.0283, "step": 21530 }, { "epoch": 0.3102986300185833, "grad_norm": 0.5209812521934509, "learning_rate": 1.6155160344778752e-05, "loss": 1.0188, "step": 21540 }, { "epoch": 0.3104426869498826, "grad_norm": 0.6073362827301025, "learning_rate": 1.615148253457808e-05, "loss": 1.0439, "step": 21550 }, { "epoch": 0.31058674388118185, "grad_norm": 0.48292115330696106, "learning_rate": 1.6147803385284513e-05, "loss": 1.0354, "step": 21560 }, { "epoch": 0.3107308008124811, "grad_norm": 0.6610682010650635, "learning_rate": 1.614412289769896e-05, "loss": 1.0681, "step": 21570 }, { "epoch": 0.3108748577437803, "grad_norm": 0.5379059910774231, "learning_rate": 1.6140441072622614e-05, "loss": 1.0363, "step": 21580 }, { "epoch": 0.3110189146750796, "grad_norm": 0.6262497305870056, "learning_rate": 1.6136757910856948e-05, "loss": 1.0287, "step": 21590 }, { "epoch": 0.31116297160637885, "grad_norm": 0.6049202680587769, "learning_rate": 1.6133073413203745e-05, "loss": 1.0502, "step": 21600 }, { "epoch": 0.3113070285376781, "grad_norm": 0.5770962834358215, "learning_rate": 1.6129387580465065e-05, "loss": 1.0425, "step": 21610 }, { "epoch": 0.3114510854689773, "grad_norm": 0.620063841342926, "learning_rate": 1.612570041344326e-05, "loss": 1.0446, "step": 21620 }, { "epoch": 0.3115951424002766, "grad_norm": 0.5885114669799805, "learning_rate": 1.6122011912940983e-05, "loss": 1.0377, "step": 21630 }, { "epoch": 0.31173919933157584, "grad_norm": 0.48093509674072266, "learning_rate": 1.6118322079761167e-05, "loss": 1.0221, "step": 21640 }, { "epoch": 0.3118832562628751, "grad_norm": 0.6427713632583618, "learning_rate": 1.6114630914707036e-05, "loss": 1.0282, "step": 21650 }, { "epoch": 0.3120273131941743, "grad_norm": 0.5798922777175903, "learning_rate": 1.6110938418582106e-05, "loss": 1.0243, "step": 21660 }, { "epoch": 0.3121713701254736, "grad_norm": 0.6313838362693787, "learning_rate": 1.6107244592190177e-05, "loss": 1.0081, "step": 21670 }, { "epoch": 0.31231542705677284, "grad_norm": 0.6102400422096252, "learning_rate": 1.610354943633536e-05, "loss": 1.0384, "step": 21680 }, { "epoch": 0.3124594839880721, "grad_norm": 0.6214983463287354, "learning_rate": 1.6099852951822023e-05, "loss": 1.0444, "step": 21690 }, { "epoch": 0.3126035409193713, "grad_norm": 0.5477856397628784, "learning_rate": 1.6096155139454844e-05, "loss": 1.0341, "step": 21700 }, { "epoch": 0.3127475978506706, "grad_norm": 0.5215377807617188, "learning_rate": 1.6092456000038793e-05, "loss": 1.0133, "step": 21710 }, { "epoch": 0.31289165478196984, "grad_norm": 0.5937027335166931, "learning_rate": 1.6088755534379115e-05, "loss": 1.0138, "step": 21720 }, { "epoch": 0.3130357117132691, "grad_norm": 0.596572756767273, "learning_rate": 1.608505374328135e-05, "loss": 1.0309, "step": 21730 }, { "epoch": 0.3131797686445683, "grad_norm": 0.5597870945930481, "learning_rate": 1.6081350627551327e-05, "loss": 1.0358, "step": 21740 }, { "epoch": 0.3133238255758676, "grad_norm": 0.50466388463974, "learning_rate": 1.6077646187995167e-05, "loss": 1.0299, "step": 21750 }, { "epoch": 0.31346788250716684, "grad_norm": 0.7967897653579712, "learning_rate": 1.6073940425419274e-05, "loss": 1.0408, "step": 21760 }, { "epoch": 0.3136119394384661, "grad_norm": 0.4919719696044922, "learning_rate": 1.6070233340630337e-05, "loss": 1.0274, "step": 21770 }, { "epoch": 0.3137559963697653, "grad_norm": 0.5507171750068665, "learning_rate": 1.606652493443534e-05, "loss": 1.0207, "step": 21780 }, { "epoch": 0.3139000533010646, "grad_norm": 0.6266515254974365, "learning_rate": 1.606281520764156e-05, "loss": 1.0497, "step": 21790 }, { "epoch": 0.31404411023236384, "grad_norm": 0.558059811592102, "learning_rate": 1.6059104161056536e-05, "loss": 1.0151, "step": 21800 }, { "epoch": 0.3141881671636631, "grad_norm": 0.5087937712669373, "learning_rate": 1.605539179548813e-05, "loss": 1.0369, "step": 21810 }, { "epoch": 0.3143322240949623, "grad_norm": 0.49334022402763367, "learning_rate": 1.605167811174446e-05, "loss": 1.0222, "step": 21820 }, { "epoch": 0.3144762810262616, "grad_norm": 0.599881649017334, "learning_rate": 1.604796311063395e-05, "loss": 1.0373, "step": 21830 }, { "epoch": 0.31462033795756084, "grad_norm": 0.6097472906112671, "learning_rate": 1.60442467929653e-05, "loss": 1.0463, "step": 21840 }, { "epoch": 0.3147643948888601, "grad_norm": 0.5266076922416687, "learning_rate": 1.6040529159547505e-05, "loss": 1.0116, "step": 21850 }, { "epoch": 0.3149084518201593, "grad_norm": 0.624330461025238, "learning_rate": 1.6036810211189843e-05, "loss": 1.0508, "step": 21860 }, { "epoch": 0.31505250875145857, "grad_norm": 0.561284065246582, "learning_rate": 1.6033089948701872e-05, "loss": 1.0226, "step": 21870 }, { "epoch": 0.31519656568275783, "grad_norm": 0.5418603420257568, "learning_rate": 1.6029368372893446e-05, "loss": 1.0101, "step": 21880 }, { "epoch": 0.3153406226140571, "grad_norm": 0.48067712783813477, "learning_rate": 1.6025645484574702e-05, "loss": 1.0448, "step": 21890 }, { "epoch": 0.3154846795453563, "grad_norm": 0.5670645833015442, "learning_rate": 1.602192128455606e-05, "loss": 1.0227, "step": 21900 }, { "epoch": 0.31562873647665557, "grad_norm": 0.6337267756462097, "learning_rate": 1.601819577364823e-05, "loss": 1.0366, "step": 21910 }, { "epoch": 0.31577279340795483, "grad_norm": 0.5901769995689392, "learning_rate": 1.6014468952662197e-05, "loss": 1.0099, "step": 21920 }, { "epoch": 0.3159168503392541, "grad_norm": 0.5751497149467468, "learning_rate": 1.601074082240924e-05, "loss": 1.0581, "step": 21930 }, { "epoch": 0.3160609072705533, "grad_norm": 0.6279475092887878, "learning_rate": 1.600701138370093e-05, "loss": 1.0286, "step": 21940 }, { "epoch": 0.31620496420185257, "grad_norm": 0.5632752776145935, "learning_rate": 1.6003280637349106e-05, "loss": 1.0393, "step": 21950 }, { "epoch": 0.31634902113315183, "grad_norm": 0.6024361848831177, "learning_rate": 1.5999548584165904e-05, "loss": 1.0155, "step": 21960 }, { "epoch": 0.3164930780644511, "grad_norm": 0.5428387522697449, "learning_rate": 1.599581522496374e-05, "loss": 1.0058, "step": 21970 }, { "epoch": 0.3166371349957503, "grad_norm": 0.5390846729278564, "learning_rate": 1.599208056055531e-05, "loss": 1.0435, "step": 21980 }, { "epoch": 0.31678119192704957, "grad_norm": 0.5568037033081055, "learning_rate": 1.5988344591753603e-05, "loss": 1.0291, "step": 21990 }, { "epoch": 0.31692524885834883, "grad_norm": 0.5895788073539734, "learning_rate": 1.5984607319371887e-05, "loss": 1.0252, "step": 22000 }, { "epoch": 0.3170693057896481, "grad_norm": 0.5308651328086853, "learning_rate": 1.5980868744223717e-05, "loss": 1.0297, "step": 22010 }, { "epoch": 0.3172133627209473, "grad_norm": 0.5814491510391235, "learning_rate": 1.5977128867122932e-05, "loss": 1.0617, "step": 22020 }, { "epoch": 0.31735741965224656, "grad_norm": 0.5710555911064148, "learning_rate": 1.597338768888364e-05, "loss": 1.0447, "step": 22030 }, { "epoch": 0.3175014765835458, "grad_norm": 0.5677223801612854, "learning_rate": 1.5969645210320256e-05, "loss": 1.0438, "step": 22040 }, { "epoch": 0.3176455335148451, "grad_norm": 0.6458613276481628, "learning_rate": 1.596590143224746e-05, "loss": 1.0231, "step": 22050 }, { "epoch": 0.3177895904461443, "grad_norm": 0.5588922500610352, "learning_rate": 1.596215635548022e-05, "loss": 1.0349, "step": 22060 }, { "epoch": 0.31793364737744356, "grad_norm": 0.6394858360290527, "learning_rate": 1.5958409980833786e-05, "loss": 1.0245, "step": 22070 }, { "epoch": 0.3180777043087428, "grad_norm": 0.6036264300346375, "learning_rate": 1.5954662309123703e-05, "loss": 1.0366, "step": 22080 }, { "epoch": 0.3182217612400421, "grad_norm": 0.48761194944381714, "learning_rate": 1.5950913341165778e-05, "loss": 1.0286, "step": 22090 }, { "epoch": 0.3183658181713413, "grad_norm": 0.5573323965072632, "learning_rate": 1.5947163077776108e-05, "loss": 1.0256, "step": 22100 }, { "epoch": 0.31850987510264056, "grad_norm": 0.6502055525779724, "learning_rate": 1.594341151977108e-05, "loss": 1.0313, "step": 22110 }, { "epoch": 0.3186539320339398, "grad_norm": 0.5464458465576172, "learning_rate": 1.5939658667967354e-05, "loss": 1.0487, "step": 22120 }, { "epoch": 0.3187979889652391, "grad_norm": 0.5588178634643555, "learning_rate": 1.5935904523181872e-05, "loss": 1.0364, "step": 22130 }, { "epoch": 0.3189420458965383, "grad_norm": 0.5737643241882324, "learning_rate": 1.5932149086231857e-05, "loss": 1.03, "step": 22140 }, { "epoch": 0.31908610282783756, "grad_norm": 0.544349193572998, "learning_rate": 1.592839235793483e-05, "loss": 1.0531, "step": 22150 }, { "epoch": 0.3192301597591368, "grad_norm": 0.42877405881881714, "learning_rate": 1.5924634339108563e-05, "loss": 1.0279, "step": 22160 }, { "epoch": 0.3193742166904361, "grad_norm": 0.5407014489173889, "learning_rate": 1.5920875030571127e-05, "loss": 1.0114, "step": 22170 }, { "epoch": 0.3195182736217353, "grad_norm": 0.5757893919944763, "learning_rate": 1.591711443314088e-05, "loss": 1.0254, "step": 22180 }, { "epoch": 0.31966233055303456, "grad_norm": 0.5823472142219543, "learning_rate": 1.5913352547636448e-05, "loss": 1.0425, "step": 22190 }, { "epoch": 0.3198063874843338, "grad_norm": 0.6235815286636353, "learning_rate": 1.5909589374876743e-05, "loss": 1.043, "step": 22200 }, { "epoch": 0.3199504444156331, "grad_norm": 0.6385778784751892, "learning_rate": 1.590582491568095e-05, "loss": 1.0288, "step": 22210 }, { "epoch": 0.3200945013469323, "grad_norm": 0.5611007809638977, "learning_rate": 1.5902059170868544e-05, "loss": 1.0391, "step": 22220 }, { "epoch": 0.32023855827823156, "grad_norm": 0.5358127951622009, "learning_rate": 1.5898292141259276e-05, "loss": 1.0232, "step": 22230 }, { "epoch": 0.3203826152095308, "grad_norm": 0.5716277360916138, "learning_rate": 1.5894523827673177e-05, "loss": 1.0329, "step": 22240 }, { "epoch": 0.3205266721408301, "grad_norm": 0.5133422613143921, "learning_rate": 1.5890754230930554e-05, "loss": 1.0256, "step": 22250 }, { "epoch": 0.3206707290721293, "grad_norm": 0.571601390838623, "learning_rate": 1.5886983351851997e-05, "loss": 1.0386, "step": 22260 }, { "epoch": 0.32081478600342855, "grad_norm": 0.6445148587226868, "learning_rate": 1.588321119125838e-05, "loss": 1.0282, "step": 22270 }, { "epoch": 0.3209588429347278, "grad_norm": 0.6041953563690186, "learning_rate": 1.587943774997084e-05, "loss": 1.0441, "step": 22280 }, { "epoch": 0.3211028998660271, "grad_norm": 0.5699586272239685, "learning_rate": 1.5875663028810813e-05, "loss": 1.0164, "step": 22290 }, { "epoch": 0.3212469567973263, "grad_norm": 0.5821050405502319, "learning_rate": 1.58718870286e-05, "loss": 1.043, "step": 22300 }, { "epoch": 0.32139101372862555, "grad_norm": 0.6183272004127502, "learning_rate": 1.586810975016038e-05, "loss": 1.0416, "step": 22310 }, { "epoch": 0.3215350706599248, "grad_norm": 0.5741239786148071, "learning_rate": 1.5864331194314218e-05, "loss": 1.0158, "step": 22320 }, { "epoch": 0.3216791275912241, "grad_norm": 0.5490394830703735, "learning_rate": 1.5860551361884057e-05, "loss": 1.0356, "step": 22330 }, { "epoch": 0.3218231845225233, "grad_norm": 0.5496690273284912, "learning_rate": 1.585677025369271e-05, "loss": 1.0209, "step": 22340 }, { "epoch": 0.32196724145382255, "grad_norm": 0.6001868844032288, "learning_rate": 1.585298787056327e-05, "loss": 1.0258, "step": 22350 }, { "epoch": 0.3221112983851218, "grad_norm": 0.46774235367774963, "learning_rate": 1.5849204213319113e-05, "loss": 1.0504, "step": 22360 }, { "epoch": 0.3222553553164211, "grad_norm": 0.5711603164672852, "learning_rate": 1.5845419282783892e-05, "loss": 1.0252, "step": 22370 }, { "epoch": 0.3223994122477203, "grad_norm": 0.5627217292785645, "learning_rate": 1.5841633079781526e-05, "loss": 1.0316, "step": 22380 }, { "epoch": 0.32254346917901955, "grad_norm": 0.6105978488922119, "learning_rate": 1.5837845605136227e-05, "loss": 1.0253, "step": 22390 }, { "epoch": 0.3226875261103188, "grad_norm": 0.7439790964126587, "learning_rate": 1.5834056859672472e-05, "loss": 1.0493, "step": 22400 }, { "epoch": 0.322831583041618, "grad_norm": 0.5626582503318787, "learning_rate": 1.583026684421502e-05, "loss": 1.0007, "step": 22410 }, { "epoch": 0.3229756399729173, "grad_norm": 0.5579158067703247, "learning_rate": 1.5826475559588908e-05, "loss": 1.0087, "step": 22420 }, { "epoch": 0.32311969690421655, "grad_norm": 0.6444723010063171, "learning_rate": 1.582268300661944e-05, "loss": 1.0474, "step": 22430 }, { "epoch": 0.3232637538355158, "grad_norm": 0.6335017085075378, "learning_rate": 1.5818889186132202e-05, "loss": 1.0355, "step": 22440 }, { "epoch": 0.323407810766815, "grad_norm": 0.6375849843025208, "learning_rate": 1.5815094098953063e-05, "loss": 1.0245, "step": 22450 }, { "epoch": 0.3235518676981143, "grad_norm": 0.59910649061203, "learning_rate": 1.5811297745908158e-05, "loss": 1.0304, "step": 22460 }, { "epoch": 0.32369592462941355, "grad_norm": 0.5544905066490173, "learning_rate": 1.58075001278239e-05, "loss": 1.0252, "step": 22470 }, { "epoch": 0.3238399815607128, "grad_norm": 0.5109099745750427, "learning_rate": 1.580370124552698e-05, "loss": 1.0265, "step": 22480 }, { "epoch": 0.323984038492012, "grad_norm": 0.5076267123222351, "learning_rate": 1.5799901099844358e-05, "loss": 1.0116, "step": 22490 }, { "epoch": 0.3241280954233113, "grad_norm": 0.5694175362586975, "learning_rate": 1.579609969160328e-05, "loss": 1.0207, "step": 22500 }, { "epoch": 0.32427215235461054, "grad_norm": 0.5941329002380371, "learning_rate": 1.5792297021631253e-05, "loss": 1.0262, "step": 22510 }, { "epoch": 0.3244162092859098, "grad_norm": 0.5924682021141052, "learning_rate": 1.5788493090756074e-05, "loss": 1.0336, "step": 22520 }, { "epoch": 0.324560266217209, "grad_norm": 0.5476395487785339, "learning_rate": 1.5784687899805796e-05, "loss": 1.0153, "step": 22530 }, { "epoch": 0.3247043231485083, "grad_norm": 0.6003282070159912, "learning_rate": 1.5780881449608766e-05, "loss": 1.0312, "step": 22540 }, { "epoch": 0.32484838007980754, "grad_norm": 0.546464741230011, "learning_rate": 1.577707374099359e-05, "loss": 1.0356, "step": 22550 }, { "epoch": 0.3249924370111068, "grad_norm": 0.5743100643157959, "learning_rate": 1.5773264774789153e-05, "loss": 1.0091, "step": 22560 }, { "epoch": 0.325136493942406, "grad_norm": 0.5611959099769592, "learning_rate": 1.5769454551824617e-05, "loss": 1.0337, "step": 22570 }, { "epoch": 0.3252805508737053, "grad_norm": 0.5726581811904907, "learning_rate": 1.5765643072929416e-05, "loss": 1.0267, "step": 22580 }, { "epoch": 0.32542460780500454, "grad_norm": 0.6655681729316711, "learning_rate": 1.576183033893326e-05, "loss": 1.0579, "step": 22590 }, { "epoch": 0.3255686647363038, "grad_norm": 0.5404876470565796, "learning_rate": 1.575801635066612e-05, "loss": 1.0413, "step": 22600 }, { "epoch": 0.325712721667603, "grad_norm": 0.6277989745140076, "learning_rate": 1.5754201108958248e-05, "loss": 1.0108, "step": 22610 }, { "epoch": 0.3258567785989023, "grad_norm": 0.5897507071495056, "learning_rate": 1.575038461464018e-05, "loss": 1.0362, "step": 22620 }, { "epoch": 0.32600083553020154, "grad_norm": 0.5520694851875305, "learning_rate": 1.5746566868542705e-05, "loss": 1.0468, "step": 22630 }, { "epoch": 0.3261448924615008, "grad_norm": 0.5689501762390137, "learning_rate": 1.5742747871496898e-05, "loss": 1.0434, "step": 22640 }, { "epoch": 0.3262889493928, "grad_norm": 0.641187846660614, "learning_rate": 1.57389276243341e-05, "loss": 1.0428, "step": 22650 }, { "epoch": 0.3264330063240993, "grad_norm": 0.5682580471038818, "learning_rate": 1.5735106127885926e-05, "loss": 1.0129, "step": 22660 }, { "epoch": 0.32657706325539854, "grad_norm": 0.6321516036987305, "learning_rate": 1.5731283382984266e-05, "loss": 1.0351, "step": 22670 }, { "epoch": 0.3267211201866978, "grad_norm": 0.6668967604637146, "learning_rate": 1.5727459390461276e-05, "loss": 1.0218, "step": 22680 }, { "epoch": 0.326865177117997, "grad_norm": 0.5244312882423401, "learning_rate": 1.5723634151149387e-05, "loss": 1.0442, "step": 22690 }, { "epoch": 0.32700923404929627, "grad_norm": 0.5505217909812927, "learning_rate": 1.5719807665881298e-05, "loss": 1.0164, "step": 22700 }, { "epoch": 0.32715329098059553, "grad_norm": 0.5882081389427185, "learning_rate": 1.5715979935489986e-05, "loss": 1.0201, "step": 22710 }, { "epoch": 0.3272973479118948, "grad_norm": 0.5344705581665039, "learning_rate": 1.5712150960808698e-05, "loss": 1.0266, "step": 22720 }, { "epoch": 0.327441404843194, "grad_norm": 0.6028308272361755, "learning_rate": 1.570832074267094e-05, "loss": 1.0538, "step": 22730 }, { "epoch": 0.32758546177449327, "grad_norm": 0.5094046592712402, "learning_rate": 1.5704489281910507e-05, "loss": 1.031, "step": 22740 }, { "epoch": 0.32772951870579253, "grad_norm": 0.5897226333618164, "learning_rate": 1.5700656579361445e-05, "loss": 1.0227, "step": 22750 }, { "epoch": 0.3278735756370918, "grad_norm": 0.5317791104316711, "learning_rate": 1.569682263585809e-05, "loss": 1.038, "step": 22760 }, { "epoch": 0.328017632568391, "grad_norm": 0.5674440860748291, "learning_rate": 1.5692987452235032e-05, "loss": 1.0218, "step": 22770 }, { "epoch": 0.32816168949969027, "grad_norm": 0.5383510589599609, "learning_rate": 1.5689151029327145e-05, "loss": 1.0221, "step": 22780 }, { "epoch": 0.32830574643098953, "grad_norm": 0.5321672558784485, "learning_rate": 1.568531336796956e-05, "loss": 1.0299, "step": 22790 }, { "epoch": 0.3284498033622888, "grad_norm": 0.5883285999298096, "learning_rate": 1.5681474468997682e-05, "loss": 1.0352, "step": 22800 }, { "epoch": 0.328593860293588, "grad_norm": 0.5454659461975098, "learning_rate": 1.5677634333247187e-05, "loss": 1.0254, "step": 22810 }, { "epoch": 0.32873791722488727, "grad_norm": 0.5320073962211609, "learning_rate": 1.5673792961554025e-05, "loss": 1.0432, "step": 22820 }, { "epoch": 0.32888197415618653, "grad_norm": 0.5060919523239136, "learning_rate": 1.56699503547544e-05, "loss": 0.9997, "step": 22830 }, { "epoch": 0.3290260310874858, "grad_norm": 0.5718419551849365, "learning_rate": 1.5666106513684803e-05, "loss": 1.0314, "step": 22840 }, { "epoch": 0.329170088018785, "grad_norm": 0.6048396825790405, "learning_rate": 1.5662261439181983e-05, "loss": 1.0257, "step": 22850 }, { "epoch": 0.32931414495008426, "grad_norm": 0.5688875913619995, "learning_rate": 1.5658415132082957e-05, "loss": 1.0253, "step": 22860 }, { "epoch": 0.32945820188138353, "grad_norm": 0.6301843523979187, "learning_rate": 1.5654567593225014e-05, "loss": 1.036, "step": 22870 }, { "epoch": 0.3296022588126828, "grad_norm": 0.5989998579025269, "learning_rate": 1.565071882344571e-05, "loss": 1.0325, "step": 22880 }, { "epoch": 0.329746315743982, "grad_norm": 0.5171307921409607, "learning_rate": 1.5646868823582874e-05, "loss": 1.0229, "step": 22890 }, { "epoch": 0.32989037267528126, "grad_norm": 0.6181734800338745, "learning_rate": 1.564301759447459e-05, "loss": 1.0229, "step": 22900 }, { "epoch": 0.3300344296065805, "grad_norm": 0.5516754984855652, "learning_rate": 1.5639165136959222e-05, "loss": 1.0232, "step": 22910 }, { "epoch": 0.3301784865378798, "grad_norm": 0.5356869101524353, "learning_rate": 1.5635311451875393e-05, "loss": 1.0472, "step": 22920 }, { "epoch": 0.330322543469179, "grad_norm": 0.6694609522819519, "learning_rate": 1.5631456540062004e-05, "loss": 1.0524, "step": 22930 }, { "epoch": 0.33046660040047826, "grad_norm": 0.592635989189148, "learning_rate": 1.5627600402358213e-05, "loss": 1.0216, "step": 22940 }, { "epoch": 0.3306106573317775, "grad_norm": 0.5867111682891846, "learning_rate": 1.5623743039603444e-05, "loss": 1.0266, "step": 22950 }, { "epoch": 0.3307547142630768, "grad_norm": 0.5420083403587341, "learning_rate": 1.5619884452637397e-05, "loss": 1.0142, "step": 22960 }, { "epoch": 0.330898771194376, "grad_norm": 0.5669357180595398, "learning_rate": 1.5616024642300032e-05, "loss": 1.0425, "step": 22970 }, { "epoch": 0.33104282812567526, "grad_norm": 0.5408803224563599, "learning_rate": 1.561216360943157e-05, "loss": 1.0381, "step": 22980 }, { "epoch": 0.3311868850569745, "grad_norm": 0.5677891969680786, "learning_rate": 1.5608301354872517e-05, "loss": 1.0387, "step": 22990 }, { "epoch": 0.3313309419882738, "grad_norm": 0.5275797247886658, "learning_rate": 1.5604437879463623e-05, "loss": 1.0127, "step": 23000 }, { "epoch": 0.331474998919573, "grad_norm": 0.6289392709732056, "learning_rate": 1.5600573184045918e-05, "loss": 1.03, "step": 23010 }, { "epoch": 0.33161905585087226, "grad_norm": 0.6317474246025085, "learning_rate": 1.5596707269460684e-05, "loss": 1.024, "step": 23020 }, { "epoch": 0.3317631127821715, "grad_norm": 0.5193930268287659, "learning_rate": 1.559284013654949e-05, "loss": 1.0341, "step": 23030 }, { "epoch": 0.3319071697134708, "grad_norm": 0.5708998441696167, "learning_rate": 1.558897178615415e-05, "loss": 1.0387, "step": 23040 }, { "epoch": 0.33205122664477, "grad_norm": 0.6090611219406128, "learning_rate": 1.558510221911675e-05, "loss": 1.0426, "step": 23050 }, { "epoch": 0.33219528357606926, "grad_norm": 0.5591376423835754, "learning_rate": 1.5581231436279648e-05, "loss": 1.0045, "step": 23060 }, { "epoch": 0.3323393405073685, "grad_norm": 0.5508315563201904, "learning_rate": 1.5577359438485452e-05, "loss": 1.0523, "step": 23070 }, { "epoch": 0.3324833974386678, "grad_norm": 0.5775851607322693, "learning_rate": 1.557348622657705e-05, "loss": 1.0117, "step": 23080 }, { "epoch": 0.332627454369967, "grad_norm": 0.5561432242393494, "learning_rate": 1.5569611801397578e-05, "loss": 1.033, "step": 23090 }, { "epoch": 0.33277151130126625, "grad_norm": 0.5185987949371338, "learning_rate": 1.556573616379045e-05, "loss": 1.0314, "step": 23100 }, { "epoch": 0.3329155682325655, "grad_norm": 0.7127716541290283, "learning_rate": 1.5561859314599338e-05, "loss": 1.0239, "step": 23110 }, { "epoch": 0.3330596251638648, "grad_norm": 0.6326349973678589, "learning_rate": 1.555798125466818e-05, "loss": 1.0128, "step": 23120 }, { "epoch": 0.333203682095164, "grad_norm": 0.5786259174346924, "learning_rate": 1.5554101984841176e-05, "loss": 1.0421, "step": 23130 }, { "epoch": 0.33334773902646325, "grad_norm": 0.5460976958274841, "learning_rate": 1.5550221505962788e-05, "loss": 1.0201, "step": 23140 }, { "epoch": 0.3334917959577625, "grad_norm": 0.6431747078895569, "learning_rate": 1.5546339818877745e-05, "loss": 1.0124, "step": 23150 }, { "epoch": 0.3336358528890618, "grad_norm": 0.592847466468811, "learning_rate": 1.554245692443104e-05, "loss": 1.0144, "step": 23160 }, { "epoch": 0.333779909820361, "grad_norm": 0.5598251819610596, "learning_rate": 1.5538572823467912e-05, "loss": 1.0221, "step": 23170 }, { "epoch": 0.33392396675166025, "grad_norm": 0.5220053195953369, "learning_rate": 1.553468751683389e-05, "loss": 1.0275, "step": 23180 }, { "epoch": 0.3340680236829595, "grad_norm": 0.6287788152694702, "learning_rate": 1.5530801005374742e-05, "loss": 1.0321, "step": 23190 }, { "epoch": 0.3342120806142588, "grad_norm": 0.5721694827079773, "learning_rate": 1.5526913289936516e-05, "loss": 1.032, "step": 23200 }, { "epoch": 0.334356137545558, "grad_norm": 0.5819039344787598, "learning_rate": 1.5523024371365508e-05, "loss": 1.0313, "step": 23210 }, { "epoch": 0.33450019447685725, "grad_norm": 0.6232460737228394, "learning_rate": 1.551913425050829e-05, "loss": 1.0248, "step": 23220 }, { "epoch": 0.3346442514081565, "grad_norm": 0.6906763911247253, "learning_rate": 1.551524292821168e-05, "loss": 1.0398, "step": 23230 }, { "epoch": 0.3347883083394558, "grad_norm": 0.6997624039649963, "learning_rate": 1.5511350405322763e-05, "loss": 1.0349, "step": 23240 }, { "epoch": 0.334932365270755, "grad_norm": 0.5740306377410889, "learning_rate": 1.5507456682688897e-05, "loss": 1.0452, "step": 23250 }, { "epoch": 0.33507642220205425, "grad_norm": 0.5284046530723572, "learning_rate": 1.5503561761157683e-05, "loss": 1.0334, "step": 23260 }, { "epoch": 0.3352204791333535, "grad_norm": 0.5597389340400696, "learning_rate": 1.5499665641577e-05, "loss": 1.0208, "step": 23270 }, { "epoch": 0.3353645360646528, "grad_norm": 0.5695542097091675, "learning_rate": 1.5495768324794967e-05, "loss": 1.0143, "step": 23280 }, { "epoch": 0.335508592995952, "grad_norm": 0.6439692378044128, "learning_rate": 1.549186981165999e-05, "loss": 0.9941, "step": 23290 }, { "epoch": 0.33565264992725125, "grad_norm": 0.6697540283203125, "learning_rate": 1.5487970103020714e-05, "loss": 1.0187, "step": 23300 }, { "epoch": 0.3357967068585505, "grad_norm": 0.4710736870765686, "learning_rate": 1.5484069199726054e-05, "loss": 1.0441, "step": 23310 }, { "epoch": 0.3359407637898498, "grad_norm": 0.5582054853439331, "learning_rate": 1.5480167102625178e-05, "loss": 1.0332, "step": 23320 }, { "epoch": 0.336084820721149, "grad_norm": 1.0697526931762695, "learning_rate": 1.5476263812567527e-05, "loss": 1.0216, "step": 23330 }, { "epoch": 0.33622887765244824, "grad_norm": 0.672410249710083, "learning_rate": 1.5472359330402788e-05, "loss": 1.041, "step": 23340 }, { "epoch": 0.3363729345837475, "grad_norm": 0.7280809879302979, "learning_rate": 1.5468453656980913e-05, "loss": 1.0598, "step": 23350 }, { "epoch": 0.33651699151504677, "grad_norm": 0.5943734049797058, "learning_rate": 1.5464546793152118e-05, "loss": 1.0243, "step": 23360 }, { "epoch": 0.336661048446346, "grad_norm": 0.6002911329269409, "learning_rate": 1.5460638739766863e-05, "loss": 1.0226, "step": 23370 }, { "epoch": 0.33680510537764524, "grad_norm": 0.67616206407547, "learning_rate": 1.545672949767589e-05, "loss": 1.0399, "step": 23380 }, { "epoch": 0.3369491623089445, "grad_norm": 0.5300679206848145, "learning_rate": 1.545281906773018e-05, "loss": 1.0477, "step": 23390 }, { "epoch": 0.33709321924024377, "grad_norm": 0.5449818968772888, "learning_rate": 1.5448907450780976e-05, "loss": 1.0286, "step": 23400 }, { "epoch": 0.337237276171543, "grad_norm": 0.6804856061935425, "learning_rate": 1.544499464767979e-05, "loss": 1.0163, "step": 23410 }, { "epoch": 0.33738133310284224, "grad_norm": 0.6101489067077637, "learning_rate": 1.5441080659278383e-05, "loss": 1.0497, "step": 23420 }, { "epoch": 0.3375253900341415, "grad_norm": 0.6030386090278625, "learning_rate": 1.5437165486428778e-05, "loss": 1.0462, "step": 23430 }, { "epoch": 0.33766944696544077, "grad_norm": 0.5325640439987183, "learning_rate": 1.5433249129983254e-05, "loss": 1.0021, "step": 23440 }, { "epoch": 0.33781350389674, "grad_norm": 0.6616448760032654, "learning_rate": 1.5429331590794344e-05, "loss": 1.0719, "step": 23450 }, { "epoch": 0.33795756082803924, "grad_norm": 0.6268752217292786, "learning_rate": 1.5425412869714842e-05, "loss": 1.0308, "step": 23460 }, { "epoch": 0.3381016177593385, "grad_norm": 0.5952742099761963, "learning_rate": 1.5421492967597807e-05, "loss": 1.0377, "step": 23470 }, { "epoch": 0.33824567469063777, "grad_norm": 0.814234733581543, "learning_rate": 1.541757188529654e-05, "loss": 1.0336, "step": 23480 }, { "epoch": 0.338389731621937, "grad_norm": 0.6123931407928467, "learning_rate": 1.5413649623664608e-05, "loss": 0.9977, "step": 23490 }, { "epoch": 0.33853378855323624, "grad_norm": 0.5388018488883972, "learning_rate": 1.540972618355583e-05, "loss": 1.0105, "step": 23500 }, { "epoch": 0.3386778454845355, "grad_norm": 0.5089865922927856, "learning_rate": 1.5405801565824297e-05, "loss": 1.0377, "step": 23510 }, { "epoch": 0.33882190241583476, "grad_norm": 0.6453099846839905, "learning_rate": 1.540187577132433e-05, "loss": 1.0003, "step": 23520 }, { "epoch": 0.33896595934713397, "grad_norm": 0.7200059294700623, "learning_rate": 1.539794880091053e-05, "loss": 1.0176, "step": 23530 }, { "epoch": 0.33911001627843324, "grad_norm": 0.5649188756942749, "learning_rate": 1.539402065543774e-05, "loss": 1.0176, "step": 23540 }, { "epoch": 0.3392540732097325, "grad_norm": 0.5101891756057739, "learning_rate": 1.5390091335761064e-05, "loss": 1.0293, "step": 23550 }, { "epoch": 0.33939813014103176, "grad_norm": 0.5435965061187744, "learning_rate": 1.538616084273586e-05, "loss": 1.0549, "step": 23560 }, { "epoch": 0.33954218707233097, "grad_norm": 0.5960400700569153, "learning_rate": 1.5382229177217744e-05, "loss": 1.0303, "step": 23570 }, { "epoch": 0.33968624400363023, "grad_norm": 0.603877067565918, "learning_rate": 1.5378296340062584e-05, "loss": 1.0318, "step": 23580 }, { "epoch": 0.3398303009349295, "grad_norm": 0.6355979442596436, "learning_rate": 1.5374362332126503e-05, "loss": 1.0153, "step": 23590 }, { "epoch": 0.33997435786622876, "grad_norm": 0.5628081560134888, "learning_rate": 1.537042715426588e-05, "loss": 1.0471, "step": 23600 }, { "epoch": 0.34011841479752797, "grad_norm": 0.6306579113006592, "learning_rate": 1.5366490807337353e-05, "loss": 1.0363, "step": 23610 }, { "epoch": 0.34026247172882723, "grad_norm": 0.5563225746154785, "learning_rate": 1.5362553292197807e-05, "loss": 1.0396, "step": 23620 }, { "epoch": 0.3404065286601265, "grad_norm": 0.5619765520095825, "learning_rate": 1.535861460970439e-05, "loss": 1.0183, "step": 23630 }, { "epoch": 0.34055058559142576, "grad_norm": 0.5668855309486389, "learning_rate": 1.535467476071449e-05, "loss": 1.0252, "step": 23640 }, { "epoch": 0.34069464252272497, "grad_norm": 0.5778625011444092, "learning_rate": 1.5350733746085762e-05, "loss": 1.0362, "step": 23650 }, { "epoch": 0.34083869945402423, "grad_norm": 0.6049732565879822, "learning_rate": 1.5346791566676113e-05, "loss": 1.0441, "step": 23660 }, { "epoch": 0.3409827563853235, "grad_norm": 0.5564858317375183, "learning_rate": 1.53428482233437e-05, "loss": 1.0485, "step": 23670 }, { "epoch": 0.34112681331662276, "grad_norm": 0.5889008641242981, "learning_rate": 1.533890371694693e-05, "loss": 1.0462, "step": 23680 }, { "epoch": 0.34127087024792196, "grad_norm": 0.5665014386177063, "learning_rate": 1.5334958048344473e-05, "loss": 1.0071, "step": 23690 }, { "epoch": 0.34141492717922123, "grad_norm": 0.5539933443069458, "learning_rate": 1.5331011218395248e-05, "loss": 1.0423, "step": 23700 }, { "epoch": 0.3415589841105205, "grad_norm": 0.518696129322052, "learning_rate": 1.5327063227958413e-05, "loss": 1.0103, "step": 23710 }, { "epoch": 0.3417030410418197, "grad_norm": 0.5499048233032227, "learning_rate": 1.5323114077893405e-05, "loss": 1.0173, "step": 23720 }, { "epoch": 0.34184709797311896, "grad_norm": 0.6649884581565857, "learning_rate": 1.5319163769059898e-05, "loss": 1.0192, "step": 23730 }, { "epoch": 0.3419911549044182, "grad_norm": 0.6036750078201294, "learning_rate": 1.5315212302317814e-05, "loss": 1.0453, "step": 23740 }, { "epoch": 0.3421352118357175, "grad_norm": 0.5595616102218628, "learning_rate": 1.5311259678527335e-05, "loss": 1.0157, "step": 23750 }, { "epoch": 0.3422792687670167, "grad_norm": 0.47189611196517944, "learning_rate": 1.5307305898548893e-05, "loss": 1.0045, "step": 23760 }, { "epoch": 0.34242332569831596, "grad_norm": 0.602363109588623, "learning_rate": 1.530335096324317e-05, "loss": 1.0427, "step": 23770 }, { "epoch": 0.3425673826296152, "grad_norm": 0.5799539685249329, "learning_rate": 1.5299394873471104e-05, "loss": 1.0324, "step": 23780 }, { "epoch": 0.3427114395609145, "grad_norm": 0.5826029181480408, "learning_rate": 1.5295437630093878e-05, "loss": 1.035, "step": 23790 }, { "epoch": 0.3428554964922137, "grad_norm": 0.570226788520813, "learning_rate": 1.529147923397293e-05, "loss": 1.0427, "step": 23800 }, { "epoch": 0.34299955342351296, "grad_norm": 0.5779927372932434, "learning_rate": 1.528751968596995e-05, "loss": 1.0351, "step": 23810 }, { "epoch": 0.3431436103548122, "grad_norm": 0.6387103796005249, "learning_rate": 1.5283558986946875e-05, "loss": 1.0208, "step": 23820 }, { "epoch": 0.3432876672861115, "grad_norm": 0.6481809020042419, "learning_rate": 1.5279597137765894e-05, "loss": 1.0319, "step": 23830 }, { "epoch": 0.3434317242174107, "grad_norm": 0.5976871252059937, "learning_rate": 1.527563413928945e-05, "loss": 1.0415, "step": 23840 }, { "epoch": 0.34357578114870996, "grad_norm": 0.554764986038208, "learning_rate": 1.527166999238023e-05, "loss": 1.0219, "step": 23850 }, { "epoch": 0.3437198380800092, "grad_norm": 0.5971626043319702, "learning_rate": 1.526770469790118e-05, "loss": 1.0197, "step": 23860 }, { "epoch": 0.3438638950113085, "grad_norm": 0.6000232696533203, "learning_rate": 1.5263738256715478e-05, "loss": 1.0252, "step": 23870 }, { "epoch": 0.3440079519426077, "grad_norm": 0.5533708930015564, "learning_rate": 1.5259770669686576e-05, "loss": 1.0173, "step": 23880 }, { "epoch": 0.34415200887390696, "grad_norm": 0.6693439483642578, "learning_rate": 1.5255801937678158e-05, "loss": 1.0185, "step": 23890 }, { "epoch": 0.3442960658052062, "grad_norm": 0.692731499671936, "learning_rate": 1.5251832061554162e-05, "loss": 1.0394, "step": 23900 }, { "epoch": 0.3444401227365055, "grad_norm": 0.5547103881835938, "learning_rate": 1.5247861042178773e-05, "loss": 1.0183, "step": 23910 }, { "epoch": 0.3445841796678047, "grad_norm": 0.6336583495140076, "learning_rate": 1.5243888880416427e-05, "loss": 1.0462, "step": 23920 }, { "epoch": 0.34472823659910395, "grad_norm": 0.6708395481109619, "learning_rate": 1.5239915577131817e-05, "loss": 1.0276, "step": 23930 }, { "epoch": 0.3448722935304032, "grad_norm": 0.5331196188926697, "learning_rate": 1.5235941133189864e-05, "loss": 1.0367, "step": 23940 }, { "epoch": 0.3450163504617025, "grad_norm": 0.5452340245246887, "learning_rate": 1.5231965549455763e-05, "loss": 1.011, "step": 23950 }, { "epoch": 0.3451604073930017, "grad_norm": 0.5825306177139282, "learning_rate": 1.5227988826794932e-05, "loss": 1.0511, "step": 23960 }, { "epoch": 0.34530446432430095, "grad_norm": 0.594727098941803, "learning_rate": 1.5224010966073058e-05, "loss": 1.0342, "step": 23970 }, { "epoch": 0.3454485212556002, "grad_norm": 0.5588743090629578, "learning_rate": 1.5220031968156055e-05, "loss": 1.022, "step": 23980 }, { "epoch": 0.3455925781868995, "grad_norm": 0.5999211072921753, "learning_rate": 1.521605183391011e-05, "loss": 1.0344, "step": 23990 }, { "epoch": 0.3457366351181987, "grad_norm": 0.5761189460754395, "learning_rate": 1.5212070564201634e-05, "loss": 1.034, "step": 24000 }, { "epoch": 0.34588069204949795, "grad_norm": 0.5832364559173584, "learning_rate": 1.5208088159897296e-05, "loss": 1.0344, "step": 24010 }, { "epoch": 0.3460247489807972, "grad_norm": 0.5501433610916138, "learning_rate": 1.5204104621864014e-05, "loss": 1.034, "step": 24020 }, { "epoch": 0.3461688059120965, "grad_norm": 0.5442602634429932, "learning_rate": 1.520011995096894e-05, "loss": 1.052, "step": 24030 }, { "epoch": 0.3463128628433957, "grad_norm": 0.6079826951026917, "learning_rate": 1.5196134148079494e-05, "loss": 1.0486, "step": 24040 }, { "epoch": 0.34645691977469495, "grad_norm": 0.7023916244506836, "learning_rate": 1.519214721406332e-05, "loss": 1.028, "step": 24050 }, { "epoch": 0.3466009767059942, "grad_norm": 0.6172901391983032, "learning_rate": 1.5188159149788328e-05, "loss": 1.0546, "step": 24060 }, { "epoch": 0.3467450336372935, "grad_norm": 0.5976448059082031, "learning_rate": 1.5184169956122659e-05, "loss": 1.0409, "step": 24070 }, { "epoch": 0.3468890905685927, "grad_norm": 0.6255366206169128, "learning_rate": 1.5180179633934704e-05, "loss": 1.0403, "step": 24080 }, { "epoch": 0.34703314749989195, "grad_norm": 0.6168020963668823, "learning_rate": 1.5176188184093103e-05, "loss": 1.045, "step": 24090 }, { "epoch": 0.3471772044311912, "grad_norm": 0.550489604473114, "learning_rate": 1.5172195607466742e-05, "loss": 1.0155, "step": 24100 }, { "epoch": 0.3473212613624905, "grad_norm": 0.606447696685791, "learning_rate": 1.5168201904924748e-05, "loss": 1.0255, "step": 24110 }, { "epoch": 0.3474653182937897, "grad_norm": 0.5619000792503357, "learning_rate": 1.5164207077336492e-05, "loss": 1.0493, "step": 24120 }, { "epoch": 0.34760937522508895, "grad_norm": 0.5773958563804626, "learning_rate": 1.51602111255716e-05, "loss": 1.0324, "step": 24130 }, { "epoch": 0.3477534321563882, "grad_norm": 0.545673668384552, "learning_rate": 1.5156214050499927e-05, "loss": 1.0233, "step": 24140 }, { "epoch": 0.3478974890876875, "grad_norm": 0.5850799083709717, "learning_rate": 1.5152215852991585e-05, "loss": 1.0275, "step": 24150 }, { "epoch": 0.3480415460189867, "grad_norm": 0.5293511748313904, "learning_rate": 1.5148216533916929e-05, "loss": 1.02, "step": 24160 }, { "epoch": 0.34818560295028594, "grad_norm": 0.5992613434791565, "learning_rate": 1.5144216094146554e-05, "loss": 1.0212, "step": 24170 }, { "epoch": 0.3483296598815852, "grad_norm": 0.5557295680046082, "learning_rate": 1.5140214534551296e-05, "loss": 1.0324, "step": 24180 }, { "epoch": 0.34847371681288447, "grad_norm": 0.652137041091919, "learning_rate": 1.5136211856002244e-05, "loss": 1.0239, "step": 24190 }, { "epoch": 0.3486177737441837, "grad_norm": 0.5570051074028015, "learning_rate": 1.5132208059370727e-05, "loss": 1.0176, "step": 24200 }, { "epoch": 0.34876183067548294, "grad_norm": 0.49894794821739197, "learning_rate": 1.5128203145528316e-05, "loss": 1.0248, "step": 24210 }, { "epoch": 0.3489058876067822, "grad_norm": 0.731623113155365, "learning_rate": 1.512419711534682e-05, "loss": 1.0075, "step": 24220 }, { "epoch": 0.34904994453808147, "grad_norm": 0.9097506403923035, "learning_rate": 1.5120189969698302e-05, "loss": 1.0315, "step": 24230 }, { "epoch": 0.3491940014693807, "grad_norm": 0.6409125328063965, "learning_rate": 1.5116181709455062e-05, "loss": 1.0406, "step": 24240 }, { "epoch": 0.34933805840067994, "grad_norm": 0.5566344857215881, "learning_rate": 1.5112172335489641e-05, "loss": 1.0293, "step": 24250 }, { "epoch": 0.3494821153319792, "grad_norm": 0.5710030198097229, "learning_rate": 1.5108161848674821e-05, "loss": 1.0333, "step": 24260 }, { "epoch": 0.34962617226327847, "grad_norm": 0.5730279684066772, "learning_rate": 1.5104150249883637e-05, "loss": 1.0082, "step": 24270 }, { "epoch": 0.3497702291945777, "grad_norm": 0.6711406707763672, "learning_rate": 1.5100137539989356e-05, "loss": 1.0169, "step": 24280 }, { "epoch": 0.34991428612587694, "grad_norm": 0.5815849304199219, "learning_rate": 1.509612371986549e-05, "loss": 0.9991, "step": 24290 }, { "epoch": 0.3500583430571762, "grad_norm": 0.6280797123908997, "learning_rate": 1.5092108790385789e-05, "loss": 1.0372, "step": 24300 }, { "epoch": 0.35020239998847547, "grad_norm": 0.6219536662101746, "learning_rate": 1.5088092752424248e-05, "loss": 1.0266, "step": 24310 }, { "epoch": 0.3503464569197747, "grad_norm": 0.6277016401290894, "learning_rate": 1.508407560685511e-05, "loss": 1.0106, "step": 24320 }, { "epoch": 0.35049051385107394, "grad_norm": 0.6451740264892578, "learning_rate": 1.5080057354552842e-05, "loss": 1.0286, "step": 24330 }, { "epoch": 0.3506345707823732, "grad_norm": 0.5524730086326599, "learning_rate": 1.5076037996392172e-05, "loss": 1.021, "step": 24340 }, { "epoch": 0.35077862771367246, "grad_norm": 0.7687239050865173, "learning_rate": 1.5072017533248051e-05, "loss": 1.0272, "step": 24350 }, { "epoch": 0.35092268464497167, "grad_norm": 0.6192282438278198, "learning_rate": 1.5067995965995686e-05, "loss": 1.0272, "step": 24360 }, { "epoch": 0.35106674157627094, "grad_norm": 0.658404529094696, "learning_rate": 1.5063973295510508e-05, "loss": 1.0251, "step": 24370 }, { "epoch": 0.3512107985075702, "grad_norm": 0.6252663731575012, "learning_rate": 1.5059949522668201e-05, "loss": 1.0236, "step": 24380 }, { "epoch": 0.35135485543886946, "grad_norm": 0.5990842580795288, "learning_rate": 1.5055924648344688e-05, "loss": 1.0096, "step": 24390 }, { "epoch": 0.35149891237016867, "grad_norm": 0.5357429385185242, "learning_rate": 1.5051898673416121e-05, "loss": 1.0309, "step": 24400 }, { "epoch": 0.35164296930146793, "grad_norm": 0.5470159649848938, "learning_rate": 1.5047871598758908e-05, "loss": 1.0331, "step": 24410 }, { "epoch": 0.3517870262327672, "grad_norm": 0.6631460785865784, "learning_rate": 1.504384342524968e-05, "loss": 1.021, "step": 24420 }, { "epoch": 0.35193108316406646, "grad_norm": 0.5246572494506836, "learning_rate": 1.5039814153765324e-05, "loss": 1.0232, "step": 24430 }, { "epoch": 0.35207514009536567, "grad_norm": 0.5777121186256409, "learning_rate": 1.5035783785182945e-05, "loss": 1.0422, "step": 24440 }, { "epoch": 0.35221919702666493, "grad_norm": 0.6213065981864929, "learning_rate": 1.5031752320379907e-05, "loss": 1.0293, "step": 24450 }, { "epoch": 0.3523632539579642, "grad_norm": 0.5165483951568604, "learning_rate": 1.50277197602338e-05, "loss": 1.0293, "step": 24460 }, { "epoch": 0.35250731088926346, "grad_norm": 0.5039709806442261, "learning_rate": 1.5023686105622464e-05, "loss": 1.0158, "step": 24470 }, { "epoch": 0.35265136782056267, "grad_norm": 0.6067390441894531, "learning_rate": 1.5019651357423959e-05, "loss": 1.0223, "step": 24480 }, { "epoch": 0.35279542475186193, "grad_norm": 0.481794536113739, "learning_rate": 1.5015615516516602e-05, "loss": 1.0269, "step": 24490 }, { "epoch": 0.3529394816831612, "grad_norm": 0.6830320954322815, "learning_rate": 1.5011578583778936e-05, "loss": 1.0401, "step": 24500 }, { "epoch": 0.35308353861446046, "grad_norm": 0.608053982257843, "learning_rate": 1.5007540560089746e-05, "loss": 1.0263, "step": 24510 }, { "epoch": 0.35322759554575967, "grad_norm": 0.6035851836204529, "learning_rate": 1.500350144632806e-05, "loss": 1.0097, "step": 24520 }, { "epoch": 0.35337165247705893, "grad_norm": 0.5260176658630371, "learning_rate": 1.4999461243373128e-05, "loss": 1.0323, "step": 24530 }, { "epoch": 0.3535157094083582, "grad_norm": 0.6285433173179626, "learning_rate": 1.4995419952104454e-05, "loss": 1.0178, "step": 24540 }, { "epoch": 0.35365976633965746, "grad_norm": 0.744941234588623, "learning_rate": 1.4991377573401766e-05, "loss": 1.0183, "step": 24550 }, { "epoch": 0.35380382327095666, "grad_norm": 0.6387519240379333, "learning_rate": 1.4987334108145038e-05, "loss": 1.0425, "step": 24560 }, { "epoch": 0.3539478802022559, "grad_norm": 0.49635758996009827, "learning_rate": 1.4983289557214472e-05, "loss": 1.0217, "step": 24570 }, { "epoch": 0.3540919371335552, "grad_norm": 0.5358009934425354, "learning_rate": 1.4979243921490518e-05, "loss": 1.0219, "step": 24580 }, { "epoch": 0.35423599406485445, "grad_norm": 0.586286187171936, "learning_rate": 1.4975197201853849e-05, "loss": 1.0217, "step": 24590 }, { "epoch": 0.35438005099615366, "grad_norm": 0.4951419234275818, "learning_rate": 1.4971149399185381e-05, "loss": 1.0422, "step": 24600 }, { "epoch": 0.3545241079274529, "grad_norm": 0.5345513820648193, "learning_rate": 1.4967100514366269e-05, "loss": 1.0154, "step": 24610 }, { "epoch": 0.3546681648587522, "grad_norm": 0.660944402217865, "learning_rate": 1.4963050548277893e-05, "loss": 1.0319, "step": 24620 }, { "epoch": 0.35481222179005145, "grad_norm": 0.5842607021331787, "learning_rate": 1.4958999501801879e-05, "loss": 1.0256, "step": 24630 }, { "epoch": 0.35495627872135066, "grad_norm": 0.5948303937911987, "learning_rate": 1.495494737582008e-05, "loss": 1.0074, "step": 24640 }, { "epoch": 0.3551003356526499, "grad_norm": 0.5562019944190979, "learning_rate": 1.4950894171214596e-05, "loss": 1.0118, "step": 24650 }, { "epoch": 0.3552443925839492, "grad_norm": 0.6218870878219604, "learning_rate": 1.4946839888867748e-05, "loss": 1.025, "step": 24660 }, { "epoch": 0.35538844951524845, "grad_norm": 0.6354550123214722, "learning_rate": 1.4942784529662094e-05, "loss": 1.0077, "step": 24670 }, { "epoch": 0.35553250644654766, "grad_norm": 0.6161721348762512, "learning_rate": 1.493872809448044e-05, "loss": 1.0306, "step": 24680 }, { "epoch": 0.3556765633778469, "grad_norm": 0.5358957052230835, "learning_rate": 1.4934670584205804e-05, "loss": 1.0287, "step": 24690 }, { "epoch": 0.3558206203091462, "grad_norm": 0.6816520094871521, "learning_rate": 1.4930611999721457e-05, "loss": 1.0331, "step": 24700 }, { "epoch": 0.35596467724044545, "grad_norm": 0.5539494752883911, "learning_rate": 1.4926552341910894e-05, "loss": 1.0369, "step": 24710 }, { "epoch": 0.35610873417174466, "grad_norm": 0.5865591168403625, "learning_rate": 1.492249161165785e-05, "loss": 1.0138, "step": 24720 }, { "epoch": 0.3562527911030439, "grad_norm": 0.5584275722503662, "learning_rate": 1.4918429809846289e-05, "loss": 1.0406, "step": 24730 }, { "epoch": 0.3563968480343432, "grad_norm": 0.57304447889328, "learning_rate": 1.4914366937360405e-05, "loss": 1.0351, "step": 24740 }, { "epoch": 0.35654090496564245, "grad_norm": 0.5285123586654663, "learning_rate": 1.4910302995084634e-05, "loss": 1.0168, "step": 24750 }, { "epoch": 0.35668496189694165, "grad_norm": 0.568499743938446, "learning_rate": 1.490623798390364e-05, "loss": 1.0351, "step": 24760 }, { "epoch": 0.3568290188282409, "grad_norm": 0.5260944366455078, "learning_rate": 1.4902171904702316e-05, "loss": 1.0377, "step": 24770 }, { "epoch": 0.3569730757595402, "grad_norm": 0.5984960794448853, "learning_rate": 1.4898104758365794e-05, "loss": 1.0291, "step": 24780 }, { "epoch": 0.35711713269083944, "grad_norm": 0.6734877228736877, "learning_rate": 1.4894036545779437e-05, "loss": 1.0336, "step": 24790 }, { "epoch": 0.35726118962213865, "grad_norm": 0.6885083913803101, "learning_rate": 1.488996726782884e-05, "loss": 1.0298, "step": 24800 }, { "epoch": 0.3574052465534379, "grad_norm": 0.610966682434082, "learning_rate": 1.488589692539982e-05, "loss": 1.0235, "step": 24810 }, { "epoch": 0.3575493034847372, "grad_norm": 0.5949985980987549, "learning_rate": 1.4881825519378445e-05, "loss": 1.0344, "step": 24820 }, { "epoch": 0.35769336041603644, "grad_norm": 0.5474660396575928, "learning_rate": 1.4877753050650996e-05, "loss": 1.0556, "step": 24830 }, { "epoch": 0.35783741734733565, "grad_norm": 0.6285277605056763, "learning_rate": 1.4873679520103999e-05, "loss": 0.9982, "step": 24840 }, { "epoch": 0.3579814742786349, "grad_norm": 0.5509119033813477, "learning_rate": 1.4869604928624202e-05, "loss": 1.0232, "step": 24850 }, { "epoch": 0.3581255312099342, "grad_norm": 0.5181789398193359, "learning_rate": 1.486552927709859e-05, "loss": 1.0532, "step": 24860 }, { "epoch": 0.35826958814123344, "grad_norm": 0.5690596103668213, "learning_rate": 1.4861452566414371e-05, "loss": 1.0294, "step": 24870 }, { "epoch": 0.35841364507253265, "grad_norm": 0.57747882604599, "learning_rate": 1.4857374797458992e-05, "loss": 1.007, "step": 24880 }, { "epoch": 0.3585577020038319, "grad_norm": 0.5637708902359009, "learning_rate": 1.485329597112013e-05, "loss": 1.0343, "step": 24890 }, { "epoch": 0.3587017589351312, "grad_norm": 0.5401881337165833, "learning_rate": 1.4849216088285686e-05, "loss": 1.0213, "step": 24900 }, { "epoch": 0.35884581586643044, "grad_norm": 0.5868175029754639, "learning_rate": 1.4845135149843795e-05, "loss": 1.0397, "step": 24910 }, { "epoch": 0.35898987279772965, "grad_norm": 0.5615954399108887, "learning_rate": 1.4841053156682817e-05, "loss": 1.0211, "step": 24920 }, { "epoch": 0.3591339297290289, "grad_norm": 0.53164142370224, "learning_rate": 1.4836970109691351e-05, "loss": 1.026, "step": 24930 }, { "epoch": 0.3592779866603282, "grad_norm": 0.5693374872207642, "learning_rate": 1.4832886009758221e-05, "loss": 1.0114, "step": 24940 }, { "epoch": 0.35942204359162744, "grad_norm": 0.5792744159698486, "learning_rate": 1.4828800857772472e-05, "loss": 1.0355, "step": 24950 }, { "epoch": 0.35956610052292665, "grad_norm": 0.5707711577415466, "learning_rate": 1.4824714654623391e-05, "loss": 1.0196, "step": 24960 }, { "epoch": 0.3597101574542259, "grad_norm": 0.5481793880462646, "learning_rate": 1.4820627401200487e-05, "loss": 1.0078, "step": 24970 }, { "epoch": 0.3598542143855252, "grad_norm": 0.5169560313224792, "learning_rate": 1.4816539098393498e-05, "loss": 1.017, "step": 24980 }, { "epoch": 0.35999827131682444, "grad_norm": 0.5783670544624329, "learning_rate": 1.4812449747092392e-05, "loss": 1.0293, "step": 24990 }, { "epoch": 0.36014232824812364, "grad_norm": 0.577237069606781, "learning_rate": 1.4808359348187365e-05, "loss": 1.0348, "step": 25000 }, { "epoch": 0.3602863851794229, "grad_norm": 0.5775808691978455, "learning_rate": 1.4804267902568838e-05, "loss": 1.0284, "step": 25010 }, { "epoch": 0.36043044211072217, "grad_norm": 0.6013045310974121, "learning_rate": 1.4800175411127464e-05, "loss": 1.0579, "step": 25020 }, { "epoch": 0.3605744990420214, "grad_norm": 0.5317995548248291, "learning_rate": 1.479608187475412e-05, "loss": 1.0012, "step": 25030 }, { "epoch": 0.36071855597332064, "grad_norm": 0.684230625629425, "learning_rate": 1.4791987294339915e-05, "loss": 1.0258, "step": 25040 }, { "epoch": 0.3608626129046199, "grad_norm": 0.6365956664085388, "learning_rate": 1.4787891670776182e-05, "loss": 1.0191, "step": 25050 }, { "epoch": 0.36100666983591917, "grad_norm": 0.5982478857040405, "learning_rate": 1.478379500495448e-05, "loss": 1.026, "step": 25060 }, { "epoch": 0.3611507267672184, "grad_norm": 0.554524838924408, "learning_rate": 1.47796972977666e-05, "loss": 1.0095, "step": 25070 }, { "epoch": 0.36129478369851764, "grad_norm": 0.6101455688476562, "learning_rate": 1.4775598550104549e-05, "loss": 1.0378, "step": 25080 }, { "epoch": 0.3614388406298169, "grad_norm": 0.6337770223617554, "learning_rate": 1.4771498762860582e-05, "loss": 1.0378, "step": 25090 }, { "epoch": 0.36158289756111617, "grad_norm": 0.5765113234519958, "learning_rate": 1.4767397936927148e-05, "loss": 1.0114, "step": 25100 }, { "epoch": 0.3617269544924154, "grad_norm": 0.583540678024292, "learning_rate": 1.4763296073196956e-05, "loss": 1.0329, "step": 25110 }, { "epoch": 0.36187101142371464, "grad_norm": 0.5872902870178223, "learning_rate": 1.4759193172562916e-05, "loss": 1.0399, "step": 25120 }, { "epoch": 0.3620150683550139, "grad_norm": 0.6090887188911438, "learning_rate": 1.4755089235918175e-05, "loss": 1.0135, "step": 25130 }, { "epoch": 0.36215912528631317, "grad_norm": 0.6019402146339417, "learning_rate": 1.4750984264156103e-05, "loss": 1.0042, "step": 25140 }, { "epoch": 0.3623031822176124, "grad_norm": 0.6022039651870728, "learning_rate": 1.4746878258170297e-05, "loss": 1.0419, "step": 25150 }, { "epoch": 0.36244723914891164, "grad_norm": 0.5955145955085754, "learning_rate": 1.4742771218854582e-05, "loss": 1.0424, "step": 25160 }, { "epoch": 0.3625912960802109, "grad_norm": 0.5031068921089172, "learning_rate": 1.4738663147102994e-05, "loss": 1.0134, "step": 25170 }, { "epoch": 0.36273535301151016, "grad_norm": 0.6688017249107361, "learning_rate": 1.473455404380981e-05, "loss": 1.0289, "step": 25180 }, { "epoch": 0.36287940994280937, "grad_norm": 0.5259919762611389, "learning_rate": 1.4730443909869522e-05, "loss": 1.0243, "step": 25190 }, { "epoch": 0.36302346687410864, "grad_norm": 0.587336003780365, "learning_rate": 1.4726332746176855e-05, "loss": 1.0306, "step": 25200 }, { "epoch": 0.3631675238054079, "grad_norm": 0.6514984369277954, "learning_rate": 1.4722220553626747e-05, "loss": 1.0451, "step": 25210 }, { "epoch": 0.36331158073670716, "grad_norm": 0.6073031425476074, "learning_rate": 1.4718107333114366e-05, "loss": 1.0448, "step": 25220 }, { "epoch": 0.36345563766800637, "grad_norm": 0.5726466178894043, "learning_rate": 1.4713993085535109e-05, "loss": 1.0334, "step": 25230 }, { "epoch": 0.36359969459930563, "grad_norm": 0.5648417472839355, "learning_rate": 1.4709877811784586e-05, "loss": 1.0026, "step": 25240 }, { "epoch": 0.3637437515306049, "grad_norm": 0.6145786046981812, "learning_rate": 1.4705761512758634e-05, "loss": 1.038, "step": 25250 }, { "epoch": 0.36388780846190416, "grad_norm": 0.6085208654403687, "learning_rate": 1.4701644189353316e-05, "loss": 1.0099, "step": 25260 }, { "epoch": 0.36403186539320337, "grad_norm": 0.5955535173416138, "learning_rate": 1.4697525842464918e-05, "loss": 1.0114, "step": 25270 }, { "epoch": 0.36417592232450263, "grad_norm": 0.5815196633338928, "learning_rate": 1.4693406472989949e-05, "loss": 1.031, "step": 25280 }, { "epoch": 0.3643199792558019, "grad_norm": 0.5595164895057678, "learning_rate": 1.4689286081825135e-05, "loss": 1.019, "step": 25290 }, { "epoch": 0.36446403618710116, "grad_norm": 0.5178977847099304, "learning_rate": 1.4685164669867429e-05, "loss": 1.0332, "step": 25300 }, { "epoch": 0.36460809311840037, "grad_norm": 0.5986042022705078, "learning_rate": 1.4681042238014007e-05, "loss": 1.0385, "step": 25310 }, { "epoch": 0.36475215004969963, "grad_norm": 0.5248829126358032, "learning_rate": 1.4676918787162264e-05, "loss": 1.0254, "step": 25320 }, { "epoch": 0.3648962069809989, "grad_norm": 0.8148512244224548, "learning_rate": 1.467279431820982e-05, "loss": 1.0229, "step": 25330 }, { "epoch": 0.36504026391229816, "grad_norm": 0.6069955825805664, "learning_rate": 1.4668668832054513e-05, "loss": 1.0203, "step": 25340 }, { "epoch": 0.36518432084359737, "grad_norm": 0.5385414958000183, "learning_rate": 1.4664542329594406e-05, "loss": 1.0416, "step": 25350 }, { "epoch": 0.36532837777489663, "grad_norm": 0.5793408751487732, "learning_rate": 1.4660414811727782e-05, "loss": 1.0406, "step": 25360 }, { "epoch": 0.3654724347061959, "grad_norm": 0.6187202334403992, "learning_rate": 1.4656286279353142e-05, "loss": 1.0439, "step": 25370 }, { "epoch": 0.36561649163749516, "grad_norm": 0.5589693784713745, "learning_rate": 1.4652156733369216e-05, "loss": 1.0141, "step": 25380 }, { "epoch": 0.36576054856879436, "grad_norm": 0.5816443562507629, "learning_rate": 1.4648026174674945e-05, "loss": 1.0207, "step": 25390 }, { "epoch": 0.3659046055000936, "grad_norm": 0.5740160346031189, "learning_rate": 1.4643894604169493e-05, "loss": 1.0227, "step": 25400 }, { "epoch": 0.3660486624313929, "grad_norm": 0.6110251545906067, "learning_rate": 1.4639762022752248e-05, "loss": 1.0352, "step": 25410 }, { "epoch": 0.36619271936269215, "grad_norm": 0.6512191891670227, "learning_rate": 1.463562843132282e-05, "loss": 1.0159, "step": 25420 }, { "epoch": 0.36633677629399136, "grad_norm": 0.6277971267700195, "learning_rate": 1.4631493830781027e-05, "loss": 1.0473, "step": 25430 }, { "epoch": 0.3664808332252906, "grad_norm": 0.5402994751930237, "learning_rate": 1.4627358222026919e-05, "loss": 1.0298, "step": 25440 }, { "epoch": 0.3666248901565899, "grad_norm": 0.593584418296814, "learning_rate": 1.4623221605960762e-05, "loss": 1.0438, "step": 25450 }, { "epoch": 0.36676894708788915, "grad_norm": 0.5621562600135803, "learning_rate": 1.461908398348304e-05, "loss": 1.0243, "step": 25460 }, { "epoch": 0.36691300401918836, "grad_norm": 0.6253653168678284, "learning_rate": 1.4614945355494452e-05, "loss": 1.0258, "step": 25470 }, { "epoch": 0.3670570609504876, "grad_norm": 0.6140376925468445, "learning_rate": 1.4610805722895925e-05, "loss": 1.0136, "step": 25480 }, { "epoch": 0.3672011178817869, "grad_norm": 0.5688601732254028, "learning_rate": 1.4606665086588598e-05, "loss": 1.0476, "step": 25490 }, { "epoch": 0.36734517481308615, "grad_norm": 0.6936155557632446, "learning_rate": 1.460252344747383e-05, "loss": 1.019, "step": 25500 }, { "epoch": 0.36748923174438536, "grad_norm": 0.5645762085914612, "learning_rate": 1.45983808064532e-05, "loss": 1.0363, "step": 25510 }, { "epoch": 0.3676332886756846, "grad_norm": 0.7030591368675232, "learning_rate": 1.4594237164428503e-05, "loss": 1.0098, "step": 25520 }, { "epoch": 0.3677773456069839, "grad_norm": 0.6672507524490356, "learning_rate": 1.4590092522301755e-05, "loss": 0.9895, "step": 25530 }, { "epoch": 0.36792140253828315, "grad_norm": 0.6040781736373901, "learning_rate": 1.4585946880975182e-05, "loss": 1.0227, "step": 25540 }, { "epoch": 0.36806545946958236, "grad_norm": 0.617346465587616, "learning_rate": 1.4581800241351237e-05, "loss": 1.0323, "step": 25550 }, { "epoch": 0.3682095164008816, "grad_norm": 0.5223129987716675, "learning_rate": 1.4577652604332586e-05, "loss": 1.0218, "step": 25560 }, { "epoch": 0.3683535733321809, "grad_norm": 0.5572784543037415, "learning_rate": 1.4573503970822112e-05, "loss": 1.0052, "step": 25570 }, { "epoch": 0.36849763026348015, "grad_norm": 0.6087919473648071, "learning_rate": 1.4569354341722912e-05, "loss": 1.0346, "step": 25580 }, { "epoch": 0.36864168719477935, "grad_norm": 0.600691020488739, "learning_rate": 1.456520371793831e-05, "loss": 1.0277, "step": 25590 }, { "epoch": 0.3687857441260786, "grad_norm": 0.6251884698867798, "learning_rate": 1.4561052100371834e-05, "loss": 1.0323, "step": 25600 }, { "epoch": 0.3689298010573779, "grad_norm": 0.5772163271903992, "learning_rate": 1.4556899489927236e-05, "loss": 1.0405, "step": 25610 }, { "epoch": 0.36907385798867715, "grad_norm": 0.5718433856964111, "learning_rate": 1.4552745887508481e-05, "loss": 1.0451, "step": 25620 }, { "epoch": 0.36921791491997635, "grad_norm": 0.5430508255958557, "learning_rate": 1.4548591294019751e-05, "loss": 1.0465, "step": 25630 }, { "epoch": 0.3693619718512756, "grad_norm": 0.640229344367981, "learning_rate": 1.4544435710365448e-05, "loss": 1.0179, "step": 25640 }, { "epoch": 0.3695060287825749, "grad_norm": 0.46533769369125366, "learning_rate": 1.4540279137450176e-05, "loss": 1.007, "step": 25650 }, { "epoch": 0.36965008571387414, "grad_norm": 0.588024914264679, "learning_rate": 1.4536121576178772e-05, "loss": 1.0287, "step": 25660 }, { "epoch": 0.36979414264517335, "grad_norm": 0.5487402081489563, "learning_rate": 1.4531963027456274e-05, "loss": 1.0579, "step": 25670 }, { "epoch": 0.3699381995764726, "grad_norm": 0.6008890867233276, "learning_rate": 1.4527803492187947e-05, "loss": 1.0253, "step": 25680 }, { "epoch": 0.3700822565077719, "grad_norm": 0.4887695610523224, "learning_rate": 1.452364297127926e-05, "loss": 1.0325, "step": 25690 }, { "epoch": 0.37022631343907114, "grad_norm": 0.5441385507583618, "learning_rate": 1.4519481465635899e-05, "loss": 1.025, "step": 25700 }, { "epoch": 0.37037037037037035, "grad_norm": 0.5769540667533875, "learning_rate": 1.4515318976163773e-05, "loss": 1.0116, "step": 25710 }, { "epoch": 0.3705144273016696, "grad_norm": 0.6632396578788757, "learning_rate": 1.4511155503768993e-05, "loss": 1.0221, "step": 25720 }, { "epoch": 0.3706584842329689, "grad_norm": 0.5847335457801819, "learning_rate": 1.450699104935789e-05, "loss": 1.0506, "step": 25730 }, { "epoch": 0.37080254116426814, "grad_norm": 0.5339410305023193, "learning_rate": 1.450282561383701e-05, "loss": 1.0105, "step": 25740 }, { "epoch": 0.37094659809556735, "grad_norm": 0.540700376033783, "learning_rate": 1.4498659198113112e-05, "loss": 1.0405, "step": 25750 }, { "epoch": 0.3710906550268666, "grad_norm": 0.5502351522445679, "learning_rate": 1.4494491803093164e-05, "loss": 1.0286, "step": 25760 }, { "epoch": 0.3712347119581659, "grad_norm": 0.6045182943344116, "learning_rate": 1.4490323429684353e-05, "loss": 1.0259, "step": 25770 }, { "epoch": 0.37137876888946514, "grad_norm": 0.5650545358657837, "learning_rate": 1.4486154078794077e-05, "loss": 1.0389, "step": 25780 }, { "epoch": 0.37152282582076435, "grad_norm": 0.6249701976776123, "learning_rate": 1.4481983751329942e-05, "loss": 1.0472, "step": 25790 }, { "epoch": 0.3716668827520636, "grad_norm": 0.541960597038269, "learning_rate": 1.4477812448199773e-05, "loss": 1.0154, "step": 25800 }, { "epoch": 0.3718109396833629, "grad_norm": 0.5343725681304932, "learning_rate": 1.4473640170311604e-05, "loss": 1.0468, "step": 25810 }, { "epoch": 0.37195499661466214, "grad_norm": 0.6055474877357483, "learning_rate": 1.4469466918573688e-05, "loss": 1.0401, "step": 25820 }, { "epoch": 0.37209905354596134, "grad_norm": 0.5726013779640198, "learning_rate": 1.446529269389448e-05, "loss": 1.0388, "step": 25830 }, { "epoch": 0.3722431104772606, "grad_norm": 0.706449031829834, "learning_rate": 1.4461117497182649e-05, "loss": 1.0231, "step": 25840 }, { "epoch": 0.37238716740855987, "grad_norm": 0.599277675151825, "learning_rate": 1.445694132934708e-05, "loss": 1.0174, "step": 25850 }, { "epoch": 0.37253122433985913, "grad_norm": 0.6309727430343628, "learning_rate": 1.4452764191296868e-05, "loss": 1.0354, "step": 25860 }, { "epoch": 0.37267528127115834, "grad_norm": 0.5788888335227966, "learning_rate": 1.4448586083941318e-05, "loss": 1.0409, "step": 25870 }, { "epoch": 0.3728193382024576, "grad_norm": 0.5666493773460388, "learning_rate": 1.4444407008189944e-05, "loss": 1.0122, "step": 25880 }, { "epoch": 0.37296339513375687, "grad_norm": 0.5751312971115112, "learning_rate": 1.444022696495248e-05, "loss": 1.0414, "step": 25890 }, { "epoch": 0.37310745206505613, "grad_norm": 0.5846768021583557, "learning_rate": 1.4436045955138853e-05, "loss": 1.0244, "step": 25900 }, { "epoch": 0.37325150899635534, "grad_norm": 0.5709925293922424, "learning_rate": 1.443186397965922e-05, "loss": 1.0162, "step": 25910 }, { "epoch": 0.3733955659276546, "grad_norm": 0.639036238193512, "learning_rate": 1.4427681039423933e-05, "loss": 1.0493, "step": 25920 }, { "epoch": 0.37353962285895387, "grad_norm": 0.5406671762466431, "learning_rate": 1.4423497135343565e-05, "loss": 1.0402, "step": 25930 }, { "epoch": 0.37368367979025313, "grad_norm": 0.5004711747169495, "learning_rate": 1.4419312268328893e-05, "loss": 0.9946, "step": 25940 }, { "epoch": 0.37382773672155234, "grad_norm": 0.5443496108055115, "learning_rate": 1.4415126439290905e-05, "loss": 1.0169, "step": 25950 }, { "epoch": 0.3739717936528516, "grad_norm": 0.6194301843643188, "learning_rate": 1.4410939649140799e-05, "loss": 0.999, "step": 25960 }, { "epoch": 0.37411585058415087, "grad_norm": 0.6406512260437012, "learning_rate": 1.4406751898789979e-05, "loss": 1.0533, "step": 25970 }, { "epoch": 0.37425990751545013, "grad_norm": 0.7409844994544983, "learning_rate": 1.4402563189150062e-05, "loss": 1.0251, "step": 25980 }, { "epoch": 0.37440396444674934, "grad_norm": 0.5493165254592896, "learning_rate": 1.439837352113287e-05, "loss": 1.0166, "step": 25990 }, { "epoch": 0.3745480213780486, "grad_norm": 0.5840917825698853, "learning_rate": 1.4394182895650442e-05, "loss": 1.0359, "step": 26000 }, { "epoch": 0.37469207830934786, "grad_norm": 0.5663790702819824, "learning_rate": 1.4389991313615014e-05, "loss": 1.0124, "step": 26010 }, { "epoch": 0.37483613524064713, "grad_norm": 0.6118389964103699, "learning_rate": 1.4385798775939034e-05, "loss": 0.9995, "step": 26020 }, { "epoch": 0.37498019217194634, "grad_norm": 0.6556876301765442, "learning_rate": 1.4381605283535167e-05, "loss": 1.0479, "step": 26030 }, { "epoch": 0.3751242491032456, "grad_norm": 0.5584632754325867, "learning_rate": 1.4377410837316273e-05, "loss": 1.0507, "step": 26040 }, { "epoch": 0.37526830603454486, "grad_norm": 0.5240368843078613, "learning_rate": 1.4373215438195427e-05, "loss": 1.0178, "step": 26050 }, { "epoch": 0.3754123629658441, "grad_norm": 0.5103548169136047, "learning_rate": 1.4369019087085905e-05, "loss": 1.0404, "step": 26060 }, { "epoch": 0.37555641989714333, "grad_norm": 0.5511793494224548, "learning_rate": 1.43648217849012e-05, "loss": 1.0203, "step": 26070 }, { "epoch": 0.3757004768284426, "grad_norm": 0.5841732621192932, "learning_rate": 1.4360623532555007e-05, "loss": 1.0362, "step": 26080 }, { "epoch": 0.37584453375974186, "grad_norm": 0.6015898585319519, "learning_rate": 1.4356424330961223e-05, "loss": 1.0231, "step": 26090 }, { "epoch": 0.3759885906910411, "grad_norm": 0.5784769654273987, "learning_rate": 1.435222418103396e-05, "loss": 1.0402, "step": 26100 }, { "epoch": 0.37613264762234033, "grad_norm": 0.48664385080337524, "learning_rate": 1.4348023083687533e-05, "loss": 1.0176, "step": 26110 }, { "epoch": 0.3762767045536396, "grad_norm": 0.7868779897689819, "learning_rate": 1.4343821039836462e-05, "loss": 1.0208, "step": 26120 }, { "epoch": 0.37642076148493886, "grad_norm": 0.6292771100997925, "learning_rate": 1.433961805039547e-05, "loss": 1.0183, "step": 26130 }, { "epoch": 0.3765648184162381, "grad_norm": 0.5502007603645325, "learning_rate": 1.4335414116279495e-05, "loss": 1.0462, "step": 26140 }, { "epoch": 0.37670887534753733, "grad_norm": 0.5436489582061768, "learning_rate": 1.4331209238403676e-05, "loss": 1.0388, "step": 26150 }, { "epoch": 0.3768529322788366, "grad_norm": 0.603003740310669, "learning_rate": 1.4327003417683354e-05, "loss": 1.0225, "step": 26160 }, { "epoch": 0.37699698921013586, "grad_norm": 0.5382051467895508, "learning_rate": 1.4322796655034078e-05, "loss": 1.0256, "step": 26170 }, { "epoch": 0.3771410461414351, "grad_norm": 0.5649926066398621, "learning_rate": 1.4318588951371603e-05, "loss": 1.0253, "step": 26180 }, { "epoch": 0.37728510307273433, "grad_norm": 0.7416114807128906, "learning_rate": 1.4314380307611892e-05, "loss": 1.0275, "step": 26190 }, { "epoch": 0.3774291600040336, "grad_norm": 0.6471835374832153, "learning_rate": 1.4310170724671102e-05, "loss": 1.0522, "step": 26200 }, { "epoch": 0.37757321693533286, "grad_norm": 0.5750507712364197, "learning_rate": 1.4305960203465605e-05, "loss": 1.0319, "step": 26210 }, { "epoch": 0.3777172738666321, "grad_norm": 0.5253751873970032, "learning_rate": 1.4301748744911972e-05, "loss": 1.0278, "step": 26220 }, { "epoch": 0.3778613307979313, "grad_norm": 0.655426561832428, "learning_rate": 1.4297536349926976e-05, "loss": 1.0328, "step": 26230 }, { "epoch": 0.3780053877292306, "grad_norm": 0.5094215869903564, "learning_rate": 1.4293323019427607e-05, "loss": 1.0205, "step": 26240 }, { "epoch": 0.37814944466052985, "grad_norm": 0.6158928871154785, "learning_rate": 1.428910875433104e-05, "loss": 1.0379, "step": 26250 }, { "epoch": 0.3782935015918291, "grad_norm": 0.6462520360946655, "learning_rate": 1.4284893555554668e-05, "loss": 1.0215, "step": 26260 }, { "epoch": 0.3784375585231283, "grad_norm": 0.6016168594360352, "learning_rate": 1.4280677424016076e-05, "loss": 1.0268, "step": 26270 }, { "epoch": 0.3785816154544276, "grad_norm": 0.6505822539329529, "learning_rate": 1.4276460360633065e-05, "loss": 1.0283, "step": 26280 }, { "epoch": 0.37872567238572685, "grad_norm": 0.5918681025505066, "learning_rate": 1.4272242366323622e-05, "loss": 1.0257, "step": 26290 }, { "epoch": 0.3788697293170261, "grad_norm": 0.5184618830680847, "learning_rate": 1.4268023442005954e-05, "loss": 1.022, "step": 26300 }, { "epoch": 0.3790137862483253, "grad_norm": 0.442764014005661, "learning_rate": 1.4263803588598459e-05, "loss": 1.0165, "step": 26310 }, { "epoch": 0.3791578431796246, "grad_norm": 0.5952311754226685, "learning_rate": 1.4259582807019743e-05, "loss": 1.0376, "step": 26320 }, { "epoch": 0.37930190011092385, "grad_norm": 0.6148770451545715, "learning_rate": 1.4255361098188614e-05, "loss": 1.0247, "step": 26330 }, { "epoch": 0.3794459570422231, "grad_norm": 0.5614669919013977, "learning_rate": 1.4251138463024073e-05, "loss": 1.0097, "step": 26340 }, { "epoch": 0.3795900139735223, "grad_norm": 0.7027855515480042, "learning_rate": 1.4246914902445332e-05, "loss": 1.0456, "step": 26350 }, { "epoch": 0.3797340709048216, "grad_norm": 0.5557788014411926, "learning_rate": 1.4242690417371803e-05, "loss": 1.0262, "step": 26360 }, { "epoch": 0.37987812783612085, "grad_norm": 0.5281688570976257, "learning_rate": 1.4238465008723103e-05, "loss": 1.0073, "step": 26370 }, { "epoch": 0.38002218476742006, "grad_norm": 0.640178918838501, "learning_rate": 1.4234238677419037e-05, "loss": 1.0119, "step": 26380 }, { "epoch": 0.3801662416987193, "grad_norm": 0.6145185828208923, "learning_rate": 1.423001142437962e-05, "loss": 1.0468, "step": 26390 }, { "epoch": 0.3803102986300186, "grad_norm": 0.5753335356712341, "learning_rate": 1.4225783250525072e-05, "loss": 1.0185, "step": 26400 }, { "epoch": 0.38045435556131785, "grad_norm": 0.6809263825416565, "learning_rate": 1.4221554156775806e-05, "loss": 1.0168, "step": 26410 }, { "epoch": 0.38059841249261706, "grad_norm": 0.5877491235733032, "learning_rate": 1.4217324144052433e-05, "loss": 1.0215, "step": 26420 }, { "epoch": 0.3807424694239163, "grad_norm": 0.5531141757965088, "learning_rate": 1.4213093213275773e-05, "loss": 1.0352, "step": 26430 }, { "epoch": 0.3808865263552156, "grad_norm": 0.5586743950843811, "learning_rate": 1.4208861365366844e-05, "loss": 1.0197, "step": 26440 }, { "epoch": 0.38103058328651485, "grad_norm": 0.5900185108184814, "learning_rate": 1.420462860124685e-05, "loss": 1.029, "step": 26450 }, { "epoch": 0.38117464021781405, "grad_norm": 0.6241628527641296, "learning_rate": 1.420039492183722e-05, "loss": 1.0361, "step": 26460 }, { "epoch": 0.3813186971491133, "grad_norm": 0.6917476058006287, "learning_rate": 1.4196160328059554e-05, "loss": 1.0499, "step": 26470 }, { "epoch": 0.3814627540804126, "grad_norm": 0.676550030708313, "learning_rate": 1.4191924820835671e-05, "loss": 1.0091, "step": 26480 }, { "epoch": 0.38160681101171184, "grad_norm": 0.5810500383377075, "learning_rate": 1.4187688401087585e-05, "loss": 1.0176, "step": 26490 }, { "epoch": 0.38175086794301105, "grad_norm": 0.49956226348876953, "learning_rate": 1.4183451069737502e-05, "loss": 1.0275, "step": 26500 }, { "epoch": 0.3818949248743103, "grad_norm": 0.5411934852600098, "learning_rate": 1.417921282770783e-05, "loss": 1.0215, "step": 26510 }, { "epoch": 0.3820389818056096, "grad_norm": 0.5320689082145691, "learning_rate": 1.4174973675921182e-05, "loss": 1.041, "step": 26520 }, { "epoch": 0.38218303873690884, "grad_norm": 0.5580525398254395, "learning_rate": 1.4170733615300356e-05, "loss": 1.0327, "step": 26530 }, { "epoch": 0.38232709566820805, "grad_norm": 0.8316383957862854, "learning_rate": 1.4166492646768357e-05, "loss": 1.0412, "step": 26540 }, { "epoch": 0.3824711525995073, "grad_norm": 0.5757890343666077, "learning_rate": 1.4162250771248387e-05, "loss": 1.0321, "step": 26550 }, { "epoch": 0.3826152095308066, "grad_norm": 0.6110444664955139, "learning_rate": 1.4158007989663842e-05, "loss": 1.0081, "step": 26560 }, { "epoch": 0.38275926646210584, "grad_norm": 0.5812126398086548, "learning_rate": 1.4153764302938316e-05, "loss": 1.0547, "step": 26570 }, { "epoch": 0.38290332339340505, "grad_norm": 0.6257738471031189, "learning_rate": 1.4149519711995603e-05, "loss": 1.0112, "step": 26580 }, { "epoch": 0.3830473803247043, "grad_norm": 0.6122918128967285, "learning_rate": 1.4145274217759693e-05, "loss": 1.0463, "step": 26590 }, { "epoch": 0.3831914372560036, "grad_norm": 0.6165433526039124, "learning_rate": 1.4141027821154769e-05, "loss": 1.0202, "step": 26600 }, { "epoch": 0.38333549418730284, "grad_norm": 0.6220911741256714, "learning_rate": 1.4136780523105212e-05, "loss": 1.0134, "step": 26610 }, { "epoch": 0.38347955111860205, "grad_norm": 0.5039299130439758, "learning_rate": 1.4132532324535603e-05, "loss": 1.028, "step": 26620 }, { "epoch": 0.3836236080499013, "grad_norm": 0.5618889927864075, "learning_rate": 1.4128283226370716e-05, "loss": 1.0144, "step": 26630 }, { "epoch": 0.3837676649812006, "grad_norm": 0.5898512005805969, "learning_rate": 1.4124033229535518e-05, "loss": 1.0148, "step": 26640 }, { "epoch": 0.38391172191249984, "grad_norm": 0.5746092796325684, "learning_rate": 1.4119782334955182e-05, "loss": 1.0291, "step": 26650 }, { "epoch": 0.38405577884379904, "grad_norm": 0.6095765233039856, "learning_rate": 1.411553054355506e-05, "loss": 1.0245, "step": 26660 }, { "epoch": 0.3841998357750983, "grad_norm": 0.8533281683921814, "learning_rate": 1.4111277856260714e-05, "loss": 1.0252, "step": 26670 }, { "epoch": 0.38434389270639757, "grad_norm": 0.5089832544326782, "learning_rate": 1.4107024273997894e-05, "loss": 1.0247, "step": 26680 }, { "epoch": 0.38448794963769684, "grad_norm": 0.5801883339881897, "learning_rate": 1.4102769797692543e-05, "loss": 1.0141, "step": 26690 }, { "epoch": 0.38463200656899604, "grad_norm": 0.5481066107749939, "learning_rate": 1.4098514428270811e-05, "loss": 1.025, "step": 26700 }, { "epoch": 0.3847760635002953, "grad_norm": 0.5064467787742615, "learning_rate": 1.4094258166659021e-05, "loss": 1.0131, "step": 26710 }, { "epoch": 0.38492012043159457, "grad_norm": 0.6142391562461853, "learning_rate": 1.409000101378371e-05, "loss": 1.0192, "step": 26720 }, { "epoch": 0.38506417736289383, "grad_norm": 0.5667955875396729, "learning_rate": 1.4085742970571601e-05, "loss": 1.0255, "step": 26730 }, { "epoch": 0.38520823429419304, "grad_norm": 0.6403192281723022, "learning_rate": 1.408148403794961e-05, "loss": 1.0183, "step": 26740 }, { "epoch": 0.3853522912254923, "grad_norm": 0.7364640235900879, "learning_rate": 1.4077224216844846e-05, "loss": 1.0171, "step": 26750 }, { "epoch": 0.38549634815679157, "grad_norm": 0.6199523210525513, "learning_rate": 1.4072963508184618e-05, "loss": 1.0094, "step": 26760 }, { "epoch": 0.38564040508809083, "grad_norm": 0.5943057537078857, "learning_rate": 1.406870191289642e-05, "loss": 1.0512, "step": 26770 }, { "epoch": 0.38578446201939004, "grad_norm": 0.5860393047332764, "learning_rate": 1.4064439431907943e-05, "loss": 1.0144, "step": 26780 }, { "epoch": 0.3859285189506893, "grad_norm": 0.5969266295433044, "learning_rate": 1.406017606614707e-05, "loss": 1.0342, "step": 26790 }, { "epoch": 0.38607257588198857, "grad_norm": 0.5967586636543274, "learning_rate": 1.405591181654188e-05, "loss": 1.0262, "step": 26800 }, { "epoch": 0.38621663281328783, "grad_norm": 0.5471726655960083, "learning_rate": 1.4051646684020637e-05, "loss": 1.03, "step": 26810 }, { "epoch": 0.38636068974458704, "grad_norm": 0.584466278553009, "learning_rate": 1.4047380669511803e-05, "loss": 1.0332, "step": 26820 }, { "epoch": 0.3865047466758863, "grad_norm": 0.6537045240402222, "learning_rate": 1.4043113773944033e-05, "loss": 1.0442, "step": 26830 }, { "epoch": 0.38664880360718556, "grad_norm": 0.578791618347168, "learning_rate": 1.4038845998246165e-05, "loss": 1.0035, "step": 26840 }, { "epoch": 0.38679286053848483, "grad_norm": 0.6067990660667419, "learning_rate": 1.4034577343347243e-05, "loss": 1.0426, "step": 26850 }, { "epoch": 0.38693691746978404, "grad_norm": 0.6514458060264587, "learning_rate": 1.4030307810176488e-05, "loss": 1.0303, "step": 26860 }, { "epoch": 0.3870809744010833, "grad_norm": 0.5709391832351685, "learning_rate": 1.4026037399663323e-05, "loss": 1.0253, "step": 26870 }, { "epoch": 0.38722503133238256, "grad_norm": 0.6085813045501709, "learning_rate": 1.4021766112737357e-05, "loss": 1.0311, "step": 26880 }, { "epoch": 0.3873690882636818, "grad_norm": 0.5561932325363159, "learning_rate": 1.4017493950328383e-05, "loss": 1.0399, "step": 26890 }, { "epoch": 0.38751314519498103, "grad_norm": 0.5875961780548096, "learning_rate": 1.4013220913366401e-05, "loss": 1.0219, "step": 26900 }, { "epoch": 0.3876572021262803, "grad_norm": 0.5987101197242737, "learning_rate": 1.4008947002781588e-05, "loss": 1.0086, "step": 26910 }, { "epoch": 0.38780125905757956, "grad_norm": 0.7484641075134277, "learning_rate": 1.4004672219504318e-05, "loss": 1.0279, "step": 26920 }, { "epoch": 0.3879453159888788, "grad_norm": 0.7231214642524719, "learning_rate": 1.4000396564465146e-05, "loss": 1.0155, "step": 26930 }, { "epoch": 0.38808937292017803, "grad_norm": 0.5435040593147278, "learning_rate": 1.3996120038594829e-05, "loss": 1.0326, "step": 26940 }, { "epoch": 0.3882334298514773, "grad_norm": 0.5462003350257874, "learning_rate": 1.3991842642824307e-05, "loss": 1.042, "step": 26950 }, { "epoch": 0.38837748678277656, "grad_norm": 0.5954107046127319, "learning_rate": 1.3987564378084707e-05, "loss": 1.0248, "step": 26960 }, { "epoch": 0.3885215437140758, "grad_norm": 0.5133011341094971, "learning_rate": 1.3983285245307355e-05, "loss": 1.0416, "step": 26970 }, { "epoch": 0.38866560064537503, "grad_norm": 0.6856930255889893, "learning_rate": 1.397900524542375e-05, "loss": 1.0472, "step": 26980 }, { "epoch": 0.3888096575766743, "grad_norm": 0.5439833402633667, "learning_rate": 1.3974724379365597e-05, "loss": 0.997, "step": 26990 }, { "epoch": 0.38895371450797356, "grad_norm": 0.6151694655418396, "learning_rate": 1.3970442648064775e-05, "loss": 1.0289, "step": 27000 }, { "epoch": 0.3890977714392728, "grad_norm": 0.6477126479148865, "learning_rate": 1.3966160052453361e-05, "loss": 1.0496, "step": 27010 }, { "epoch": 0.38924182837057203, "grad_norm": 0.620611310005188, "learning_rate": 1.396187659346362e-05, "loss": 1.0345, "step": 27020 }, { "epoch": 0.3893858853018713, "grad_norm": 0.5189834833145142, "learning_rate": 1.3957592272027995e-05, "loss": 1.0183, "step": 27030 }, { "epoch": 0.38952994223317056, "grad_norm": 0.621438205242157, "learning_rate": 1.395330708907913e-05, "loss": 1.0469, "step": 27040 }, { "epoch": 0.3896739991644698, "grad_norm": 0.6799705624580383, "learning_rate": 1.3949021045549844e-05, "loss": 1.021, "step": 27050 }, { "epoch": 0.389818056095769, "grad_norm": 0.552381694316864, "learning_rate": 1.3944734142373157e-05, "loss": 1.0358, "step": 27060 }, { "epoch": 0.3899621130270683, "grad_norm": 0.5012443661689758, "learning_rate": 1.3940446380482264e-05, "loss": 1.0345, "step": 27070 }, { "epoch": 0.39010616995836755, "grad_norm": 0.569753110408783, "learning_rate": 1.3936157760810551e-05, "loss": 1.049, "step": 27080 }, { "epoch": 0.3902502268896668, "grad_norm": 0.5513450503349304, "learning_rate": 1.3931868284291591e-05, "loss": 1.0374, "step": 27090 }, { "epoch": 0.390394283820966, "grad_norm": 0.5258768200874329, "learning_rate": 1.392757795185915e-05, "loss": 1.0163, "step": 27100 }, { "epoch": 0.3905383407522653, "grad_norm": 0.6887435913085938, "learning_rate": 1.392328676444717e-05, "loss": 1.0386, "step": 27110 }, { "epoch": 0.39068239768356455, "grad_norm": 0.5797517895698547, "learning_rate": 1.391899472298978e-05, "loss": 1.0269, "step": 27120 }, { "epoch": 0.3908264546148638, "grad_norm": 0.573955774307251, "learning_rate": 1.3914701828421304e-05, "loss": 1.0304, "step": 27130 }, { "epoch": 0.390970511546163, "grad_norm": 0.5322799682617188, "learning_rate": 1.3910408081676241e-05, "loss": 1.0395, "step": 27140 }, { "epoch": 0.3911145684774623, "grad_norm": 0.6017953753471375, "learning_rate": 1.3906113483689285e-05, "loss": 1.0302, "step": 27150 }, { "epoch": 0.39125862540876155, "grad_norm": 0.5898041129112244, "learning_rate": 1.3901818035395307e-05, "loss": 1.0364, "step": 27160 }, { "epoch": 0.3914026823400608, "grad_norm": 0.5774397850036621, "learning_rate": 1.3897521737729367e-05, "loss": 1.0237, "step": 27170 }, { "epoch": 0.39154673927136, "grad_norm": 0.6190448999404907, "learning_rate": 1.3893224591626715e-05, "loss": 1.049, "step": 27180 }, { "epoch": 0.3916907962026593, "grad_norm": 0.6038142442703247, "learning_rate": 1.3888926598022772e-05, "loss": 1.026, "step": 27190 }, { "epoch": 0.39183485313395855, "grad_norm": 0.6462919116020203, "learning_rate": 1.3884627757853158e-05, "loss": 1.0331, "step": 27200 }, { "epoch": 0.3919789100652578, "grad_norm": 0.5720357298851013, "learning_rate": 1.388032807205367e-05, "loss": 1.0436, "step": 27210 }, { "epoch": 0.392122966996557, "grad_norm": 0.522705614566803, "learning_rate": 1.387602754156029e-05, "loss": 1.0061, "step": 27220 }, { "epoch": 0.3922670239278563, "grad_norm": 0.5933101177215576, "learning_rate": 1.3871726167309186e-05, "loss": 1.0235, "step": 27230 }, { "epoch": 0.39241108085915555, "grad_norm": 0.5866686701774597, "learning_rate": 1.3867423950236702e-05, "loss": 1.0324, "step": 27240 }, { "epoch": 0.3925551377904548, "grad_norm": 0.5963432192802429, "learning_rate": 1.3863120891279378e-05, "loss": 1.0311, "step": 27250 }, { "epoch": 0.392699194721754, "grad_norm": 0.5158278346061707, "learning_rate": 1.3858816991373925e-05, "loss": 1.0346, "step": 27260 }, { "epoch": 0.3928432516530533, "grad_norm": 0.5357363224029541, "learning_rate": 1.3854512251457248e-05, "loss": 1.036, "step": 27270 }, { "epoch": 0.39298730858435255, "grad_norm": 0.6555647850036621, "learning_rate": 1.3850206672466424e-05, "loss": 1.0511, "step": 27280 }, { "epoch": 0.3931313655156518, "grad_norm": 0.5619667768478394, "learning_rate": 1.3845900255338726e-05, "loss": 1.0176, "step": 27290 }, { "epoch": 0.393275422446951, "grad_norm": 0.6229352951049805, "learning_rate": 1.384159300101159e-05, "loss": 1.0252, "step": 27300 }, { "epoch": 0.3934194793782503, "grad_norm": 0.5728248357772827, "learning_rate": 1.3837284910422659e-05, "loss": 1.0265, "step": 27310 }, { "epoch": 0.39356353630954954, "grad_norm": 0.6256684064865112, "learning_rate": 1.3832975984509735e-05, "loss": 1.016, "step": 27320 }, { "epoch": 0.3937075932408488, "grad_norm": 0.5774276852607727, "learning_rate": 1.3828666224210814e-05, "loss": 1.0318, "step": 27330 }, { "epoch": 0.393851650172148, "grad_norm": 0.6430305242538452, "learning_rate": 1.3824355630464074e-05, "loss": 1.0287, "step": 27340 }, { "epoch": 0.3939957071034473, "grad_norm": 0.5507270693778992, "learning_rate": 1.382004420420787e-05, "loss": 1.0277, "step": 27350 }, { "epoch": 0.39413976403474654, "grad_norm": 0.5930743217468262, "learning_rate": 1.3815731946380744e-05, "loss": 1.0334, "step": 27360 }, { "epoch": 0.3942838209660458, "grad_norm": 0.5006609559059143, "learning_rate": 1.3811418857921406e-05, "loss": 1.0545, "step": 27370 }, { "epoch": 0.394427877897345, "grad_norm": 0.6899300217628479, "learning_rate": 1.3807104939768763e-05, "loss": 1.0328, "step": 27380 }, { "epoch": 0.3945719348286443, "grad_norm": 0.6164814233779907, "learning_rate": 1.3802790192861893e-05, "loss": 1.0314, "step": 27390 }, { "epoch": 0.39471599175994354, "grad_norm": 0.6175230145454407, "learning_rate": 1.3798474618140061e-05, "loss": 1.0266, "step": 27400 }, { "epoch": 0.3948600486912428, "grad_norm": 0.6572819352149963, "learning_rate": 1.3794158216542698e-05, "loss": 1.0425, "step": 27410 }, { "epoch": 0.395004105622542, "grad_norm": 0.6338945031166077, "learning_rate": 1.3789840989009438e-05, "loss": 1.041, "step": 27420 }, { "epoch": 0.3951481625538413, "grad_norm": 0.6209861636161804, "learning_rate": 1.3785522936480074e-05, "loss": 1.0361, "step": 27430 }, { "epoch": 0.39529221948514054, "grad_norm": 0.5528213977813721, "learning_rate": 1.3781204059894586e-05, "loss": 1.0104, "step": 27440 }, { "epoch": 0.3954362764164398, "grad_norm": 0.603303849697113, "learning_rate": 1.3776884360193137e-05, "loss": 1.0319, "step": 27450 }, { "epoch": 0.395580333347739, "grad_norm": 0.5575588345527649, "learning_rate": 1.3772563838316065e-05, "loss": 1.0025, "step": 27460 }, { "epoch": 0.3957243902790383, "grad_norm": 0.5700780749320984, "learning_rate": 1.3768242495203889e-05, "loss": 1.0279, "step": 27470 }, { "epoch": 0.39586844721033754, "grad_norm": 0.5762060880661011, "learning_rate": 1.3763920331797302e-05, "loss": 1.0486, "step": 27480 }, { "epoch": 0.3960125041416368, "grad_norm": 0.4911508858203888, "learning_rate": 1.3759597349037186e-05, "loss": 1.0127, "step": 27490 }, { "epoch": 0.396156561072936, "grad_norm": 0.6626416444778442, "learning_rate": 1.3755273547864584e-05, "loss": 1.0403, "step": 27500 }, { "epoch": 0.39630061800423527, "grad_norm": 0.6523149609565735, "learning_rate": 1.3750948929220743e-05, "loss": 1.0366, "step": 27510 }, { "epoch": 0.39644467493553454, "grad_norm": 0.522388756275177, "learning_rate": 1.374662349404706e-05, "loss": 1.0388, "step": 27520 }, { "epoch": 0.3965887318668338, "grad_norm": 0.5563465356826782, "learning_rate": 1.3742297243285126e-05, "loss": 1.0114, "step": 27530 }, { "epoch": 0.396732788798133, "grad_norm": 0.5447655320167542, "learning_rate": 1.3737970177876711e-05, "loss": 1.0205, "step": 27540 }, { "epoch": 0.39687684572943227, "grad_norm": 0.5525548458099365, "learning_rate": 1.3733642298763752e-05, "loss": 1.0128, "step": 27550 }, { "epoch": 0.39702090266073153, "grad_norm": 0.5254085063934326, "learning_rate": 1.3729313606888371e-05, "loss": 1.0114, "step": 27560 }, { "epoch": 0.3971649595920308, "grad_norm": 0.5455352067947388, "learning_rate": 1.372498410319286e-05, "loss": 1.0319, "step": 27570 }, { "epoch": 0.39730901652333, "grad_norm": 0.5383720993995667, "learning_rate": 1.3720653788619697e-05, "loss": 1.004, "step": 27580 }, { "epoch": 0.39745307345462927, "grad_norm": 0.5045921802520752, "learning_rate": 1.3716322664111535e-05, "loss": 1.016, "step": 27590 }, { "epoch": 0.39759713038592853, "grad_norm": 0.65843665599823, "learning_rate": 1.3711990730611189e-05, "loss": 1.02, "step": 27600 }, { "epoch": 0.3977411873172278, "grad_norm": 0.4848676025867462, "learning_rate": 1.3707657989061675e-05, "loss": 1.0226, "step": 27610 }, { "epoch": 0.397885244248527, "grad_norm": 0.5090790390968323, "learning_rate": 1.3703324440406159e-05, "loss": 1.0214, "step": 27620 }, { "epoch": 0.39802930117982627, "grad_norm": 0.5815185308456421, "learning_rate": 1.3698990085588002e-05, "loss": 0.9969, "step": 27630 }, { "epoch": 0.39817335811112553, "grad_norm": 0.5621809959411621, "learning_rate": 1.3694654925550728e-05, "loss": 1.0293, "step": 27640 }, { "epoch": 0.3983174150424248, "grad_norm": 0.5777491331100464, "learning_rate": 1.3690318961238046e-05, "loss": 1.0179, "step": 27650 }, { "epoch": 0.398461471973724, "grad_norm": 0.5728102326393127, "learning_rate": 1.3685982193593837e-05, "loss": 1.0347, "step": 27660 }, { "epoch": 0.39860552890502327, "grad_norm": 0.6662067770957947, "learning_rate": 1.3681644623562149e-05, "loss": 1.0244, "step": 27670 }, { "epoch": 0.39874958583632253, "grad_norm": 0.6177421808242798, "learning_rate": 1.3677306252087217e-05, "loss": 1.0102, "step": 27680 }, { "epoch": 0.39889364276762174, "grad_norm": 0.6331455707550049, "learning_rate": 1.3672967080113443e-05, "loss": 1.0363, "step": 27690 }, { "epoch": 0.399037699698921, "grad_norm": 0.5854665040969849, "learning_rate": 1.3668627108585404e-05, "loss": 1.0074, "step": 27700 }, { "epoch": 0.39918175663022026, "grad_norm": 0.5754724740982056, "learning_rate": 1.3664286338447847e-05, "loss": 1.0154, "step": 27710 }, { "epoch": 0.3993258135615195, "grad_norm": 0.6391647458076477, "learning_rate": 1.3659944770645708e-05, "loss": 1.0295, "step": 27720 }, { "epoch": 0.39946987049281873, "grad_norm": 0.5782113671302795, "learning_rate": 1.3655602406124082e-05, "loss": 1.0408, "step": 27730 }, { "epoch": 0.399613927424118, "grad_norm": 0.6727347373962402, "learning_rate": 1.3651259245828238e-05, "loss": 1.049, "step": 27740 }, { "epoch": 0.39975798435541726, "grad_norm": 0.5403684377670288, "learning_rate": 1.3646915290703628e-05, "loss": 1.0257, "step": 27750 }, { "epoch": 0.3999020412867165, "grad_norm": 0.5497075319290161, "learning_rate": 1.3642570541695867e-05, "loss": 1.0169, "step": 27760 }, { "epoch": 0.40004609821801573, "grad_norm": 0.570429801940918, "learning_rate": 1.3638224999750751e-05, "loss": 1.0356, "step": 27770 }, { "epoch": 0.400190155149315, "grad_norm": 0.5894126296043396, "learning_rate": 1.3633878665814238e-05, "loss": 0.9985, "step": 27780 }, { "epoch": 0.40033421208061426, "grad_norm": 0.5434573888778687, "learning_rate": 1.362953154083247e-05, "loss": 1.027, "step": 27790 }, { "epoch": 0.4004782690119135, "grad_norm": 0.5834015011787415, "learning_rate": 1.3625183625751756e-05, "loss": 1.0195, "step": 27800 }, { "epoch": 0.40062232594321273, "grad_norm": 0.5129935145378113, "learning_rate": 1.3620834921518575e-05, "loss": 1.0072, "step": 27810 }, { "epoch": 0.400766382874512, "grad_norm": 0.6349966526031494, "learning_rate": 1.3616485429079583e-05, "loss": 1.0328, "step": 27820 }, { "epoch": 0.40091043980581126, "grad_norm": 0.593727707862854, "learning_rate": 1.36121351493816e-05, "loss": 1.0206, "step": 27830 }, { "epoch": 0.4010544967371105, "grad_norm": 0.5819534659385681, "learning_rate": 1.3607784083371627e-05, "loss": 1.0195, "step": 27840 }, { "epoch": 0.40119855366840973, "grad_norm": 0.5432244539260864, "learning_rate": 1.3603432231996825e-05, "loss": 1.0278, "step": 27850 }, { "epoch": 0.401342610599709, "grad_norm": 0.5749140381813049, "learning_rate": 1.3599079596204539e-05, "loss": 1.0351, "step": 27860 }, { "epoch": 0.40148666753100826, "grad_norm": 0.7217517495155334, "learning_rate": 1.3594726176942272e-05, "loss": 1.0323, "step": 27870 }, { "epoch": 0.4016307244623075, "grad_norm": 0.5909184813499451, "learning_rate": 1.3590371975157707e-05, "loss": 1.0403, "step": 27880 }, { "epoch": 0.40177478139360673, "grad_norm": 0.6383644938468933, "learning_rate": 1.358601699179869e-05, "loss": 1.0043, "step": 27890 }, { "epoch": 0.401918838324906, "grad_norm": 0.573214590549469, "learning_rate": 1.3581661227813246e-05, "loss": 1.0463, "step": 27900 }, { "epoch": 0.40206289525620525, "grad_norm": 0.6027265787124634, "learning_rate": 1.3577304684149559e-05, "loss": 1.0263, "step": 27910 }, { "epoch": 0.4022069521875045, "grad_norm": 0.5588923692703247, "learning_rate": 1.3572947361755993e-05, "loss": 1.0298, "step": 27920 }, { "epoch": 0.4023510091188037, "grad_norm": 0.6305546164512634, "learning_rate": 1.3568589261581075e-05, "loss": 1.0306, "step": 27930 }, { "epoch": 0.402495066050103, "grad_norm": 0.7798808217048645, "learning_rate": 1.3564230384573502e-05, "loss": 1.029, "step": 27940 }, { "epoch": 0.40263912298140225, "grad_norm": 0.5395976901054382, "learning_rate": 1.3559870731682145e-05, "loss": 1.0239, "step": 27950 }, { "epoch": 0.4027831799127015, "grad_norm": 0.532769501209259, "learning_rate": 1.3555510303856036e-05, "loss": 1.0146, "step": 27960 }, { "epoch": 0.4029272368440007, "grad_norm": 0.9768617749214172, "learning_rate": 1.3551149102044386e-05, "loss": 1.0286, "step": 27970 }, { "epoch": 0.4030712937753, "grad_norm": 0.5485137104988098, "learning_rate": 1.354678712719656e-05, "loss": 1.0266, "step": 27980 }, { "epoch": 0.40321535070659925, "grad_norm": 0.5443882346153259, "learning_rate": 1.3542424380262108e-05, "loss": 1.0351, "step": 27990 }, { "epoch": 0.4033594076378985, "grad_norm": 0.5557625889778137, "learning_rate": 1.3538060862190735e-05, "loss": 1.0104, "step": 28000 }, { "epoch": 0.4035034645691977, "grad_norm": 0.5878540873527527, "learning_rate": 1.3533696573932316e-05, "loss": 1.0176, "step": 28010 }, { "epoch": 0.403647521500497, "grad_norm": 0.615067720413208, "learning_rate": 1.3529331516436905e-05, "loss": 1.0285, "step": 28020 }, { "epoch": 0.40379157843179625, "grad_norm": 0.5775823593139648, "learning_rate": 1.3524965690654708e-05, "loss": 1.0296, "step": 28030 }, { "epoch": 0.4039356353630955, "grad_norm": 0.5731359720230103, "learning_rate": 1.352059909753611e-05, "loss": 1.0473, "step": 28040 }, { "epoch": 0.4040796922943947, "grad_norm": 0.5543149709701538, "learning_rate": 1.3516231738031651e-05, "loss": 1.0276, "step": 28050 }, { "epoch": 0.404223749225694, "grad_norm": 0.5918812155723572, "learning_rate": 1.3511863613092052e-05, "loss": 1.0372, "step": 28060 }, { "epoch": 0.40436780615699325, "grad_norm": 0.5753766894340515, "learning_rate": 1.3507494723668188e-05, "loss": 1.0423, "step": 28070 }, { "epoch": 0.4045118630882925, "grad_norm": 0.5637984275817871, "learning_rate": 1.3503125070711108e-05, "loss": 1.035, "step": 28080 }, { "epoch": 0.4046559200195917, "grad_norm": 0.5202157497406006, "learning_rate": 1.349875465517203e-05, "loss": 1.0445, "step": 28090 }, { "epoch": 0.404799976950891, "grad_norm": 0.6230756640434265, "learning_rate": 1.3494383478002326e-05, "loss": 1.0049, "step": 28100 }, { "epoch": 0.40494403388219025, "grad_norm": 0.5658066272735596, "learning_rate": 1.3490011540153545e-05, "loss": 1.0238, "step": 28110 }, { "epoch": 0.4050880908134895, "grad_norm": 0.549156665802002, "learning_rate": 1.3485638842577395e-05, "loss": 1.0304, "step": 28120 }, { "epoch": 0.4052321477447887, "grad_norm": 0.5747542977333069, "learning_rate": 1.3481265386225755e-05, "loss": 1.0201, "step": 28130 }, { "epoch": 0.405376204676088, "grad_norm": 0.5837199687957764, "learning_rate": 1.3476891172050666e-05, "loss": 1.0208, "step": 28140 }, { "epoch": 0.40552026160738724, "grad_norm": 0.5938249826431274, "learning_rate": 1.347251620100433e-05, "loss": 1.0323, "step": 28150 }, { "epoch": 0.4056643185386865, "grad_norm": 0.5567238926887512, "learning_rate": 1.3468140474039123e-05, "loss": 1.0231, "step": 28160 }, { "epoch": 0.4058083754699857, "grad_norm": 0.6004190444946289, "learning_rate": 1.3463763992107572e-05, "loss": 1.0224, "step": 28170 }, { "epoch": 0.405952432401285, "grad_norm": 0.6211457848548889, "learning_rate": 1.3459386756162387e-05, "loss": 1.0114, "step": 28180 }, { "epoch": 0.40609648933258424, "grad_norm": 0.591342031955719, "learning_rate": 1.3455008767156425e-05, "loss": 1.0291, "step": 28190 }, { "epoch": 0.4062405462638835, "grad_norm": 0.5131479501724243, "learning_rate": 1.3450630026042719e-05, "loss": 1.0301, "step": 28200 }, { "epoch": 0.4063846031951827, "grad_norm": 0.5368046164512634, "learning_rate": 1.3446250533774458e-05, "loss": 1.0307, "step": 28210 }, { "epoch": 0.406528660126482, "grad_norm": 0.5792551636695862, "learning_rate": 1.3441870291304995e-05, "loss": 1.0332, "step": 28220 }, { "epoch": 0.40667271705778124, "grad_norm": 0.9220713376998901, "learning_rate": 1.3437489299587848e-05, "loss": 1.0206, "step": 28230 }, { "epoch": 0.4068167739890805, "grad_norm": 0.5201154947280884, "learning_rate": 1.3433107559576705e-05, "loss": 1.0212, "step": 28240 }, { "epoch": 0.4069608309203797, "grad_norm": 0.5643423795700073, "learning_rate": 1.3428725072225407e-05, "loss": 1.017, "step": 28250 }, { "epoch": 0.407104887851679, "grad_norm": 0.5780232548713684, "learning_rate": 1.3424341838487955e-05, "loss": 1.0211, "step": 28260 }, { "epoch": 0.40724894478297824, "grad_norm": 0.6155387163162231, "learning_rate": 1.341995785931853e-05, "loss": 1.0138, "step": 28270 }, { "epoch": 0.4073930017142775, "grad_norm": 0.5612841844558716, "learning_rate": 1.3415573135671451e-05, "loss": 1.0248, "step": 28280 }, { "epoch": 0.4075370586455767, "grad_norm": 0.6546777486801147, "learning_rate": 1.341118766850122e-05, "loss": 1.0305, "step": 28290 }, { "epoch": 0.407681115576876, "grad_norm": 0.5699030756950378, "learning_rate": 1.3406801458762493e-05, "loss": 1.0195, "step": 28300 }, { "epoch": 0.40782517250817524, "grad_norm": 0.7382329106330872, "learning_rate": 1.3402414507410083e-05, "loss": 1.0355, "step": 28310 }, { "epoch": 0.4079692294394745, "grad_norm": 0.5104536414146423, "learning_rate": 1.3398026815398974e-05, "loss": 1.0278, "step": 28320 }, { "epoch": 0.4081132863707737, "grad_norm": 0.5759681463241577, "learning_rate": 1.33936383836843e-05, "loss": 1.0253, "step": 28330 }, { "epoch": 0.408257343302073, "grad_norm": 0.5067317485809326, "learning_rate": 1.3389249213221367e-05, "loss": 1.0196, "step": 28340 }, { "epoch": 0.40840140023337224, "grad_norm": 0.6182513236999512, "learning_rate": 1.3384859304965635e-05, "loss": 1.0186, "step": 28350 }, { "epoch": 0.4085454571646715, "grad_norm": 0.5657869577407837, "learning_rate": 1.3380468659872726e-05, "loss": 1.0375, "step": 28360 }, { "epoch": 0.4086895140959707, "grad_norm": 0.607580840587616, "learning_rate": 1.3376077278898421e-05, "loss": 1.0309, "step": 28370 }, { "epoch": 0.40883357102726997, "grad_norm": 0.5067529082298279, "learning_rate": 1.3371685162998668e-05, "loss": 1.0355, "step": 28380 }, { "epoch": 0.40897762795856923, "grad_norm": 0.5086706280708313, "learning_rate": 1.336729231312957e-05, "loss": 1.022, "step": 28390 }, { "epoch": 0.4091216848898685, "grad_norm": 0.5726044178009033, "learning_rate": 1.3362898730247382e-05, "loss": 1.0168, "step": 28400 }, { "epoch": 0.4092657418211677, "grad_norm": 0.5389735102653503, "learning_rate": 1.3358504415308533e-05, "loss": 1.0243, "step": 28410 }, { "epoch": 0.40940979875246697, "grad_norm": 0.6076347231864929, "learning_rate": 1.3354109369269605e-05, "loss": 1.0241, "step": 28420 }, { "epoch": 0.40955385568376623, "grad_norm": 0.4856959283351898, "learning_rate": 1.3349713593087337e-05, "loss": 1.031, "step": 28430 }, { "epoch": 0.4096979126150655, "grad_norm": 0.6383373737335205, "learning_rate": 1.3345317087718628e-05, "loss": 1.0165, "step": 28440 }, { "epoch": 0.4098419695463647, "grad_norm": 0.5641618967056274, "learning_rate": 1.3340919854120538e-05, "loss": 1.0099, "step": 28450 }, { "epoch": 0.40998602647766397, "grad_norm": 0.5254115462303162, "learning_rate": 1.3336521893250285e-05, "loss": 1.0275, "step": 28460 }, { "epoch": 0.41013008340896323, "grad_norm": 0.5576873421669006, "learning_rate": 1.3332123206065242e-05, "loss": 1.0108, "step": 28470 }, { "epoch": 0.4102741403402625, "grad_norm": 0.5620102286338806, "learning_rate": 1.3327723793522944e-05, "loss": 1.023, "step": 28480 }, { "epoch": 0.4104181972715617, "grad_norm": 0.5105875730514526, "learning_rate": 1.3323323656581082e-05, "loss": 1.027, "step": 28490 }, { "epoch": 0.41056225420286097, "grad_norm": 0.5693491101264954, "learning_rate": 1.3318922796197507e-05, "loss": 1.0561, "step": 28500 }, { "epoch": 0.41070631113416023, "grad_norm": 0.555982768535614, "learning_rate": 1.331452121333022e-05, "loss": 1.0258, "step": 28510 }, { "epoch": 0.4108503680654595, "grad_norm": 0.5636388063430786, "learning_rate": 1.331011890893739e-05, "loss": 1.0255, "step": 28520 }, { "epoch": 0.4109944249967587, "grad_norm": 0.4831456243991852, "learning_rate": 1.330571588397734e-05, "loss": 1.0197, "step": 28530 }, { "epoch": 0.41113848192805796, "grad_norm": 0.5975767970085144, "learning_rate": 1.330131213940854e-05, "loss": 1.007, "step": 28540 }, { "epoch": 0.4112825388593572, "grad_norm": 0.5731145739555359, "learning_rate": 1.329690767618963e-05, "loss": 1.0209, "step": 28550 }, { "epoch": 0.4114265957906565, "grad_norm": 0.6682479381561279, "learning_rate": 1.32925024952794e-05, "loss": 1.025, "step": 28560 }, { "epoch": 0.4115706527219557, "grad_norm": 0.5810287594795227, "learning_rate": 1.32880965976368e-05, "loss": 1.0491, "step": 28570 }, { "epoch": 0.41171470965325496, "grad_norm": 0.5970315933227539, "learning_rate": 1.3283689984220927e-05, "loss": 1.0477, "step": 28580 }, { "epoch": 0.4118587665845542, "grad_norm": 0.6494952440261841, "learning_rate": 1.3279282655991043e-05, "loss": 0.9981, "step": 28590 }, { "epoch": 0.4120028235158535, "grad_norm": 0.6970714926719666, "learning_rate": 1.3274874613906564e-05, "loss": 1.0077, "step": 28600 }, { "epoch": 0.4121468804471527, "grad_norm": 0.6124892830848694, "learning_rate": 1.3270465858927055e-05, "loss": 1.0258, "step": 28610 }, { "epoch": 0.41229093737845196, "grad_norm": 0.563629150390625, "learning_rate": 1.3266056392012248e-05, "loss": 1.0306, "step": 28620 }, { "epoch": 0.4124349943097512, "grad_norm": 0.6846545934677124, "learning_rate": 1.3261646214122017e-05, "loss": 1.0161, "step": 28630 }, { "epoch": 0.4125790512410505, "grad_norm": 0.5320466756820679, "learning_rate": 1.3257235326216405e-05, "loss": 1.0005, "step": 28640 }, { "epoch": 0.4127231081723497, "grad_norm": 0.5725711584091187, "learning_rate": 1.325282372925559e-05, "loss": 1.0258, "step": 28650 }, { "epoch": 0.41286716510364896, "grad_norm": 0.6050660610198975, "learning_rate": 1.3248411424199924e-05, "loss": 1.0354, "step": 28660 }, { "epoch": 0.4130112220349482, "grad_norm": 0.6206074953079224, "learning_rate": 1.3243998412009904e-05, "loss": 1.0444, "step": 28670 }, { "epoch": 0.4131552789662475, "grad_norm": 0.6172865033149719, "learning_rate": 1.3239584693646179e-05, "loss": 1.0292, "step": 28680 }, { "epoch": 0.4132993358975467, "grad_norm": 0.4913505017757416, "learning_rate": 1.3235170270069557e-05, "loss": 1.0175, "step": 28690 }, { "epoch": 0.41344339282884596, "grad_norm": 0.5030386447906494, "learning_rate": 1.3230755142240997e-05, "loss": 1.0412, "step": 28700 }, { "epoch": 0.4135874497601452, "grad_norm": 0.5598655343055725, "learning_rate": 1.3226339311121613e-05, "loss": 1.0181, "step": 28710 }, { "epoch": 0.4137315066914445, "grad_norm": 0.6742128133773804, "learning_rate": 1.3221922777672665e-05, "loss": 1.034, "step": 28720 }, { "epoch": 0.4138755636227437, "grad_norm": 0.5599446296691895, "learning_rate": 1.3217505542855573e-05, "loss": 1.0297, "step": 28730 }, { "epoch": 0.41401962055404296, "grad_norm": 0.5743834972381592, "learning_rate": 1.3213087607631912e-05, "loss": 1.0293, "step": 28740 }, { "epoch": 0.4141636774853422, "grad_norm": 0.5799579620361328, "learning_rate": 1.3208668972963406e-05, "loss": 1.0457, "step": 28750 }, { "epoch": 0.4143077344166415, "grad_norm": 0.7092347741127014, "learning_rate": 1.3204249639811924e-05, "loss": 1.006, "step": 28760 }, { "epoch": 0.4144517913479407, "grad_norm": 0.5876005291938782, "learning_rate": 1.3199829609139498e-05, "loss": 1.0278, "step": 28770 }, { "epoch": 0.41459584827923995, "grad_norm": 0.6291282176971436, "learning_rate": 1.3195408881908312e-05, "loss": 1.0118, "step": 28780 }, { "epoch": 0.4147399052105392, "grad_norm": 0.5738189220428467, "learning_rate": 1.319098745908069e-05, "loss": 1.012, "step": 28790 }, { "epoch": 0.4148839621418385, "grad_norm": 0.6768100261688232, "learning_rate": 1.318656534161912e-05, "loss": 1.049, "step": 28800 }, { "epoch": 0.4150280190731377, "grad_norm": 0.5153951048851013, "learning_rate": 1.318214253048623e-05, "loss": 1.0227, "step": 28810 }, { "epoch": 0.41517207600443695, "grad_norm": 0.5960916876792908, "learning_rate": 1.3177719026644815e-05, "loss": 1.0149, "step": 28820 }, { "epoch": 0.4153161329357362, "grad_norm": 0.6023445129394531, "learning_rate": 1.3173294831057799e-05, "loss": 1.0334, "step": 28830 }, { "epoch": 0.4154601898670355, "grad_norm": 0.6096762418746948, "learning_rate": 1.3168869944688274e-05, "loss": 1.0387, "step": 28840 }, { "epoch": 0.4156042467983347, "grad_norm": 0.5115797519683838, "learning_rate": 1.3164444368499479e-05, "loss": 1.0066, "step": 28850 }, { "epoch": 0.41574830372963395, "grad_norm": 0.5491281151771545, "learning_rate": 1.3160018103454799e-05, "loss": 1.0308, "step": 28860 }, { "epoch": 0.4158923606609332, "grad_norm": 0.9223948121070862, "learning_rate": 1.315559115051777e-05, "loss": 1.025, "step": 28870 }, { "epoch": 0.4160364175922325, "grad_norm": 0.6295514106750488, "learning_rate": 1.3151163510652077e-05, "loss": 1.0147, "step": 28880 }, { "epoch": 0.4161804745235317, "grad_norm": 0.5565011501312256, "learning_rate": 1.3146735184821559e-05, "loss": 1.006, "step": 28890 }, { "epoch": 0.41632453145483095, "grad_norm": 0.5805292725563049, "learning_rate": 1.31423061739902e-05, "loss": 1.0511, "step": 28900 }, { "epoch": 0.4164685883861302, "grad_norm": 0.5794126987457275, "learning_rate": 1.3137876479122138e-05, "loss": 1.0367, "step": 28910 }, { "epoch": 0.4166126453174295, "grad_norm": 0.5777976512908936, "learning_rate": 1.3133446101181648e-05, "loss": 1.0072, "step": 28920 }, { "epoch": 0.4167567022487287, "grad_norm": 0.5612347722053528, "learning_rate": 1.3129015041133174e-05, "loss": 1.0263, "step": 28930 }, { "epoch": 0.41690075918002795, "grad_norm": 0.8037853240966797, "learning_rate": 1.3124583299941287e-05, "loss": 1.0244, "step": 28940 }, { "epoch": 0.4170448161113272, "grad_norm": 0.5883808732032776, "learning_rate": 1.3120150878570716e-05, "loss": 1.0409, "step": 28950 }, { "epoch": 0.4171888730426265, "grad_norm": 0.5531783699989319, "learning_rate": 1.3115717777986347e-05, "loss": 1.0347, "step": 28960 }, { "epoch": 0.4173329299739257, "grad_norm": 0.608101487159729, "learning_rate": 1.3111283999153196e-05, "loss": 1.0117, "step": 28970 }, { "epoch": 0.41747698690522494, "grad_norm": 0.6610711216926575, "learning_rate": 1.3106849543036441e-05, "loss": 1.0363, "step": 28980 }, { "epoch": 0.4176210438365242, "grad_norm": 0.5230353474617004, "learning_rate": 1.3102414410601396e-05, "loss": 1.0151, "step": 28990 }, { "epoch": 0.4177651007678234, "grad_norm": 0.6251484751701355, "learning_rate": 1.3097978602813535e-05, "loss": 1.0223, "step": 29000 }, { "epoch": 0.4179091576991227, "grad_norm": 0.6061869859695435, "learning_rate": 1.3093542120638467e-05, "loss": 1.0048, "step": 29010 }, { "epoch": 0.41805321463042194, "grad_norm": 0.6269460916519165, "learning_rate": 1.3089104965041954e-05, "loss": 1.0439, "step": 29020 }, { "epoch": 0.4181972715617212, "grad_norm": 0.6348804831504822, "learning_rate": 1.3084667136989907e-05, "loss": 1.0204, "step": 29030 }, { "epoch": 0.4183413284930204, "grad_norm": 0.6431804895401001, "learning_rate": 1.3080228637448375e-05, "loss": 1.0229, "step": 29040 }, { "epoch": 0.4184853854243197, "grad_norm": 0.6093942523002625, "learning_rate": 1.3075789467383562e-05, "loss": 1.0388, "step": 29050 }, { "epoch": 0.41862944235561894, "grad_norm": 0.5368409156799316, "learning_rate": 1.3071349627761812e-05, "loss": 1.0095, "step": 29060 }, { "epoch": 0.4187734992869182, "grad_norm": 0.5583363175392151, "learning_rate": 1.3066909119549615e-05, "loss": 1.037, "step": 29070 }, { "epoch": 0.4189175562182174, "grad_norm": 0.5977921485900879, "learning_rate": 1.3062467943713615e-05, "loss": 1.0186, "step": 29080 }, { "epoch": 0.4190616131495167, "grad_norm": 0.5360631346702576, "learning_rate": 1.3058026101220587e-05, "loss": 1.0257, "step": 29090 }, { "epoch": 0.41920567008081594, "grad_norm": 0.5536994934082031, "learning_rate": 1.3053583593037462e-05, "loss": 1.0247, "step": 29100 }, { "epoch": 0.4193497270121152, "grad_norm": 0.5214248895645142, "learning_rate": 1.3049140420131314e-05, "loss": 1.0255, "step": 29110 }, { "epoch": 0.4194937839434144, "grad_norm": 0.6232405304908752, "learning_rate": 1.3044696583469364e-05, "loss": 1.0303, "step": 29120 }, { "epoch": 0.4196378408747137, "grad_norm": 0.6169807314872742, "learning_rate": 1.3040252084018961e-05, "loss": 1.0357, "step": 29130 }, { "epoch": 0.41978189780601294, "grad_norm": 0.5029460787773132, "learning_rate": 1.3035806922747624e-05, "loss": 1.0225, "step": 29140 }, { "epoch": 0.4199259547373122, "grad_norm": 0.6531195044517517, "learning_rate": 1.3031361100623001e-05, "loss": 1.0172, "step": 29150 }, { "epoch": 0.4200700116686114, "grad_norm": 0.5460676550865173, "learning_rate": 1.3026914618612878e-05, "loss": 1.0082, "step": 29160 }, { "epoch": 0.4202140685999107, "grad_norm": 0.6486418843269348, "learning_rate": 1.3022467477685204e-05, "loss": 1.0097, "step": 29170 }, { "epoch": 0.42035812553120994, "grad_norm": 0.5953981876373291, "learning_rate": 1.3018019678808052e-05, "loss": 1.0222, "step": 29180 }, { "epoch": 0.4205021824625092, "grad_norm": 0.6229760050773621, "learning_rate": 1.3013571222949655e-05, "loss": 1.0505, "step": 29190 }, { "epoch": 0.4206462393938084, "grad_norm": 0.5768680572509766, "learning_rate": 1.300912211107837e-05, "loss": 1.0713, "step": 29200 }, { "epoch": 0.42079029632510767, "grad_norm": 0.576744019985199, "learning_rate": 1.3004672344162711e-05, "loss": 1.0369, "step": 29210 }, { "epoch": 0.42093435325640693, "grad_norm": 0.5998263359069824, "learning_rate": 1.3000221923171337e-05, "loss": 1.0399, "step": 29220 }, { "epoch": 0.4210784101877062, "grad_norm": 0.5798830986022949, "learning_rate": 1.2995770849073034e-05, "loss": 1.0456, "step": 29230 }, { "epoch": 0.4212224671190054, "grad_norm": 0.5114676356315613, "learning_rate": 1.2991319122836748e-05, "loss": 0.9902, "step": 29240 }, { "epoch": 0.42136652405030467, "grad_norm": 0.654665470123291, "learning_rate": 1.298686674543155e-05, "loss": 1.0353, "step": 29250 }, { "epoch": 0.42151058098160393, "grad_norm": 0.5200009942054749, "learning_rate": 1.298241371782667e-05, "loss": 1.0128, "step": 29260 }, { "epoch": 0.4216546379129032, "grad_norm": 0.55632483959198, "learning_rate": 1.2977960040991462e-05, "loss": 1.0357, "step": 29270 }, { "epoch": 0.4217986948442024, "grad_norm": 0.6114822626113892, "learning_rate": 1.2973505715895436e-05, "loss": 1.0351, "step": 29280 }, { "epoch": 0.42194275177550167, "grad_norm": 0.5504259467124939, "learning_rate": 1.2969050743508234e-05, "loss": 1.0245, "step": 29290 }, { "epoch": 0.42208680870680093, "grad_norm": 0.6451894044876099, "learning_rate": 1.2964595124799643e-05, "loss": 1.0103, "step": 29300 }, { "epoch": 0.4222308656381002, "grad_norm": 0.5632631182670593, "learning_rate": 1.2960138860739592e-05, "loss": 0.9943, "step": 29310 }, { "epoch": 0.4223749225693994, "grad_norm": 0.5638245940208435, "learning_rate": 1.2955681952298145e-05, "loss": 1.0094, "step": 29320 }, { "epoch": 0.42251897950069867, "grad_norm": 0.5850462913513184, "learning_rate": 1.295122440044551e-05, "loss": 1.0328, "step": 29330 }, { "epoch": 0.42266303643199793, "grad_norm": 0.6049823760986328, "learning_rate": 1.2946766206152037e-05, "loss": 1.0323, "step": 29340 }, { "epoch": 0.4228070933632972, "grad_norm": 0.638034462928772, "learning_rate": 1.294230737038821e-05, "loss": 1.0246, "step": 29350 }, { "epoch": 0.4229511502945964, "grad_norm": 0.5273005962371826, "learning_rate": 1.2937847894124657e-05, "loss": 1.0363, "step": 29360 }, { "epoch": 0.42309520722589566, "grad_norm": 0.6056172251701355, "learning_rate": 1.293338777833215e-05, "loss": 1.0167, "step": 29370 }, { "epoch": 0.4232392641571949, "grad_norm": 0.5097370743751526, "learning_rate": 1.2928927023981583e-05, "loss": 1.013, "step": 29380 }, { "epoch": 0.4233833210884942, "grad_norm": 0.5481967329978943, "learning_rate": 1.2924465632044012e-05, "loss": 1.0239, "step": 29390 }, { "epoch": 0.4235273780197934, "grad_norm": 0.5808329582214355, "learning_rate": 1.2920003603490612e-05, "loss": 1.0276, "step": 29400 }, { "epoch": 0.42367143495109266, "grad_norm": 0.5588293075561523, "learning_rate": 1.2915540939292712e-05, "loss": 1.0301, "step": 29410 }, { "epoch": 0.4238154918823919, "grad_norm": 0.5422973036766052, "learning_rate": 1.291107764042177e-05, "loss": 1.0499, "step": 29420 }, { "epoch": 0.4239595488136912, "grad_norm": 0.5828791856765747, "learning_rate": 1.2906613707849381e-05, "loss": 1.0357, "step": 29430 }, { "epoch": 0.4241036057449904, "grad_norm": 0.5760287642478943, "learning_rate": 1.2902149142547287e-05, "loss": 1.0273, "step": 29440 }, { "epoch": 0.42424766267628966, "grad_norm": 0.7837143540382385, "learning_rate": 1.289768394548736e-05, "loss": 1.0388, "step": 29450 }, { "epoch": 0.4243917196075889, "grad_norm": 0.5347105264663696, "learning_rate": 1.289321811764161e-05, "loss": 1.0087, "step": 29460 }, { "epoch": 0.4245357765388882, "grad_norm": 0.6786202788352966, "learning_rate": 1.2888751659982187e-05, "loss": 1.0355, "step": 29470 }, { "epoch": 0.4246798334701874, "grad_norm": 0.5785194039344788, "learning_rate": 1.2884284573481381e-05, "loss": 1.0353, "step": 29480 }, { "epoch": 0.42482389040148666, "grad_norm": 0.6395455598831177, "learning_rate": 1.2879816859111612e-05, "loss": 1.0215, "step": 29490 }, { "epoch": 0.4249679473327859, "grad_norm": 0.539808988571167, "learning_rate": 1.2875348517845437e-05, "loss": 1.0088, "step": 29500 }, { "epoch": 0.4251120042640852, "grad_norm": 0.5646077394485474, "learning_rate": 1.2870879550655557e-05, "loss": 1.036, "step": 29510 }, { "epoch": 0.4252560611953844, "grad_norm": 0.6132586598396301, "learning_rate": 1.2866409958514803e-05, "loss": 1.0476, "step": 29520 }, { "epoch": 0.42540011812668366, "grad_norm": 0.523238480091095, "learning_rate": 1.2861939742396141e-05, "loss": 1.0276, "step": 29530 }, { "epoch": 0.4255441750579829, "grad_norm": 0.53263258934021, "learning_rate": 1.2857468903272678e-05, "loss": 1.018, "step": 29540 }, { "epoch": 0.4256882319892822, "grad_norm": 0.5227144360542297, "learning_rate": 1.2852997442117655e-05, "loss": 1.021, "step": 29550 }, { "epoch": 0.4258322889205814, "grad_norm": 0.5284324288368225, "learning_rate": 1.2848525359904447e-05, "loss": 1.0247, "step": 29560 }, { "epoch": 0.42597634585188066, "grad_norm": 0.7121564745903015, "learning_rate": 1.2844052657606558e-05, "loss": 1.0468, "step": 29570 }, { "epoch": 0.4261204027831799, "grad_norm": 0.549347996711731, "learning_rate": 1.2839579336197645e-05, "loss": 1.0286, "step": 29580 }, { "epoch": 0.4262644597144792, "grad_norm": 0.5873895883560181, "learning_rate": 1.283510539665148e-05, "loss": 1.0255, "step": 29590 }, { "epoch": 0.4264085166457784, "grad_norm": 0.6166974902153015, "learning_rate": 1.2830630839941985e-05, "loss": 1.0211, "step": 29600 }, { "epoch": 0.42655257357707765, "grad_norm": 0.5977450609207153, "learning_rate": 1.2826155667043199e-05, "loss": 1.0338, "step": 29610 }, { "epoch": 0.4266966305083769, "grad_norm": 0.659603476524353, "learning_rate": 1.2821679878929312e-05, "loss": 1.0208, "step": 29620 }, { "epoch": 0.4268406874396762, "grad_norm": 0.5155239105224609, "learning_rate": 1.2817203476574643e-05, "loss": 1.0167, "step": 29630 }, { "epoch": 0.4269847443709754, "grad_norm": 0.5760207772254944, "learning_rate": 1.2812726460953637e-05, "loss": 1.0268, "step": 29640 }, { "epoch": 0.42712880130227465, "grad_norm": 0.6081256866455078, "learning_rate": 1.2808248833040885e-05, "loss": 1.0351, "step": 29650 }, { "epoch": 0.4272728582335739, "grad_norm": 0.5349990725517273, "learning_rate": 1.28037705938111e-05, "loss": 1.0107, "step": 29660 }, { "epoch": 0.4274169151648732, "grad_norm": 0.5976602435112, "learning_rate": 1.2799291744239138e-05, "loss": 1.0191, "step": 29670 }, { "epoch": 0.4275609720961724, "grad_norm": 0.5629780888557434, "learning_rate": 1.2794812285299976e-05, "loss": 1.0314, "step": 29680 }, { "epoch": 0.42770502902747165, "grad_norm": 0.6238521933555603, "learning_rate": 1.2790332217968734e-05, "loss": 1.0431, "step": 29690 }, { "epoch": 0.4278490859587709, "grad_norm": 0.5768886208534241, "learning_rate": 1.2785851543220664e-05, "loss": 1.0422, "step": 29700 }, { "epoch": 0.4279931428900702, "grad_norm": 0.6511243581771851, "learning_rate": 1.2781370262031139e-05, "loss": 1.0429, "step": 29710 }, { "epoch": 0.4281371998213694, "grad_norm": 0.5374072194099426, "learning_rate": 1.277688837537568e-05, "loss": 1.0032, "step": 29720 }, { "epoch": 0.42828125675266865, "grad_norm": 0.7526559829711914, "learning_rate": 1.2772405884229928e-05, "loss": 1.0355, "step": 29730 }, { "epoch": 0.4284253136839679, "grad_norm": 0.5825052261352539, "learning_rate": 1.2767922789569665e-05, "loss": 1.0183, "step": 29740 }, { "epoch": 0.4285693706152672, "grad_norm": 0.5553596615791321, "learning_rate": 1.2763439092370788e-05, "loss": 1.023, "step": 29750 }, { "epoch": 0.4287134275465664, "grad_norm": 0.5329517722129822, "learning_rate": 1.2758954793609344e-05, "loss": 1.012, "step": 29760 }, { "epoch": 0.42885748447786565, "grad_norm": 0.6524930596351624, "learning_rate": 1.27544698942615e-05, "loss": 1.0298, "step": 29770 }, { "epoch": 0.4290015414091649, "grad_norm": 0.6755332350730896, "learning_rate": 1.274998439530356e-05, "loss": 1.0237, "step": 29780 }, { "epoch": 0.4291455983404642, "grad_norm": 0.5230400562286377, "learning_rate": 1.2745498297711953e-05, "loss": 1.0342, "step": 29790 }, { "epoch": 0.4292896552717634, "grad_norm": 0.6378017663955688, "learning_rate": 1.274101160246324e-05, "loss": 1.0366, "step": 29800 }, { "epoch": 0.42943371220306265, "grad_norm": 0.5726189613342285, "learning_rate": 1.2736524310534118e-05, "loss": 1.0337, "step": 29810 }, { "epoch": 0.4295777691343619, "grad_norm": 0.6010988354682922, "learning_rate": 1.2732036422901397e-05, "loss": 1.0252, "step": 29820 }, { "epoch": 0.42972182606566117, "grad_norm": 0.571182370185852, "learning_rate": 1.272754794054204e-05, "loss": 1.0287, "step": 29830 }, { "epoch": 0.4298658829969604, "grad_norm": 0.6563334465026855, "learning_rate": 1.272305886443312e-05, "loss": 0.9941, "step": 29840 }, { "epoch": 0.43000993992825964, "grad_norm": 0.6256719827651978, "learning_rate": 1.2718569195551853e-05, "loss": 1.022, "step": 29850 }, { "epoch": 0.4301539968595589, "grad_norm": 0.5974683165550232, "learning_rate": 1.271407893487557e-05, "loss": 1.0317, "step": 29860 }, { "epoch": 0.43029805379085817, "grad_norm": 0.6402578949928284, "learning_rate": 1.2709588083381747e-05, "loss": 1.0149, "step": 29870 }, { "epoch": 0.4304421107221574, "grad_norm": 0.5927873849868774, "learning_rate": 1.2705096642047976e-05, "loss": 1.0182, "step": 29880 }, { "epoch": 0.43058616765345664, "grad_norm": 0.5713891386985779, "learning_rate": 1.2700604611851984e-05, "loss": 1.0227, "step": 29890 }, { "epoch": 0.4307302245847559, "grad_norm": 0.6035123467445374, "learning_rate": 1.2696111993771623e-05, "loss": 1.0242, "step": 29900 }, { "epoch": 0.43087428151605517, "grad_norm": 0.6411575078964233, "learning_rate": 1.269161878878487e-05, "loss": 1.0063, "step": 29910 }, { "epoch": 0.4310183384473544, "grad_norm": 0.6706348061561584, "learning_rate": 1.2687124997869844e-05, "loss": 1.0255, "step": 29920 }, { "epoch": 0.43116239537865364, "grad_norm": 0.5613524317741394, "learning_rate": 1.268263062200477e-05, "loss": 1.0292, "step": 29930 }, { "epoch": 0.4313064523099529, "grad_norm": 0.6087634563446045, "learning_rate": 1.2678135662168019e-05, "loss": 1.0366, "step": 29940 }, { "epoch": 0.43145050924125217, "grad_norm": 0.5440014004707336, "learning_rate": 1.2673640119338075e-05, "loss": 1.0036, "step": 29950 }, { "epoch": 0.4315945661725514, "grad_norm": 0.652259111404419, "learning_rate": 1.2669143994493565e-05, "loss": 1.0345, "step": 29960 }, { "epoch": 0.43173862310385064, "grad_norm": 0.5840180516242981, "learning_rate": 1.2664647288613225e-05, "loss": 1.0218, "step": 29970 }, { "epoch": 0.4318826800351499, "grad_norm": 0.5811200141906738, "learning_rate": 1.266015000267593e-05, "loss": 1.0154, "step": 29980 }, { "epoch": 0.43202673696644917, "grad_norm": 0.5535847544670105, "learning_rate": 1.2655652137660677e-05, "loss": 1.0318, "step": 29990 }, { "epoch": 0.4321707938977484, "grad_norm": 0.5859915614128113, "learning_rate": 1.2651153694546588e-05, "loss": 1.0538, "step": 30000 }, { "epoch": 0.43231485082904764, "grad_norm": 0.5436146259307861, "learning_rate": 1.2646654674312915e-05, "loss": 1.0287, "step": 30010 }, { "epoch": 0.4324589077603469, "grad_norm": 0.5253572463989258, "learning_rate": 1.2642155077939024e-05, "loss": 1.0233, "step": 30020 }, { "epoch": 0.43260296469164616, "grad_norm": 0.613762378692627, "learning_rate": 1.2637654906404424e-05, "loss": 1.0229, "step": 30030 }, { "epoch": 0.43274702162294537, "grad_norm": 0.5489577054977417, "learning_rate": 1.263315416068874e-05, "loss": 1.0234, "step": 30040 }, { "epoch": 0.43289107855424463, "grad_norm": 0.5223692059516907, "learning_rate": 1.2628652841771716e-05, "loss": 1.0226, "step": 30050 }, { "epoch": 0.4330351354855439, "grad_norm": 0.5891116857528687, "learning_rate": 1.2624150950633232e-05, "loss": 1.021, "step": 30060 }, { "epoch": 0.43317919241684316, "grad_norm": 0.597817599773407, "learning_rate": 1.2619648488253287e-05, "loss": 1.0257, "step": 30070 }, { "epoch": 0.43332324934814237, "grad_norm": 0.6240730881690979, "learning_rate": 1.2615145455612004e-05, "loss": 1.0283, "step": 30080 }, { "epoch": 0.43346730627944163, "grad_norm": 0.5432168245315552, "learning_rate": 1.261064185368963e-05, "loss": 1.0271, "step": 30090 }, { "epoch": 0.4336113632107409, "grad_norm": 0.5634761452674866, "learning_rate": 1.2606137683466539e-05, "loss": 1.0175, "step": 30100 }, { "epoch": 0.43375542014204016, "grad_norm": 0.5914860963821411, "learning_rate": 1.2601632945923226e-05, "loss": 1.0329, "step": 30110 }, { "epoch": 0.43389947707333937, "grad_norm": 0.5869556069374084, "learning_rate": 1.259712764204031e-05, "loss": 1.0356, "step": 30120 }, { "epoch": 0.43404353400463863, "grad_norm": 0.5884944200515747, "learning_rate": 1.2592621772798534e-05, "loss": 1.0151, "step": 30130 }, { "epoch": 0.4341875909359379, "grad_norm": 0.6159077882766724, "learning_rate": 1.2588115339178763e-05, "loss": 1.0218, "step": 30140 }, { "epoch": 0.43433164786723716, "grad_norm": 0.6196834444999695, "learning_rate": 1.2583608342161986e-05, "loss": 1.0351, "step": 30150 }, { "epoch": 0.43447570479853637, "grad_norm": 0.570512056350708, "learning_rate": 1.2579100782729312e-05, "loss": 1.003, "step": 30160 }, { "epoch": 0.43461976172983563, "grad_norm": 0.5434126853942871, "learning_rate": 1.257459266186198e-05, "loss": 1.0119, "step": 30170 }, { "epoch": 0.4347638186611349, "grad_norm": 0.7198238372802734, "learning_rate": 1.257008398054134e-05, "loss": 1.0183, "step": 30180 }, { "epoch": 0.43490787559243416, "grad_norm": 0.5934227705001831, "learning_rate": 1.2565574739748872e-05, "loss": 1.0364, "step": 30190 }, { "epoch": 0.43505193252373336, "grad_norm": 0.7064135670661926, "learning_rate": 1.2561064940466174e-05, "loss": 1.0267, "step": 30200 }, { "epoch": 0.43519598945503263, "grad_norm": 0.5537364482879639, "learning_rate": 1.255655458367497e-05, "loss": 1.0267, "step": 30210 }, { "epoch": 0.4353400463863319, "grad_norm": 0.5636672377586365, "learning_rate": 1.2552043670357101e-05, "loss": 1.0037, "step": 30220 }, { "epoch": 0.43548410331763115, "grad_norm": 0.5693824887275696, "learning_rate": 1.254753220149453e-05, "loss": 1.0178, "step": 30230 }, { "epoch": 0.43562816024893036, "grad_norm": 0.6389200091362, "learning_rate": 1.254302017806934e-05, "loss": 1.0218, "step": 30240 }, { "epoch": 0.4357722171802296, "grad_norm": 0.6115773916244507, "learning_rate": 1.2538507601063744e-05, "loss": 1.0276, "step": 30250 }, { "epoch": 0.4359162741115289, "grad_norm": 0.5919804573059082, "learning_rate": 1.2533994471460054e-05, "loss": 1.0168, "step": 30260 }, { "epoch": 0.43606033104282815, "grad_norm": 0.6357947587966919, "learning_rate": 1.252948079024073e-05, "loss": 1.0153, "step": 30270 }, { "epoch": 0.43620438797412736, "grad_norm": 0.6063383221626282, "learning_rate": 1.2524966558388327e-05, "loss": 1.0095, "step": 30280 }, { "epoch": 0.4363484449054266, "grad_norm": 0.5542093515396118, "learning_rate": 1.2520451776885544e-05, "loss": 1.0246, "step": 30290 }, { "epoch": 0.4364925018367259, "grad_norm": 0.6694737076759338, "learning_rate": 1.2515936446715171e-05, "loss": 1.0491, "step": 30300 }, { "epoch": 0.4366365587680251, "grad_norm": 0.5313624143600464, "learning_rate": 1.2511420568860144e-05, "loss": 1.0447, "step": 30310 }, { "epoch": 0.43678061569932436, "grad_norm": 0.5757784843444824, "learning_rate": 1.2506904144303502e-05, "loss": 1.0369, "step": 30320 }, { "epoch": 0.4369246726306236, "grad_norm": 0.9457381963729858, "learning_rate": 1.2502387174028416e-05, "loss": 1.0399, "step": 30330 }, { "epoch": 0.4370687295619229, "grad_norm": 0.5293096303939819, "learning_rate": 1.2497869659018158e-05, "loss": 1.0197, "step": 30340 }, { "epoch": 0.4372127864932221, "grad_norm": 0.6232894659042358, "learning_rate": 1.2493351600256136e-05, "loss": 1.0265, "step": 30350 }, { "epoch": 0.43735684342452136, "grad_norm": 0.6530959010124207, "learning_rate": 1.2488832998725866e-05, "loss": 1.0193, "step": 30360 }, { "epoch": 0.4375009003558206, "grad_norm": 0.5749871134757996, "learning_rate": 1.2484313855410983e-05, "loss": 1.0378, "step": 30370 }, { "epoch": 0.4376449572871199, "grad_norm": 0.560028612613678, "learning_rate": 1.2479794171295248e-05, "loss": 1.0262, "step": 30380 }, { "epoch": 0.4377890142184191, "grad_norm": 0.5265219211578369, "learning_rate": 1.2475273947362528e-05, "loss": 1.0136, "step": 30390 }, { "epoch": 0.43793307114971836, "grad_norm": 0.6045346856117249, "learning_rate": 1.247075318459682e-05, "loss": 1.027, "step": 30400 }, { "epoch": 0.4380771280810176, "grad_norm": 0.5903724431991577, "learning_rate": 1.2466231883982223e-05, "loss": 1.0223, "step": 30410 }, { "epoch": 0.4382211850123169, "grad_norm": 0.551878809928894, "learning_rate": 1.246171004650297e-05, "loss": 1.0125, "step": 30420 }, { "epoch": 0.4383652419436161, "grad_norm": 0.613456130027771, "learning_rate": 1.2457187673143396e-05, "loss": 1.0231, "step": 30430 }, { "epoch": 0.43850929887491535, "grad_norm": 0.6330251097679138, "learning_rate": 1.2452664764887965e-05, "loss": 1.0084, "step": 30440 }, { "epoch": 0.4386533558062146, "grad_norm": 0.5188472270965576, "learning_rate": 1.244814132272125e-05, "loss": 1.0383, "step": 30450 }, { "epoch": 0.4387974127375139, "grad_norm": 0.5829715728759766, "learning_rate": 1.244361734762794e-05, "loss": 1.0361, "step": 30460 }, { "epoch": 0.4389414696688131, "grad_norm": 0.7289334535598755, "learning_rate": 1.2439092840592843e-05, "loss": 1.0452, "step": 30470 }, { "epoch": 0.43908552660011235, "grad_norm": 0.6511844396591187, "learning_rate": 1.2434567802600881e-05, "loss": 1.0338, "step": 30480 }, { "epoch": 0.4392295835314116, "grad_norm": 0.7280377149581909, "learning_rate": 1.2430042234637094e-05, "loss": 1.0261, "step": 30490 }, { "epoch": 0.4393736404627109, "grad_norm": 0.5383123755455017, "learning_rate": 1.2425516137686636e-05, "loss": 1.0147, "step": 30500 }, { "epoch": 0.4395176973940101, "grad_norm": 0.5853512287139893, "learning_rate": 1.2420989512734774e-05, "loss": 1.0188, "step": 30510 }, { "epoch": 0.43966175432530935, "grad_norm": 0.6809625029563904, "learning_rate": 1.2416462360766892e-05, "loss": 1.0317, "step": 30520 }, { "epoch": 0.4398058112566086, "grad_norm": 0.9652009606361389, "learning_rate": 1.241193468276849e-05, "loss": 1.0355, "step": 30530 }, { "epoch": 0.4399498681879079, "grad_norm": 0.6148524880409241, "learning_rate": 1.2407406479725179e-05, "loss": 1.0355, "step": 30540 }, { "epoch": 0.4400939251192071, "grad_norm": 0.5160987973213196, "learning_rate": 1.2402877752622689e-05, "loss": 1.0306, "step": 30550 }, { "epoch": 0.44023798205050635, "grad_norm": 0.5704976916313171, "learning_rate": 1.239834850244686e-05, "loss": 1.0531, "step": 30560 }, { "epoch": 0.4403820389818056, "grad_norm": 0.608162522315979, "learning_rate": 1.2393818730183647e-05, "loss": 1.029, "step": 30570 }, { "epoch": 0.4405260959131049, "grad_norm": 0.6319136619567871, "learning_rate": 1.2389288436819118e-05, "loss": 1.0027, "step": 30580 }, { "epoch": 0.4406701528444041, "grad_norm": 0.6754544377326965, "learning_rate": 1.2384757623339458e-05, "loss": 1.0246, "step": 30590 }, { "epoch": 0.44081420977570335, "grad_norm": 0.882371723651886, "learning_rate": 1.2380226290730959e-05, "loss": 1.0206, "step": 30600 }, { "epoch": 0.4409582667070026, "grad_norm": 0.6008939743041992, "learning_rate": 1.2375694439980036e-05, "loss": 1.0049, "step": 30610 }, { "epoch": 0.4411023236383019, "grad_norm": 0.5888626575469971, "learning_rate": 1.2371162072073202e-05, "loss": 1.0442, "step": 30620 }, { "epoch": 0.4412463805696011, "grad_norm": 0.5692532658576965, "learning_rate": 1.2366629187997096e-05, "loss": 1.0428, "step": 30630 }, { "epoch": 0.44139043750090035, "grad_norm": 0.5510519742965698, "learning_rate": 1.2362095788738464e-05, "loss": 1.0398, "step": 30640 }, { "epoch": 0.4415344944321996, "grad_norm": 0.5545852780342102, "learning_rate": 1.2357561875284166e-05, "loss": 1.0053, "step": 30650 }, { "epoch": 0.4416785513634989, "grad_norm": 0.5512057542800903, "learning_rate": 1.2353027448621169e-05, "loss": 1.0344, "step": 30660 }, { "epoch": 0.4418226082947981, "grad_norm": 0.608013391494751, "learning_rate": 1.2348492509736554e-05, "loss": 1.0538, "step": 30670 }, { "epoch": 0.44196666522609734, "grad_norm": 0.6366941928863525, "learning_rate": 1.234395705961752e-05, "loss": 1.0188, "step": 30680 }, { "epoch": 0.4421107221573966, "grad_norm": 0.530868649482727, "learning_rate": 1.2339421099251367e-05, "loss": 1.0388, "step": 30690 }, { "epoch": 0.44225477908869587, "grad_norm": 0.5997594594955444, "learning_rate": 1.2334884629625515e-05, "loss": 1.0111, "step": 30700 }, { "epoch": 0.4423988360199951, "grad_norm": 0.8025027513504028, "learning_rate": 1.2330347651727485e-05, "loss": 1.0087, "step": 30710 }, { "epoch": 0.44254289295129434, "grad_norm": 0.538600504398346, "learning_rate": 1.2325810166544923e-05, "loss": 1.02, "step": 30720 }, { "epoch": 0.4426869498825936, "grad_norm": 0.5707302689552307, "learning_rate": 1.2321272175065571e-05, "loss": 1.0435, "step": 30730 }, { "epoch": 0.44283100681389287, "grad_norm": 0.6243785619735718, "learning_rate": 1.2316733678277286e-05, "loss": 1.0324, "step": 30740 }, { "epoch": 0.4429750637451921, "grad_norm": 0.5545337796211243, "learning_rate": 1.2312194677168041e-05, "loss": 1.038, "step": 30750 }, { "epoch": 0.44311912067649134, "grad_norm": 0.6085401773452759, "learning_rate": 1.230765517272591e-05, "loss": 1.0137, "step": 30760 }, { "epoch": 0.4432631776077906, "grad_norm": 0.5168399214744568, "learning_rate": 1.2303115165939086e-05, "loss": 1.0088, "step": 30770 }, { "epoch": 0.44340723453908987, "grad_norm": 0.5611743927001953, "learning_rate": 1.2298574657795856e-05, "loss": 1.0207, "step": 30780 }, { "epoch": 0.4435512914703891, "grad_norm": 0.5813621878623962, "learning_rate": 1.2294033649284635e-05, "loss": 1.0046, "step": 30790 }, { "epoch": 0.44369534840168834, "grad_norm": 0.5414305925369263, "learning_rate": 1.2289492141393937e-05, "loss": 1.032, "step": 30800 }, { "epoch": 0.4438394053329876, "grad_norm": 0.5455127358436584, "learning_rate": 1.2284950135112382e-05, "loss": 1.0306, "step": 30810 }, { "epoch": 0.44398346226428687, "grad_norm": 0.5503478050231934, "learning_rate": 1.22804076314287e-05, "loss": 1.0046, "step": 30820 }, { "epoch": 0.4441275191955861, "grad_norm": 0.5376619100570679, "learning_rate": 1.2275864631331737e-05, "loss": 1.0277, "step": 30830 }, { "epoch": 0.44427157612688534, "grad_norm": 0.5993817448616028, "learning_rate": 1.2271321135810442e-05, "loss": 1.0183, "step": 30840 }, { "epoch": 0.4444156330581846, "grad_norm": 0.6439812183380127, "learning_rate": 1.2266777145853866e-05, "loss": 1.0425, "step": 30850 }, { "epoch": 0.44455968998948386, "grad_norm": 0.5761047601699829, "learning_rate": 1.2262232662451175e-05, "loss": 1.051, "step": 30860 }, { "epoch": 0.44470374692078307, "grad_norm": 0.553991436958313, "learning_rate": 1.225768768659164e-05, "loss": 1.018, "step": 30870 }, { "epoch": 0.44484780385208234, "grad_norm": 0.7612801790237427, "learning_rate": 1.2253142219264644e-05, "loss": 1.0281, "step": 30880 }, { "epoch": 0.4449918607833816, "grad_norm": 0.6323403120040894, "learning_rate": 1.2248596261459666e-05, "loss": 1.0406, "step": 30890 }, { "epoch": 0.44513591771468086, "grad_norm": 0.5343186259269714, "learning_rate": 1.2244049814166303e-05, "loss": 1.0222, "step": 30900 }, { "epoch": 0.44527997464598007, "grad_norm": 0.5989657640457153, "learning_rate": 1.2239502878374255e-05, "loss": 1.0303, "step": 30910 }, { "epoch": 0.44542403157727933, "grad_norm": 0.5075747966766357, "learning_rate": 1.2234955455073319e-05, "loss": 1.0193, "step": 30920 }, { "epoch": 0.4455680885085786, "grad_norm": 0.5390758514404297, "learning_rate": 1.2230407545253414e-05, "loss": 1.0204, "step": 30930 }, { "epoch": 0.44571214543987786, "grad_norm": 0.5804110765457153, "learning_rate": 1.222585914990455e-05, "loss": 1.0334, "step": 30940 }, { "epoch": 0.44585620237117707, "grad_norm": 3.104297637939453, "learning_rate": 1.2221310270016865e-05, "loss": 1.0631, "step": 30950 }, { "epoch": 0.44600025930247633, "grad_norm": 0.5167530179023743, "learning_rate": 1.221676090658057e-05, "loss": 1.0337, "step": 30960 }, { "epoch": 0.4461443162337756, "grad_norm": 0.5759089589118958, "learning_rate": 1.2212211060586008e-05, "loss": 1.0183, "step": 30970 }, { "epoch": 0.44628837316507486, "grad_norm": 0.5175536870956421, "learning_rate": 1.2207660733023613e-05, "loss": 1.0247, "step": 30980 }, { "epoch": 0.44643243009637407, "grad_norm": 0.519580602645874, "learning_rate": 1.2203109924883933e-05, "loss": 1.0394, "step": 30990 }, { "epoch": 0.44657648702767333, "grad_norm": 0.6472852826118469, "learning_rate": 1.2198558637157616e-05, "loss": 1.0076, "step": 31000 }, { "epoch": 0.4467205439589726, "grad_norm": 0.5493713021278381, "learning_rate": 1.2194006870835408e-05, "loss": 1.0292, "step": 31010 }, { "epoch": 0.44686460089027186, "grad_norm": 0.5693567991256714, "learning_rate": 1.2189454626908176e-05, "loss": 1.0217, "step": 31020 }, { "epoch": 0.44700865782157106, "grad_norm": 0.6454607844352722, "learning_rate": 1.2184901906366872e-05, "loss": 1.0142, "step": 31030 }, { "epoch": 0.44715271475287033, "grad_norm": 0.7012841105461121, "learning_rate": 1.2180348710202565e-05, "loss": 0.9985, "step": 31040 }, { "epoch": 0.4472967716841696, "grad_norm": 0.6283087134361267, "learning_rate": 1.2175795039406422e-05, "loss": 1.0346, "step": 31050 }, { "epoch": 0.44744082861546886, "grad_norm": 0.7161290645599365, "learning_rate": 1.2171240894969714e-05, "loss": 1.0284, "step": 31060 }, { "epoch": 0.44758488554676806, "grad_norm": 0.6098495125770569, "learning_rate": 1.2166686277883815e-05, "loss": 1.0147, "step": 31070 }, { "epoch": 0.4477289424780673, "grad_norm": 0.6446240544319153, "learning_rate": 1.21621311891402e-05, "loss": 1.0227, "step": 31080 }, { "epoch": 0.4478729994093666, "grad_norm": 0.5811617970466614, "learning_rate": 1.2157575629730457e-05, "loss": 1.021, "step": 31090 }, { "epoch": 0.44801705634066585, "grad_norm": 0.6839554309844971, "learning_rate": 1.215301960064626e-05, "loss": 1.031, "step": 31100 }, { "epoch": 0.44816111327196506, "grad_norm": 0.5694627165794373, "learning_rate": 1.2148463102879398e-05, "loss": 1.0507, "step": 31110 }, { "epoch": 0.4483051702032643, "grad_norm": 0.6384556889533997, "learning_rate": 1.2143906137421755e-05, "loss": 1.0203, "step": 31120 }, { "epoch": 0.4484492271345636, "grad_norm": 0.5794015526771545, "learning_rate": 1.2139348705265321e-05, "loss": 1.0188, "step": 31130 }, { "epoch": 0.44859328406586285, "grad_norm": 0.5144084692001343, "learning_rate": 1.2134790807402189e-05, "loss": 1.0178, "step": 31140 }, { "epoch": 0.44873734099716206, "grad_norm": 0.5534980297088623, "learning_rate": 1.2130232444824543e-05, "loss": 1.0415, "step": 31150 }, { "epoch": 0.4488813979284613, "grad_norm": 0.48926809430122375, "learning_rate": 1.2125673618524684e-05, "loss": 1.0094, "step": 31160 }, { "epoch": 0.4490254548597606, "grad_norm": 0.6032081246376038, "learning_rate": 1.2121114329495003e-05, "loss": 1.0162, "step": 31170 }, { "epoch": 0.44916951179105985, "grad_norm": 0.5034138560295105, "learning_rate": 1.2116554578727993e-05, "loss": 1.0238, "step": 31180 }, { "epoch": 0.44931356872235906, "grad_norm": 0.6093688011169434, "learning_rate": 1.2111994367216244e-05, "loss": 1.0209, "step": 31190 }, { "epoch": 0.4494576256536583, "grad_norm": 0.6029537320137024, "learning_rate": 1.2107433695952462e-05, "loss": 1.0096, "step": 31200 }, { "epoch": 0.4496016825849576, "grad_norm": 0.5483503937721252, "learning_rate": 1.2102872565929435e-05, "loss": 1.034, "step": 31210 }, { "epoch": 0.44974573951625685, "grad_norm": 0.590434193611145, "learning_rate": 1.2098310978140057e-05, "loss": 1.0231, "step": 31220 }, { "epoch": 0.44988979644755606, "grad_norm": 0.6409574747085571, "learning_rate": 1.2093748933577328e-05, "loss": 1.0312, "step": 31230 }, { "epoch": 0.4500338533788553, "grad_norm": 0.5614639520645142, "learning_rate": 1.208918643323434e-05, "loss": 0.9887, "step": 31240 }, { "epoch": 0.4501779103101546, "grad_norm": 0.621324896812439, "learning_rate": 1.2084623478104285e-05, "loss": 1.0357, "step": 31250 }, { "epoch": 0.45032196724145385, "grad_norm": 0.5756137371063232, "learning_rate": 1.2080060069180452e-05, "loss": 1.0237, "step": 31260 }, { "epoch": 0.45046602417275305, "grad_norm": 0.5862130522727966, "learning_rate": 1.2075496207456242e-05, "loss": 1.0317, "step": 31270 }, { "epoch": 0.4506100811040523, "grad_norm": 0.6214578151702881, "learning_rate": 1.2070931893925138e-05, "loss": 1.0306, "step": 31280 }, { "epoch": 0.4507541380353516, "grad_norm": 0.5534659624099731, "learning_rate": 1.2066367129580727e-05, "loss": 1.0242, "step": 31290 }, { "epoch": 0.45089819496665084, "grad_norm": 0.5505697131156921, "learning_rate": 1.2061801915416698e-05, "loss": 1.0337, "step": 31300 }, { "epoch": 0.45104225189795005, "grad_norm": 0.6099808216094971, "learning_rate": 1.2057236252426834e-05, "loss": 1.0237, "step": 31310 }, { "epoch": 0.4511863088292493, "grad_norm": 0.5561302304267883, "learning_rate": 1.205267014160502e-05, "loss": 1.0099, "step": 31320 }, { "epoch": 0.4513303657605486, "grad_norm": 0.5727132558822632, "learning_rate": 1.2048103583945233e-05, "loss": 1.0377, "step": 31330 }, { "epoch": 0.45147442269184784, "grad_norm": 0.576480507850647, "learning_rate": 1.2043536580441546e-05, "loss": 1.0063, "step": 31340 }, { "epoch": 0.45161847962314705, "grad_norm": 0.6703700423240662, "learning_rate": 1.2038969132088139e-05, "loss": 1.0376, "step": 31350 }, { "epoch": 0.4517625365544463, "grad_norm": 0.6149612069129944, "learning_rate": 1.2034401239879279e-05, "loss": 1.0205, "step": 31360 }, { "epoch": 0.4519065934857456, "grad_norm": 0.683622419834137, "learning_rate": 1.2029832904809333e-05, "loss": 1.0164, "step": 31370 }, { "epoch": 0.45205065041704484, "grad_norm": 0.6892789602279663, "learning_rate": 1.2025264127872764e-05, "loss": 1.0329, "step": 31380 }, { "epoch": 0.45219470734834405, "grad_norm": 0.5351040959358215, "learning_rate": 1.2020694910064137e-05, "loss": 1.0384, "step": 31390 }, { "epoch": 0.4523387642796433, "grad_norm": 0.6014117002487183, "learning_rate": 1.2016125252378098e-05, "loss": 1.0268, "step": 31400 }, { "epoch": 0.4524828212109426, "grad_norm": 0.6096225380897522, "learning_rate": 1.2011555155809407e-05, "loss": 1.0224, "step": 31410 }, { "epoch": 0.45262687814224184, "grad_norm": 0.6388968229293823, "learning_rate": 1.2006984621352906e-05, "loss": 1.0193, "step": 31420 }, { "epoch": 0.45277093507354105, "grad_norm": 0.5522310733795166, "learning_rate": 1.2002413650003545e-05, "loss": 1.0332, "step": 31430 }, { "epoch": 0.4529149920048403, "grad_norm": 0.7236217260360718, "learning_rate": 1.199784224275635e-05, "loss": 1.0283, "step": 31440 }, { "epoch": 0.4530590489361396, "grad_norm": 0.6079700589179993, "learning_rate": 1.1993270400606462e-05, "loss": 1.0207, "step": 31450 }, { "epoch": 0.45320310586743884, "grad_norm": 0.4999813735485077, "learning_rate": 1.1988698124549102e-05, "loss": 1.0106, "step": 31460 }, { "epoch": 0.45334716279873805, "grad_norm": 0.5409412384033203, "learning_rate": 1.1984125415579596e-05, "loss": 1.021, "step": 31470 }, { "epoch": 0.4534912197300373, "grad_norm": 0.5420812964439392, "learning_rate": 1.197955227469336e-05, "loss": 1.0084, "step": 31480 }, { "epoch": 0.4536352766613366, "grad_norm": 0.5634649991989136, "learning_rate": 1.1974978702885898e-05, "loss": 1.0361, "step": 31490 }, { "epoch": 0.45377933359263584, "grad_norm": 0.7330309748649597, "learning_rate": 1.1970404701152822e-05, "loss": 1.0362, "step": 31500 }, { "epoch": 0.45392339052393504, "grad_norm": 0.53804612159729, "learning_rate": 1.196583027048982e-05, "loss": 1.0525, "step": 31510 }, { "epoch": 0.4540674474552343, "grad_norm": 0.5847958326339722, "learning_rate": 1.196125541189269e-05, "loss": 1.022, "step": 31520 }, { "epoch": 0.45421150438653357, "grad_norm": 0.6122546792030334, "learning_rate": 1.1956680126357308e-05, "loss": 1.0401, "step": 31530 }, { "epoch": 0.45435556131783283, "grad_norm": 0.6637943387031555, "learning_rate": 1.1952104414879658e-05, "loss": 1.0365, "step": 31540 }, { "epoch": 0.45449961824913204, "grad_norm": 0.6475297212600708, "learning_rate": 1.1947528278455807e-05, "loss": 1.0001, "step": 31550 }, { "epoch": 0.4546436751804313, "grad_norm": 0.6540977358818054, "learning_rate": 1.1942951718081912e-05, "loss": 1.0261, "step": 31560 }, { "epoch": 0.45478773211173057, "grad_norm": 0.497886061668396, "learning_rate": 1.1938374734754237e-05, "loss": 1.0266, "step": 31570 }, { "epoch": 0.45493178904302983, "grad_norm": 0.5904746055603027, "learning_rate": 1.1933797329469117e-05, "loss": 1.0093, "step": 31580 }, { "epoch": 0.45507584597432904, "grad_norm": 0.5959451794624329, "learning_rate": 1.1929219503222997e-05, "loss": 1.0248, "step": 31590 }, { "epoch": 0.4552199029056283, "grad_norm": 0.7836245894432068, "learning_rate": 1.1924641257012403e-05, "loss": 1.0199, "step": 31600 }, { "epoch": 0.45536395983692757, "grad_norm": 0.6108161211013794, "learning_rate": 1.192006259183396e-05, "loss": 1.0467, "step": 31610 }, { "epoch": 0.4555080167682268, "grad_norm": 0.5521235466003418, "learning_rate": 1.191548350868438e-05, "loss": 1.0296, "step": 31620 }, { "epoch": 0.45565207369952604, "grad_norm": 0.5361019372940063, "learning_rate": 1.191090400856046e-05, "loss": 1.0242, "step": 31630 }, { "epoch": 0.4557961306308253, "grad_norm": 0.5625245571136475, "learning_rate": 1.19063240924591e-05, "loss": 1.0098, "step": 31640 }, { "epoch": 0.45594018756212457, "grad_norm": 0.5830609202384949, "learning_rate": 1.1901743761377285e-05, "loss": 1.0268, "step": 31650 }, { "epoch": 0.4560842444934238, "grad_norm": 0.5644638538360596, "learning_rate": 1.1897163016312085e-05, "loss": 1.0124, "step": 31660 }, { "epoch": 0.45622830142472304, "grad_norm": 0.5439152717590332, "learning_rate": 1.1892581858260665e-05, "loss": 0.9989, "step": 31670 }, { "epoch": 0.4563723583560223, "grad_norm": 0.5330991148948669, "learning_rate": 1.1888000288220288e-05, "loss": 1.0315, "step": 31680 }, { "epoch": 0.45651641528732156, "grad_norm": 0.6343188881874084, "learning_rate": 1.1883418307188292e-05, "loss": 1.0204, "step": 31690 }, { "epoch": 0.45666047221862077, "grad_norm": 0.6865444183349609, "learning_rate": 1.1878835916162111e-05, "loss": 1.025, "step": 31700 }, { "epoch": 0.45680452914992004, "grad_norm": 0.5597052574157715, "learning_rate": 1.187425311613927e-05, "loss": 1.0154, "step": 31710 }, { "epoch": 0.4569485860812193, "grad_norm": 0.6149247288703918, "learning_rate": 1.186966990811738e-05, "loss": 1.0293, "step": 31720 }, { "epoch": 0.45709264301251856, "grad_norm": 0.5930432081222534, "learning_rate": 1.1865086293094146e-05, "loss": 1.026, "step": 31730 }, { "epoch": 0.45723669994381777, "grad_norm": 0.6211383938789368, "learning_rate": 1.1860502272067352e-05, "loss": 1.0215, "step": 31740 }, { "epoch": 0.45738075687511703, "grad_norm": 0.6969831585884094, "learning_rate": 1.1855917846034879e-05, "loss": 1.035, "step": 31750 }, { "epoch": 0.4575248138064163, "grad_norm": 0.7310178875923157, "learning_rate": 1.1851333015994696e-05, "loss": 1.032, "step": 31760 }, { "epoch": 0.45766887073771556, "grad_norm": 0.5876138806343079, "learning_rate": 1.1846747782944852e-05, "loss": 1.0438, "step": 31770 }, { "epoch": 0.45781292766901477, "grad_norm": 0.566362738609314, "learning_rate": 1.1842162147883492e-05, "loss": 1.034, "step": 31780 }, { "epoch": 0.45795698460031403, "grad_norm": 0.45695582032203674, "learning_rate": 1.1837576111808846e-05, "loss": 1.0179, "step": 31790 }, { "epoch": 0.4581010415316133, "grad_norm": 0.5838852524757385, "learning_rate": 1.1832989675719231e-05, "loss": 1.0158, "step": 31800 }, { "epoch": 0.45824509846291256, "grad_norm": 0.563360333442688, "learning_rate": 1.1828402840613045e-05, "loss": 1.0113, "step": 31810 }, { "epoch": 0.45838915539421177, "grad_norm": 0.5359305739402771, "learning_rate": 1.1823815607488789e-05, "loss": 1.0321, "step": 31820 }, { "epoch": 0.45853321232551103, "grad_norm": 0.6095662713050842, "learning_rate": 1.1819227977345032e-05, "loss": 1.0093, "step": 31830 }, { "epoch": 0.4586772692568103, "grad_norm": 0.5786446928977966, "learning_rate": 1.1814639951180443e-05, "loss": 1.015, "step": 31840 }, { "epoch": 0.45882132618810956, "grad_norm": 0.6685968637466431, "learning_rate": 1.1810051529993765e-05, "loss": 1.0243, "step": 31850 }, { "epoch": 0.45896538311940877, "grad_norm": 0.5776757597923279, "learning_rate": 1.1805462714783842e-05, "loss": 1.0309, "step": 31860 }, { "epoch": 0.45910944005070803, "grad_norm": 0.5636850595474243, "learning_rate": 1.180087350654959e-05, "loss": 1.0263, "step": 31870 }, { "epoch": 0.4592534969820073, "grad_norm": 0.5590022802352905, "learning_rate": 1.179628390629002e-05, "loss": 1.0022, "step": 31880 }, { "epoch": 0.45939755391330656, "grad_norm": 0.6480368375778198, "learning_rate": 1.1791693915004223e-05, "loss": 1.0125, "step": 31890 }, { "epoch": 0.45954161084460576, "grad_norm": 0.5446181297302246, "learning_rate": 1.178710353369138e-05, "loss": 1.038, "step": 31900 }, { "epoch": 0.459685667775905, "grad_norm": 0.545379638671875, "learning_rate": 1.1782512763350748e-05, "loss": 1.019, "step": 31910 }, { "epoch": 0.4598297247072043, "grad_norm": 0.5600593686103821, "learning_rate": 1.1777921604981677e-05, "loss": 1.002, "step": 31920 }, { "epoch": 0.45997378163850355, "grad_norm": 0.5947697162628174, "learning_rate": 1.17733300595836e-05, "loss": 1.011, "step": 31930 }, { "epoch": 0.46011783856980276, "grad_norm": 0.5739585757255554, "learning_rate": 1.1768738128156033e-05, "loss": 1.024, "step": 31940 }, { "epoch": 0.460261895501102, "grad_norm": 0.5938307642936707, "learning_rate": 1.1764145811698571e-05, "loss": 1.0214, "step": 31950 }, { "epoch": 0.4604059524324013, "grad_norm": 0.5985384583473206, "learning_rate": 1.1759553111210906e-05, "loss": 1.0321, "step": 31960 }, { "epoch": 0.46055000936370055, "grad_norm": 0.4748883843421936, "learning_rate": 1.1754960027692801e-05, "loss": 1.0317, "step": 31970 }, { "epoch": 0.46069406629499976, "grad_norm": 0.6445233225822449, "learning_rate": 1.1750366562144109e-05, "loss": 1.0444, "step": 31980 }, { "epoch": 0.460838123226299, "grad_norm": 0.5735971331596375, "learning_rate": 1.174577271556476e-05, "loss": 1.0375, "step": 31990 }, { "epoch": 0.4609821801575983, "grad_norm": 0.597209632396698, "learning_rate": 1.1741178488954777e-05, "loss": 1.0271, "step": 32000 }, { "epoch": 0.46112623708889755, "grad_norm": 0.5384397506713867, "learning_rate": 1.1736583883314254e-05, "loss": 1.0414, "step": 32010 }, { "epoch": 0.46127029402019676, "grad_norm": 0.6700142025947571, "learning_rate": 1.1731988899643374e-05, "loss": 1.0253, "step": 32020 }, { "epoch": 0.461414350951496, "grad_norm": 0.5831916928291321, "learning_rate": 1.1727393538942407e-05, "loss": 1.0199, "step": 32030 }, { "epoch": 0.4615584078827953, "grad_norm": 0.49577924609184265, "learning_rate": 1.1722797802211693e-05, "loss": 1.0225, "step": 32040 }, { "epoch": 0.46170246481409455, "grad_norm": 0.5894416570663452, "learning_rate": 1.171820169045167e-05, "loss": 1.0188, "step": 32050 }, { "epoch": 0.46184652174539376, "grad_norm": 0.5564287900924683, "learning_rate": 1.1713605204662834e-05, "loss": 1.0465, "step": 32060 }, { "epoch": 0.461990578676693, "grad_norm": 0.6189201474189758, "learning_rate": 1.1709008345845786e-05, "loss": 1.0252, "step": 32070 }, { "epoch": 0.4621346356079923, "grad_norm": 0.5556772351264954, "learning_rate": 1.17044111150012e-05, "loss": 1.0326, "step": 32080 }, { "epoch": 0.46227869253929155, "grad_norm": 0.6864781379699707, "learning_rate": 1.1699813513129823e-05, "loss": 1.0486, "step": 32090 }, { "epoch": 0.46242274947059075, "grad_norm": 0.580782413482666, "learning_rate": 1.1695215541232497e-05, "loss": 1.021, "step": 32100 }, { "epoch": 0.46256680640189, "grad_norm": 0.6461674571037292, "learning_rate": 1.1690617200310128e-05, "loss": 1.0224, "step": 32110 }, { "epoch": 0.4627108633331893, "grad_norm": 0.533871054649353, "learning_rate": 1.1686018491363722e-05, "loss": 1.0225, "step": 32120 }, { "epoch": 0.46285492026448855, "grad_norm": 0.6044811010360718, "learning_rate": 1.1681419415394344e-05, "loss": 1.0316, "step": 32130 }, { "epoch": 0.46299897719578775, "grad_norm": 0.6584555506706238, "learning_rate": 1.1676819973403158e-05, "loss": 1.0336, "step": 32140 }, { "epoch": 0.463143034127087, "grad_norm": 0.6285313963890076, "learning_rate": 1.1672220166391392e-05, "loss": 1.0362, "step": 32150 }, { "epoch": 0.4632870910583863, "grad_norm": 0.5741084814071655, "learning_rate": 1.1667619995360368e-05, "loss": 1.0185, "step": 32160 }, { "epoch": 0.46343114798968554, "grad_norm": 0.5344382524490356, "learning_rate": 1.1663019461311475e-05, "loss": 1.0215, "step": 32170 }, { "epoch": 0.46357520492098475, "grad_norm": 0.6228933334350586, "learning_rate": 1.1658418565246183e-05, "loss": 1.043, "step": 32180 }, { "epoch": 0.463719261852284, "grad_norm": 0.7229593992233276, "learning_rate": 1.1653817308166055e-05, "loss": 0.9985, "step": 32190 }, { "epoch": 0.4638633187835833, "grad_norm": 0.56514972448349, "learning_rate": 1.1649215691072706e-05, "loss": 1.0177, "step": 32200 }, { "epoch": 0.46400737571488254, "grad_norm": 0.5896420478820801, "learning_rate": 1.1644613714967857e-05, "loss": 1.0303, "step": 32210 }, { "epoch": 0.46415143264618175, "grad_norm": 0.5764533877372742, "learning_rate": 1.1640011380853285e-05, "loss": 1.0082, "step": 32220 }, { "epoch": 0.464295489577481, "grad_norm": 0.6431474685668945, "learning_rate": 1.1635408689730862e-05, "loss": 1.0407, "step": 32230 }, { "epoch": 0.4644395465087803, "grad_norm": 0.5689139366149902, "learning_rate": 1.1630805642602527e-05, "loss": 1.0358, "step": 32240 }, { "epoch": 0.46458360344007954, "grad_norm": 0.530646026134491, "learning_rate": 1.1626202240470302e-05, "loss": 1.0332, "step": 32250 }, { "epoch": 0.46472766037137875, "grad_norm": 0.642479658126831, "learning_rate": 1.162159848433628e-05, "loss": 1.033, "step": 32260 }, { "epoch": 0.464871717302678, "grad_norm": 0.5798544883728027, "learning_rate": 1.1616994375202642e-05, "loss": 1.0399, "step": 32270 }, { "epoch": 0.4650157742339773, "grad_norm": 0.5376886129379272, "learning_rate": 1.1612389914071633e-05, "loss": 1.0265, "step": 32280 }, { "epoch": 0.46515983116527654, "grad_norm": 0.6260908842086792, "learning_rate": 1.1607785101945582e-05, "loss": 1.0245, "step": 32290 }, { "epoch": 0.46530388809657575, "grad_norm": 0.597516655921936, "learning_rate": 1.1603179939826896e-05, "loss": 1.0122, "step": 32300 }, { "epoch": 0.465447945027875, "grad_norm": 0.5453872680664062, "learning_rate": 1.1598574428718049e-05, "loss": 1.0166, "step": 32310 }, { "epoch": 0.4655920019591743, "grad_norm": 0.5635013580322266, "learning_rate": 1.1593968569621604e-05, "loss": 1.0116, "step": 32320 }, { "epoch": 0.46573605889047354, "grad_norm": 0.6421080827713013, "learning_rate": 1.1589362363540189e-05, "loss": 1.023, "step": 32330 }, { "epoch": 0.46588011582177274, "grad_norm": 0.7370414137840271, "learning_rate": 1.1584755811476511e-05, "loss": 1.047, "step": 32340 }, { "epoch": 0.466024172753072, "grad_norm": 0.6795259714126587, "learning_rate": 1.1580148914433359e-05, "loss": 1.0235, "step": 32350 }, { "epoch": 0.46616822968437127, "grad_norm": 0.5798462629318237, "learning_rate": 1.1575541673413582e-05, "loss": 1.0129, "step": 32360 }, { "epoch": 0.46631228661567053, "grad_norm": 0.6684741973876953, "learning_rate": 1.157093408942012e-05, "loss": 1.0319, "step": 32370 }, { "epoch": 0.46645634354696974, "grad_norm": 0.53980952501297, "learning_rate": 1.1566326163455979e-05, "loss": 1.0186, "step": 32380 }, { "epoch": 0.466600400478269, "grad_norm": 0.6894479990005493, "learning_rate": 1.1561717896524238e-05, "loss": 1.0438, "step": 32390 }, { "epoch": 0.46674445740956827, "grad_norm": 0.6048723459243774, "learning_rate": 1.1557109289628052e-05, "loss": 1.0076, "step": 32400 }, { "epoch": 0.46688851434086753, "grad_norm": 0.7372193932533264, "learning_rate": 1.1552500343770658e-05, "loss": 1.029, "step": 32410 }, { "epoch": 0.46703257127216674, "grad_norm": 0.5819730758666992, "learning_rate": 1.1547891059955356e-05, "loss": 1.012, "step": 32420 }, { "epoch": 0.467176628203466, "grad_norm": 0.6845980882644653, "learning_rate": 1.154328143918552e-05, "loss": 1.0259, "step": 32430 }, { "epoch": 0.46732068513476527, "grad_norm": 0.6390533447265625, "learning_rate": 1.1538671482464608e-05, "loss": 1.0455, "step": 32440 }, { "epoch": 0.46746474206606453, "grad_norm": 0.6333168745040894, "learning_rate": 1.1534061190796137e-05, "loss": 1.0301, "step": 32450 }, { "epoch": 0.46760879899736374, "grad_norm": 0.5310613512992859, "learning_rate": 1.152945056518371e-05, "loss": 1.0354, "step": 32460 }, { "epoch": 0.467752855928663, "grad_norm": 0.5582602620124817, "learning_rate": 1.152483960663099e-05, "loss": 1.0252, "step": 32470 }, { "epoch": 0.46789691285996227, "grad_norm": 0.5743705630302429, "learning_rate": 1.1520228316141729e-05, "loss": 1.0403, "step": 32480 }, { "epoch": 0.46804096979126153, "grad_norm": 0.5354068875312805, "learning_rate": 1.151561669471973e-05, "loss": 1.0301, "step": 32490 }, { "epoch": 0.46818502672256074, "grad_norm": 0.6218494772911072, "learning_rate": 1.1511004743368886e-05, "loss": 1.0412, "step": 32500 }, { "epoch": 0.46832908365386, "grad_norm": 0.5262079834938049, "learning_rate": 1.1506392463093155e-05, "loss": 1.0183, "step": 32510 }, { "epoch": 0.46847314058515926, "grad_norm": 0.7889408469200134, "learning_rate": 1.1501779854896564e-05, "loss": 1.0526, "step": 32520 }, { "epoch": 0.46861719751645853, "grad_norm": 0.671790361404419, "learning_rate": 1.1497166919783216e-05, "loss": 1.0363, "step": 32530 }, { "epoch": 0.46876125444775774, "grad_norm": 0.6060637831687927, "learning_rate": 1.1492553658757281e-05, "loss": 1.0227, "step": 32540 }, { "epoch": 0.468905311379057, "grad_norm": 0.5498833060264587, "learning_rate": 1.1487940072823007e-05, "loss": 1.0177, "step": 32550 }, { "epoch": 0.46904936831035626, "grad_norm": 0.5617979764938354, "learning_rate": 1.1483326162984704e-05, "loss": 1.0172, "step": 32560 }, { "epoch": 0.4691934252416555, "grad_norm": 0.5950319170951843, "learning_rate": 1.1478711930246757e-05, "loss": 1.0409, "step": 32570 }, { "epoch": 0.46933748217295473, "grad_norm": 0.6482478976249695, "learning_rate": 1.1474097375613624e-05, "loss": 0.9901, "step": 32580 }, { "epoch": 0.469481539104254, "grad_norm": 0.533887505531311, "learning_rate": 1.1469482500089826e-05, "loss": 1.0164, "step": 32590 }, { "epoch": 0.46962559603555326, "grad_norm": 0.5833592414855957, "learning_rate": 1.1464867304679961e-05, "loss": 1.0464, "step": 32600 }, { "epoch": 0.4697696529668525, "grad_norm": 0.5561684370040894, "learning_rate": 1.1460251790388688e-05, "loss": 1.0359, "step": 32610 }, { "epoch": 0.46991370989815173, "grad_norm": 0.702983558177948, "learning_rate": 1.1455635958220748e-05, "loss": 1.0174, "step": 32620 }, { "epoch": 0.470057766829451, "grad_norm": 0.575308620929718, "learning_rate": 1.1451019809180938e-05, "loss": 1.0305, "step": 32630 }, { "epoch": 0.47020182376075026, "grad_norm": 0.6303951144218445, "learning_rate": 1.1446403344274133e-05, "loss": 1.0359, "step": 32640 }, { "epoch": 0.4703458806920495, "grad_norm": 0.6195650100708008, "learning_rate": 1.1441786564505275e-05, "loss": 1.0303, "step": 32650 }, { "epoch": 0.47048993762334873, "grad_norm": 0.5987343192100525, "learning_rate": 1.1437169470879368e-05, "loss": 1.0285, "step": 32660 }, { "epoch": 0.470633994554648, "grad_norm": 0.589579164981842, "learning_rate": 1.1432552064401497e-05, "loss": 1.0139, "step": 32670 }, { "epoch": 0.47077805148594726, "grad_norm": 0.5205335021018982, "learning_rate": 1.14279343460768e-05, "loss": 1.0344, "step": 32680 }, { "epoch": 0.4709221084172465, "grad_norm": 0.5689437985420227, "learning_rate": 1.1423316316910496e-05, "loss": 1.0214, "step": 32690 }, { "epoch": 0.47106616534854573, "grad_norm": 0.5881633758544922, "learning_rate": 1.1418697977907862e-05, "loss": 1.022, "step": 32700 }, { "epoch": 0.471210222279845, "grad_norm": 0.582982063293457, "learning_rate": 1.1414079330074252e-05, "loss": 1.0009, "step": 32710 }, { "epoch": 0.47135427921114426, "grad_norm": 0.6023359298706055, "learning_rate": 1.140946037441508e-05, "loss": 1.0255, "step": 32720 }, { "epoch": 0.4714983361424435, "grad_norm": 0.6213319301605225, "learning_rate": 1.1404841111935824e-05, "loss": 1.0354, "step": 32730 }, { "epoch": 0.4716423930737427, "grad_norm": 0.5460777878761292, "learning_rate": 1.1400221543642043e-05, "loss": 1.0209, "step": 32740 }, { "epoch": 0.471786450005042, "grad_norm": 0.6849192380905151, "learning_rate": 1.1395601670539344e-05, "loss": 1.0097, "step": 32750 }, { "epoch": 0.47193050693634125, "grad_norm": 0.5484505891799927, "learning_rate": 1.1390981493633415e-05, "loss": 1.0063, "step": 32760 }, { "epoch": 0.4720745638676405, "grad_norm": 0.6390265226364136, "learning_rate": 1.1386361013930002e-05, "loss": 1.0419, "step": 32770 }, { "epoch": 0.4722186207989397, "grad_norm": 0.6577069163322449, "learning_rate": 1.1381740232434925e-05, "loss": 1.0162, "step": 32780 }, { "epoch": 0.472362677730239, "grad_norm": 0.5977217555046082, "learning_rate": 1.1377119150154058e-05, "loss": 1.022, "step": 32790 }, { "epoch": 0.47250673466153825, "grad_norm": 0.5634925365447998, "learning_rate": 1.137249776809335e-05, "loss": 1.0225, "step": 32800 }, { "epoch": 0.4726507915928375, "grad_norm": 0.5434150099754333, "learning_rate": 1.1367876087258806e-05, "loss": 1.0155, "step": 32810 }, { "epoch": 0.4727948485241367, "grad_norm": 0.5795437693595886, "learning_rate": 1.1363254108656513e-05, "loss": 1.0211, "step": 32820 }, { "epoch": 0.472938905455436, "grad_norm": 0.6080933213233948, "learning_rate": 1.1358631833292609e-05, "loss": 1.0364, "step": 32830 }, { "epoch": 0.47308296238673525, "grad_norm": 0.5317662954330444, "learning_rate": 1.1354009262173292e-05, "loss": 1.0074, "step": 32840 }, { "epoch": 0.4732270193180345, "grad_norm": 0.686359703540802, "learning_rate": 1.1349386396304842e-05, "loss": 1.0229, "step": 32850 }, { "epoch": 0.4733710762493337, "grad_norm": 0.6074622869491577, "learning_rate": 1.1344763236693583e-05, "loss": 1.0143, "step": 32860 }, { "epoch": 0.473515133180633, "grad_norm": 0.6043309569358826, "learning_rate": 1.1340139784345923e-05, "loss": 1.0402, "step": 32870 }, { "epoch": 0.47365919011193225, "grad_norm": 0.5316594243049622, "learning_rate": 1.1335516040268315e-05, "loss": 1.0117, "step": 32880 }, { "epoch": 0.4738032470432315, "grad_norm": 0.5331855416297913, "learning_rate": 1.1330892005467293e-05, "loss": 0.9849, "step": 32890 }, { "epoch": 0.4739473039745307, "grad_norm": 0.5734590888023376, "learning_rate": 1.132626768094944e-05, "loss": 1.0353, "step": 32900 }, { "epoch": 0.47409136090583, "grad_norm": 0.5718367099761963, "learning_rate": 1.1321643067721407e-05, "loss": 1.0156, "step": 32910 }, { "epoch": 0.47423541783712925, "grad_norm": 0.5481789708137512, "learning_rate": 1.1317018166789911e-05, "loss": 1.0032, "step": 32920 }, { "epoch": 0.4743794747684285, "grad_norm": 0.5542817115783691, "learning_rate": 1.1312392979161729e-05, "loss": 1.0224, "step": 32930 }, { "epoch": 0.4745235316997277, "grad_norm": 0.5004504323005676, "learning_rate": 1.1307767505843699e-05, "loss": 1.0404, "step": 32940 }, { "epoch": 0.474667588631027, "grad_norm": 0.5400373339653015, "learning_rate": 1.130314174784272e-05, "loss": 1.0238, "step": 32950 }, { "epoch": 0.47481164556232625, "grad_norm": 0.5386858582496643, "learning_rate": 1.1298515706165764e-05, "loss": 1.009, "step": 32960 }, { "epoch": 0.47495570249362545, "grad_norm": 0.6059908866882324, "learning_rate": 1.1293889381819854e-05, "loss": 1.0092, "step": 32970 }, { "epoch": 0.4750997594249247, "grad_norm": 0.5509114861488342, "learning_rate": 1.1289262775812069e-05, "loss": 1.0033, "step": 32980 }, { "epoch": 0.475243816356224, "grad_norm": 0.4810161888599396, "learning_rate": 1.1284635889149566e-05, "loss": 1.0172, "step": 32990 }, { "epoch": 0.47538787328752324, "grad_norm": 0.5593041777610779, "learning_rate": 1.1280008722839552e-05, "loss": 1.0277, "step": 33000 }, { "epoch": 0.47553193021882245, "grad_norm": 0.5800256133079529, "learning_rate": 1.1275381277889298e-05, "loss": 1.013, "step": 33010 }, { "epoch": 0.4756759871501217, "grad_norm": 0.6317630410194397, "learning_rate": 1.1270753555306129e-05, "loss": 1.0329, "step": 33020 }, { "epoch": 0.475820044081421, "grad_norm": 0.5637384057044983, "learning_rate": 1.1266125556097444e-05, "loss": 1.0178, "step": 33030 }, { "epoch": 0.47596410101272024, "grad_norm": 0.5414950251579285, "learning_rate": 1.1261497281270693e-05, "loss": 1.0151, "step": 33040 }, { "epoch": 0.47610815794401945, "grad_norm": 0.6262162327766418, "learning_rate": 1.1256868731833383e-05, "loss": 1.017, "step": 33050 }, { "epoch": 0.4762522148753187, "grad_norm": 0.6255475878715515, "learning_rate": 1.1252239908793093e-05, "loss": 1.0204, "step": 33060 }, { "epoch": 0.476396271806618, "grad_norm": 0.6530277132987976, "learning_rate": 1.1247610813157446e-05, "loss": 1.0071, "step": 33070 }, { "epoch": 0.47654032873791724, "grad_norm": 0.6401610970497131, "learning_rate": 1.1242981445934138e-05, "loss": 1.0194, "step": 33080 }, { "epoch": 0.47668438566921645, "grad_norm": 0.643420398235321, "learning_rate": 1.1238351808130911e-05, "loss": 1.0111, "step": 33090 }, { "epoch": 0.4768284426005157, "grad_norm": 0.5949244499206543, "learning_rate": 1.1233721900755583e-05, "loss": 1.0173, "step": 33100 }, { "epoch": 0.476972499531815, "grad_norm": 0.5837469696998596, "learning_rate": 1.1229091724816016e-05, "loss": 1.0326, "step": 33110 }, { "epoch": 0.47711655646311424, "grad_norm": 0.5850193500518799, "learning_rate": 1.1224461281320136e-05, "loss": 1.0177, "step": 33120 }, { "epoch": 0.47726061339441345, "grad_norm": 0.5880247950553894, "learning_rate": 1.1219830571275928e-05, "loss": 1.0042, "step": 33130 }, { "epoch": 0.4774046703257127, "grad_norm": 0.5486767292022705, "learning_rate": 1.1215199595691425e-05, "loss": 1.0246, "step": 33140 }, { "epoch": 0.477548727257012, "grad_norm": 0.6419997811317444, "learning_rate": 1.1210568355574743e-05, "loss": 1.0189, "step": 33150 }, { "epoch": 0.47769278418831124, "grad_norm": 0.6350610852241516, "learning_rate": 1.1205936851934025e-05, "loss": 1.0442, "step": 33160 }, { "epoch": 0.47783684111961044, "grad_norm": 0.6184735894203186, "learning_rate": 1.120130508577749e-05, "loss": 1.0016, "step": 33170 }, { "epoch": 0.4779808980509097, "grad_norm": 0.6295711994171143, "learning_rate": 1.1196673058113413e-05, "loss": 1.0218, "step": 33180 }, { "epoch": 0.47812495498220897, "grad_norm": 0.5689663887023926, "learning_rate": 1.1192040769950115e-05, "loss": 1.0158, "step": 33190 }, { "epoch": 0.47826901191350824, "grad_norm": 0.7592531442642212, "learning_rate": 1.1187408222295988e-05, "loss": 1.0292, "step": 33200 }, { "epoch": 0.47841306884480744, "grad_norm": 0.5784085988998413, "learning_rate": 1.118277541615947e-05, "loss": 1.0266, "step": 33210 }, { "epoch": 0.4785571257761067, "grad_norm": 0.637199878692627, "learning_rate": 1.1178142352549064e-05, "loss": 1.0219, "step": 33220 }, { "epoch": 0.47870118270740597, "grad_norm": 0.5977768301963806, "learning_rate": 1.1173509032473316e-05, "loss": 1.0314, "step": 33230 }, { "epoch": 0.47884523963870523, "grad_norm": 0.5544858574867249, "learning_rate": 1.1168875456940842e-05, "loss": 1.0043, "step": 33240 }, { "epoch": 0.47898929657000444, "grad_norm": 0.5969414114952087, "learning_rate": 1.1164241626960304e-05, "loss": 1.0297, "step": 33250 }, { "epoch": 0.4791333535013037, "grad_norm": 0.6712751984596252, "learning_rate": 1.1159607543540429e-05, "loss": 1.0134, "step": 33260 }, { "epoch": 0.47927741043260297, "grad_norm": 0.6637882590293884, "learning_rate": 1.1154973207689985e-05, "loss": 1.0377, "step": 33270 }, { "epoch": 0.47942146736390223, "grad_norm": 0.5263668298721313, "learning_rate": 1.1150338620417806e-05, "loss": 1.0034, "step": 33280 }, { "epoch": 0.47956552429520144, "grad_norm": 0.6545845866203308, "learning_rate": 1.1145703782732781e-05, "loss": 1.029, "step": 33290 }, { "epoch": 0.4797095812265007, "grad_norm": 0.5800336599349976, "learning_rate": 1.1141068695643845e-05, "loss": 1.0074, "step": 33300 }, { "epoch": 0.47985363815779997, "grad_norm": 0.5339868068695068, "learning_rate": 1.1136433360159996e-05, "loss": 1.0223, "step": 33310 }, { "epoch": 0.47999769508909923, "grad_norm": 0.6196845173835754, "learning_rate": 1.1131797777290281e-05, "loss": 1.036, "step": 33320 }, { "epoch": 0.48014175202039844, "grad_norm": 0.6900539994239807, "learning_rate": 1.1127161948043806e-05, "loss": 1.024, "step": 33330 }, { "epoch": 0.4802858089516977, "grad_norm": 0.635518491268158, "learning_rate": 1.1122525873429718e-05, "loss": 1.0273, "step": 33340 }, { "epoch": 0.48042986588299696, "grad_norm": 0.5423782467842102, "learning_rate": 1.1117889554457238e-05, "loss": 1.0158, "step": 33350 }, { "epoch": 0.48057392281429623, "grad_norm": 1.0638518333435059, "learning_rate": 1.111325299213562e-05, "loss": 1.0243, "step": 33360 }, { "epoch": 0.48071797974559544, "grad_norm": 0.6649375557899475, "learning_rate": 1.1108616187474186e-05, "loss": 1.0199, "step": 33370 }, { "epoch": 0.4808620366768947, "grad_norm": 0.5941753387451172, "learning_rate": 1.11039791414823e-05, "loss": 1.0284, "step": 33380 }, { "epoch": 0.48100609360819396, "grad_norm": 0.5361167192459106, "learning_rate": 1.1099341855169383e-05, "loss": 1.0069, "step": 33390 }, { "epoch": 0.4811501505394932, "grad_norm": 0.5737732648849487, "learning_rate": 1.1094704329544914e-05, "loss": 0.9989, "step": 33400 }, { "epoch": 0.48129420747079243, "grad_norm": 0.6742610931396484, "learning_rate": 1.109006656561841e-05, "loss": 1.0337, "step": 33410 }, { "epoch": 0.4814382644020917, "grad_norm": 0.5616894960403442, "learning_rate": 1.1085428564399453e-05, "loss": 1.0373, "step": 33420 }, { "epoch": 0.48158232133339096, "grad_norm": 0.5926318764686584, "learning_rate": 1.1080790326897668e-05, "loss": 1.0238, "step": 33430 }, { "epoch": 0.4817263782646902, "grad_norm": 0.5741323232650757, "learning_rate": 1.107615185412274e-05, "loss": 1.0243, "step": 33440 }, { "epoch": 0.48187043519598943, "grad_norm": 0.5415228009223938, "learning_rate": 1.1071513147084403e-05, "loss": 1.0177, "step": 33450 }, { "epoch": 0.4820144921272887, "grad_norm": 0.6227560043334961, "learning_rate": 1.1066874206792431e-05, "loss": 1.002, "step": 33460 }, { "epoch": 0.48215854905858796, "grad_norm": 0.6032860279083252, "learning_rate": 1.1062235034256663e-05, "loss": 1.0171, "step": 33470 }, { "epoch": 0.4823026059898872, "grad_norm": 0.5680851340293884, "learning_rate": 1.1057595630486984e-05, "loss": 1.0107, "step": 33480 }, { "epoch": 0.48244666292118643, "grad_norm": 0.7578117251396179, "learning_rate": 1.1052955996493321e-05, "loss": 1.0253, "step": 33490 }, { "epoch": 0.4825907198524857, "grad_norm": 0.5712920427322388, "learning_rate": 1.1048316133285667e-05, "loss": 1.0229, "step": 33500 }, { "epoch": 0.48273477678378496, "grad_norm": 0.6681069731712341, "learning_rate": 1.104367604187405e-05, "loss": 1.0232, "step": 33510 }, { "epoch": 0.4828788337150842, "grad_norm": 0.6597923040390015, "learning_rate": 1.1039035723268558e-05, "loss": 1.0236, "step": 33520 }, { "epoch": 0.48302289064638343, "grad_norm": 0.5945020318031311, "learning_rate": 1.103439517847932e-05, "loss": 1.0021, "step": 33530 }, { "epoch": 0.4831669475776827, "grad_norm": 0.6446981430053711, "learning_rate": 1.1029754408516525e-05, "loss": 1.0425, "step": 33540 }, { "epoch": 0.48331100450898196, "grad_norm": 0.5626109838485718, "learning_rate": 1.1025113414390398e-05, "loss": 0.9943, "step": 33550 }, { "epoch": 0.4834550614402812, "grad_norm": 0.680046021938324, "learning_rate": 1.1020472197111224e-05, "loss": 1.041, "step": 33560 }, { "epoch": 0.4835991183715804, "grad_norm": 0.5631656050682068, "learning_rate": 1.1015830757689327e-05, "loss": 1.0309, "step": 33570 }, { "epoch": 0.4837431753028797, "grad_norm": 0.5655036568641663, "learning_rate": 1.101118909713509e-05, "loss": 1.0381, "step": 33580 }, { "epoch": 0.48388723223417895, "grad_norm": 0.556452214717865, "learning_rate": 1.1006547216458937e-05, "loss": 1.0099, "step": 33590 }, { "epoch": 0.4840312891654782, "grad_norm": 0.6915555596351624, "learning_rate": 1.1001905116671334e-05, "loss": 1.0386, "step": 33600 }, { "epoch": 0.4841753460967774, "grad_norm": 0.5771707892417908, "learning_rate": 1.0997262798782815e-05, "loss": 1.0076, "step": 33610 }, { "epoch": 0.4843194030280767, "grad_norm": 0.9634688496589661, "learning_rate": 1.099262026380394e-05, "loss": 1.0201, "step": 33620 }, { "epoch": 0.48446345995937595, "grad_norm": 0.657273530960083, "learning_rate": 1.0987977512745327e-05, "loss": 1.046, "step": 33630 }, { "epoch": 0.4846075168906752, "grad_norm": 0.49862417578697205, "learning_rate": 1.0983334546617637e-05, "loss": 1.0432, "step": 33640 }, { "epoch": 0.4847515738219744, "grad_norm": 0.6185719966888428, "learning_rate": 1.0978691366431583e-05, "loss": 1.0118, "step": 33650 }, { "epoch": 0.4848956307532737, "grad_norm": 0.5877535939216614, "learning_rate": 1.097404797319792e-05, "loss": 1.0326, "step": 33660 }, { "epoch": 0.48503968768457295, "grad_norm": 0.5418586730957031, "learning_rate": 1.0969404367927448e-05, "loss": 1.0337, "step": 33670 }, { "epoch": 0.4851837446158722, "grad_norm": 0.5355377197265625, "learning_rate": 1.096476055163102e-05, "loss": 1.031, "step": 33680 }, { "epoch": 0.4853278015471714, "grad_norm": 0.5953190326690674, "learning_rate": 1.0960116525319526e-05, "loss": 1.0469, "step": 33690 }, { "epoch": 0.4854718584784707, "grad_norm": 0.621695876121521, "learning_rate": 1.0955472290003914e-05, "loss": 1.0275, "step": 33700 }, { "epoch": 0.48561591540976995, "grad_norm": 0.5835665464401245, "learning_rate": 1.0950827846695161e-05, "loss": 1.0165, "step": 33710 }, { "epoch": 0.4857599723410692, "grad_norm": 0.6365586519241333, "learning_rate": 1.0946183196404305e-05, "loss": 1.0236, "step": 33720 }, { "epoch": 0.4859040292723684, "grad_norm": 0.6362375617027283, "learning_rate": 1.0941538340142418e-05, "loss": 1.0102, "step": 33730 }, { "epoch": 0.4860480862036677, "grad_norm": 0.7200132608413696, "learning_rate": 1.0936893278920626e-05, "loss": 1.0244, "step": 33740 }, { "epoch": 0.48619214313496695, "grad_norm": 0.6358307600021362, "learning_rate": 1.0932248013750089e-05, "loss": 1.0335, "step": 33750 }, { "epoch": 0.4863362000662662, "grad_norm": 0.6214737892150879, "learning_rate": 1.0927602545642018e-05, "loss": 1.0381, "step": 33760 }, { "epoch": 0.4864802569975654, "grad_norm": 0.5125634074211121, "learning_rate": 1.0922956875607673e-05, "loss": 1.03, "step": 33770 }, { "epoch": 0.4866243139288647, "grad_norm": 0.6336681842803955, "learning_rate": 1.0918311004658346e-05, "loss": 1.03, "step": 33780 }, { "epoch": 0.48676837086016395, "grad_norm": 0.5649718642234802, "learning_rate": 1.0913664933805381e-05, "loss": 0.9968, "step": 33790 }, { "epoch": 0.4869124277914632, "grad_norm": 0.5822668075561523, "learning_rate": 1.0909018664060164e-05, "loss": 1.0357, "step": 33800 }, { "epoch": 0.4870564847227624, "grad_norm": 0.5345937609672546, "learning_rate": 1.0904372196434127e-05, "loss": 1.0025, "step": 33810 }, { "epoch": 0.4872005416540617, "grad_norm": 0.5745434761047363, "learning_rate": 1.0899725531938736e-05, "loss": 1.0289, "step": 33820 }, { "epoch": 0.48734459858536094, "grad_norm": 0.5992249250411987, "learning_rate": 1.089507867158551e-05, "loss": 1.0441, "step": 33830 }, { "epoch": 0.4874886555166602, "grad_norm": 0.6297547221183777, "learning_rate": 1.0890431616386004e-05, "loss": 1.0193, "step": 33840 }, { "epoch": 0.4876327124479594, "grad_norm": 0.5767676830291748, "learning_rate": 1.0885784367351817e-05, "loss": 1.0103, "step": 33850 }, { "epoch": 0.4877767693792587, "grad_norm": 0.5850902199745178, "learning_rate": 1.0881136925494593e-05, "loss": 1.0475, "step": 33860 }, { "epoch": 0.48792082631055794, "grad_norm": 0.5311706066131592, "learning_rate": 1.0876489291826015e-05, "loss": 1.0303, "step": 33870 }, { "epoch": 0.4880648832418572, "grad_norm": 0.5796907544136047, "learning_rate": 1.0871841467357813e-05, "loss": 1.017, "step": 33880 }, { "epoch": 0.4882089401731564, "grad_norm": 0.6047660708427429, "learning_rate": 1.0867193453101748e-05, "loss": 1.0114, "step": 33890 }, { "epoch": 0.4883529971044557, "grad_norm": 0.7163272500038147, "learning_rate": 1.0862545250069631e-05, "loss": 1.0197, "step": 33900 }, { "epoch": 0.48849705403575494, "grad_norm": 0.6854219436645508, "learning_rate": 1.085789685927331e-05, "loss": 1.0231, "step": 33910 }, { "epoch": 0.4886411109670542, "grad_norm": 0.5965855717658997, "learning_rate": 1.0853248281724682e-05, "loss": 1.003, "step": 33920 }, { "epoch": 0.4887851678983534, "grad_norm": 0.6373468041419983, "learning_rate": 1.0848599518435674e-05, "loss": 1.0242, "step": 33930 }, { "epoch": 0.4889292248296527, "grad_norm": 0.5388720631599426, "learning_rate": 1.0843950570418251e-05, "loss": 1.0382, "step": 33940 }, { "epoch": 0.48907328176095194, "grad_norm": 0.6404698491096497, "learning_rate": 1.0839301438684439e-05, "loss": 1.0324, "step": 33950 }, { "epoch": 0.4892173386922512, "grad_norm": 0.692626416683197, "learning_rate": 1.0834652124246276e-05, "loss": 1.0235, "step": 33960 }, { "epoch": 0.4893613956235504, "grad_norm": 0.5482460856437683, "learning_rate": 1.0830002628115863e-05, "loss": 1.0054, "step": 33970 }, { "epoch": 0.4895054525548497, "grad_norm": 0.5376763939857483, "learning_rate": 1.0825352951305324e-05, "loss": 1.0273, "step": 33980 }, { "epoch": 0.48964950948614894, "grad_norm": 0.5511714816093445, "learning_rate": 1.0820703094826837e-05, "loss": 1.0065, "step": 33990 }, { "epoch": 0.4897935664174482, "grad_norm": 0.7800193428993225, "learning_rate": 1.0816053059692607e-05, "loss": 1.0419, "step": 34000 }, { "epoch": 0.4899376233487474, "grad_norm": 0.6549273133277893, "learning_rate": 1.0811402846914881e-05, "loss": 1.0188, "step": 34010 }, { "epoch": 0.49008168028004667, "grad_norm": 0.5511816740036011, "learning_rate": 1.0806752457505949e-05, "loss": 1.0514, "step": 34020 }, { "epoch": 0.49022573721134594, "grad_norm": 0.6253392696380615, "learning_rate": 1.0802101892478139e-05, "loss": 1.0365, "step": 34030 }, { "epoch": 0.4903697941426452, "grad_norm": 0.5437934994697571, "learning_rate": 1.0797451152843813e-05, "loss": 1.0175, "step": 34040 }, { "epoch": 0.4905138510739444, "grad_norm": 0.580740213394165, "learning_rate": 1.0792800239615366e-05, "loss": 1.0405, "step": 34050 }, { "epoch": 0.49065790800524367, "grad_norm": 0.646485447883606, "learning_rate": 1.0788149153805247e-05, "loss": 1.036, "step": 34060 }, { "epoch": 0.49080196493654293, "grad_norm": 0.6106576919555664, "learning_rate": 1.078349789642593e-05, "loss": 1.0081, "step": 34070 }, { "epoch": 0.4909460218678422, "grad_norm": 0.6716938614845276, "learning_rate": 1.0778846468489927e-05, "loss": 1.0205, "step": 34080 }, { "epoch": 0.4910900787991414, "grad_norm": 0.6112418174743652, "learning_rate": 1.0774194871009795e-05, "loss": 1.003, "step": 34090 }, { "epoch": 0.49123413573044067, "grad_norm": 0.5627857446670532, "learning_rate": 1.076954310499812e-05, "loss": 1.0165, "step": 34100 }, { "epoch": 0.49137819266173993, "grad_norm": 0.5897433161735535, "learning_rate": 1.0764891171467527e-05, "loss": 1.0239, "step": 34110 }, { "epoch": 0.4915222495930392, "grad_norm": 0.5515845417976379, "learning_rate": 1.0760239071430674e-05, "loss": 1.0372, "step": 34120 }, { "epoch": 0.4916663065243384, "grad_norm": 0.5582568049430847, "learning_rate": 1.0755586805900264e-05, "loss": 1.0286, "step": 34130 }, { "epoch": 0.49181036345563767, "grad_norm": 0.5665695071220398, "learning_rate": 1.0750934375889031e-05, "loss": 1.0143, "step": 34140 }, { "epoch": 0.49195442038693693, "grad_norm": 0.6537351012229919, "learning_rate": 1.0746281782409741e-05, "loss": 1.0258, "step": 34150 }, { "epoch": 0.4920984773182362, "grad_norm": 0.5903180837631226, "learning_rate": 1.0741629026475204e-05, "loss": 1.0406, "step": 34160 }, { "epoch": 0.4922425342495354, "grad_norm": 0.5804015398025513, "learning_rate": 1.073697610909826e-05, "loss": 1.0212, "step": 34170 }, { "epoch": 0.49238659118083467, "grad_norm": 0.724968433380127, "learning_rate": 1.0732323031291782e-05, "loss": 1.0573, "step": 34180 }, { "epoch": 0.49253064811213393, "grad_norm": 0.627497136592865, "learning_rate": 1.0727669794068683e-05, "loss": 1.0238, "step": 34190 }, { "epoch": 0.4926747050434332, "grad_norm": 0.5886906981468201, "learning_rate": 1.0723016398441907e-05, "loss": 1.0327, "step": 34200 }, { "epoch": 0.4928187619747324, "grad_norm": 0.5589420795440674, "learning_rate": 1.0718362845424437e-05, "loss": 1.0396, "step": 34210 }, { "epoch": 0.49296281890603166, "grad_norm": 0.6463192105293274, "learning_rate": 1.0713709136029282e-05, "loss": 1.0181, "step": 34220 }, { "epoch": 0.4931068758373309, "grad_norm": 0.6273395419120789, "learning_rate": 1.0709055271269497e-05, "loss": 1.0283, "step": 34230 }, { "epoch": 0.4932509327686302, "grad_norm": 0.6272040009498596, "learning_rate": 1.0704401252158156e-05, "loss": 1.0094, "step": 34240 }, { "epoch": 0.4933949896999294, "grad_norm": 0.6010400056838989, "learning_rate": 1.0699747079708388e-05, "loss": 1.0057, "step": 34250 }, { "epoch": 0.49353904663122866, "grad_norm": 0.6007885336875916, "learning_rate": 1.0695092754933327e-05, "loss": 1.0289, "step": 34260 }, { "epoch": 0.4936831035625279, "grad_norm": 0.6088549494743347, "learning_rate": 1.0690438278846164e-05, "loss": 1.0276, "step": 34270 }, { "epoch": 0.49382716049382713, "grad_norm": 0.6549152731895447, "learning_rate": 1.0685783652460113e-05, "loss": 1.0132, "step": 34280 }, { "epoch": 0.4939712174251264, "grad_norm": 0.6156216859817505, "learning_rate": 1.0681128876788422e-05, "loss": 1.0263, "step": 34290 }, { "epoch": 0.49411527435642566, "grad_norm": 0.5971141457557678, "learning_rate": 1.0676473952844369e-05, "loss": 1.037, "step": 34300 }, { "epoch": 0.4942593312877249, "grad_norm": 0.5692846179008484, "learning_rate": 1.067181888164127e-05, "loss": 1.0347, "step": 34310 }, { "epoch": 0.49440338821902413, "grad_norm": 0.47606226801872253, "learning_rate": 1.0667163664192467e-05, "loss": 1.0308, "step": 34320 }, { "epoch": 0.4945474451503234, "grad_norm": 0.6196852922439575, "learning_rate": 1.0662508301511335e-05, "loss": 1.0273, "step": 34330 }, { "epoch": 0.49469150208162266, "grad_norm": 0.637514591217041, "learning_rate": 1.065785279461129e-05, "loss": 1.0484, "step": 34340 }, { "epoch": 0.4948355590129219, "grad_norm": 0.6438940763473511, "learning_rate": 1.0653197144505762e-05, "loss": 1.0269, "step": 34350 }, { "epoch": 0.49497961594422113, "grad_norm": 0.5789345502853394, "learning_rate": 1.0648541352208233e-05, "loss": 1.032, "step": 34360 }, { "epoch": 0.4951236728755204, "grad_norm": 0.635887086391449, "learning_rate": 1.0643885418732191e-05, "loss": 1.0272, "step": 34370 }, { "epoch": 0.49526772980681966, "grad_norm": 0.603382408618927, "learning_rate": 1.063922934509118e-05, "loss": 1.0058, "step": 34380 }, { "epoch": 0.4954117867381189, "grad_norm": 0.61844402551651, "learning_rate": 1.063457313229876e-05, "loss": 1.034, "step": 34390 }, { "epoch": 0.49555584366941813, "grad_norm": 0.6242319345474243, "learning_rate": 1.0629916781368519e-05, "loss": 1.0437, "step": 34400 }, { "epoch": 0.4956999006007174, "grad_norm": 0.5652687549591064, "learning_rate": 1.0625260293314086e-05, "loss": 1.0034, "step": 34410 }, { "epoch": 0.49584395753201665, "grad_norm": 0.7617804408073425, "learning_rate": 1.0620603669149113e-05, "loss": 1.0066, "step": 34420 }, { "epoch": 0.4959880144633159, "grad_norm": 0.5702570080757141, "learning_rate": 1.0615946909887285e-05, "loss": 1.0242, "step": 34430 }, { "epoch": 0.4961320713946151, "grad_norm": 0.5408039689064026, "learning_rate": 1.0611290016542307e-05, "loss": 1.01, "step": 34440 }, { "epoch": 0.4962761283259144, "grad_norm": 0.5169128179550171, "learning_rate": 1.0606632990127927e-05, "loss": 1.031, "step": 34450 }, { "epoch": 0.49642018525721365, "grad_norm": 0.6136772632598877, "learning_rate": 1.0601975831657913e-05, "loss": 1.028, "step": 34460 }, { "epoch": 0.4965642421885129, "grad_norm": 0.615149974822998, "learning_rate": 1.0597318542146067e-05, "loss": 1.0001, "step": 34470 }, { "epoch": 0.4967082991198121, "grad_norm": 0.6651378870010376, "learning_rate": 1.0592661122606211e-05, "loss": 1.0231, "step": 34480 }, { "epoch": 0.4968523560511114, "grad_norm": 0.677888810634613, "learning_rate": 1.0588003574052207e-05, "loss": 1.052, "step": 34490 }, { "epoch": 0.49699641298241065, "grad_norm": 0.6368032097816467, "learning_rate": 1.058334589749794e-05, "loss": 0.9976, "step": 34500 }, { "epoch": 0.4971404699137099, "grad_norm": 0.5970730185508728, "learning_rate": 1.057868809395731e-05, "loss": 1.0335, "step": 34510 }, { "epoch": 0.4972845268450091, "grad_norm": 0.6341341137886047, "learning_rate": 1.0574030164444272e-05, "loss": 0.9998, "step": 34520 }, { "epoch": 0.4974285837763084, "grad_norm": 0.5348761677742004, "learning_rate": 1.056937210997278e-05, "loss": 1.027, "step": 34530 }, { "epoch": 0.49757264070760765, "grad_norm": 0.7168810367584229, "learning_rate": 1.0564713931556838e-05, "loss": 1.0289, "step": 34540 }, { "epoch": 0.4977166976389069, "grad_norm": 0.6505715847015381, "learning_rate": 1.0560055630210462e-05, "loss": 1.0192, "step": 34550 }, { "epoch": 0.4978607545702061, "grad_norm": 0.6991880536079407, "learning_rate": 1.05553972069477e-05, "loss": 1.0298, "step": 34560 }, { "epoch": 0.4980048115015054, "grad_norm": 0.613568902015686, "learning_rate": 1.0550738662782632e-05, "loss": 1.0156, "step": 34570 }, { "epoch": 0.49814886843280465, "grad_norm": 0.5543942451477051, "learning_rate": 1.054607999872935e-05, "loss": 1.0038, "step": 34580 }, { "epoch": 0.4982929253641039, "grad_norm": 0.5577853918075562, "learning_rate": 1.054142121580199e-05, "loss": 1.0423, "step": 34590 }, { "epoch": 0.4984369822954031, "grad_norm": 0.5902289748191833, "learning_rate": 1.0536762315014695e-05, "loss": 0.9978, "step": 34600 }, { "epoch": 0.4985810392267024, "grad_norm": 0.6642874479293823, "learning_rate": 1.0532103297381653e-05, "loss": 1.03, "step": 34610 }, { "epoch": 0.49872509615800165, "grad_norm": 0.7516694664955139, "learning_rate": 1.0527444163917063e-05, "loss": 1.0166, "step": 34620 }, { "epoch": 0.4988691530893009, "grad_norm": 0.6212608814239502, "learning_rate": 1.0522784915635152e-05, "loss": 1.0402, "step": 34630 }, { "epoch": 0.4990132100206001, "grad_norm": 0.5182382464408875, "learning_rate": 1.0518125553550178e-05, "loss": 0.9924, "step": 34640 }, { "epoch": 0.4991572669518994, "grad_norm": 0.6416479349136353, "learning_rate": 1.051346607867642e-05, "loss": 1.0154, "step": 34650 }, { "epoch": 0.49930132388319864, "grad_norm": 0.6422526240348816, "learning_rate": 1.0508806492028178e-05, "loss": 1.0409, "step": 34660 }, { "epoch": 0.4994453808144979, "grad_norm": 0.6135118007659912, "learning_rate": 1.0504146794619783e-05, "loss": 1.0238, "step": 34670 }, { "epoch": 0.4995894377457971, "grad_norm": 0.5611364245414734, "learning_rate": 1.0499486987465584e-05, "loss": 1.0196, "step": 34680 }, { "epoch": 0.4997334946770964, "grad_norm": 0.5582852363586426, "learning_rate": 1.0494827071579961e-05, "loss": 1.0308, "step": 34690 }, { "epoch": 0.49987755160839564, "grad_norm": 0.5679566860198975, "learning_rate": 1.0490167047977306e-05, "loss": 1.0191, "step": 34700 }, { "epoch": 0.5000216085396949, "grad_norm": 0.570986270904541, "learning_rate": 1.048550691767205e-05, "loss": 1.0233, "step": 34710 }, { "epoch": 0.5001656654709942, "grad_norm": 0.6344956159591675, "learning_rate": 1.0480846681678635e-05, "loss": 1.0339, "step": 34720 }, { "epoch": 0.5003097224022934, "grad_norm": 0.6062827110290527, "learning_rate": 1.0476186341011532e-05, "loss": 1.029, "step": 34730 }, { "epoch": 0.5004537793335926, "grad_norm": 0.5521675944328308, "learning_rate": 1.0471525896685229e-05, "loss": 1.0248, "step": 34740 }, { "epoch": 0.5005978362648918, "grad_norm": 0.5854989886283875, "learning_rate": 1.0466865349714243e-05, "loss": 1.037, "step": 34750 }, { "epoch": 0.5007418931961911, "grad_norm": 0.5962767601013184, "learning_rate": 1.0462204701113115e-05, "loss": 1.0155, "step": 34760 }, { "epoch": 0.5008859501274904, "grad_norm": 0.6587475538253784, "learning_rate": 1.0457543951896396e-05, "loss": 1.0172, "step": 34770 }, { "epoch": 0.5010300070587896, "grad_norm": 0.6376791596412659, "learning_rate": 1.045288310307867e-05, "loss": 1.0258, "step": 34780 }, { "epoch": 0.5011740639900889, "grad_norm": 0.5904030799865723, "learning_rate": 1.0448222155674542e-05, "loss": 1.0061, "step": 34790 }, { "epoch": 0.5013181209213882, "grad_norm": 0.5428909659385681, "learning_rate": 1.0443561110698635e-05, "loss": 1.0313, "step": 34800 }, { "epoch": 0.5014621778526874, "grad_norm": 0.5389203429222107, "learning_rate": 1.0438899969165588e-05, "loss": 1.0197, "step": 34810 }, { "epoch": 0.5016062347839866, "grad_norm": 0.5955771207809448, "learning_rate": 1.0434238732090076e-05, "loss": 1.0094, "step": 34820 }, { "epoch": 0.5017502917152858, "grad_norm": 0.5243788361549377, "learning_rate": 1.0429577400486784e-05, "loss": 1.017, "step": 34830 }, { "epoch": 0.5018943486465851, "grad_norm": 0.5687932968139648, "learning_rate": 1.0424915975370416e-05, "loss": 1.0045, "step": 34840 }, { "epoch": 0.5020384055778844, "grad_norm": 0.5824376940727234, "learning_rate": 1.0420254457755699e-05, "loss": 1.0205, "step": 34850 }, { "epoch": 0.5021824625091836, "grad_norm": 0.6246141195297241, "learning_rate": 1.0415592848657388e-05, "loss": 1.0298, "step": 34860 }, { "epoch": 0.5023265194404829, "grad_norm": 0.6343791484832764, "learning_rate": 1.0410931149090247e-05, "loss": 1.0195, "step": 34870 }, { "epoch": 0.5024705763717822, "grad_norm": 0.5543587803840637, "learning_rate": 1.0406269360069058e-05, "loss": 1.0114, "step": 34880 }, { "epoch": 0.5026146333030814, "grad_norm": 0.5309613347053528, "learning_rate": 1.040160748260864e-05, "loss": 1.003, "step": 34890 }, { "epoch": 0.5027586902343806, "grad_norm": 0.5168134570121765, "learning_rate": 1.039694551772381e-05, "loss": 1.0155, "step": 34900 }, { "epoch": 0.5029027471656798, "grad_norm": 0.5596436858177185, "learning_rate": 1.039228346642942e-05, "loss": 1.0319, "step": 34910 }, { "epoch": 0.5030468040969791, "grad_norm": 0.6299206614494324, "learning_rate": 1.0387621329740327e-05, "loss": 1.0344, "step": 34920 }, { "epoch": 0.5031908610282784, "grad_norm": 0.5811081528663635, "learning_rate": 1.0382959108671422e-05, "loss": 1.0113, "step": 34930 }, { "epoch": 0.5033349179595776, "grad_norm": 0.6343690752983093, "learning_rate": 1.03782968042376e-05, "loss": 1.0361, "step": 34940 }, { "epoch": 0.5034789748908769, "grad_norm": 0.68972247838974, "learning_rate": 1.037363441745378e-05, "loss": 1.0302, "step": 34950 }, { "epoch": 0.5036230318221762, "grad_norm": 0.5717976093292236, "learning_rate": 1.0368971949334906e-05, "loss": 1.0431, "step": 34960 }, { "epoch": 0.5037670887534754, "grad_norm": 0.6288382411003113, "learning_rate": 1.0364309400895921e-05, "loss": 1.0018, "step": 34970 }, { "epoch": 0.5039111456847746, "grad_norm": 0.6394724249839783, "learning_rate": 1.0359646773151815e-05, "loss": 1.016, "step": 34980 }, { "epoch": 0.5040552026160738, "grad_norm": 0.5719991326332092, "learning_rate": 1.0354984067117562e-05, "loss": 1.0485, "step": 34990 }, { "epoch": 0.5041992595473731, "grad_norm": 0.5724058151245117, "learning_rate": 1.0350321283808176e-05, "loss": 1.0177, "step": 35000 }, { "epoch": 0.5043433164786724, "grad_norm": 0.5895094871520996, "learning_rate": 1.0345658424238676e-05, "loss": 1.0298, "step": 35010 }, { "epoch": 0.5044873734099716, "grad_norm": 0.6532093286514282, "learning_rate": 1.0340995489424108e-05, "loss": 1.0175, "step": 35020 }, { "epoch": 0.5046314303412709, "grad_norm": 0.548981785774231, "learning_rate": 1.0336332480379527e-05, "loss": 1.0035, "step": 35030 }, { "epoch": 0.5047754872725702, "grad_norm": 0.5826421976089478, "learning_rate": 1.0331669398120003e-05, "loss": 1.0139, "step": 35040 }, { "epoch": 0.5049195442038694, "grad_norm": 0.6140947937965393, "learning_rate": 1.0327006243660631e-05, "loss": 0.9947, "step": 35050 }, { "epoch": 0.5050636011351686, "grad_norm": 0.6252985000610352, "learning_rate": 1.0322343018016505e-05, "loss": 1.0032, "step": 35060 }, { "epoch": 0.5052076580664678, "grad_norm": 0.7099573016166687, "learning_rate": 1.0317679722202753e-05, "loss": 1.0339, "step": 35070 }, { "epoch": 0.5053517149977671, "grad_norm": 0.6047699451446533, "learning_rate": 1.0313016357234507e-05, "loss": 1.042, "step": 35080 }, { "epoch": 0.5054957719290664, "grad_norm": 0.4941195249557495, "learning_rate": 1.0308352924126918e-05, "loss": 1.02, "step": 35090 }, { "epoch": 0.5056398288603656, "grad_norm": 0.6888431906700134, "learning_rate": 1.0303689423895154e-05, "loss": 1.0059, "step": 35100 }, { "epoch": 0.5057838857916649, "grad_norm": 0.576938807964325, "learning_rate": 1.0299025857554388e-05, "loss": 1.0056, "step": 35110 }, { "epoch": 0.5059279427229642, "grad_norm": 0.5949046015739441, "learning_rate": 1.0294362226119821e-05, "loss": 1.0313, "step": 35120 }, { "epoch": 0.5060719996542634, "grad_norm": 0.6250497102737427, "learning_rate": 1.0289698530606655e-05, "loss": 1.0393, "step": 35130 }, { "epoch": 0.5062160565855626, "grad_norm": 0.5415104031562805, "learning_rate": 1.0285034772030118e-05, "loss": 1.0405, "step": 35140 }, { "epoch": 0.5063601135168618, "grad_norm": 0.5248748064041138, "learning_rate": 1.028037095140544e-05, "loss": 1.0341, "step": 35150 }, { "epoch": 0.5065041704481611, "grad_norm": 0.5896631479263306, "learning_rate": 1.0275707069747877e-05, "loss": 1.0092, "step": 35160 }, { "epoch": 0.5066482273794604, "grad_norm": 0.6074146628379822, "learning_rate": 1.0271043128072689e-05, "loss": 1.0019, "step": 35170 }, { "epoch": 0.5067922843107596, "grad_norm": 0.5902014374732971, "learning_rate": 1.0266379127395148e-05, "loss": 1.0069, "step": 35180 }, { "epoch": 0.5069363412420589, "grad_norm": 0.5882689356803894, "learning_rate": 1.0261715068730547e-05, "loss": 1.0195, "step": 35190 }, { "epoch": 0.5070803981733581, "grad_norm": 0.641704797744751, "learning_rate": 1.0257050953094185e-05, "loss": 1.0367, "step": 35200 }, { "epoch": 0.5072244551046574, "grad_norm": 0.6412422060966492, "learning_rate": 1.025238678150138e-05, "loss": 1.0184, "step": 35210 }, { "epoch": 0.5073685120359566, "grad_norm": 0.7109228372573853, "learning_rate": 1.024772255496745e-05, "loss": 1.0031, "step": 35220 }, { "epoch": 0.5075125689672558, "grad_norm": 0.5909984111785889, "learning_rate": 1.0243058274507737e-05, "loss": 1.0108, "step": 35230 }, { "epoch": 0.5076566258985551, "grad_norm": 0.6337960362434387, "learning_rate": 1.0238393941137594e-05, "loss": 1.0462, "step": 35240 }, { "epoch": 0.5078006828298544, "grad_norm": 0.6581072807312012, "learning_rate": 1.0233729555872378e-05, "loss": 1.0186, "step": 35250 }, { "epoch": 0.5079447397611536, "grad_norm": 0.6113468408584595, "learning_rate": 1.022906511972746e-05, "loss": 1.033, "step": 35260 }, { "epoch": 0.5080887966924529, "grad_norm": 0.6098412871360779, "learning_rate": 1.0224400633718226e-05, "loss": 1.0161, "step": 35270 }, { "epoch": 0.5082328536237521, "grad_norm": 0.5138407945632935, "learning_rate": 1.0219736098860071e-05, "loss": 1.0242, "step": 35280 }, { "epoch": 0.5083769105550514, "grad_norm": 0.5745452046394348, "learning_rate": 1.0215071516168395e-05, "loss": 1.0172, "step": 35290 }, { "epoch": 0.5085209674863506, "grad_norm": 0.5427656173706055, "learning_rate": 1.0210406886658622e-05, "loss": 1.0169, "step": 35300 }, { "epoch": 0.5086650244176498, "grad_norm": 0.5485026240348816, "learning_rate": 1.020574221134617e-05, "loss": 1.0222, "step": 35310 }, { "epoch": 0.5088090813489491, "grad_norm": 0.5079200863838196, "learning_rate": 1.0201077491246477e-05, "loss": 1.0131, "step": 35320 }, { "epoch": 0.5089531382802484, "grad_norm": 0.5675054788589478, "learning_rate": 1.0196412727374985e-05, "loss": 1.0168, "step": 35330 }, { "epoch": 0.5090971952115476, "grad_norm": 0.6245012879371643, "learning_rate": 1.0191747920747155e-05, "loss": 1.0033, "step": 35340 }, { "epoch": 0.5092412521428469, "grad_norm": 0.573479175567627, "learning_rate": 1.0187083072378448e-05, "loss": 1.0219, "step": 35350 }, { "epoch": 0.5093853090741461, "grad_norm": 0.6171622276306152, "learning_rate": 1.0182418183284335e-05, "loss": 1.0157, "step": 35360 }, { "epoch": 0.5095293660054454, "grad_norm": 0.5683724880218506, "learning_rate": 1.0177753254480303e-05, "loss": 1.0207, "step": 35370 }, { "epoch": 0.5096734229367446, "grad_norm": 0.6862166523933411, "learning_rate": 1.0173088286981837e-05, "loss": 1.0102, "step": 35380 }, { "epoch": 0.5098174798680438, "grad_norm": 0.6910626292228699, "learning_rate": 1.016842328180444e-05, "loss": 0.9967, "step": 35390 }, { "epoch": 0.5099615367993431, "grad_norm": 0.6231151223182678, "learning_rate": 1.0163758239963615e-05, "loss": 1.0224, "step": 35400 }, { "epoch": 0.5101055937306423, "grad_norm": 0.5692195892333984, "learning_rate": 1.0159093162474885e-05, "loss": 1.0038, "step": 35410 }, { "epoch": 0.5102496506619416, "grad_norm": 0.5317984819412231, "learning_rate": 1.0154428050353766e-05, "loss": 1.0122, "step": 35420 }, { "epoch": 0.5103937075932409, "grad_norm": 0.6425004005432129, "learning_rate": 1.0149762904615787e-05, "loss": 1.0318, "step": 35430 }, { "epoch": 0.5105377645245401, "grad_norm": 0.5365632772445679, "learning_rate": 1.0145097726276494e-05, "loss": 1.0341, "step": 35440 }, { "epoch": 0.5106818214558394, "grad_norm": 0.6597155928611755, "learning_rate": 1.0140432516351426e-05, "loss": 1.0348, "step": 35450 }, { "epoch": 0.5108258783871386, "grad_norm": 0.6425182223320007, "learning_rate": 1.013576727585614e-05, "loss": 1.0259, "step": 35460 }, { "epoch": 0.5109699353184378, "grad_norm": 0.5821989178657532, "learning_rate": 1.0131102005806186e-05, "loss": 0.995, "step": 35470 }, { "epoch": 0.5111139922497371, "grad_norm": 0.5270401835441589, "learning_rate": 1.0126436707217137e-05, "loss": 1.0172, "step": 35480 }, { "epoch": 0.5112580491810363, "grad_norm": 0.6593245267868042, "learning_rate": 1.012177138110456e-05, "loss": 1.0218, "step": 35490 }, { "epoch": 0.5114021061123356, "grad_norm": 0.628489077091217, "learning_rate": 1.0117106028484034e-05, "loss": 1.0383, "step": 35500 }, { "epoch": 0.5115461630436349, "grad_norm": 0.57815021276474, "learning_rate": 1.011244065037114e-05, "loss": 1.0133, "step": 35510 }, { "epoch": 0.5116902199749341, "grad_norm": 0.6188305020332336, "learning_rate": 1.0107775247781466e-05, "loss": 1.0236, "step": 35520 }, { "epoch": 0.5118342769062334, "grad_norm": 0.5830217003822327, "learning_rate": 1.0103109821730612e-05, "loss": 1.0076, "step": 35530 }, { "epoch": 0.5119783338375326, "grad_norm": 0.6795600652694702, "learning_rate": 1.0098444373234169e-05, "loss": 1.0349, "step": 35540 }, { "epoch": 0.5121223907688318, "grad_norm": 0.5690765976905823, "learning_rate": 1.0093778903307744e-05, "loss": 1.0115, "step": 35550 }, { "epoch": 0.5122664477001311, "grad_norm": 0.5599063038825989, "learning_rate": 1.0089113412966944e-05, "loss": 1.0372, "step": 35560 }, { "epoch": 0.5124105046314303, "grad_norm": 0.6547520160675049, "learning_rate": 1.0084447903227386e-05, "loss": 1.0256, "step": 35570 }, { "epoch": 0.5125545615627296, "grad_norm": 0.6033701300621033, "learning_rate": 1.0079782375104685e-05, "loss": 1.0372, "step": 35580 }, { "epoch": 0.5126986184940289, "grad_norm": 0.6120834350585938, "learning_rate": 1.0075116829614458e-05, "loss": 1.0205, "step": 35590 }, { "epoch": 0.5128426754253281, "grad_norm": 0.5898239612579346, "learning_rate": 1.0070451267772338e-05, "loss": 0.9922, "step": 35600 }, { "epoch": 0.5129867323566273, "grad_norm": 0.5969788432121277, "learning_rate": 1.0065785690593946e-05, "loss": 1.0304, "step": 35610 }, { "epoch": 0.5131307892879265, "grad_norm": 0.5550798773765564, "learning_rate": 1.0061120099094917e-05, "loss": 1.0057, "step": 35620 }, { "epoch": 0.5132748462192258, "grad_norm": 0.5665841102600098, "learning_rate": 1.0056454494290884e-05, "loss": 1.0097, "step": 35630 }, { "epoch": 0.5134189031505251, "grad_norm": 0.5943402647972107, "learning_rate": 1.0051788877197488e-05, "loss": 1.0073, "step": 35640 }, { "epoch": 0.5135629600818243, "grad_norm": 0.6273964643478394, "learning_rate": 1.0047123248830369e-05, "loss": 1.0515, "step": 35650 }, { "epoch": 0.5137070170131236, "grad_norm": 0.8690207004547119, "learning_rate": 1.0042457610205168e-05, "loss": 1.0301, "step": 35660 }, { "epoch": 0.5138510739444229, "grad_norm": 0.6672706007957458, "learning_rate": 1.0037791962337531e-05, "loss": 1.0015, "step": 35670 }, { "epoch": 0.5139951308757221, "grad_norm": 0.5607325434684753, "learning_rate": 1.0033126306243104e-05, "loss": 1.0194, "step": 35680 }, { "epoch": 0.5141391878070213, "grad_norm": 0.6259201169013977, "learning_rate": 1.002846064293754e-05, "loss": 1.0093, "step": 35690 }, { "epoch": 0.5142832447383205, "grad_norm": 0.6983173489570618, "learning_rate": 1.0023794973436483e-05, "loss": 1.0134, "step": 35700 }, { "epoch": 0.5144273016696198, "grad_norm": 0.5167803168296814, "learning_rate": 1.0019129298755593e-05, "loss": 1.0212, "step": 35710 }, { "epoch": 0.5145713586009191, "grad_norm": 0.5578393936157227, "learning_rate": 1.0014463619910513e-05, "loss": 1.0078, "step": 35720 }, { "epoch": 0.5147154155322183, "grad_norm": 0.5481727123260498, "learning_rate": 1.0009797937916905e-05, "loss": 1.0314, "step": 35730 }, { "epoch": 0.5148594724635176, "grad_norm": 0.5455896258354187, "learning_rate": 1.000513225379042e-05, "loss": 1.0023, "step": 35740 }, { "epoch": 0.5150035293948169, "grad_norm": 0.6557184457778931, "learning_rate": 1.0000466568546716e-05, "loss": 1.0311, "step": 35750 }, { "epoch": 0.5151475863261161, "grad_norm": 0.7582526206970215, "learning_rate": 9.995800883201446e-06, "loss": 0.9772, "step": 35760 }, { "epoch": 0.5152916432574153, "grad_norm": 0.5622434616088867, "learning_rate": 9.991135198770266e-06, "loss": 1.0282, "step": 35770 }, { "epoch": 0.5154357001887145, "grad_norm": 0.5806698799133301, "learning_rate": 9.98646951626883e-06, "loss": 1.0392, "step": 35780 }, { "epoch": 0.5155797571200138, "grad_norm": 0.5843561291694641, "learning_rate": 9.981803836712794e-06, "loss": 1.0311, "step": 35790 }, { "epoch": 0.5157238140513131, "grad_norm": 0.5436504483222961, "learning_rate": 9.977138161117813e-06, "loss": 1.0078, "step": 35800 }, { "epoch": 0.5158678709826123, "grad_norm": 0.6146788001060486, "learning_rate": 9.972472490499536e-06, "loss": 1.039, "step": 35810 }, { "epoch": 0.5160119279139116, "grad_norm": 0.5172025561332703, "learning_rate": 9.967806825873621e-06, "loss": 1.0053, "step": 35820 }, { "epoch": 0.5161559848452109, "grad_norm": 0.5545276999473572, "learning_rate": 9.963141168255714e-06, "loss": 1.0309, "step": 35830 }, { "epoch": 0.5163000417765101, "grad_norm": 0.5263173580169678, "learning_rate": 9.958475518661467e-06, "loss": 1.0071, "step": 35840 }, { "epoch": 0.5164440987078093, "grad_norm": 0.6488673686981201, "learning_rate": 9.95380987810653e-06, "loss": 1.0285, "step": 35850 }, { "epoch": 0.5165881556391085, "grad_norm": 0.6554338932037354, "learning_rate": 9.949144247606538e-06, "loss": 1.0137, "step": 35860 }, { "epoch": 0.5167322125704078, "grad_norm": 0.5225178003311157, "learning_rate": 9.94447862817715e-06, "loss": 1.029, "step": 35870 }, { "epoch": 0.5168762695017071, "grad_norm": 0.6226567029953003, "learning_rate": 9.939813020833995e-06, "loss": 1.0378, "step": 35880 }, { "epoch": 0.5170203264330063, "grad_norm": 0.6217008233070374, "learning_rate": 9.935147426592712e-06, "loss": 0.9921, "step": 35890 }, { "epoch": 0.5171643833643056, "grad_norm": 0.6131475567817688, "learning_rate": 9.930481846468944e-06, "loss": 1.0511, "step": 35900 }, { "epoch": 0.5173084402956049, "grad_norm": 0.5917423367500305, "learning_rate": 9.925816281478318e-06, "loss": 1.0134, "step": 35910 }, { "epoch": 0.5174524972269041, "grad_norm": 0.6239330768585205, "learning_rate": 9.921150732636461e-06, "loss": 1.0211, "step": 35920 }, { "epoch": 0.5175965541582033, "grad_norm": 0.6124416589736938, "learning_rate": 9.916485200959005e-06, "loss": 1.006, "step": 35930 }, { "epoch": 0.5177406110895025, "grad_norm": 0.5896172523498535, "learning_rate": 9.911819687461567e-06, "loss": 1.0296, "step": 35940 }, { "epoch": 0.5178846680208018, "grad_norm": 0.6174232959747314, "learning_rate": 9.907154193159768e-06, "loss": 1.0337, "step": 35950 }, { "epoch": 0.5180287249521011, "grad_norm": 0.6552242636680603, "learning_rate": 9.902488719069221e-06, "loss": 1.0386, "step": 35960 }, { "epoch": 0.5181727818834003, "grad_norm": 0.6238149404525757, "learning_rate": 9.897823266205532e-06, "loss": 1.0338, "step": 35970 }, { "epoch": 0.5183168388146996, "grad_norm": 0.6092976331710815, "learning_rate": 9.893157835584309e-06, "loss": 1.0355, "step": 35980 }, { "epoch": 0.5184608957459989, "grad_norm": 0.6302141547203064, "learning_rate": 9.888492428221156e-06, "loss": 1.0335, "step": 35990 }, { "epoch": 0.5186049526772981, "grad_norm": 0.5200178623199463, "learning_rate": 9.883827045131656e-06, "loss": 1.022, "step": 36000 }, { "epoch": 0.5187490096085973, "grad_norm": 0.5777044296264648, "learning_rate": 9.87916168733141e-06, "loss": 1.0219, "step": 36010 }, { "epoch": 0.5188930665398965, "grad_norm": 0.5704400539398193, "learning_rate": 9.874496355836e-06, "loss": 1.0066, "step": 36020 }, { "epoch": 0.5190371234711958, "grad_norm": 0.5675187706947327, "learning_rate": 9.869831051660997e-06, "loss": 1.0325, "step": 36030 }, { "epoch": 0.5191811804024951, "grad_norm": 0.5844601392745972, "learning_rate": 9.86516577582198e-06, "loss": 1.0187, "step": 36040 }, { "epoch": 0.5193252373337943, "grad_norm": 0.6567334532737732, "learning_rate": 9.860500529334512e-06, "loss": 1.0397, "step": 36050 }, { "epoch": 0.5194692942650936, "grad_norm": 0.6387859582901001, "learning_rate": 9.855835313214156e-06, "loss": 1.0088, "step": 36060 }, { "epoch": 0.5196133511963928, "grad_norm": 0.5364038348197937, "learning_rate": 9.851170128476464e-06, "loss": 1.0275, "step": 36070 }, { "epoch": 0.5197574081276921, "grad_norm": 0.5949365496635437, "learning_rate": 9.846504976136981e-06, "loss": 1.0256, "step": 36080 }, { "epoch": 0.5199014650589913, "grad_norm": 0.718833327293396, "learning_rate": 9.841839857211249e-06, "loss": 1.0149, "step": 36090 }, { "epoch": 0.5200455219902905, "grad_norm": 0.540953516960144, "learning_rate": 9.837174772714797e-06, "loss": 1.0208, "step": 36100 }, { "epoch": 0.5201895789215898, "grad_norm": 0.7185535430908203, "learning_rate": 9.83250972366315e-06, "loss": 1.0198, "step": 36110 }, { "epoch": 0.520333635852889, "grad_norm": 0.5654457807540894, "learning_rate": 9.827844711071826e-06, "loss": 1.0394, "step": 36120 }, { "epoch": 0.5204776927841883, "grad_norm": 0.6072353720664978, "learning_rate": 9.823179735956337e-06, "loss": 1.0043, "step": 36130 }, { "epoch": 0.5206217497154876, "grad_norm": 0.6299914121627808, "learning_rate": 9.818514799332174e-06, "loss": 1.0225, "step": 36140 }, { "epoch": 0.5207658066467868, "grad_norm": 0.5617902278900146, "learning_rate": 9.813849902214841e-06, "loss": 1.0307, "step": 36150 }, { "epoch": 0.5209098635780861, "grad_norm": 0.556522011756897, "learning_rate": 9.809185045619812e-06, "loss": 1.0163, "step": 36160 }, { "epoch": 0.5210539205093853, "grad_norm": 0.6089711785316467, "learning_rate": 9.80452023056257e-06, "loss": 1.0104, "step": 36170 }, { "epoch": 0.5211979774406845, "grad_norm": 0.5726987719535828, "learning_rate": 9.799855458058577e-06, "loss": 1.019, "step": 36180 }, { "epoch": 0.5213420343719838, "grad_norm": 0.6261593103408813, "learning_rate": 9.795190729123288e-06, "loss": 1.0262, "step": 36190 }, { "epoch": 0.521486091303283, "grad_norm": 0.6123784184455872, "learning_rate": 9.790526044772152e-06, "loss": 1.0351, "step": 36200 }, { "epoch": 0.5216301482345823, "grad_norm": 0.623529314994812, "learning_rate": 9.785861406020607e-06, "loss": 1.0525, "step": 36210 }, { "epoch": 0.5217742051658816, "grad_norm": 0.5684060454368591, "learning_rate": 9.781196813884079e-06, "loss": 1.0217, "step": 36220 }, { "epoch": 0.5219182620971808, "grad_norm": 0.49141550064086914, "learning_rate": 9.776532269377986e-06, "loss": 1.0235, "step": 36230 }, { "epoch": 0.5220623190284801, "grad_norm": 0.5897905230522156, "learning_rate": 9.77186777351774e-06, "loss": 1.0118, "step": 36240 }, { "epoch": 0.5222063759597793, "grad_norm": 0.5766646265983582, "learning_rate": 9.767203327318723e-06, "loss": 1.017, "step": 36250 }, { "epoch": 0.5223504328910785, "grad_norm": 0.6184492111206055, "learning_rate": 9.76253893179634e-06, "loss": 0.9995, "step": 36260 }, { "epoch": 0.5224944898223778, "grad_norm": 0.5500518679618835, "learning_rate": 9.757874587965948e-06, "loss": 1.0252, "step": 36270 }, { "epoch": 0.522638546753677, "grad_norm": 0.5596465468406677, "learning_rate": 9.753210296842924e-06, "loss": 1.021, "step": 36280 }, { "epoch": 0.5227826036849763, "grad_norm": 0.6081998944282532, "learning_rate": 9.748546059442613e-06, "loss": 1.014, "step": 36290 }, { "epoch": 0.5229266606162756, "grad_norm": 0.5707460641860962, "learning_rate": 9.743881876780353e-06, "loss": 1.0102, "step": 36300 }, { "epoch": 0.5230707175475748, "grad_norm": 0.5708239078521729, "learning_rate": 9.739217749871476e-06, "loss": 1.0477, "step": 36310 }, { "epoch": 0.5232147744788741, "grad_norm": 0.8087396621704102, "learning_rate": 9.7345536797313e-06, "loss": 1.0201, "step": 36320 }, { "epoch": 0.5233588314101733, "grad_norm": 0.5321927666664124, "learning_rate": 9.729889667375123e-06, "loss": 1.0304, "step": 36330 }, { "epoch": 0.5235028883414725, "grad_norm": 0.5228332877159119, "learning_rate": 9.725225713818242e-06, "loss": 0.9842, "step": 36340 }, { "epoch": 0.5236469452727718, "grad_norm": 0.54778653383255, "learning_rate": 9.72056182007593e-06, "loss": 1.0389, "step": 36350 }, { "epoch": 0.523791002204071, "grad_norm": 0.6246420741081238, "learning_rate": 9.715897987163457e-06, "loss": 1.0301, "step": 36360 }, { "epoch": 0.5239350591353703, "grad_norm": 0.6011121273040771, "learning_rate": 9.711234216096071e-06, "loss": 1.0303, "step": 36370 }, { "epoch": 0.5240791160666696, "grad_norm": 0.6510663032531738, "learning_rate": 9.706570507889013e-06, "loss": 0.9917, "step": 36380 }, { "epoch": 0.5242231729979688, "grad_norm": 0.5987183451652527, "learning_rate": 9.701906863557509e-06, "loss": 1.0175, "step": 36390 }, { "epoch": 0.5243672299292681, "grad_norm": 0.5923366546630859, "learning_rate": 9.69724328411677e-06, "loss": 1.0189, "step": 36400 }, { "epoch": 0.5245112868605672, "grad_norm": 0.5356194376945496, "learning_rate": 9.692579770581986e-06, "loss": 0.991, "step": 36410 }, { "epoch": 0.5246553437918665, "grad_norm": 0.6098308563232422, "learning_rate": 9.687916323968352e-06, "loss": 1.0258, "step": 36420 }, { "epoch": 0.5247994007231658, "grad_norm": 0.5701761245727539, "learning_rate": 9.683252945291027e-06, "loss": 1.0266, "step": 36430 }, { "epoch": 0.524943457654465, "grad_norm": 0.627974808216095, "learning_rate": 9.678589635565162e-06, "loss": 1.0131, "step": 36440 }, { "epoch": 0.5250875145857643, "grad_norm": 0.6633008122444153, "learning_rate": 9.673926395805903e-06, "loss": 1.0196, "step": 36450 }, { "epoch": 0.5252315715170636, "grad_norm": 0.5533243417739868, "learning_rate": 9.66926322702837e-06, "loss": 1.0284, "step": 36460 }, { "epoch": 0.5253756284483628, "grad_norm": 0.6051493883132935, "learning_rate": 9.664600130247667e-06, "loss": 1.0272, "step": 36470 }, { "epoch": 0.5255196853796621, "grad_norm": 0.6262309551239014, "learning_rate": 9.65993710647889e-06, "loss": 1.0189, "step": 36480 }, { "epoch": 0.5256637423109612, "grad_norm": 0.5696543455123901, "learning_rate": 9.655274156737114e-06, "loss": 1.0353, "step": 36490 }, { "epoch": 0.5258077992422605, "grad_norm": 0.6242706179618835, "learning_rate": 9.6506112820374e-06, "loss": 1.0196, "step": 36500 }, { "epoch": 0.5259518561735598, "grad_norm": 0.6055695414543152, "learning_rate": 9.64594848339479e-06, "loss": 1.0171, "step": 36510 }, { "epoch": 0.526095913104859, "grad_norm": 0.6551834940910339, "learning_rate": 9.641285761824307e-06, "loss": 1.0006, "step": 36520 }, { "epoch": 0.5262399700361583, "grad_norm": 0.5793470144271851, "learning_rate": 9.636623118340968e-06, "loss": 1.0456, "step": 36530 }, { "epoch": 0.5263840269674576, "grad_norm": 0.6057597994804382, "learning_rate": 9.631960553959767e-06, "loss": 1.0198, "step": 36540 }, { "epoch": 0.5265280838987568, "grad_norm": 0.6657388806343079, "learning_rate": 9.62729806969567e-06, "loss": 1.0386, "step": 36550 }, { "epoch": 0.5266721408300561, "grad_norm": 0.6059638857841492, "learning_rate": 9.622635666563647e-06, "loss": 1.0233, "step": 36560 }, { "epoch": 0.5268161977613552, "grad_norm": 0.5604829788208008, "learning_rate": 9.617973345578632e-06, "loss": 0.9891, "step": 36570 }, { "epoch": 0.5269602546926545, "grad_norm": 0.5700094103813171, "learning_rate": 9.613311107755547e-06, "loss": 1.0126, "step": 36580 }, { "epoch": 0.5271043116239538, "grad_norm": 0.6769119501113892, "learning_rate": 9.608648954109302e-06, "loss": 1.0184, "step": 36590 }, { "epoch": 0.527248368555253, "grad_norm": 0.6489994525909424, "learning_rate": 9.60398688565478e-06, "loss": 1.0208, "step": 36600 }, { "epoch": 0.5273924254865523, "grad_norm": 0.580607533454895, "learning_rate": 9.599324903406851e-06, "loss": 0.9943, "step": 36610 }, { "epoch": 0.5275364824178516, "grad_norm": 0.7023763060569763, "learning_rate": 9.594663008380363e-06, "loss": 1.0256, "step": 36620 }, { "epoch": 0.5276805393491508, "grad_norm": 0.5354670286178589, "learning_rate": 9.590001201590144e-06, "loss": 1.0263, "step": 36630 }, { "epoch": 0.5278245962804501, "grad_norm": 0.5822432041168213, "learning_rate": 9.585339484051011e-06, "loss": 0.9996, "step": 36640 }, { "epoch": 0.5279686532117492, "grad_norm": 0.5705580115318298, "learning_rate": 9.580677856777753e-06, "loss": 1.0092, "step": 36650 }, { "epoch": 0.5281127101430485, "grad_norm": 0.5426855683326721, "learning_rate": 9.576016320785135e-06, "loss": 1.0097, "step": 36660 }, { "epoch": 0.5282567670743478, "grad_norm": 0.6067710518836975, "learning_rate": 9.571354877087921e-06, "loss": 1.0058, "step": 36670 }, { "epoch": 0.528400824005647, "grad_norm": 0.5888494849205017, "learning_rate": 9.566693526700835e-06, "loss": 0.9949, "step": 36680 }, { "epoch": 0.5285448809369463, "grad_norm": 0.5788161754608154, "learning_rate": 9.562032270638587e-06, "loss": 1.0075, "step": 36690 }, { "epoch": 0.5286889378682456, "grad_norm": 0.6695165634155273, "learning_rate": 9.557371109915878e-06, "loss": 1.0411, "step": 36700 }, { "epoch": 0.5288329947995448, "grad_norm": 0.6557411551475525, "learning_rate": 9.552710045547367e-06, "loss": 1.0201, "step": 36710 }, { "epoch": 0.5289770517308441, "grad_norm": 0.6170535087585449, "learning_rate": 9.548049078547715e-06, "loss": 1.0118, "step": 36720 }, { "epoch": 0.5291211086621432, "grad_norm": 0.6295254230499268, "learning_rate": 9.54338820993154e-06, "loss": 1.0351, "step": 36730 }, { "epoch": 0.5292651655934425, "grad_norm": 0.5872475504875183, "learning_rate": 9.538727440713451e-06, "loss": 1.0398, "step": 36740 }, { "epoch": 0.5294092225247418, "grad_norm": 0.5091297626495361, "learning_rate": 9.534066771908038e-06, "loss": 1.0198, "step": 36750 }, { "epoch": 0.529553279456041, "grad_norm": 0.6274429559707642, "learning_rate": 9.529406204529861e-06, "loss": 1.0141, "step": 36760 }, { "epoch": 0.5296973363873403, "grad_norm": 0.6691988706588745, "learning_rate": 9.524745739593459e-06, "loss": 1.0108, "step": 36770 }, { "epoch": 0.5298413933186396, "grad_norm": 0.6400824785232544, "learning_rate": 9.520085378113356e-06, "loss": 1.0095, "step": 36780 }, { "epoch": 0.5299854502499388, "grad_norm": 0.6180183291435242, "learning_rate": 9.515425121104047e-06, "loss": 1.0142, "step": 36790 }, { "epoch": 0.5301295071812381, "grad_norm": 0.5982281565666199, "learning_rate": 9.510764969579999e-06, "loss": 1.0265, "step": 36800 }, { "epoch": 0.5302735641125372, "grad_norm": 0.5235705375671387, "learning_rate": 9.506104924555674e-06, "loss": 1.0019, "step": 36810 }, { "epoch": 0.5304176210438365, "grad_norm": 0.6294726729393005, "learning_rate": 9.501444987045487e-06, "loss": 1.0044, "step": 36820 }, { "epoch": 0.5305616779751358, "grad_norm": 0.6173301935195923, "learning_rate": 9.496785158063855e-06, "loss": 1.0391, "step": 36830 }, { "epoch": 0.530705734906435, "grad_norm": 0.5367320775985718, "learning_rate": 9.492125438625151e-06, "loss": 0.9951, "step": 36840 }, { "epoch": 0.5308497918377343, "grad_norm": 0.7068583369255066, "learning_rate": 9.48746582974373e-06, "loss": 0.9917, "step": 36850 }, { "epoch": 0.5309938487690335, "grad_norm": 0.6781473755836487, "learning_rate": 9.482806332433931e-06, "loss": 1.0262, "step": 36860 }, { "epoch": 0.5311379057003328, "grad_norm": 0.6276537775993347, "learning_rate": 9.478146947710058e-06, "loss": 1.0212, "step": 36870 }, { "epoch": 0.5312819626316321, "grad_norm": 0.6010395288467407, "learning_rate": 9.473487676586392e-06, "loss": 1.0201, "step": 36880 }, { "epoch": 0.5314260195629312, "grad_norm": 0.6485746502876282, "learning_rate": 9.4688285200772e-06, "loss": 1.041, "step": 36890 }, { "epoch": 0.5315700764942305, "grad_norm": 0.6540676951408386, "learning_rate": 9.46416947919671e-06, "loss": 1.0225, "step": 36900 }, { "epoch": 0.5317141334255298, "grad_norm": 0.5813535451889038, "learning_rate": 9.45951055495913e-06, "loss": 1.0233, "step": 36910 }, { "epoch": 0.531858190356829, "grad_norm": 0.5422252416610718, "learning_rate": 9.45485174837865e-06, "loss": 1.0098, "step": 36920 }, { "epoch": 0.5320022472881283, "grad_norm": 0.6572553515434265, "learning_rate": 9.45019306046942e-06, "loss": 1.0414, "step": 36930 }, { "epoch": 0.5321463042194275, "grad_norm": 0.6319551467895508, "learning_rate": 9.44553449224558e-06, "loss": 1.0117, "step": 36940 }, { "epoch": 0.5322903611507268, "grad_norm": 0.6119747161865234, "learning_rate": 9.440876044721235e-06, "loss": 1.0091, "step": 36950 }, { "epoch": 0.532434418082026, "grad_norm": 0.5634505748748779, "learning_rate": 9.436217718910453e-06, "loss": 1.0233, "step": 36960 }, { "epoch": 0.5325784750133252, "grad_norm": 0.6269254684448242, "learning_rate": 9.431559515827304e-06, "loss": 1.0379, "step": 36970 }, { "epoch": 0.5327225319446245, "grad_norm": 0.7109168171882629, "learning_rate": 9.426901436485804e-06, "loss": 1.0136, "step": 36980 }, { "epoch": 0.5328665888759238, "grad_norm": 0.5518725514411926, "learning_rate": 9.422243481899955e-06, "loss": 1.0228, "step": 36990 }, { "epoch": 0.533010645807223, "grad_norm": 0.6496334671974182, "learning_rate": 9.417585653083729e-06, "loss": 1.0124, "step": 37000 }, { "epoch": 0.5331547027385223, "grad_norm": 0.5915142297744751, "learning_rate": 9.412927951051075e-06, "loss": 1.0227, "step": 37010 }, { "epoch": 0.5332987596698215, "grad_norm": 0.526029109954834, "learning_rate": 9.408270376815904e-06, "loss": 1.0271, "step": 37020 }, { "epoch": 0.5334428166011208, "grad_norm": 0.728598415851593, "learning_rate": 9.40361293139211e-06, "loss": 1.0282, "step": 37030 }, { "epoch": 0.53358687353242, "grad_norm": 0.6483193039894104, "learning_rate": 9.398955615793552e-06, "loss": 1.031, "step": 37040 }, { "epoch": 0.5337309304637192, "grad_norm": 0.5777032971382141, "learning_rate": 9.394298431034068e-06, "loss": 1.0444, "step": 37050 }, { "epoch": 0.5338749873950185, "grad_norm": 0.6195807456970215, "learning_rate": 9.38964137812746e-06, "loss": 1.0227, "step": 37060 }, { "epoch": 0.5340190443263177, "grad_norm": 1.3067591190338135, "learning_rate": 9.384984458087502e-06, "loss": 1.0208, "step": 37070 }, { "epoch": 0.534163101257617, "grad_norm": 0.6034696698188782, "learning_rate": 9.380327671927945e-06, "loss": 1.03, "step": 37080 }, { "epoch": 0.5343071581889163, "grad_norm": 0.5766692757606506, "learning_rate": 9.375671020662506e-06, "loss": 1.0335, "step": 37090 }, { "epoch": 0.5344512151202155, "grad_norm": 0.568112850189209, "learning_rate": 9.37101450530487e-06, "loss": 1.0324, "step": 37100 }, { "epoch": 0.5345952720515148, "grad_norm": 0.6545660495758057, "learning_rate": 9.366358126868704e-06, "loss": 1.0358, "step": 37110 }, { "epoch": 0.534739328982814, "grad_norm": 0.5522094368934631, "learning_rate": 9.361701886367631e-06, "loss": 1.007, "step": 37120 }, { "epoch": 0.5348833859141132, "grad_norm": 0.6215907335281372, "learning_rate": 9.357045784815248e-06, "loss": 1.01, "step": 37130 }, { "epoch": 0.5350274428454125, "grad_norm": 0.5886874198913574, "learning_rate": 9.352389823225133e-06, "loss": 1.0249, "step": 37140 }, { "epoch": 0.5351714997767117, "grad_norm": 0.5882801413536072, "learning_rate": 9.347734002610815e-06, "loss": 1.0173, "step": 37150 }, { "epoch": 0.535315556708011, "grad_norm": 0.6239109039306641, "learning_rate": 9.343078323985811e-06, "loss": 1.0131, "step": 37160 }, { "epoch": 0.5354596136393103, "grad_norm": 1.379630446434021, "learning_rate": 9.338422788363592e-06, "loss": 1.0173, "step": 37170 }, { "epoch": 0.5356036705706095, "grad_norm": 0.552575409412384, "learning_rate": 9.333767396757605e-06, "loss": 1.0072, "step": 37180 }, { "epoch": 0.5357477275019088, "grad_norm": 0.6046473383903503, "learning_rate": 9.329112150181267e-06, "loss": 1.0255, "step": 37190 }, { "epoch": 0.535891784433208, "grad_norm": 0.6556302905082703, "learning_rate": 9.32445704964796e-06, "loss": 1.0379, "step": 37200 }, { "epoch": 0.5360358413645072, "grad_norm": 0.6306343078613281, "learning_rate": 9.319802096171027e-06, "loss": 1.0471, "step": 37210 }, { "epoch": 0.5361798982958065, "grad_norm": 0.5234145522117615, "learning_rate": 9.315147290763804e-06, "loss": 1.0176, "step": 37220 }, { "epoch": 0.5363239552271057, "grad_norm": 0.6313613653182983, "learning_rate": 9.310492634439563e-06, "loss": 1.0286, "step": 37230 }, { "epoch": 0.536468012158405, "grad_norm": 0.6118576526641846, "learning_rate": 9.305838128211561e-06, "loss": 1.0173, "step": 37240 }, { "epoch": 0.5366120690897043, "grad_norm": 0.6831321716308594, "learning_rate": 9.301183773093027e-06, "loss": 1.0193, "step": 37250 }, { "epoch": 0.5367561260210035, "grad_norm": 0.677731454372406, "learning_rate": 9.296529570097144e-06, "loss": 1.0434, "step": 37260 }, { "epoch": 0.5369001829523028, "grad_norm": 0.5814643502235413, "learning_rate": 9.291875520237068e-06, "loss": 1.0105, "step": 37270 }, { "epoch": 0.537044239883602, "grad_norm": 0.6593078970909119, "learning_rate": 9.287221624525926e-06, "loss": 1.0187, "step": 37280 }, { "epoch": 0.5371882968149012, "grad_norm": 0.6539261341094971, "learning_rate": 9.2825678839768e-06, "loss": 1.0189, "step": 37290 }, { "epoch": 0.5373323537462005, "grad_norm": 0.5706778764724731, "learning_rate": 9.277914299602751e-06, "loss": 1.0185, "step": 37300 }, { "epoch": 0.5374764106774997, "grad_norm": 0.6825644969940186, "learning_rate": 9.273260872416797e-06, "loss": 1.0336, "step": 37310 }, { "epoch": 0.537620467608799, "grad_norm": 0.6412229537963867, "learning_rate": 9.268607603431924e-06, "loss": 1.0211, "step": 37320 }, { "epoch": 0.5377645245400983, "grad_norm": 0.5434486865997314, "learning_rate": 9.263954493661089e-06, "loss": 1.0397, "step": 37330 }, { "epoch": 0.5379085814713975, "grad_norm": 0.6290640234947205, "learning_rate": 9.259301544117208e-06, "loss": 1.0232, "step": 37340 }, { "epoch": 0.5380526384026968, "grad_norm": 0.5575625896453857, "learning_rate": 9.254648755813158e-06, "loss": 1.0141, "step": 37350 }, { "epoch": 0.538196695333996, "grad_norm": 0.598358690738678, "learning_rate": 9.249996129761797e-06, "loss": 1.0191, "step": 37360 }, { "epoch": 0.5383407522652952, "grad_norm": 0.6271530389785767, "learning_rate": 9.245343666975925e-06, "loss": 1.008, "step": 37370 }, { "epoch": 0.5384848091965945, "grad_norm": 0.5654636025428772, "learning_rate": 9.240691368468334e-06, "loss": 1.0376, "step": 37380 }, { "epoch": 0.5386288661278937, "grad_norm": 0.5937756299972534, "learning_rate": 9.236039235251755e-06, "loss": 1.0006, "step": 37390 }, { "epoch": 0.538772923059193, "grad_norm": 0.6633250117301941, "learning_rate": 9.231387268338893e-06, "loss": 1.0344, "step": 37400 }, { "epoch": 0.5389169799904923, "grad_norm": 0.5502985715866089, "learning_rate": 9.226735468742422e-06, "loss": 1.0333, "step": 37410 }, { "epoch": 0.5390610369217915, "grad_norm": 0.7371459007263184, "learning_rate": 9.222083837474973e-06, "loss": 1.0179, "step": 37420 }, { "epoch": 0.5392050938530908, "grad_norm": 0.6678913831710815, "learning_rate": 9.217432375549139e-06, "loss": 1.0201, "step": 37430 }, { "epoch": 0.5393491507843899, "grad_norm": 0.5894361734390259, "learning_rate": 9.212781083977483e-06, "loss": 1.0252, "step": 37440 }, { "epoch": 0.5394932077156892, "grad_norm": 0.5343698263168335, "learning_rate": 9.208129963772526e-06, "loss": 1.0109, "step": 37450 }, { "epoch": 0.5396372646469885, "grad_norm": 0.5272938013076782, "learning_rate": 9.203479015946749e-06, "loss": 1.0229, "step": 37460 }, { "epoch": 0.5397813215782877, "grad_norm": 0.59951251745224, "learning_rate": 9.198828241512604e-06, "loss": 1.0244, "step": 37470 }, { "epoch": 0.539925378509587, "grad_norm": 0.648590087890625, "learning_rate": 9.194177641482498e-06, "loss": 1.0296, "step": 37480 }, { "epoch": 0.5400694354408863, "grad_norm": 0.5928313136100769, "learning_rate": 9.189527216868803e-06, "loss": 1.0143, "step": 37490 }, { "epoch": 0.5402134923721855, "grad_norm": 0.5701433420181274, "learning_rate": 9.184876968683857e-06, "loss": 0.9927, "step": 37500 }, { "epoch": 0.5403575493034848, "grad_norm": 0.6457295417785645, "learning_rate": 9.180226897939943e-06, "loss": 1.0295, "step": 37510 }, { "epoch": 0.5405016062347839, "grad_norm": 0.6101258993148804, "learning_rate": 9.17557700564933e-06, "loss": 1.0108, "step": 37520 }, { "epoch": 0.5406456631660832, "grad_norm": 0.6400893330574036, "learning_rate": 9.17092729282423e-06, "loss": 1.0145, "step": 37530 }, { "epoch": 0.5407897200973825, "grad_norm": 0.6215956211090088, "learning_rate": 9.166277760476816e-06, "loss": 1.0132, "step": 37540 }, { "epoch": 0.5409337770286817, "grad_norm": 0.5928720235824585, "learning_rate": 9.161628409619237e-06, "loss": 1.0219, "step": 37550 }, { "epoch": 0.541077833959981, "grad_norm": 0.959450364112854, "learning_rate": 9.156979241263587e-06, "loss": 1.0046, "step": 37560 }, { "epoch": 0.5412218908912803, "grad_norm": 0.672532856464386, "learning_rate": 9.152330256421926e-06, "loss": 1.0412, "step": 37570 }, { "epoch": 0.5413659478225795, "grad_norm": 0.5589152574539185, "learning_rate": 9.147681456106275e-06, "loss": 1.0102, "step": 37580 }, { "epoch": 0.5415100047538788, "grad_norm": 0.5893039107322693, "learning_rate": 9.14303284132861e-06, "loss": 1.0068, "step": 37590 }, { "epoch": 0.5416540616851779, "grad_norm": 0.7029055953025818, "learning_rate": 9.138384413100879e-06, "loss": 1.0132, "step": 37600 }, { "epoch": 0.5417981186164772, "grad_norm": 0.6449434757232666, "learning_rate": 9.133736172434973e-06, "loss": 1.0129, "step": 37610 }, { "epoch": 0.5419421755477765, "grad_norm": 0.6333742141723633, "learning_rate": 9.129088120342749e-06, "loss": 1.0305, "step": 37620 }, { "epoch": 0.5420862324790757, "grad_norm": 0.6525691747665405, "learning_rate": 9.124440257836031e-06, "loss": 1.0266, "step": 37630 }, { "epoch": 0.542230289410375, "grad_norm": 0.6647065281867981, "learning_rate": 9.119792585926593e-06, "loss": 1.0265, "step": 37640 }, { "epoch": 0.5423743463416743, "grad_norm": 0.5731229186058044, "learning_rate": 9.11514510562616e-06, "loss": 1.0033, "step": 37650 }, { "epoch": 0.5425184032729735, "grad_norm": 0.5933045744895935, "learning_rate": 9.110497817946436e-06, "loss": 1.0069, "step": 37660 }, { "epoch": 0.5426624602042728, "grad_norm": 0.6093741655349731, "learning_rate": 9.105850723899065e-06, "loss": 1.002, "step": 37670 }, { "epoch": 0.5428065171355719, "grad_norm": 0.570600152015686, "learning_rate": 9.101203824495655e-06, "loss": 1.0284, "step": 37680 }, { "epoch": 0.5429505740668712, "grad_norm": 0.5646870732307434, "learning_rate": 9.096557120747775e-06, "loss": 1.0093, "step": 37690 }, { "epoch": 0.5430946309981705, "grad_norm": 0.6539302468299866, "learning_rate": 9.091910613666945e-06, "loss": 1.0299, "step": 37700 }, { "epoch": 0.5432386879294697, "grad_norm": 0.547149658203125, "learning_rate": 9.087264304264648e-06, "loss": 1.0283, "step": 37710 }, { "epoch": 0.543382744860769, "grad_norm": 0.5525507926940918, "learning_rate": 9.082618193552323e-06, "loss": 1.0208, "step": 37720 }, { "epoch": 0.5435268017920682, "grad_norm": 0.5298338532447815, "learning_rate": 9.077972282541358e-06, "loss": 1.0146, "step": 37730 }, { "epoch": 0.5436708587233675, "grad_norm": 0.5054857730865479, "learning_rate": 9.07332657224311e-06, "loss": 1.0214, "step": 37740 }, { "epoch": 0.5438149156546668, "grad_norm": 0.6309981942176819, "learning_rate": 9.068681063668886e-06, "loss": 1.0446, "step": 37750 }, { "epoch": 0.5439589725859659, "grad_norm": 0.6483060121536255, "learning_rate": 9.064035757829941e-06, "loss": 1.0095, "step": 37760 }, { "epoch": 0.5441030295172652, "grad_norm": 0.577060341835022, "learning_rate": 9.059390655737508e-06, "loss": 0.9945, "step": 37770 }, { "epoch": 0.5442470864485645, "grad_norm": 0.6026631593704224, "learning_rate": 9.05474575840275e-06, "loss": 1.0073, "step": 37780 }, { "epoch": 0.5443911433798637, "grad_norm": 0.6109444499015808, "learning_rate": 9.050101066836798e-06, "loss": 1.0399, "step": 37790 }, { "epoch": 0.544535200311163, "grad_norm": 0.6298561096191406, "learning_rate": 9.045456582050743e-06, "loss": 1.0078, "step": 37800 }, { "epoch": 0.5446792572424622, "grad_norm": 0.6335732340812683, "learning_rate": 9.040812305055619e-06, "loss": 1.0087, "step": 37810 }, { "epoch": 0.5448233141737615, "grad_norm": 0.5873390436172485, "learning_rate": 9.036168236862426e-06, "loss": 1.0298, "step": 37820 }, { "epoch": 0.5449673711050608, "grad_norm": 0.5091093182563782, "learning_rate": 9.03152437848211e-06, "loss": 1.0363, "step": 37830 }, { "epoch": 0.5451114280363599, "grad_norm": 0.6849727034568787, "learning_rate": 9.02688073092558e-06, "loss": 1.0126, "step": 37840 }, { "epoch": 0.5452554849676592, "grad_norm": 0.7993967533111572, "learning_rate": 9.022237295203689e-06, "loss": 1.0182, "step": 37850 }, { "epoch": 0.5453995418989585, "grad_norm": 0.6131024956703186, "learning_rate": 9.01759407232725e-06, "loss": 1.0363, "step": 37860 }, { "epoch": 0.5455435988302577, "grad_norm": 0.5998750925064087, "learning_rate": 9.01295106330703e-06, "loss": 1.0188, "step": 37870 }, { "epoch": 0.545687655761557, "grad_norm": 0.7071616649627686, "learning_rate": 9.008308269153747e-06, "loss": 1.021, "step": 37880 }, { "epoch": 0.5458317126928562, "grad_norm": 0.5982471108436584, "learning_rate": 9.003665690878076e-06, "loss": 1.0386, "step": 37890 }, { "epoch": 0.5459757696241555, "grad_norm": 0.6303359866142273, "learning_rate": 8.999023329490634e-06, "loss": 1.0283, "step": 37900 }, { "epoch": 0.5461198265554548, "grad_norm": 0.5582040548324585, "learning_rate": 8.99438118600201e-06, "loss": 1.0347, "step": 37910 }, { "epoch": 0.5462638834867539, "grad_norm": 0.6476597189903259, "learning_rate": 8.989739261422722e-06, "loss": 0.9984, "step": 37920 }, { "epoch": 0.5464079404180532, "grad_norm": 0.6058054566383362, "learning_rate": 8.985097556763269e-06, "loss": 1.0102, "step": 37930 }, { "epoch": 0.5465519973493524, "grad_norm": 0.5832399129867554, "learning_rate": 8.980456073034074e-06, "loss": 1.0269, "step": 37940 }, { "epoch": 0.5466960542806517, "grad_norm": 0.6475029587745667, "learning_rate": 8.975814811245525e-06, "loss": 1.0046, "step": 37950 }, { "epoch": 0.546840111211951, "grad_norm": 0.5944557785987854, "learning_rate": 8.971173772407967e-06, "loss": 1.0239, "step": 37960 }, { "epoch": 0.5469841681432502, "grad_norm": 0.637930691242218, "learning_rate": 8.966532957531684e-06, "loss": 1.0331, "step": 37970 }, { "epoch": 0.5471282250745495, "grad_norm": 0.6158673167228699, "learning_rate": 8.961892367626918e-06, "loss": 1.0276, "step": 37980 }, { "epoch": 0.5472722820058488, "grad_norm": 0.6054377555847168, "learning_rate": 8.957252003703865e-06, "loss": 1.0443, "step": 37990 }, { "epoch": 0.5474163389371479, "grad_norm": 0.5525330305099487, "learning_rate": 8.952611866772665e-06, "loss": 1.0318, "step": 38000 }, { "epoch": 0.5475603958684472, "grad_norm": 0.5811078548431396, "learning_rate": 8.947971957843412e-06, "loss": 1.0214, "step": 38010 }, { "epoch": 0.5477044527997464, "grad_norm": 0.597833514213562, "learning_rate": 8.943332277926154e-06, "loss": 1.0117, "step": 38020 }, { "epoch": 0.5478485097310457, "grad_norm": 0.5681318044662476, "learning_rate": 8.938692828030877e-06, "loss": 0.993, "step": 38030 }, { "epoch": 0.547992566662345, "grad_norm": 0.555644690990448, "learning_rate": 8.934053609167535e-06, "loss": 1.0162, "step": 38040 }, { "epoch": 0.5481366235936442, "grad_norm": 0.6647034883499146, "learning_rate": 8.92941462234602e-06, "loss": 1.0087, "step": 38050 }, { "epoch": 0.5482806805249435, "grad_norm": 0.6532979607582092, "learning_rate": 8.924775868576166e-06, "loss": 1.0149, "step": 38060 }, { "epoch": 0.5484247374562428, "grad_norm": 0.5512877702713013, "learning_rate": 8.92013734886778e-06, "loss": 1.0309, "step": 38070 }, { "epoch": 0.5485687943875419, "grad_norm": 0.6826861500740051, "learning_rate": 8.915499064230593e-06, "loss": 1.0214, "step": 38080 }, { "epoch": 0.5487128513188412, "grad_norm": 0.5934279561042786, "learning_rate": 8.910861015674297e-06, "loss": 1.015, "step": 38090 }, { "epoch": 0.5488569082501404, "grad_norm": 0.7294298410415649, "learning_rate": 8.906223204208535e-06, "loss": 0.9978, "step": 38100 }, { "epoch": 0.5490009651814397, "grad_norm": 0.6452908515930176, "learning_rate": 8.901585630842894e-06, "loss": 1.0172, "step": 38110 }, { "epoch": 0.549145022112739, "grad_norm": 0.7616612911224365, "learning_rate": 8.896948296586905e-06, "loss": 1.0127, "step": 38120 }, { "epoch": 0.5492890790440382, "grad_norm": 0.5813748240470886, "learning_rate": 8.89231120245006e-06, "loss": 1.0176, "step": 38130 }, { "epoch": 0.5494331359753375, "grad_norm": 0.6211594343185425, "learning_rate": 8.88767434944178e-06, "loss": 1.0237, "step": 38140 }, { "epoch": 0.5495771929066368, "grad_norm": 0.6435821056365967, "learning_rate": 8.883037738571453e-06, "loss": 1.0501, "step": 38150 }, { "epoch": 0.5497212498379359, "grad_norm": 0.5843872427940369, "learning_rate": 8.878401370848406e-06, "loss": 1.0338, "step": 38160 }, { "epoch": 0.5498653067692352, "grad_norm": 0.5781148076057434, "learning_rate": 8.8737652472819e-06, "loss": 1.0203, "step": 38170 }, { "epoch": 0.5500093637005344, "grad_norm": 0.588342547416687, "learning_rate": 8.86912936888117e-06, "loss": 1.0159, "step": 38180 }, { "epoch": 0.5501534206318337, "grad_norm": 0.5943087339401245, "learning_rate": 8.864493736655373e-06, "loss": 1.0093, "step": 38190 }, { "epoch": 0.550297477563133, "grad_norm": 0.6095810532569885, "learning_rate": 8.859858351613624e-06, "loss": 1.0255, "step": 38200 }, { "epoch": 0.5504415344944322, "grad_norm": 0.6482893824577332, "learning_rate": 8.855223214764986e-06, "loss": 1.021, "step": 38210 }, { "epoch": 0.5505855914257315, "grad_norm": 0.5073266625404358, "learning_rate": 8.850588327118461e-06, "loss": 1.0388, "step": 38220 }, { "epoch": 0.5507296483570306, "grad_norm": 0.5247241258621216, "learning_rate": 8.845953689682999e-06, "loss": 1.001, "step": 38230 }, { "epoch": 0.5508737052883299, "grad_norm": 0.6343132853507996, "learning_rate": 8.841319303467502e-06, "loss": 1.0319, "step": 38240 }, { "epoch": 0.5510177622196292, "grad_norm": 0.5247365832328796, "learning_rate": 8.836685169480805e-06, "loss": 1.0214, "step": 38250 }, { "epoch": 0.5511618191509284, "grad_norm": 0.6870401501655579, "learning_rate": 8.832051288731701e-06, "loss": 1.0393, "step": 38260 }, { "epoch": 0.5513058760822277, "grad_norm": 0.6118807792663574, "learning_rate": 8.82741766222892e-06, "loss": 1.0278, "step": 38270 }, { "epoch": 0.551449933013527, "grad_norm": 0.5001921057701111, "learning_rate": 8.822784290981136e-06, "loss": 1.0279, "step": 38280 }, { "epoch": 0.5515939899448262, "grad_norm": 0.5460012555122375, "learning_rate": 8.818151175996974e-06, "loss": 0.9962, "step": 38290 }, { "epoch": 0.5517380468761255, "grad_norm": 0.5944710373878479, "learning_rate": 8.813518318285e-06, "loss": 1.0026, "step": 38300 }, { "epoch": 0.5518821038074246, "grad_norm": 0.6419718861579895, "learning_rate": 8.808885718853713e-06, "loss": 1.0186, "step": 38310 }, { "epoch": 0.5520261607387239, "grad_norm": 0.5534757375717163, "learning_rate": 8.804253378711583e-06, "loss": 1.037, "step": 38320 }, { "epoch": 0.5521702176700232, "grad_norm": 0.5966949462890625, "learning_rate": 8.799621298866992e-06, "loss": 1.0134, "step": 38330 }, { "epoch": 0.5523142746013224, "grad_norm": 0.5333777666091919, "learning_rate": 8.794989480328284e-06, "loss": 1.0146, "step": 38340 }, { "epoch": 0.5524583315326217, "grad_norm": 0.6215424537658691, "learning_rate": 8.790357924103745e-06, "loss": 1.0216, "step": 38350 }, { "epoch": 0.552602388463921, "grad_norm": 0.5424696207046509, "learning_rate": 8.785726631201596e-06, "loss": 1.0054, "step": 38360 }, { "epoch": 0.5527464453952202, "grad_norm": 0.6233113408088684, "learning_rate": 8.781095602630013e-06, "loss": 1.0158, "step": 38370 }, { "epoch": 0.5528905023265195, "grad_norm": 0.6187631487846375, "learning_rate": 8.776464839397099e-06, "loss": 1.0449, "step": 38380 }, { "epoch": 0.5530345592578186, "grad_norm": 0.7542943954467773, "learning_rate": 8.77183434251091e-06, "loss": 1.0275, "step": 38390 }, { "epoch": 0.5531786161891179, "grad_norm": 0.6114190816879272, "learning_rate": 8.767204112979446e-06, "loss": 1.0139, "step": 38400 }, { "epoch": 0.5533226731204172, "grad_norm": 0.6118826270103455, "learning_rate": 8.762574151810639e-06, "loss": 1.0123, "step": 38410 }, { "epoch": 0.5534667300517164, "grad_norm": 0.6880956292152405, "learning_rate": 8.757944460012367e-06, "loss": 1.0474, "step": 38420 }, { "epoch": 0.5536107869830157, "grad_norm": 0.5714311003684998, "learning_rate": 8.753315038592453e-06, "loss": 1.0332, "step": 38430 }, { "epoch": 0.553754843914315, "grad_norm": 0.5913240909576416, "learning_rate": 8.74868588855866e-06, "loss": 1.0102, "step": 38440 }, { "epoch": 0.5538989008456142, "grad_norm": 0.6488410830497742, "learning_rate": 8.74405701091868e-06, "loss": 1.0199, "step": 38450 }, { "epoch": 0.5540429577769135, "grad_norm": 0.7068957686424255, "learning_rate": 8.73942840668017e-06, "loss": 1.0234, "step": 38460 }, { "epoch": 0.5541870147082126, "grad_norm": 0.5866946578025818, "learning_rate": 8.7348000768507e-06, "loss": 1.0068, "step": 38470 }, { "epoch": 0.5543310716395119, "grad_norm": 0.6052505373954773, "learning_rate": 8.730172022437806e-06, "loss": 1.0189, "step": 38480 }, { "epoch": 0.5544751285708112, "grad_norm": 0.6822125315666199, "learning_rate": 8.725544244448943e-06, "loss": 0.9985, "step": 38490 }, { "epoch": 0.5546191855021104, "grad_norm": 0.5761526226997375, "learning_rate": 8.720916743891516e-06, "loss": 1.0262, "step": 38500 }, { "epoch": 0.5547632424334097, "grad_norm": 0.5955058336257935, "learning_rate": 8.71628952177287e-06, "loss": 1.0238, "step": 38510 }, { "epoch": 0.554907299364709, "grad_norm": 0.547618567943573, "learning_rate": 8.711662579100286e-06, "loss": 1.0246, "step": 38520 }, { "epoch": 0.5550513562960082, "grad_norm": 0.6268759965896606, "learning_rate": 8.707035916880985e-06, "loss": 1.0133, "step": 38530 }, { "epoch": 0.5551954132273075, "grad_norm": 0.5458856821060181, "learning_rate": 8.70240953612213e-06, "loss": 1.0192, "step": 38540 }, { "epoch": 0.5553394701586066, "grad_norm": 0.6251266598701477, "learning_rate": 8.697783437830817e-06, "loss": 1.0445, "step": 38550 }, { "epoch": 0.5554835270899059, "grad_norm": 0.5710606575012207, "learning_rate": 8.693157623014085e-06, "loss": 1.0103, "step": 38560 }, { "epoch": 0.5556275840212052, "grad_norm": 0.6082000732421875, "learning_rate": 8.688532092678914e-06, "loss": 1.0202, "step": 38570 }, { "epoch": 0.5557716409525044, "grad_norm": 0.5642730593681335, "learning_rate": 8.68390684783221e-06, "loss": 1.0211, "step": 38580 }, { "epoch": 0.5559156978838037, "grad_norm": 0.5714201927185059, "learning_rate": 8.679281889480833e-06, "loss": 1.0094, "step": 38590 }, { "epoch": 0.556059754815103, "grad_norm": 0.5635415315628052, "learning_rate": 8.674657218631573e-06, "loss": 1.0095, "step": 38600 }, { "epoch": 0.5562038117464022, "grad_norm": 0.6005164384841919, "learning_rate": 8.670032836291144e-06, "loss": 1.0325, "step": 38610 }, { "epoch": 0.5563478686777015, "grad_norm": 0.6503455638885498, "learning_rate": 8.665408743466227e-06, "loss": 1.0041, "step": 38620 }, { "epoch": 0.5564919256090006, "grad_norm": 0.6297188401222229, "learning_rate": 8.660784941163415e-06, "loss": 1.0167, "step": 38630 }, { "epoch": 0.5566359825402999, "grad_norm": 0.5665830373764038, "learning_rate": 8.656161430389243e-06, "loss": 1.0255, "step": 38640 }, { "epoch": 0.5567800394715992, "grad_norm": 0.625270664691925, "learning_rate": 8.651538212150194e-06, "loss": 1.0129, "step": 38650 }, { "epoch": 0.5569240964028984, "grad_norm": 0.5851070284843445, "learning_rate": 8.646915287452672e-06, "loss": 1.0318, "step": 38660 }, { "epoch": 0.5570681533341977, "grad_norm": 0.6134948134422302, "learning_rate": 8.642292657303024e-06, "loss": 1.016, "step": 38670 }, { "epoch": 0.5572122102654969, "grad_norm": 0.6355244517326355, "learning_rate": 8.637670322707537e-06, "loss": 1.0134, "step": 38680 }, { "epoch": 0.5573562671967962, "grad_norm": 0.6000000834465027, "learning_rate": 8.633048284672427e-06, "loss": 1.0254, "step": 38690 }, { "epoch": 0.5575003241280955, "grad_norm": 0.6704204082489014, "learning_rate": 8.62842654420385e-06, "loss": 1.0106, "step": 38700 }, { "epoch": 0.5576443810593946, "grad_norm": 0.6079708337783813, "learning_rate": 8.623805102307894e-06, "loss": 1.0194, "step": 38710 }, { "epoch": 0.5577884379906939, "grad_norm": 0.6132945418357849, "learning_rate": 8.619183959990577e-06, "loss": 1.0171, "step": 38720 }, { "epoch": 0.5579324949219931, "grad_norm": 0.5544208288192749, "learning_rate": 8.61456311825787e-06, "loss": 1.0318, "step": 38730 }, { "epoch": 0.5580765518532924, "grad_norm": 0.6078389883041382, "learning_rate": 8.609942578115657e-06, "loss": 1.0017, "step": 38740 }, { "epoch": 0.5582206087845917, "grad_norm": 0.6498771905899048, "learning_rate": 8.605322340569768e-06, "loss": 0.9872, "step": 38750 }, { "epoch": 0.5583646657158909, "grad_norm": 0.6382272243499756, "learning_rate": 8.600702406625968e-06, "loss": 1.0337, "step": 38760 }, { "epoch": 0.5585087226471902, "grad_norm": 0.5370135307312012, "learning_rate": 8.596082777289951e-06, "loss": 1.017, "step": 38770 }, { "epoch": 0.5586527795784895, "grad_norm": 0.6954641342163086, "learning_rate": 8.591463453567344e-06, "loss": 1.0291, "step": 38780 }, { "epoch": 0.5587968365097886, "grad_norm": 0.5990657806396484, "learning_rate": 8.586844436463714e-06, "loss": 1.0099, "step": 38790 }, { "epoch": 0.5589408934410879, "grad_norm": 0.6652782559394836, "learning_rate": 8.582225726984554e-06, "loss": 1.0046, "step": 38800 }, { "epoch": 0.5590849503723871, "grad_norm": 0.536297619342804, "learning_rate": 8.577607326135296e-06, "loss": 1.0215, "step": 38810 }, { "epoch": 0.5592290073036864, "grad_norm": 0.6417962312698364, "learning_rate": 8.572989234921302e-06, "loss": 1.0219, "step": 38820 }, { "epoch": 0.5593730642349857, "grad_norm": 0.5660361647605896, "learning_rate": 8.568371454347865e-06, "loss": 1.0373, "step": 38830 }, { "epoch": 0.5595171211662849, "grad_norm": 0.6714792847633362, "learning_rate": 8.563753985420213e-06, "loss": 1.0255, "step": 38840 }, { "epoch": 0.5596611780975842, "grad_norm": 0.5720904469490051, "learning_rate": 8.559136829143509e-06, "loss": 1.0114, "step": 38850 }, { "epoch": 0.5598052350288835, "grad_norm": 0.5660542845726013, "learning_rate": 8.554519986522834e-06, "loss": 1.0315, "step": 38860 }, { "epoch": 0.5599492919601826, "grad_norm": 0.5986666679382324, "learning_rate": 8.549903458563223e-06, "loss": 1.0192, "step": 38870 }, { "epoch": 0.5600933488914819, "grad_norm": 0.5407901406288147, "learning_rate": 8.545287246269624e-06, "loss": 1.0218, "step": 38880 }, { "epoch": 0.5602374058227811, "grad_norm": 0.6441916227340698, "learning_rate": 8.540671350646922e-06, "loss": 1.0291, "step": 38890 }, { "epoch": 0.5603814627540804, "grad_norm": 0.5515192747116089, "learning_rate": 8.536055772699936e-06, "loss": 1.0116, "step": 38900 }, { "epoch": 0.5605255196853797, "grad_norm": 0.6288701295852661, "learning_rate": 8.531440513433412e-06, "loss": 1.0225, "step": 38910 }, { "epoch": 0.5606695766166789, "grad_norm": 0.6084778904914856, "learning_rate": 8.526825573852031e-06, "loss": 1.0242, "step": 38920 }, { "epoch": 0.5608136335479782, "grad_norm": 0.5939294099807739, "learning_rate": 8.522210954960402e-06, "loss": 1.0129, "step": 38930 }, { "epoch": 0.5609576904792775, "grad_norm": 0.5160016417503357, "learning_rate": 8.517596657763058e-06, "loss": 1.0129, "step": 38940 }, { "epoch": 0.5611017474105766, "grad_norm": 0.5904541015625, "learning_rate": 8.512982683264474e-06, "loss": 1.047, "step": 38950 }, { "epoch": 0.5612458043418759, "grad_norm": 0.7362788915634155, "learning_rate": 8.508369032469046e-06, "loss": 1.0259, "step": 38960 }, { "epoch": 0.5613898612731751, "grad_norm": 0.5475773811340332, "learning_rate": 8.503755706381102e-06, "loss": 1.0202, "step": 38970 }, { "epoch": 0.5615339182044744, "grad_norm": 0.7003915309906006, "learning_rate": 8.499142706004902e-06, "loss": 1.0291, "step": 38980 }, { "epoch": 0.5616779751357737, "grad_norm": 0.6023433804512024, "learning_rate": 8.494530032344631e-06, "loss": 1.0329, "step": 38990 }, { "epoch": 0.5618220320670729, "grad_norm": 0.6179013848304749, "learning_rate": 8.489917686404399e-06, "loss": 1.0038, "step": 39000 }, { "epoch": 0.5619660889983722, "grad_norm": 0.5738324522972107, "learning_rate": 8.48530566918826e-06, "loss": 1.0077, "step": 39010 }, { "epoch": 0.5621101459296715, "grad_norm": 0.6717596650123596, "learning_rate": 8.480693981700177e-06, "loss": 1.0261, "step": 39020 }, { "epoch": 0.5622542028609706, "grad_norm": 0.5486605763435364, "learning_rate": 8.47608262494406e-06, "loss": 1.0282, "step": 39030 }, { "epoch": 0.5623982597922699, "grad_norm": 0.5822954177856445, "learning_rate": 8.471471599923734e-06, "loss": 1.0205, "step": 39040 }, { "epoch": 0.5625423167235691, "grad_norm": 0.5565518736839294, "learning_rate": 8.46686090764295e-06, "loss": 1.0209, "step": 39050 }, { "epoch": 0.5626863736548684, "grad_norm": 0.6196978688240051, "learning_rate": 8.462250549105398e-06, "loss": 0.9893, "step": 39060 }, { "epoch": 0.5628304305861677, "grad_norm": 0.5636383295059204, "learning_rate": 8.45764052531469e-06, "loss": 1.0217, "step": 39070 }, { "epoch": 0.5629744875174669, "grad_norm": 0.6413935422897339, "learning_rate": 8.453030837274361e-06, "loss": 1.0175, "step": 39080 }, { "epoch": 0.5631185444487662, "grad_norm": 0.6890947818756104, "learning_rate": 8.448421485987879e-06, "loss": 1.0189, "step": 39090 }, { "epoch": 0.5632626013800655, "grad_norm": 0.6520262360572815, "learning_rate": 8.443812472458639e-06, "loss": 1.0184, "step": 39100 }, { "epoch": 0.5634066583113646, "grad_norm": 0.5905381441116333, "learning_rate": 8.43920379768995e-06, "loss": 1.0186, "step": 39110 }, { "epoch": 0.5635507152426639, "grad_norm": 0.5987008810043335, "learning_rate": 8.434595462685066e-06, "loss": 1.0303, "step": 39120 }, { "epoch": 0.5636947721739631, "grad_norm": 0.6232080459594727, "learning_rate": 8.429987468447152e-06, "loss": 0.9983, "step": 39130 }, { "epoch": 0.5638388291052624, "grad_norm": 0.5960439443588257, "learning_rate": 8.425379815979312e-06, "loss": 1.0218, "step": 39140 }, { "epoch": 0.5639828860365617, "grad_norm": 0.6017009019851685, "learning_rate": 8.420772506284565e-06, "loss": 1.0276, "step": 39150 }, { "epoch": 0.5641269429678609, "grad_norm": 0.6649116277694702, "learning_rate": 8.416165540365851e-06, "loss": 1.0369, "step": 39160 }, { "epoch": 0.5642709998991602, "grad_norm": 0.531055748462677, "learning_rate": 8.411558919226058e-06, "loss": 1.0049, "step": 39170 }, { "epoch": 0.5644150568304594, "grad_norm": 0.5994618535041809, "learning_rate": 8.406952643867971e-06, "loss": 1.0194, "step": 39180 }, { "epoch": 0.5645591137617586, "grad_norm": 0.5408610701560974, "learning_rate": 8.402346715294318e-06, "loss": 1.0259, "step": 39190 }, { "epoch": 0.5647031706930579, "grad_norm": 0.5614040493965149, "learning_rate": 8.397741134507746e-06, "loss": 1.0216, "step": 39200 }, { "epoch": 0.5648472276243571, "grad_norm": 0.6325517892837524, "learning_rate": 8.393135902510827e-06, "loss": 1.0164, "step": 39210 }, { "epoch": 0.5649912845556564, "grad_norm": 0.5664529204368591, "learning_rate": 8.388531020306052e-06, "loss": 1.0293, "step": 39220 }, { "epoch": 0.5651353414869557, "grad_norm": 0.5711817741394043, "learning_rate": 8.383926488895847e-06, "loss": 1.0219, "step": 39230 }, { "epoch": 0.5652793984182549, "grad_norm": 0.6091551780700684, "learning_rate": 8.379322309282549e-06, "loss": 1.0097, "step": 39240 }, { "epoch": 0.5654234553495542, "grad_norm": 0.6787410974502563, "learning_rate": 8.374718482468429e-06, "loss": 1.0396, "step": 39250 }, { "epoch": 0.5655675122808534, "grad_norm": 0.5833188891410828, "learning_rate": 8.370115009455675e-06, "loss": 1.0235, "step": 39260 }, { "epoch": 0.5657115692121526, "grad_norm": 0.6172269582748413, "learning_rate": 8.365511891246397e-06, "loss": 1.0174, "step": 39270 }, { "epoch": 0.5658556261434519, "grad_norm": 0.6320672035217285, "learning_rate": 8.360909128842635e-06, "loss": 1.0466, "step": 39280 }, { "epoch": 0.5659996830747511, "grad_norm": 0.6311730742454529, "learning_rate": 8.356306723246344e-06, "loss": 1.0224, "step": 39290 }, { "epoch": 0.5661437400060504, "grad_norm": 0.6683375239372253, "learning_rate": 8.351704675459403e-06, "loss": 1.0202, "step": 39300 }, { "epoch": 0.5662877969373497, "grad_norm": 0.5916944742202759, "learning_rate": 8.347102986483616e-06, "loss": 1.0169, "step": 39310 }, { "epoch": 0.5664318538686489, "grad_norm": 0.5895016193389893, "learning_rate": 8.34250165732071e-06, "loss": 1.0346, "step": 39320 }, { "epoch": 0.5665759107999482, "grad_norm": 0.6572079062461853, "learning_rate": 8.337900688972322e-06, "loss": 1.0159, "step": 39330 }, { "epoch": 0.5667199677312474, "grad_norm": 0.5720778107643127, "learning_rate": 8.33330008244003e-06, "loss": 1.0176, "step": 39340 }, { "epoch": 0.5668640246625466, "grad_norm": 0.6016258597373962, "learning_rate": 8.328699838725315e-06, "loss": 1.014, "step": 39350 }, { "epoch": 0.5670080815938459, "grad_norm": 0.7664455771446228, "learning_rate": 8.324099958829593e-06, "loss": 1.0235, "step": 39360 }, { "epoch": 0.5671521385251451, "grad_norm": 0.5646085143089294, "learning_rate": 8.319500443754189e-06, "loss": 1.0343, "step": 39370 }, { "epoch": 0.5672961954564444, "grad_norm": 0.5941106081008911, "learning_rate": 8.314901294500356e-06, "loss": 1.0136, "step": 39380 }, { "epoch": 0.5674402523877436, "grad_norm": 0.6006180047988892, "learning_rate": 8.310302512069265e-06, "loss": 1.0105, "step": 39390 }, { "epoch": 0.5675843093190429, "grad_norm": 0.5841224789619446, "learning_rate": 8.305704097462011e-06, "loss": 1.0177, "step": 39400 }, { "epoch": 0.5677283662503422, "grad_norm": 0.8114508390426636, "learning_rate": 8.301106051679595e-06, "loss": 1.0129, "step": 39410 }, { "epoch": 0.5678724231816414, "grad_norm": 0.5688908696174622, "learning_rate": 8.296508375722963e-06, "loss": 1.0179, "step": 39420 }, { "epoch": 0.5680164801129406, "grad_norm": 0.5788629055023193, "learning_rate": 8.291911070592955e-06, "loss": 1.0178, "step": 39430 }, { "epoch": 0.5681605370442399, "grad_norm": 0.5609277486801147, "learning_rate": 8.28731413729034e-06, "loss": 1.0384, "step": 39440 }, { "epoch": 0.5683045939755391, "grad_norm": 0.69785475730896, "learning_rate": 8.282717576815816e-06, "loss": 1.0213, "step": 39450 }, { "epoch": 0.5684486509068384, "grad_norm": 0.6239933371543884, "learning_rate": 8.278121390169981e-06, "loss": 1.011, "step": 39460 }, { "epoch": 0.5685927078381376, "grad_norm": 0.5641295313835144, "learning_rate": 8.273525578353371e-06, "loss": 1.0264, "step": 39470 }, { "epoch": 0.5687367647694369, "grad_norm": 0.615608811378479, "learning_rate": 8.268930142366425e-06, "loss": 1.0126, "step": 39480 }, { "epoch": 0.5688808217007362, "grad_norm": 0.5850027799606323, "learning_rate": 8.264335083209502e-06, "loss": 1.0081, "step": 39490 }, { "epoch": 0.5690248786320354, "grad_norm": 0.5991293787956238, "learning_rate": 8.259740401882892e-06, "loss": 1.0207, "step": 39500 }, { "epoch": 0.5691689355633346, "grad_norm": 0.5950531959533691, "learning_rate": 8.255146099386791e-06, "loss": 0.9916, "step": 39510 }, { "epoch": 0.5693129924946339, "grad_norm": 0.6409483551979065, "learning_rate": 8.25055217672131e-06, "loss": 1.024, "step": 39520 }, { "epoch": 0.5694570494259331, "grad_norm": 0.602638304233551, "learning_rate": 8.24595863488649e-06, "loss": 1.0202, "step": 39530 }, { "epoch": 0.5696011063572324, "grad_norm": 0.636147677898407, "learning_rate": 8.241365474882279e-06, "loss": 0.9985, "step": 39540 }, { "epoch": 0.5697451632885316, "grad_norm": 0.5812684893608093, "learning_rate": 8.236772697708537e-06, "loss": 1.0187, "step": 39550 }, { "epoch": 0.5698892202198309, "grad_norm": 0.7413349151611328, "learning_rate": 8.232180304365063e-06, "loss": 1.0333, "step": 39560 }, { "epoch": 0.5700332771511302, "grad_norm": 0.5983883142471313, "learning_rate": 8.227588295851542e-06, "loss": 1.0297, "step": 39570 }, { "epoch": 0.5701773340824293, "grad_norm": 0.6596150994300842, "learning_rate": 8.222996673167605e-06, "loss": 1.0081, "step": 39580 }, { "epoch": 0.5703213910137286, "grad_norm": 0.6228833794593811, "learning_rate": 8.218405437312776e-06, "loss": 1.0255, "step": 39590 }, { "epoch": 0.5704654479450278, "grad_norm": 0.5467576384544373, "learning_rate": 8.213814589286506e-06, "loss": 1.0071, "step": 39600 }, { "epoch": 0.5706095048763271, "grad_norm": 0.6596918106079102, "learning_rate": 8.209224130088159e-06, "loss": 1.0227, "step": 39610 }, { "epoch": 0.5707535618076264, "grad_norm": 0.5788407921791077, "learning_rate": 8.204634060717016e-06, "loss": 1.0154, "step": 39620 }, { "epoch": 0.5708976187389256, "grad_norm": 0.6458188891410828, "learning_rate": 8.20004438217227e-06, "loss": 1.0196, "step": 39630 }, { "epoch": 0.5710416756702249, "grad_norm": 0.5770555138587952, "learning_rate": 8.195455095453034e-06, "loss": 1.0107, "step": 39640 }, { "epoch": 0.5711857326015242, "grad_norm": 0.5458569526672363, "learning_rate": 8.190866201558329e-06, "loss": 1.0212, "step": 39650 }, { "epoch": 0.5713297895328233, "grad_norm": 0.5996442437171936, "learning_rate": 8.186277701487092e-06, "loss": 1.0139, "step": 39660 }, { "epoch": 0.5714738464641226, "grad_norm": 0.6025555729866028, "learning_rate": 8.181689596238182e-06, "loss": 1.0094, "step": 39670 }, { "epoch": 0.5716179033954218, "grad_norm": 0.6486698389053345, "learning_rate": 8.177101886810361e-06, "loss": 1.0082, "step": 39680 }, { "epoch": 0.5717619603267211, "grad_norm": 0.5293046832084656, "learning_rate": 8.172514574202315e-06, "loss": 1.0315, "step": 39690 }, { "epoch": 0.5719060172580204, "grad_norm": 0.5688088536262512, "learning_rate": 8.167927659412636e-06, "loss": 1.0206, "step": 39700 }, { "epoch": 0.5720500741893196, "grad_norm": 0.6041375398635864, "learning_rate": 8.16334114343983e-06, "loss": 1.0173, "step": 39710 }, { "epoch": 0.5721941311206189, "grad_norm": 0.6499061584472656, "learning_rate": 8.15875502728232e-06, "loss": 1.0233, "step": 39720 }, { "epoch": 0.5723381880519182, "grad_norm": 0.6422781944274902, "learning_rate": 8.154169311938444e-06, "loss": 1.0212, "step": 39730 }, { "epoch": 0.5724822449832173, "grad_norm": 0.5883256196975708, "learning_rate": 8.149583998406442e-06, "loss": 1.0222, "step": 39740 }, { "epoch": 0.5726263019145166, "grad_norm": 0.6779961585998535, "learning_rate": 8.14499908768448e-06, "loss": 1.0258, "step": 39750 }, { "epoch": 0.5727703588458158, "grad_norm": 0.6148270964622498, "learning_rate": 8.140414580770626e-06, "loss": 1.0124, "step": 39760 }, { "epoch": 0.5729144157771151, "grad_norm": 0.6151580810546875, "learning_rate": 8.135830478662862e-06, "loss": 1.0236, "step": 39770 }, { "epoch": 0.5730584727084144, "grad_norm": 0.6776831150054932, "learning_rate": 8.131246782359088e-06, "loss": 0.9998, "step": 39780 }, { "epoch": 0.5732025296397136, "grad_norm": 0.5680509805679321, "learning_rate": 8.12666349285711e-06, "loss": 1.0156, "step": 39790 }, { "epoch": 0.5733465865710129, "grad_norm": 0.59038907289505, "learning_rate": 8.122080611154648e-06, "loss": 1.0111, "step": 39800 }, { "epoch": 0.5734906435023122, "grad_norm": 0.5553265810012817, "learning_rate": 8.117498138249334e-06, "loss": 1.0138, "step": 39810 }, { "epoch": 0.5736347004336113, "grad_norm": 0.5936552286148071, "learning_rate": 8.112916075138699e-06, "loss": 1.0273, "step": 39820 }, { "epoch": 0.5737787573649106, "grad_norm": 0.5887789130210876, "learning_rate": 8.108334422820209e-06, "loss": 1.0294, "step": 39830 }, { "epoch": 0.5739228142962098, "grad_norm": 0.5883054137229919, "learning_rate": 8.103753182291218e-06, "loss": 1.0328, "step": 39840 }, { "epoch": 0.5740668712275091, "grad_norm": 0.6597990393638611, "learning_rate": 8.099172354548997e-06, "loss": 0.9939, "step": 39850 }, { "epoch": 0.5742109281588084, "grad_norm": 0.5739467144012451, "learning_rate": 8.094591940590736e-06, "loss": 1.0016, "step": 39860 }, { "epoch": 0.5743549850901076, "grad_norm": 0.7081357836723328, "learning_rate": 8.090011941413523e-06, "loss": 1.0383, "step": 39870 }, { "epoch": 0.5744990420214069, "grad_norm": 0.5651097297668457, "learning_rate": 8.08543235801436e-06, "loss": 1.0135, "step": 39880 }, { "epoch": 0.5746430989527062, "grad_norm": 0.5493505001068115, "learning_rate": 8.080853191390163e-06, "loss": 1.0137, "step": 39890 }, { "epoch": 0.5747871558840053, "grad_norm": 0.46422478556632996, "learning_rate": 8.07627444253775e-06, "loss": 1.0405, "step": 39900 }, { "epoch": 0.5749312128153046, "grad_norm": 0.6994962692260742, "learning_rate": 8.071696112453856e-06, "loss": 1.0172, "step": 39910 }, { "epoch": 0.5750752697466038, "grad_norm": 0.5448575019836426, "learning_rate": 8.067118202135116e-06, "loss": 1.0244, "step": 39920 }, { "epoch": 0.5752193266779031, "grad_norm": 0.6547740697860718, "learning_rate": 8.062540712578079e-06, "loss": 1.0255, "step": 39930 }, { "epoch": 0.5753633836092024, "grad_norm": 0.5272354483604431, "learning_rate": 8.057963644779203e-06, "loss": 1.0259, "step": 39940 }, { "epoch": 0.5755074405405016, "grad_norm": 0.6416448950767517, "learning_rate": 8.053386999734853e-06, "loss": 1.0147, "step": 39950 }, { "epoch": 0.5756514974718009, "grad_norm": 0.5450991988182068, "learning_rate": 8.048810778441292e-06, "loss": 1.0131, "step": 39960 }, { "epoch": 0.5757955544031002, "grad_norm": 0.6209630966186523, "learning_rate": 8.044234981894716e-06, "loss": 1.0338, "step": 39970 }, { "epoch": 0.5759396113343993, "grad_norm": 0.5697899460792542, "learning_rate": 8.039659611091201e-06, "loss": 0.9978, "step": 39980 }, { "epoch": 0.5760836682656986, "grad_norm": 0.7120861411094666, "learning_rate": 8.035084667026745e-06, "loss": 1.0166, "step": 39990 }, { "epoch": 0.5762277251969978, "grad_norm": 0.6045985221862793, "learning_rate": 8.030510150697252e-06, "loss": 1.0234, "step": 40000 }, { "epoch": 0.5763717821282971, "grad_norm": 0.6500576734542847, "learning_rate": 8.025936063098529e-06, "loss": 1.0437, "step": 40010 }, { "epoch": 0.5765158390595964, "grad_norm": 0.581506073474884, "learning_rate": 8.021362405226295e-06, "loss": 1.0293, "step": 40020 }, { "epoch": 0.5766598959908956, "grad_norm": 0.5469965934753418, "learning_rate": 8.016789178076169e-06, "loss": 1.0253, "step": 40030 }, { "epoch": 0.5768039529221949, "grad_norm": 0.5983647108078003, "learning_rate": 8.012216382643681e-06, "loss": 1.0006, "step": 40040 }, { "epoch": 0.5769480098534941, "grad_norm": 0.6301031708717346, "learning_rate": 8.007644019924264e-06, "loss": 1.0354, "step": 40050 }, { "epoch": 0.5770920667847933, "grad_norm": 0.6649234294891357, "learning_rate": 8.00307209091326e-06, "loss": 1.0477, "step": 40060 }, { "epoch": 0.5772361237160926, "grad_norm": 0.577212929725647, "learning_rate": 7.998500596605913e-06, "loss": 1.0236, "step": 40070 }, { "epoch": 0.5773801806473918, "grad_norm": 0.6051406860351562, "learning_rate": 7.993929537997377e-06, "loss": 1.0237, "step": 40080 }, { "epoch": 0.5775242375786911, "grad_norm": 0.5686997771263123, "learning_rate": 7.989358916082709e-06, "loss": 1.0298, "step": 40090 }, { "epoch": 0.5776682945099904, "grad_norm": 0.6738128066062927, "learning_rate": 7.984788731856862e-06, "loss": 1.0051, "step": 40100 }, { "epoch": 0.5778123514412896, "grad_norm": 0.5925938487052917, "learning_rate": 7.980218986314715e-06, "loss": 1.0238, "step": 40110 }, { "epoch": 0.5779564083725889, "grad_norm": 0.5643150210380554, "learning_rate": 7.975649680451024e-06, "loss": 1.014, "step": 40120 }, { "epoch": 0.5781004653038881, "grad_norm": 0.5942860841751099, "learning_rate": 7.97108081526048e-06, "loss": 1.0026, "step": 40130 }, { "epoch": 0.5782445222351873, "grad_norm": 0.6095684170722961, "learning_rate": 7.96651239173765e-06, "loss": 1.0163, "step": 40140 }, { "epoch": 0.5783885791664866, "grad_norm": 0.621647298336029, "learning_rate": 7.961944410877017e-06, "loss": 1.0221, "step": 40150 }, { "epoch": 0.5785326360977858, "grad_norm": 0.613663911819458, "learning_rate": 7.957376873672976e-06, "loss": 1.0195, "step": 40160 }, { "epoch": 0.5786766930290851, "grad_norm": 0.5918740034103394, "learning_rate": 7.952809781119809e-06, "loss": 0.9951, "step": 40170 }, { "epoch": 0.5788207499603844, "grad_norm": 0.5893607139587402, "learning_rate": 7.94824313421171e-06, "loss": 1.0272, "step": 40180 }, { "epoch": 0.5789648068916836, "grad_norm": 0.7715088725090027, "learning_rate": 7.943676933942779e-06, "loss": 1.0101, "step": 40190 }, { "epoch": 0.5791088638229829, "grad_norm": 0.6940677762031555, "learning_rate": 7.939111181307016e-06, "loss": 1.0139, "step": 40200 }, { "epoch": 0.5792529207542821, "grad_norm": 0.5966167449951172, "learning_rate": 7.93454587729831e-06, "loss": 1.0219, "step": 40210 }, { "epoch": 0.5793969776855813, "grad_norm": 0.6023448705673218, "learning_rate": 7.929981022910483e-06, "loss": 1.0295, "step": 40220 }, { "epoch": 0.5795410346168806, "grad_norm": 0.7437616586685181, "learning_rate": 7.925416619137223e-06, "loss": 1.0075, "step": 40230 }, { "epoch": 0.5796850915481798, "grad_norm": 0.5959208607673645, "learning_rate": 7.920852666972151e-06, "loss": 1.0185, "step": 40240 }, { "epoch": 0.5798291484794791, "grad_norm": 0.5967977046966553, "learning_rate": 7.916289167408773e-06, "loss": 1.0219, "step": 40250 }, { "epoch": 0.5799732054107783, "grad_norm": 0.6647666096687317, "learning_rate": 7.911726121440495e-06, "loss": 1.0124, "step": 40260 }, { "epoch": 0.5801172623420776, "grad_norm": 0.5860805511474609, "learning_rate": 7.907163530060633e-06, "loss": 1.0203, "step": 40270 }, { "epoch": 0.5802613192733769, "grad_norm": 0.5214429497718811, "learning_rate": 7.9026013942624e-06, "loss": 1.0174, "step": 40280 }, { "epoch": 0.5804053762046761, "grad_norm": 0.8571519255638123, "learning_rate": 7.89803971503891e-06, "loss": 1.0173, "step": 40290 }, { "epoch": 0.5805494331359753, "grad_norm": 0.7096226215362549, "learning_rate": 7.893478493383178e-06, "loss": 1.0248, "step": 40300 }, { "epoch": 0.5806934900672746, "grad_norm": 0.5956733226776123, "learning_rate": 7.888917730288119e-06, "loss": 1.0308, "step": 40310 }, { "epoch": 0.5808375469985738, "grad_norm": 0.5794342756271362, "learning_rate": 7.884357426746547e-06, "loss": 1.0284, "step": 40320 }, { "epoch": 0.5809816039298731, "grad_norm": 0.590449333190918, "learning_rate": 7.879797583751177e-06, "loss": 1.017, "step": 40330 }, { "epoch": 0.5811256608611723, "grad_norm": 0.5910434722900391, "learning_rate": 7.875238202294623e-06, "loss": 1.0041, "step": 40340 }, { "epoch": 0.5812697177924716, "grad_norm": 0.8716016411781311, "learning_rate": 7.870679283369406e-06, "loss": 1.0237, "step": 40350 }, { "epoch": 0.5814137747237709, "grad_norm": 0.5578383207321167, "learning_rate": 7.866120827967935e-06, "loss": 1.0137, "step": 40360 }, { "epoch": 0.5815578316550701, "grad_norm": 0.6493157148361206, "learning_rate": 7.861562837082518e-06, "loss": 1.0199, "step": 40370 }, { "epoch": 0.5817018885863693, "grad_norm": 0.5964250564575195, "learning_rate": 7.857005311705377e-06, "loss": 1.0359, "step": 40380 }, { "epoch": 0.5818459455176686, "grad_norm": 0.7357345223426819, "learning_rate": 7.852448252828616e-06, "loss": 1.0286, "step": 40390 }, { "epoch": 0.5819900024489678, "grad_norm": 0.6434196829795837, "learning_rate": 7.84789166144424e-06, "loss": 1.0342, "step": 40400 }, { "epoch": 0.5821340593802671, "grad_norm": 0.642611026763916, "learning_rate": 7.843335538544165e-06, "loss": 1.009, "step": 40410 }, { "epoch": 0.5822781163115663, "grad_norm": 0.6158607602119446, "learning_rate": 7.838779885120193e-06, "loss": 1.0187, "step": 40420 }, { "epoch": 0.5824221732428656, "grad_norm": 0.6186838150024414, "learning_rate": 7.834224702164022e-06, "loss": 1.019, "step": 40430 }, { "epoch": 0.5825662301741649, "grad_norm": 0.6436375975608826, "learning_rate": 7.82966999066726e-06, "loss": 0.9933, "step": 40440 }, { "epoch": 0.5827102871054641, "grad_norm": 0.7230498194694519, "learning_rate": 7.8251157516214e-06, "loss": 1.0291, "step": 40450 }, { "epoch": 0.5828543440367633, "grad_norm": 0.6493755578994751, "learning_rate": 7.820561986017838e-06, "loss": 1.0078, "step": 40460 }, { "epoch": 0.5829984009680625, "grad_norm": 0.5933564305305481, "learning_rate": 7.816008694847867e-06, "loss": 1.0167, "step": 40470 }, { "epoch": 0.5831424578993618, "grad_norm": 0.6012160181999207, "learning_rate": 7.811455879102673e-06, "loss": 1.0276, "step": 40480 }, { "epoch": 0.5832865148306611, "grad_norm": 0.6838956475257874, "learning_rate": 7.806903539773344e-06, "loss": 1.0081, "step": 40490 }, { "epoch": 0.5834305717619603, "grad_norm": 0.5689899921417236, "learning_rate": 7.802351677850865e-06, "loss": 1.0194, "step": 40500 }, { "epoch": 0.5835746286932596, "grad_norm": 0.7117645144462585, "learning_rate": 7.797800294326104e-06, "loss": 0.9945, "step": 40510 }, { "epoch": 0.5837186856245589, "grad_norm": 0.6395501494407654, "learning_rate": 7.793249390189843e-06, "loss": 0.9716, "step": 40520 }, { "epoch": 0.5838627425558581, "grad_norm": 0.5645208358764648, "learning_rate": 7.788698966432746e-06, "loss": 1.0202, "step": 40530 }, { "epoch": 0.5840067994871573, "grad_norm": 0.7860140800476074, "learning_rate": 7.784149024045378e-06, "loss": 1.0158, "step": 40540 }, { "epoch": 0.5841508564184565, "grad_norm": 0.5279279351234436, "learning_rate": 7.7795995640182e-06, "loss": 1.0267, "step": 40550 }, { "epoch": 0.5842949133497558, "grad_norm": 0.5274000763893127, "learning_rate": 7.775050587341566e-06, "loss": 1.0154, "step": 40560 }, { "epoch": 0.5844389702810551, "grad_norm": 0.5330712795257568, "learning_rate": 7.770502095005728e-06, "loss": 1.0367, "step": 40570 }, { "epoch": 0.5845830272123543, "grad_norm": 0.6997164487838745, "learning_rate": 7.765954088000829e-06, "loss": 1.0196, "step": 40580 }, { "epoch": 0.5847270841436536, "grad_norm": 0.6270565986633301, "learning_rate": 7.761406567316901e-06, "loss": 1.0052, "step": 40590 }, { "epoch": 0.5848711410749529, "grad_norm": 0.5885422229766846, "learning_rate": 7.756859533943886e-06, "loss": 1.0377, "step": 40600 }, { "epoch": 0.5850151980062521, "grad_norm": 0.5697892308235168, "learning_rate": 7.75231298887161e-06, "loss": 0.9945, "step": 40610 }, { "epoch": 0.5851592549375513, "grad_norm": 0.6186272501945496, "learning_rate": 7.74776693308978e-06, "loss": 1.0199, "step": 40620 }, { "epoch": 0.5853033118688505, "grad_norm": 0.6105360388755798, "learning_rate": 7.743221367588026e-06, "loss": 1.0317, "step": 40630 }, { "epoch": 0.5854473688001498, "grad_norm": 0.6047841906547546, "learning_rate": 7.738676293355847e-06, "loss": 1.0216, "step": 40640 }, { "epoch": 0.5855914257314491, "grad_norm": 0.6993504166603088, "learning_rate": 7.734131711382643e-06, "loss": 1.0026, "step": 40650 }, { "epoch": 0.5857354826627483, "grad_norm": 0.5617213845252991, "learning_rate": 7.729587622657707e-06, "loss": 0.9951, "step": 40660 }, { "epoch": 0.5858795395940476, "grad_norm": 0.6542547345161438, "learning_rate": 7.725044028170225e-06, "loss": 1.0054, "step": 40670 }, { "epoch": 0.5860235965253469, "grad_norm": 0.5995214581489563, "learning_rate": 7.720500928909276e-06, "loss": 1.033, "step": 40680 }, { "epoch": 0.5861676534566461, "grad_norm": 0.5970878601074219, "learning_rate": 7.715958325863831e-06, "loss": 0.9994, "step": 40690 }, { "epoch": 0.5863117103879453, "grad_norm": 0.5853875875473022, "learning_rate": 7.711416220022747e-06, "loss": 1.0091, "step": 40700 }, { "epoch": 0.5864557673192445, "grad_norm": 0.6795983910560608, "learning_rate": 7.706874612374781e-06, "loss": 1.0137, "step": 40710 }, { "epoch": 0.5865998242505438, "grad_norm": 0.5505003929138184, "learning_rate": 7.702333503908582e-06, "loss": 1.0143, "step": 40720 }, { "epoch": 0.5867438811818431, "grad_norm": 0.6230047345161438, "learning_rate": 7.697792895612676e-06, "loss": 1.0478, "step": 40730 }, { "epoch": 0.5868879381131423, "grad_norm": 0.6251411437988281, "learning_rate": 7.693252788475503e-06, "loss": 1.0301, "step": 40740 }, { "epoch": 0.5870319950444416, "grad_norm": 0.5603410005569458, "learning_rate": 7.688713183485379e-06, "loss": 1.0227, "step": 40750 }, { "epoch": 0.5871760519757409, "grad_norm": 0.5499513745307922, "learning_rate": 7.684174081630504e-06, "loss": 1.0216, "step": 40760 }, { "epoch": 0.5873201089070401, "grad_norm": 0.5878617763519287, "learning_rate": 7.67963548389899e-06, "loss": 1.0204, "step": 40770 }, { "epoch": 0.5874641658383393, "grad_norm": 0.672378420829773, "learning_rate": 7.675097391278815e-06, "loss": 1.0213, "step": 40780 }, { "epoch": 0.5876082227696385, "grad_norm": 0.7302099466323853, "learning_rate": 7.670559804757874e-06, "loss": 1.0247, "step": 40790 }, { "epoch": 0.5877522797009378, "grad_norm": 0.5528120994567871, "learning_rate": 7.666022725323924e-06, "loss": 0.9972, "step": 40800 }, { "epoch": 0.5878963366322371, "grad_norm": 0.6364361643791199, "learning_rate": 7.66148615396463e-06, "loss": 1.0111, "step": 40810 }, { "epoch": 0.5880403935635363, "grad_norm": 0.6810481548309326, "learning_rate": 7.656950091667542e-06, "loss": 1.0189, "step": 40820 }, { "epoch": 0.5881844504948356, "grad_norm": 0.5953986048698425, "learning_rate": 7.652414539420096e-06, "loss": 1.0248, "step": 40830 }, { "epoch": 0.5883285074261349, "grad_norm": 0.7078180909156799, "learning_rate": 7.647879498209617e-06, "loss": 1.0214, "step": 40840 }, { "epoch": 0.588472564357434, "grad_norm": 0.5777971744537354, "learning_rate": 7.643344969023326e-06, "loss": 1.008, "step": 40850 }, { "epoch": 0.5886166212887333, "grad_norm": 0.5660359263420105, "learning_rate": 7.638810952848328e-06, "loss": 1.016, "step": 40860 }, { "epoch": 0.5887606782200325, "grad_norm": 0.5861753821372986, "learning_rate": 7.634277450671607e-06, "loss": 1.0284, "step": 40870 }, { "epoch": 0.5889047351513318, "grad_norm": 0.677720844745636, "learning_rate": 7.629744463480053e-06, "loss": 0.9968, "step": 40880 }, { "epoch": 0.5890487920826311, "grad_norm": 0.5892419815063477, "learning_rate": 7.625211992260431e-06, "loss": 0.9945, "step": 40890 }, { "epoch": 0.5891928490139303, "grad_norm": 0.6533917188644409, "learning_rate": 7.6206800379994e-06, "loss": 1.0413, "step": 40900 }, { "epoch": 0.5893369059452296, "grad_norm": 0.6236209273338318, "learning_rate": 7.616148601683503e-06, "loss": 1.0205, "step": 40910 }, { "epoch": 0.5894809628765288, "grad_norm": 0.6106456518173218, "learning_rate": 7.611617684299164e-06, "loss": 1.0172, "step": 40920 }, { "epoch": 0.589625019807828, "grad_norm": 0.7330796718597412, "learning_rate": 7.607087286832714e-06, "loss": 1.0321, "step": 40930 }, { "epoch": 0.5897690767391273, "grad_norm": 0.6348506212234497, "learning_rate": 7.602557410270349e-06, "loss": 1.0331, "step": 40940 }, { "epoch": 0.5899131336704265, "grad_norm": 0.665501594543457, "learning_rate": 7.598028055598161e-06, "loss": 1.0295, "step": 40950 }, { "epoch": 0.5900571906017258, "grad_norm": 0.6182237267494202, "learning_rate": 7.593499223802132e-06, "loss": 0.9935, "step": 40960 }, { "epoch": 0.590201247533025, "grad_norm": 0.765225350856781, "learning_rate": 7.588970915868126e-06, "loss": 1.0425, "step": 40970 }, { "epoch": 0.5903453044643243, "grad_norm": 0.6308184862136841, "learning_rate": 7.584443132781887e-06, "loss": 0.9983, "step": 40980 }, { "epoch": 0.5904893613956236, "grad_norm": 0.7328409552574158, "learning_rate": 7.5799158755290595e-06, "loss": 1.0314, "step": 40990 }, { "epoch": 0.5906334183269228, "grad_norm": 0.5854573249816895, "learning_rate": 7.575389145095157e-06, "loss": 1.0183, "step": 41000 }, { "epoch": 0.590777475258222, "grad_norm": 0.6093853116035461, "learning_rate": 7.570862942465593e-06, "loss": 1.0023, "step": 41010 }, { "epoch": 0.5909215321895213, "grad_norm": 0.637779951095581, "learning_rate": 7.566337268625657e-06, "loss": 1.0249, "step": 41020 }, { "epoch": 0.5910655891208205, "grad_norm": 0.6483780741691589, "learning_rate": 7.561812124560521e-06, "loss": 1.016, "step": 41030 }, { "epoch": 0.5912096460521198, "grad_norm": 0.6446595788002014, "learning_rate": 7.557287511255255e-06, "loss": 1.0353, "step": 41040 }, { "epoch": 0.591353702983419, "grad_norm": 0.6424724459648132, "learning_rate": 7.552763429694802e-06, "loss": 1.0421, "step": 41050 }, { "epoch": 0.5914977599147183, "grad_norm": 0.6585966348648071, "learning_rate": 7.548239880863984e-06, "loss": 1.0247, "step": 41060 }, { "epoch": 0.5916418168460176, "grad_norm": 0.6525462865829468, "learning_rate": 7.543716865747528e-06, "loss": 0.9947, "step": 41070 }, { "epoch": 0.5917858737773168, "grad_norm": 0.5727991461753845, "learning_rate": 7.539194385330024e-06, "loss": 1.0184, "step": 41080 }, { "epoch": 0.591929930708616, "grad_norm": 0.6609602570533752, "learning_rate": 7.534672440595955e-06, "loss": 1.0233, "step": 41090 }, { "epoch": 0.5920739876399153, "grad_norm": 0.6003077030181885, "learning_rate": 7.530151032529687e-06, "loss": 1.028, "step": 41100 }, { "epoch": 0.5922180445712145, "grad_norm": 0.7327446341514587, "learning_rate": 7.5256301621154645e-06, "loss": 1.0289, "step": 41110 }, { "epoch": 0.5923621015025138, "grad_norm": 0.6378503441810608, "learning_rate": 7.521109830337427e-06, "loss": 1.0174, "step": 41120 }, { "epoch": 0.592506158433813, "grad_norm": 0.640765905380249, "learning_rate": 7.516590038179581e-06, "loss": 1.0193, "step": 41130 }, { "epoch": 0.5926502153651123, "grad_norm": 0.6114451289176941, "learning_rate": 7.512070786625823e-06, "loss": 1.0193, "step": 41140 }, { "epoch": 0.5927942722964116, "grad_norm": 0.614895224571228, "learning_rate": 7.507552076659935e-06, "loss": 1.0146, "step": 41150 }, { "epoch": 0.5929383292277108, "grad_norm": 0.6712912321090698, "learning_rate": 7.5030339092655805e-06, "loss": 1.0069, "step": 41160 }, { "epoch": 0.59308238615901, "grad_norm": 0.6977723836898804, "learning_rate": 7.498516285426291e-06, "loss": 1.0238, "step": 41170 }, { "epoch": 0.5932264430903093, "grad_norm": 0.6460567712783813, "learning_rate": 7.493999206125504e-06, "loss": 1.0366, "step": 41180 }, { "epoch": 0.5933705000216085, "grad_norm": 0.5149868726730347, "learning_rate": 7.4894826723465185e-06, "loss": 1.0075, "step": 41190 }, { "epoch": 0.5935145569529078, "grad_norm": 0.6791356205940247, "learning_rate": 7.48496668507252e-06, "loss": 1.0297, "step": 41200 }, { "epoch": 0.593658613884207, "grad_norm": 0.6235713362693787, "learning_rate": 7.480451245286582e-06, "loss": 1.0368, "step": 41210 }, { "epoch": 0.5938026708155063, "grad_norm": 0.6560283303260803, "learning_rate": 7.4759363539716475e-06, "loss": 1.0177, "step": 41220 }, { "epoch": 0.5939467277468056, "grad_norm": 0.6092167496681213, "learning_rate": 7.471422012110552e-06, "loss": 1.0235, "step": 41230 }, { "epoch": 0.5940907846781048, "grad_norm": 0.5454646944999695, "learning_rate": 7.466908220686002e-06, "loss": 1.016, "step": 41240 }, { "epoch": 0.594234841609404, "grad_norm": 0.687719464302063, "learning_rate": 7.4623949806805875e-06, "loss": 1.0207, "step": 41250 }, { "epoch": 0.5943788985407032, "grad_norm": 0.8912255764007568, "learning_rate": 7.457882293076781e-06, "loss": 1.0085, "step": 41260 }, { "epoch": 0.5945229554720025, "grad_norm": 0.596967875957489, "learning_rate": 7.4533701588569306e-06, "loss": 1.0137, "step": 41270 }, { "epoch": 0.5946670124033018, "grad_norm": 0.6342841386795044, "learning_rate": 7.448858579003264e-06, "loss": 1.0155, "step": 41280 }, { "epoch": 0.594811069334601, "grad_norm": 0.7018398642539978, "learning_rate": 7.444347554497894e-06, "loss": 1.0055, "step": 41290 }, { "epoch": 0.5949551262659003, "grad_norm": 0.5752993822097778, "learning_rate": 7.439837086322807e-06, "loss": 1.025, "step": 41300 }, { "epoch": 0.5950991831971996, "grad_norm": 0.5885398387908936, "learning_rate": 7.435327175459863e-06, "loss": 1.0178, "step": 41310 }, { "epoch": 0.5952432401284988, "grad_norm": 0.6342470645904541, "learning_rate": 7.430817822890821e-06, "loss": 1.0219, "step": 41320 }, { "epoch": 0.595387297059798, "grad_norm": 0.6557644009590149, "learning_rate": 7.4263090295972895e-06, "loss": 1.0171, "step": 41330 }, { "epoch": 0.5955313539910972, "grad_norm": 0.6283184289932251, "learning_rate": 7.4218007965607855e-06, "loss": 1.017, "step": 41340 }, { "epoch": 0.5956754109223965, "grad_norm": 0.586904764175415, "learning_rate": 7.417293124762679e-06, "loss": 1.0469, "step": 41350 }, { "epoch": 0.5958194678536958, "grad_norm": 0.6193545460700989, "learning_rate": 7.412786015184228e-06, "loss": 0.9911, "step": 41360 }, { "epoch": 0.595963524784995, "grad_norm": 0.7516299486160278, "learning_rate": 7.408279468806574e-06, "loss": 1.0397, "step": 41370 }, { "epoch": 0.5961075817162943, "grad_norm": 0.5326626896858215, "learning_rate": 7.403773486610726e-06, "loss": 1.0237, "step": 41380 }, { "epoch": 0.5962516386475936, "grad_norm": 0.6334313750267029, "learning_rate": 7.399268069577573e-06, "loss": 1.0405, "step": 41390 }, { "epoch": 0.5963956955788928, "grad_norm": 0.63750159740448, "learning_rate": 7.394763218687886e-06, "loss": 0.9907, "step": 41400 }, { "epoch": 0.596539752510192, "grad_norm": 0.640842080116272, "learning_rate": 7.390258934922308e-06, "loss": 1.0224, "step": 41410 }, { "epoch": 0.5966838094414912, "grad_norm": 0.5708299875259399, "learning_rate": 7.385755219261357e-06, "loss": 1.0164, "step": 41420 }, { "epoch": 0.5968278663727905, "grad_norm": 0.5873056650161743, "learning_rate": 7.381252072685432e-06, "loss": 1.009, "step": 41430 }, { "epoch": 0.5969719233040898, "grad_norm": 0.5703116059303284, "learning_rate": 7.376749496174802e-06, "loss": 1.0041, "step": 41440 }, { "epoch": 0.597115980235389, "grad_norm": 0.6429044008255005, "learning_rate": 7.3722474907096235e-06, "loss": 1.0084, "step": 41450 }, { "epoch": 0.5972600371666883, "grad_norm": 0.6282838582992554, "learning_rate": 7.367746057269917e-06, "loss": 1.0217, "step": 41460 }, { "epoch": 0.5974040940979876, "grad_norm": 0.5623292326927185, "learning_rate": 7.363245196835575e-06, "loss": 1.0143, "step": 41470 }, { "epoch": 0.5975481510292868, "grad_norm": 0.6059540510177612, "learning_rate": 7.358744910386388e-06, "loss": 1.0177, "step": 41480 }, { "epoch": 0.597692207960586, "grad_norm": 0.6665140390396118, "learning_rate": 7.354245198901995e-06, "loss": 1.0433, "step": 41490 }, { "epoch": 0.5978362648918852, "grad_norm": 0.7568016648292542, "learning_rate": 7.34974606336192e-06, "loss": 1.029, "step": 41500 }, { "epoch": 0.5979803218231845, "grad_norm": 0.6323282122612, "learning_rate": 7.345247504745571e-06, "loss": 1.0153, "step": 41510 }, { "epoch": 0.5981243787544838, "grad_norm": 0.539384126663208, "learning_rate": 7.340749524032217e-06, "loss": 1.0182, "step": 41520 }, { "epoch": 0.598268435685783, "grad_norm": 0.6559541821479797, "learning_rate": 7.336252122201005e-06, "loss": 1.023, "step": 41530 }, { "epoch": 0.5984124926170823, "grad_norm": 0.5994787812232971, "learning_rate": 7.331755300230963e-06, "loss": 1.0097, "step": 41540 }, { "epoch": 0.5985565495483816, "grad_norm": 0.6237807273864746, "learning_rate": 7.32725905910098e-06, "loss": 1.0167, "step": 41550 }, { "epoch": 0.5987006064796808, "grad_norm": 0.6201650500297546, "learning_rate": 7.3227633997898314e-06, "loss": 1.0167, "step": 41560 }, { "epoch": 0.59884466341098, "grad_norm": 0.6548067331314087, "learning_rate": 7.318268323276161e-06, "loss": 1.0083, "step": 41570 }, { "epoch": 0.5989887203422792, "grad_norm": 0.7271896600723267, "learning_rate": 7.313773830538475e-06, "loss": 1.0256, "step": 41580 }, { "epoch": 0.5991327772735785, "grad_norm": 0.6019732356071472, "learning_rate": 7.309279922555178e-06, "loss": 1.0147, "step": 41590 }, { "epoch": 0.5992768342048778, "grad_norm": 0.6316788792610168, "learning_rate": 7.30478660030452e-06, "loss": 1.0212, "step": 41600 }, { "epoch": 0.599420891136177, "grad_norm": 0.7106907367706299, "learning_rate": 7.3002938647646336e-06, "loss": 1.0165, "step": 41610 }, { "epoch": 0.5995649480674763, "grad_norm": 0.7058002352714539, "learning_rate": 7.295801716913537e-06, "loss": 1.0231, "step": 41620 }, { "epoch": 0.5997090049987756, "grad_norm": 0.5363566279411316, "learning_rate": 7.291310157729101e-06, "loss": 1.0047, "step": 41630 }, { "epoch": 0.5998530619300748, "grad_norm": 0.6057727932929993, "learning_rate": 7.286819188189075e-06, "loss": 1.0115, "step": 41640 }, { "epoch": 0.599997118861374, "grad_norm": 0.6345871090888977, "learning_rate": 7.282328809271084e-06, "loss": 1.0186, "step": 41650 }, { "epoch": 0.6001411757926732, "grad_norm": 0.6517109274864197, "learning_rate": 7.277839021952621e-06, "loss": 1.0287, "step": 41660 }, { "epoch": 0.6002852327239725, "grad_norm": 0.5592643618583679, "learning_rate": 7.273349827211052e-06, "loss": 1.0158, "step": 41670 }, { "epoch": 0.6004292896552718, "grad_norm": 0.6520673632621765, "learning_rate": 7.268861226023612e-06, "loss": 1.0457, "step": 41680 }, { "epoch": 0.600573346586571, "grad_norm": 0.6690886616706848, "learning_rate": 7.264373219367402e-06, "loss": 1.0384, "step": 41690 }, { "epoch": 0.6007174035178703, "grad_norm": 0.5864752531051636, "learning_rate": 7.259885808219409e-06, "loss": 1.0268, "step": 41700 }, { "epoch": 0.6008614604491695, "grad_norm": 0.6361005306243896, "learning_rate": 7.255398993556477e-06, "loss": 1.0331, "step": 41710 }, { "epoch": 0.6010055173804688, "grad_norm": 0.7044946551322937, "learning_rate": 7.250912776355317e-06, "loss": 1.0202, "step": 41720 }, { "epoch": 0.601149574311768, "grad_norm": 0.5614191889762878, "learning_rate": 7.246427157592526e-06, "loss": 1.0063, "step": 41730 }, { "epoch": 0.6012936312430672, "grad_norm": 0.7571386694908142, "learning_rate": 7.241942138244557e-06, "loss": 1.0251, "step": 41740 }, { "epoch": 0.6014376881743665, "grad_norm": 0.6337618231773376, "learning_rate": 7.237457719287735e-06, "loss": 1.0114, "step": 41750 }, { "epoch": 0.6015817451056658, "grad_norm": 0.5836769342422485, "learning_rate": 7.232973901698259e-06, "loss": 1.0166, "step": 41760 }, { "epoch": 0.601725802036965, "grad_norm": 0.6328428983688354, "learning_rate": 7.228490686452192e-06, "loss": 1.0107, "step": 41770 }, { "epoch": 0.6018698589682643, "grad_norm": 0.6117316484451294, "learning_rate": 7.224008074525472e-06, "loss": 1.012, "step": 41780 }, { "epoch": 0.6020139158995635, "grad_norm": 0.6610920429229736, "learning_rate": 7.219526066893898e-06, "loss": 1.0401, "step": 41790 }, { "epoch": 0.6021579728308628, "grad_norm": 0.636726975440979, "learning_rate": 7.215044664533143e-06, "loss": 1.0295, "step": 41800 }, { "epoch": 0.602302029762162, "grad_norm": 0.5862812995910645, "learning_rate": 7.210563868418746e-06, "loss": 1.0029, "step": 41810 }, { "epoch": 0.6024460866934612, "grad_norm": 0.7004019618034363, "learning_rate": 7.206083679526116e-06, "loss": 1.0148, "step": 41820 }, { "epoch": 0.6025901436247605, "grad_norm": 0.6236006617546082, "learning_rate": 7.2016040988305235e-06, "loss": 1.0302, "step": 41830 }, { "epoch": 0.6027342005560598, "grad_norm": 0.6367349028587341, "learning_rate": 7.1971251273071175e-06, "loss": 1.0014, "step": 41840 }, { "epoch": 0.602878257487359, "grad_norm": 0.6152836084365845, "learning_rate": 7.192646765930908e-06, "loss": 1.0532, "step": 41850 }, { "epoch": 0.6030223144186583, "grad_norm": 0.6188142895698547, "learning_rate": 7.188169015676762e-06, "loss": 1.034, "step": 41860 }, { "epoch": 0.6031663713499575, "grad_norm": 0.5735546350479126, "learning_rate": 7.183691877519441e-06, "loss": 1.0185, "step": 41870 }, { "epoch": 0.6033104282812568, "grad_norm": 0.5709615349769592, "learning_rate": 7.17921535243354e-06, "loss": 1.0239, "step": 41880 }, { "epoch": 0.603454485212556, "grad_norm": 0.5740513205528259, "learning_rate": 7.17473944139355e-06, "loss": 1.0162, "step": 41890 }, { "epoch": 0.6035985421438552, "grad_norm": 0.5819230079650879, "learning_rate": 7.170264145373806e-06, "loss": 1.0214, "step": 41900 }, { "epoch": 0.6037425990751545, "grad_norm": 0.49948909878730774, "learning_rate": 7.165789465348521e-06, "loss": 1.0277, "step": 41910 }, { "epoch": 0.6038866560064537, "grad_norm": 0.7643324136734009, "learning_rate": 7.161315402291773e-06, "loss": 1.0223, "step": 41920 }, { "epoch": 0.604030712937753, "grad_norm": 0.6517783403396606, "learning_rate": 7.156841957177503e-06, "loss": 1.0227, "step": 41930 }, { "epoch": 0.6041747698690523, "grad_norm": 0.6694622039794922, "learning_rate": 7.152369130979514e-06, "loss": 1.0044, "step": 41940 }, { "epoch": 0.6043188268003515, "grad_norm": 0.5882413983345032, "learning_rate": 7.147896924671485e-06, "loss": 1.0138, "step": 41950 }, { "epoch": 0.6044628837316508, "grad_norm": 0.775355875492096, "learning_rate": 7.14342533922695e-06, "loss": 1.0195, "step": 41960 }, { "epoch": 0.60460694066295, "grad_norm": 0.6620295643806458, "learning_rate": 7.1389543756193115e-06, "loss": 1.0273, "step": 41970 }, { "epoch": 0.6047509975942492, "grad_norm": 0.6419587731361389, "learning_rate": 7.134484034821838e-06, "loss": 1.0224, "step": 41980 }, { "epoch": 0.6048950545255485, "grad_norm": 0.5880239605903625, "learning_rate": 7.130014317807658e-06, "loss": 1.014, "step": 41990 }, { "epoch": 0.6050391114568477, "grad_norm": 0.5835565328598022, "learning_rate": 7.1255452255497725e-06, "loss": 1.0388, "step": 42000 }, { "epoch": 0.605183168388147, "grad_norm": 0.5859873294830322, "learning_rate": 7.121076759021041e-06, "loss": 1.012, "step": 42010 }, { "epoch": 0.6053272253194463, "grad_norm": 0.6449272632598877, "learning_rate": 7.116608919194177e-06, "loss": 1.0003, "step": 42020 }, { "epoch": 0.6054712822507455, "grad_norm": 0.6205201745033264, "learning_rate": 7.112141707041782e-06, "loss": 1.0274, "step": 42030 }, { "epoch": 0.6056153391820448, "grad_norm": 0.6808909773826599, "learning_rate": 7.107675123536295e-06, "loss": 1.0246, "step": 42040 }, { "epoch": 0.605759396113344, "grad_norm": 0.6364637017250061, "learning_rate": 7.103209169650034e-06, "loss": 1.0184, "step": 42050 }, { "epoch": 0.6059034530446432, "grad_norm": 0.8138787150382996, "learning_rate": 7.098743846355177e-06, "loss": 1.0194, "step": 42060 }, { "epoch": 0.6060475099759425, "grad_norm": 0.6228765845298767, "learning_rate": 7.094279154623762e-06, "loss": 1.0058, "step": 42070 }, { "epoch": 0.6061915669072417, "grad_norm": 0.616649329662323, "learning_rate": 7.089815095427689e-06, "loss": 1.0056, "step": 42080 }, { "epoch": 0.606335623838541, "grad_norm": 0.529268205165863, "learning_rate": 7.085351669738724e-06, "loss": 1.011, "step": 42090 }, { "epoch": 0.6064796807698403, "grad_norm": 0.6124671697616577, "learning_rate": 7.0808888785284934e-06, "loss": 1.0158, "step": 42100 }, { "epoch": 0.6066237377011395, "grad_norm": 0.6179755330085754, "learning_rate": 7.076426722768486e-06, "loss": 1.0192, "step": 42110 }, { "epoch": 0.6067677946324388, "grad_norm": 0.6596282720565796, "learning_rate": 7.071965203430051e-06, "loss": 1.0272, "step": 42120 }, { "epoch": 0.606911851563738, "grad_norm": 0.7349212765693665, "learning_rate": 7.067504321484393e-06, "loss": 1.0299, "step": 42130 }, { "epoch": 0.6070559084950372, "grad_norm": 0.6352936625480652, "learning_rate": 7.063044077902598e-06, "loss": 1.0095, "step": 42140 }, { "epoch": 0.6071999654263365, "grad_norm": 0.5938249230384827, "learning_rate": 7.0585844736555895e-06, "loss": 0.9927, "step": 42150 }, { "epoch": 0.6073440223576357, "grad_norm": 0.6572223901748657, "learning_rate": 7.054125509714161e-06, "loss": 1.0236, "step": 42160 }, { "epoch": 0.607488079288935, "grad_norm": 0.5948213934898376, "learning_rate": 7.049667187048974e-06, "loss": 0.9959, "step": 42170 }, { "epoch": 0.6076321362202343, "grad_norm": 0.6174025535583496, "learning_rate": 7.045209506630539e-06, "loss": 1.0259, "step": 42180 }, { "epoch": 0.6077761931515335, "grad_norm": 0.500306248664856, "learning_rate": 7.040752469429233e-06, "loss": 1.0236, "step": 42190 }, { "epoch": 0.6079202500828327, "grad_norm": 0.6655672788619995, "learning_rate": 7.036296076415292e-06, "loss": 1.0338, "step": 42200 }, { "epoch": 0.6080643070141319, "grad_norm": 0.6235907673835754, "learning_rate": 7.03184032855881e-06, "loss": 1.027, "step": 42210 }, { "epoch": 0.6082083639454312, "grad_norm": 0.610526442527771, "learning_rate": 7.0273852268297436e-06, "loss": 1.0035, "step": 42220 }, { "epoch": 0.6083524208767305, "grad_norm": 0.6061323881149292, "learning_rate": 7.022930772197907e-06, "loss": 1.0097, "step": 42230 }, { "epoch": 0.6084964778080297, "grad_norm": 0.5258249044418335, "learning_rate": 7.01847696563297e-06, "loss": 1.0219, "step": 42240 }, { "epoch": 0.608640534739329, "grad_norm": 0.6494345664978027, "learning_rate": 7.01402380810447e-06, "loss": 1.0222, "step": 42250 }, { "epoch": 0.6087845916706283, "grad_norm": 0.7211364507675171, "learning_rate": 7.009571300581797e-06, "loss": 1.0248, "step": 42260 }, { "epoch": 0.6089286486019275, "grad_norm": 0.7034059762954712, "learning_rate": 7.005119444034193e-06, "loss": 1.0098, "step": 42270 }, { "epoch": 0.6090727055332267, "grad_norm": 0.5944812297821045, "learning_rate": 7.000668239430777e-06, "loss": 1.022, "step": 42280 }, { "epoch": 0.6092167624645259, "grad_norm": 0.6565080881118774, "learning_rate": 6.996217687740509e-06, "loss": 1.023, "step": 42290 }, { "epoch": 0.6093608193958252, "grad_norm": 0.6511583924293518, "learning_rate": 6.99176778993221e-06, "loss": 1.0014, "step": 42300 }, { "epoch": 0.6095048763271245, "grad_norm": 0.6816222071647644, "learning_rate": 6.987318546974567e-06, "loss": 1.0163, "step": 42310 }, { "epoch": 0.6096489332584237, "grad_norm": 0.6753540635108948, "learning_rate": 6.9828699598361135e-06, "loss": 1.0092, "step": 42320 }, { "epoch": 0.609792990189723, "grad_norm": 0.661775529384613, "learning_rate": 6.978422029485251e-06, "loss": 1.0315, "step": 42330 }, { "epoch": 0.6099370471210223, "grad_norm": 0.5916600823402405, "learning_rate": 6.973974756890231e-06, "loss": 1.0193, "step": 42340 }, { "epoch": 0.6100811040523215, "grad_norm": 0.6581755876541138, "learning_rate": 6.969528143019158e-06, "loss": 1.0378, "step": 42350 }, { "epoch": 0.6102251609836207, "grad_norm": 0.6407250761985779, "learning_rate": 6.965082188840007e-06, "loss": 1.0293, "step": 42360 }, { "epoch": 0.6103692179149199, "grad_norm": 0.604726254940033, "learning_rate": 6.9606368953205965e-06, "loss": 1.025, "step": 42370 }, { "epoch": 0.6105132748462192, "grad_norm": 0.6147050261497498, "learning_rate": 6.956192263428603e-06, "loss": 1.0104, "step": 42380 }, { "epoch": 0.6106573317775185, "grad_norm": 0.6224717497825623, "learning_rate": 6.951748294131567e-06, "loss": 1.0144, "step": 42390 }, { "epoch": 0.6108013887088177, "grad_norm": 0.7217327356338501, "learning_rate": 6.947304988396879e-06, "loss": 1.0395, "step": 42400 }, { "epoch": 0.610945445640117, "grad_norm": 0.6371968388557434, "learning_rate": 6.942862347191777e-06, "loss": 1.0182, "step": 42410 }, { "epoch": 0.6110895025714163, "grad_norm": 0.5764115452766418, "learning_rate": 6.938420371483374e-06, "loss": 1.0339, "step": 42420 }, { "epoch": 0.6112335595027155, "grad_norm": 0.5761376023292542, "learning_rate": 6.9339790622386165e-06, "loss": 1.0158, "step": 42430 }, { "epoch": 0.6113776164340147, "grad_norm": 0.5998538732528687, "learning_rate": 6.929538420424327e-06, "loss": 1.0219, "step": 42440 }, { "epoch": 0.6115216733653139, "grad_norm": 0.599068284034729, "learning_rate": 6.925098447007163e-06, "loss": 1.0374, "step": 42450 }, { "epoch": 0.6116657302966132, "grad_norm": 0.6097978949546814, "learning_rate": 6.920659142953649e-06, "loss": 0.9993, "step": 42460 }, { "epoch": 0.6118097872279125, "grad_norm": 0.5823416113853455, "learning_rate": 6.916220509230161e-06, "loss": 1.0009, "step": 42470 }, { "epoch": 0.6119538441592117, "grad_norm": 0.5852572917938232, "learning_rate": 6.9117825468029276e-06, "loss": 1.0335, "step": 42480 }, { "epoch": 0.612097901090511, "grad_norm": 0.6183345913887024, "learning_rate": 6.90734525663803e-06, "loss": 1.031, "step": 42490 }, { "epoch": 0.6122419580218103, "grad_norm": 0.7070101499557495, "learning_rate": 6.902908639701408e-06, "loss": 1.0149, "step": 42500 }, { "epoch": 0.6123860149531095, "grad_norm": 0.6024273037910461, "learning_rate": 6.898472696958853e-06, "loss": 1.0142, "step": 42510 }, { "epoch": 0.6125300718844087, "grad_norm": 0.5777057409286499, "learning_rate": 6.894037429376e-06, "loss": 1.0045, "step": 42520 }, { "epoch": 0.6126741288157079, "grad_norm": 0.7443797588348389, "learning_rate": 6.889602837918357e-06, "loss": 1.0165, "step": 42530 }, { "epoch": 0.6128181857470072, "grad_norm": 0.5777792930603027, "learning_rate": 6.885168923551265e-06, "loss": 0.9996, "step": 42540 }, { "epoch": 0.6129622426783065, "grad_norm": 0.5409834980964661, "learning_rate": 6.880735687239932e-06, "loss": 1.0398, "step": 42550 }, { "epoch": 0.6131062996096057, "grad_norm": 0.6305665373802185, "learning_rate": 6.876303129949411e-06, "loss": 1.0203, "step": 42560 }, { "epoch": 0.613250356540905, "grad_norm": 0.6016412377357483, "learning_rate": 6.871871252644602e-06, "loss": 1.0159, "step": 42570 }, { "epoch": 0.6133944134722042, "grad_norm": 0.6214472651481628, "learning_rate": 6.867440056290274e-06, "loss": 1.0169, "step": 42580 }, { "epoch": 0.6135384704035035, "grad_norm": 0.6808599829673767, "learning_rate": 6.863009541851031e-06, "loss": 1.0195, "step": 42590 }, { "epoch": 0.6136825273348027, "grad_norm": 0.6278876662254333, "learning_rate": 6.858579710291333e-06, "loss": 1.0107, "step": 42600 }, { "epoch": 0.6138265842661019, "grad_norm": 0.5400162935256958, "learning_rate": 6.854150562575499e-06, "loss": 1.0244, "step": 42610 }, { "epoch": 0.6139706411974012, "grad_norm": 0.6503427624702454, "learning_rate": 6.84972209966769e-06, "loss": 1.0059, "step": 42620 }, { "epoch": 0.6141146981287005, "grad_norm": 0.5955107808113098, "learning_rate": 6.845294322531919e-06, "loss": 1.0378, "step": 42630 }, { "epoch": 0.6142587550599997, "grad_norm": 0.5728549361228943, "learning_rate": 6.840867232132058e-06, "loss": 0.9921, "step": 42640 }, { "epoch": 0.614402811991299, "grad_norm": 0.9009476900100708, "learning_rate": 6.836440829431818e-06, "loss": 1.0149, "step": 42650 }, { "epoch": 0.6145468689225982, "grad_norm": 0.7177078723907471, "learning_rate": 6.83201511539477e-06, "loss": 1.029, "step": 42660 }, { "epoch": 0.6146909258538975, "grad_norm": 0.6054080128669739, "learning_rate": 6.827590090984331e-06, "loss": 1.0007, "step": 42670 }, { "epoch": 0.6148349827851967, "grad_norm": 0.611545979976654, "learning_rate": 6.82316575716376e-06, "loss": 1.0054, "step": 42680 }, { "epoch": 0.6149790397164959, "grad_norm": 0.5639975666999817, "learning_rate": 6.818742114896185e-06, "loss": 1.0116, "step": 42690 }, { "epoch": 0.6151230966477952, "grad_norm": 0.5729649662971497, "learning_rate": 6.814319165144564e-06, "loss": 1.0003, "step": 42700 }, { "epoch": 0.6152671535790945, "grad_norm": 0.5668555498123169, "learning_rate": 6.809896908871713e-06, "loss": 1.0205, "step": 42710 }, { "epoch": 0.6154112105103937, "grad_norm": 0.5614668130874634, "learning_rate": 6.8054753470402975e-06, "loss": 1.0034, "step": 42720 }, { "epoch": 0.615555267441693, "grad_norm": 0.5896543264389038, "learning_rate": 6.801054480612832e-06, "loss": 0.9952, "step": 42730 }, { "epoch": 0.6156993243729922, "grad_norm": 0.6210322380065918, "learning_rate": 6.7966343105516755e-06, "loss": 1.0286, "step": 42740 }, { "epoch": 0.6158433813042915, "grad_norm": 0.6250858306884766, "learning_rate": 6.79221483781904e-06, "loss": 1.0118, "step": 42750 }, { "epoch": 0.6159874382355907, "grad_norm": 0.6140739321708679, "learning_rate": 6.7877960633769824e-06, "loss": 1.032, "step": 42760 }, { "epoch": 0.6161314951668899, "grad_norm": 0.5905389785766602, "learning_rate": 6.783377988187407e-06, "loss": 1.0275, "step": 42770 }, { "epoch": 0.6162755520981892, "grad_norm": 0.6868119239807129, "learning_rate": 6.778960613212073e-06, "loss": 1.0188, "step": 42780 }, { "epoch": 0.6164196090294884, "grad_norm": 0.6418941617012024, "learning_rate": 6.774543939412579e-06, "loss": 1.0118, "step": 42790 }, { "epoch": 0.6165636659607877, "grad_norm": 0.6080740094184875, "learning_rate": 6.770127967750374e-06, "loss": 1.0286, "step": 42800 }, { "epoch": 0.616707722892087, "grad_norm": 0.6652899384498596, "learning_rate": 6.765712699186757e-06, "loss": 1.0483, "step": 42810 }, { "epoch": 0.6168517798233862, "grad_norm": 0.6463445425033569, "learning_rate": 6.761298134682862e-06, "loss": 1.0196, "step": 42820 }, { "epoch": 0.6169958367546855, "grad_norm": 0.658723771572113, "learning_rate": 6.756884275199691e-06, "loss": 1.0043, "step": 42830 }, { "epoch": 0.6171398936859847, "grad_norm": 0.6873076558113098, "learning_rate": 6.7524711216980745e-06, "loss": 1.0129, "step": 42840 }, { "epoch": 0.6172839506172839, "grad_norm": 0.5704846382141113, "learning_rate": 6.748058675138691e-06, "loss": 1.0447, "step": 42850 }, { "epoch": 0.6174280075485832, "grad_norm": 0.6206031441688538, "learning_rate": 6.743646936482075e-06, "loss": 1.0307, "step": 42860 }, { "epoch": 0.6175720644798824, "grad_norm": 0.6279870271682739, "learning_rate": 6.739235906688602e-06, "loss": 1.0138, "step": 42870 }, { "epoch": 0.6177161214111817, "grad_norm": 0.657698392868042, "learning_rate": 6.734825586718485e-06, "loss": 1.0203, "step": 42880 }, { "epoch": 0.617860178342481, "grad_norm": 0.5187119245529175, "learning_rate": 6.730415977531799e-06, "loss": 1.0308, "step": 42890 }, { "epoch": 0.6180042352737802, "grad_norm": 0.5883438587188721, "learning_rate": 6.726007080088447e-06, "loss": 1.0359, "step": 42900 }, { "epoch": 0.6181482922050795, "grad_norm": 0.5652337670326233, "learning_rate": 6.721598895348192e-06, "loss": 1.0271, "step": 42910 }, { "epoch": 0.6182923491363786, "grad_norm": 0.6093305945396423, "learning_rate": 6.717191424270631e-06, "loss": 1.0251, "step": 42920 }, { "epoch": 0.6184364060676779, "grad_norm": 0.5216379165649414, "learning_rate": 6.712784667815209e-06, "loss": 1.006, "step": 42930 }, { "epoch": 0.6185804629989772, "grad_norm": 0.6014308333396912, "learning_rate": 6.70837862694122e-06, "loss": 1.0324, "step": 42940 }, { "epoch": 0.6187245199302764, "grad_norm": 0.6771434545516968, "learning_rate": 6.703973302607798e-06, "loss": 1.01, "step": 42950 }, { "epoch": 0.6188685768615757, "grad_norm": 0.778387188911438, "learning_rate": 6.699568695773913e-06, "loss": 1.0241, "step": 42960 }, { "epoch": 0.619012633792875, "grad_norm": 0.5566554665565491, "learning_rate": 6.6951648073984e-06, "loss": 1.0038, "step": 42970 }, { "epoch": 0.6191566907241742, "grad_norm": 0.5816327333450317, "learning_rate": 6.690761638439915e-06, "loss": 1.0235, "step": 42980 }, { "epoch": 0.6193007476554735, "grad_norm": 0.5478515625, "learning_rate": 6.686359189856969e-06, "loss": 1.0165, "step": 42990 }, { "epoch": 0.6194448045867726, "grad_norm": 0.6159368753433228, "learning_rate": 6.681957462607918e-06, "loss": 1.0179, "step": 43000 }, { "epoch": 0.6195888615180719, "grad_norm": 0.580099880695343, "learning_rate": 6.6775564576509534e-06, "loss": 1.018, "step": 43010 }, { "epoch": 0.6197329184493712, "grad_norm": 0.6514555215835571, "learning_rate": 6.6731561759441155e-06, "loss": 1.0196, "step": 43020 }, { "epoch": 0.6198769753806704, "grad_norm": 0.61688232421875, "learning_rate": 6.668756618445285e-06, "loss": 1.0349, "step": 43030 }, { "epoch": 0.6200210323119697, "grad_norm": 0.721116840839386, "learning_rate": 6.664357786112181e-06, "loss": 1.0309, "step": 43040 }, { "epoch": 0.620165089243269, "grad_norm": 0.5536653995513916, "learning_rate": 6.659959679902376e-06, "loss": 1.015, "step": 43050 }, { "epoch": 0.6203091461745682, "grad_norm": 0.5684986114501953, "learning_rate": 6.655562300773273e-06, "loss": 1.0214, "step": 43060 }, { "epoch": 0.6204532031058675, "grad_norm": 0.6205838322639465, "learning_rate": 6.651165649682115e-06, "loss": 1.0194, "step": 43070 }, { "epoch": 0.6205972600371666, "grad_norm": 0.6007439494132996, "learning_rate": 6.646769727586004e-06, "loss": 1.0302, "step": 43080 }, { "epoch": 0.6207413169684659, "grad_norm": 0.7089323401451111, "learning_rate": 6.642374535441865e-06, "loss": 1.0239, "step": 43090 }, { "epoch": 0.6208853738997652, "grad_norm": 0.7257370948791504, "learning_rate": 6.637980074206467e-06, "loss": 1.0189, "step": 43100 }, { "epoch": 0.6210294308310644, "grad_norm": 0.5735003352165222, "learning_rate": 6.6335863448364305e-06, "loss": 1.0265, "step": 43110 }, { "epoch": 0.6211734877623637, "grad_norm": 0.5978228449821472, "learning_rate": 6.629193348288205e-06, "loss": 1.0179, "step": 43120 }, { "epoch": 0.621317544693663, "grad_norm": 0.6640300750732422, "learning_rate": 6.624801085518089e-06, "loss": 1.0142, "step": 43130 }, { "epoch": 0.6214616016249622, "grad_norm": 0.6534985303878784, "learning_rate": 6.620409557482216e-06, "loss": 1.0149, "step": 43140 }, { "epoch": 0.6216056585562615, "grad_norm": 0.608762800693512, "learning_rate": 6.616018765136558e-06, "loss": 1.0123, "step": 43150 }, { "epoch": 0.6217497154875606, "grad_norm": 0.5738025903701782, "learning_rate": 6.611628709436937e-06, "loss": 1.0383, "step": 43160 }, { "epoch": 0.6218937724188599, "grad_norm": 0.5874068140983582, "learning_rate": 6.607239391339002e-06, "loss": 0.9957, "step": 43170 }, { "epoch": 0.6220378293501592, "grad_norm": 0.6023411750793457, "learning_rate": 6.602850811798245e-06, "loss": 1.0301, "step": 43180 }, { "epoch": 0.6221818862814584, "grad_norm": 0.8262518048286438, "learning_rate": 6.598462971770006e-06, "loss": 1.0215, "step": 43190 }, { "epoch": 0.6223259432127577, "grad_norm": 0.7225953936576843, "learning_rate": 6.594075872209456e-06, "loss": 1.0256, "step": 43200 }, { "epoch": 0.622470000144057, "grad_norm": 0.5936123132705688, "learning_rate": 6.589689514071598e-06, "loss": 1.0334, "step": 43210 }, { "epoch": 0.6226140570753562, "grad_norm": 0.6910178065299988, "learning_rate": 6.585303898311293e-06, "loss": 1.001, "step": 43220 }, { "epoch": 0.6227581140066555, "grad_norm": 0.5673627853393555, "learning_rate": 6.580919025883218e-06, "loss": 0.9986, "step": 43230 }, { "epoch": 0.6229021709379546, "grad_norm": 0.5227172374725342, "learning_rate": 6.5765348977419106e-06, "loss": 1.0018, "step": 43240 }, { "epoch": 0.6230462278692539, "grad_norm": 0.572830080986023, "learning_rate": 6.572151514841727e-06, "loss": 1.0269, "step": 43250 }, { "epoch": 0.6231902848005532, "grad_norm": 0.6719442009925842, "learning_rate": 6.567768878136869e-06, "loss": 1.0249, "step": 43260 }, { "epoch": 0.6233343417318524, "grad_norm": 0.5868476629257202, "learning_rate": 6.563386988581382e-06, "loss": 1.0237, "step": 43270 }, { "epoch": 0.6234783986631517, "grad_norm": 0.5998367071151733, "learning_rate": 6.559005847129139e-06, "loss": 1.0069, "step": 43280 }, { "epoch": 0.623622455594451, "grad_norm": 0.6530718207359314, "learning_rate": 6.554625454733852e-06, "loss": 1.0166, "step": 43290 }, { "epoch": 0.6237665125257502, "grad_norm": 0.6501248478889465, "learning_rate": 6.550245812349077e-06, "loss": 0.9883, "step": 43300 }, { "epoch": 0.6239105694570495, "grad_norm": 0.5071753263473511, "learning_rate": 6.545866920928199e-06, "loss": 1.0081, "step": 43310 }, { "epoch": 0.6240546263883486, "grad_norm": 0.6153305172920227, "learning_rate": 6.5414887814244396e-06, "loss": 1.0083, "step": 43320 }, { "epoch": 0.6241986833196479, "grad_norm": 0.6175336837768555, "learning_rate": 6.537111394790865e-06, "loss": 0.9848, "step": 43330 }, { "epoch": 0.6243427402509472, "grad_norm": 0.7310288548469543, "learning_rate": 6.532734761980368e-06, "loss": 1.0238, "step": 43340 }, { "epoch": 0.6244867971822464, "grad_norm": 0.7438796758651733, "learning_rate": 6.5283588839456836e-06, "loss": 1.0042, "step": 43350 }, { "epoch": 0.6246308541135457, "grad_norm": 0.6033474206924438, "learning_rate": 6.523983761639381e-06, "loss": 1.0002, "step": 43360 }, { "epoch": 0.624774911044845, "grad_norm": 0.5989162921905518, "learning_rate": 6.519609396013855e-06, "loss": 1.0127, "step": 43370 }, { "epoch": 0.6249189679761442, "grad_norm": 0.6061357259750366, "learning_rate": 6.515235788021359e-06, "loss": 1.0082, "step": 43380 }, { "epoch": 0.6250630249074435, "grad_norm": 0.80193692445755, "learning_rate": 6.510862938613958e-06, "loss": 1.0086, "step": 43390 }, { "epoch": 0.6252070818387426, "grad_norm": 0.6099424362182617, "learning_rate": 6.506490848743559e-06, "loss": 1.0282, "step": 43400 }, { "epoch": 0.6253511387700419, "grad_norm": 0.630855143070221, "learning_rate": 6.502119519361911e-06, "loss": 1.0116, "step": 43410 }, { "epoch": 0.6254951957013412, "grad_norm": 0.5582447648048401, "learning_rate": 6.4977489514205915e-06, "loss": 1.0261, "step": 43420 }, { "epoch": 0.6256392526326404, "grad_norm": 0.6460801362991333, "learning_rate": 6.4933791458710095e-06, "loss": 1.0183, "step": 43430 }, { "epoch": 0.6257833095639397, "grad_norm": 0.6204429268836975, "learning_rate": 6.489010103664415e-06, "loss": 1.0305, "step": 43440 }, { "epoch": 0.625927366495239, "grad_norm": 0.5879707336425781, "learning_rate": 6.484641825751885e-06, "loss": 1.0109, "step": 43450 }, { "epoch": 0.6260714234265382, "grad_norm": 0.6646779775619507, "learning_rate": 6.480274313084338e-06, "loss": 1.0228, "step": 43460 }, { "epoch": 0.6262154803578374, "grad_norm": 0.5505615472793579, "learning_rate": 6.475907566612518e-06, "loss": 1.0212, "step": 43470 }, { "epoch": 0.6263595372891366, "grad_norm": 0.6140030026435852, "learning_rate": 6.471541587287003e-06, "loss": 1.0113, "step": 43480 }, { "epoch": 0.6265035942204359, "grad_norm": 0.5819932818412781, "learning_rate": 6.467176376058213e-06, "loss": 1.0014, "step": 43490 }, { "epoch": 0.6266476511517352, "grad_norm": 0.574243426322937, "learning_rate": 6.4628119338763904e-06, "loss": 1.0067, "step": 43500 }, { "epoch": 0.6267917080830344, "grad_norm": 0.5761387348175049, "learning_rate": 6.45844826169161e-06, "loss": 1.0292, "step": 43510 }, { "epoch": 0.6269357650143337, "grad_norm": 0.639982283115387, "learning_rate": 6.454085360453792e-06, "loss": 0.976, "step": 43520 }, { "epoch": 0.6270798219456329, "grad_norm": 0.6161608695983887, "learning_rate": 6.449723231112674e-06, "loss": 1.027, "step": 43530 }, { "epoch": 0.6272238788769322, "grad_norm": 0.678606390953064, "learning_rate": 6.445361874617831e-06, "loss": 1.0406, "step": 43540 }, { "epoch": 0.6273679358082314, "grad_norm": 0.601266622543335, "learning_rate": 6.441001291918673e-06, "loss": 1.0126, "step": 43550 }, { "epoch": 0.6275119927395306, "grad_norm": 0.601625919342041, "learning_rate": 6.436641483964434e-06, "loss": 1.0175, "step": 43560 }, { "epoch": 0.6276560496708299, "grad_norm": 0.5323131084442139, "learning_rate": 6.432282451704191e-06, "loss": 1.0012, "step": 43570 }, { "epoch": 0.6278001066021291, "grad_norm": 0.6979629993438721, "learning_rate": 6.427924196086841e-06, "loss": 1.0329, "step": 43580 }, { "epoch": 0.6279441635334284, "grad_norm": 0.5388063788414001, "learning_rate": 6.423566718061116e-06, "loss": 1.0152, "step": 43590 }, { "epoch": 0.6280882204647277, "grad_norm": 0.649337112903595, "learning_rate": 6.41921001857558e-06, "loss": 1.0278, "step": 43600 }, { "epoch": 0.6282322773960269, "grad_norm": 0.6489685773849487, "learning_rate": 6.414854098578629e-06, "loss": 1.0177, "step": 43610 }, { "epoch": 0.6283763343273262, "grad_norm": 0.6887600421905518, "learning_rate": 6.410498959018476e-06, "loss": 1.0225, "step": 43620 }, { "epoch": 0.6285203912586254, "grad_norm": 0.7380510568618774, "learning_rate": 6.4061446008431895e-06, "loss": 1.0289, "step": 43630 }, { "epoch": 0.6286644481899246, "grad_norm": 0.6783304810523987, "learning_rate": 6.401791025000646e-06, "loss": 1.052, "step": 43640 }, { "epoch": 0.6288085051212239, "grad_norm": 0.6634083390235901, "learning_rate": 6.397438232438553e-06, "loss": 1.0182, "step": 43650 }, { "epoch": 0.6289525620525231, "grad_norm": 0.6285718679428101, "learning_rate": 6.393086224104464e-06, "loss": 1.0127, "step": 43660 }, { "epoch": 0.6290966189838224, "grad_norm": 0.5838115215301514, "learning_rate": 6.388735000945745e-06, "loss": 1.0106, "step": 43670 }, { "epoch": 0.6292406759151217, "grad_norm": 0.5893847942352295, "learning_rate": 6.3843845639096e-06, "loss": 1.0177, "step": 43680 }, { "epoch": 0.6293847328464209, "grad_norm": 0.6196548342704773, "learning_rate": 6.380034913943058e-06, "loss": 1.0028, "step": 43690 }, { "epoch": 0.6295287897777202, "grad_norm": 0.6291428804397583, "learning_rate": 6.375686051992975e-06, "loss": 1.0266, "step": 43700 }, { "epoch": 0.6296728467090194, "grad_norm": 0.6694349050521851, "learning_rate": 6.3713379790060445e-06, "loss": 1.0248, "step": 43710 }, { "epoch": 0.6298169036403186, "grad_norm": 0.6268457174301147, "learning_rate": 6.366990695928778e-06, "loss": 1.0144, "step": 43720 }, { "epoch": 0.6299609605716179, "grad_norm": 0.652484655380249, "learning_rate": 6.3626442037075175e-06, "loss": 1.0055, "step": 43730 }, { "epoch": 0.6301050175029171, "grad_norm": 0.6141695380210876, "learning_rate": 6.358298503288438e-06, "loss": 1.0028, "step": 43740 }, { "epoch": 0.6302490744342164, "grad_norm": 0.6055964827537537, "learning_rate": 6.35395359561754e-06, "loss": 1.0261, "step": 43750 }, { "epoch": 0.6303931313655157, "grad_norm": 0.5862328410148621, "learning_rate": 6.349609481640641e-06, "loss": 1.0067, "step": 43760 }, { "epoch": 0.6305371882968149, "grad_norm": 0.6251682639122009, "learning_rate": 6.345266162303405e-06, "loss": 1.0071, "step": 43770 }, { "epoch": 0.6306812452281142, "grad_norm": 0.6669208407402039, "learning_rate": 6.340923638551304e-06, "loss": 1.0134, "step": 43780 }, { "epoch": 0.6308253021594133, "grad_norm": 0.6178964376449585, "learning_rate": 6.336581911329656e-06, "loss": 1.0239, "step": 43790 }, { "epoch": 0.6309693590907126, "grad_norm": 0.7305578589439392, "learning_rate": 6.332240981583586e-06, "loss": 1.0052, "step": 43800 }, { "epoch": 0.6311134160220119, "grad_norm": 0.6335142850875854, "learning_rate": 6.327900850258057e-06, "loss": 1.0306, "step": 43810 }, { "epoch": 0.6312574729533111, "grad_norm": 0.641200840473175, "learning_rate": 6.323561518297857e-06, "loss": 1.0324, "step": 43820 }, { "epoch": 0.6314015298846104, "grad_norm": 0.599928617477417, "learning_rate": 6.319222986647599e-06, "loss": 1.0069, "step": 43830 }, { "epoch": 0.6315455868159097, "grad_norm": 0.5591249465942383, "learning_rate": 6.314885256251717e-06, "loss": 1.0023, "step": 43840 }, { "epoch": 0.6316896437472089, "grad_norm": 0.58012455701828, "learning_rate": 6.310548328054482e-06, "loss": 1.0185, "step": 43850 }, { "epoch": 0.6318337006785082, "grad_norm": 0.5824377536773682, "learning_rate": 6.30621220299998e-06, "loss": 1.0052, "step": 43860 }, { "epoch": 0.6319777576098073, "grad_norm": 0.6389300227165222, "learning_rate": 6.3018768820321216e-06, "loss": 1.0307, "step": 43870 }, { "epoch": 0.6321218145411066, "grad_norm": 0.586188793182373, "learning_rate": 6.297542366094653e-06, "loss": 1.0025, "step": 43880 }, { "epoch": 0.6322658714724059, "grad_norm": 0.6241875290870667, "learning_rate": 6.293208656131135e-06, "loss": 1.0152, "step": 43890 }, { "epoch": 0.6324099284037051, "grad_norm": 0.6493820548057556, "learning_rate": 6.28887575308496e-06, "loss": 1.0012, "step": 43900 }, { "epoch": 0.6325539853350044, "grad_norm": 0.6596702933311462, "learning_rate": 6.284543657899338e-06, "loss": 1.0102, "step": 43910 }, { "epoch": 0.6326980422663037, "grad_norm": 0.6422639489173889, "learning_rate": 6.280212371517302e-06, "loss": 1.0222, "step": 43920 }, { "epoch": 0.6328420991976029, "grad_norm": 0.6775549650192261, "learning_rate": 6.275881894881726e-06, "loss": 1.0628, "step": 43930 }, { "epoch": 0.6329861561289022, "grad_norm": 0.5756018161773682, "learning_rate": 6.271552228935284e-06, "loss": 1.0296, "step": 43940 }, { "epoch": 0.6331302130602013, "grad_norm": 0.5965772867202759, "learning_rate": 6.267223374620486e-06, "loss": 1.0149, "step": 43950 }, { "epoch": 0.6332742699915006, "grad_norm": 0.5774609446525574, "learning_rate": 6.262895332879666e-06, "loss": 1.01, "step": 43960 }, { "epoch": 0.6334183269227999, "grad_norm": 0.6798967719078064, "learning_rate": 6.258568104654981e-06, "loss": 1.0075, "step": 43970 }, { "epoch": 0.6335623838540991, "grad_norm": 0.621671736240387, "learning_rate": 6.254241690888403e-06, "loss": 1.0107, "step": 43980 }, { "epoch": 0.6337064407853984, "grad_norm": 0.5781314969062805, "learning_rate": 6.249916092521738e-06, "loss": 1.032, "step": 43990 }, { "epoch": 0.6338504977166977, "grad_norm": 0.5990108251571655, "learning_rate": 6.245591310496606e-06, "loss": 1.0236, "step": 44000 }, { "epoch": 0.6339945546479969, "grad_norm": 0.5996828079223633, "learning_rate": 6.241267345754456e-06, "loss": 0.9997, "step": 44010 }, { "epoch": 0.6341386115792962, "grad_norm": 0.6769276857376099, "learning_rate": 6.236944199236555e-06, "loss": 1.0181, "step": 44020 }, { "epoch": 0.6342826685105953, "grad_norm": 0.5939728021621704, "learning_rate": 6.2326218718839836e-06, "loss": 0.9951, "step": 44030 }, { "epoch": 0.6344267254418946, "grad_norm": 0.5858749151229858, "learning_rate": 6.228300364637665e-06, "loss": 1.0081, "step": 44040 }, { "epoch": 0.6345707823731939, "grad_norm": 0.6309555172920227, "learning_rate": 6.223979678438325e-06, "loss": 1.0111, "step": 44050 }, { "epoch": 0.6347148393044931, "grad_norm": 0.5809255242347717, "learning_rate": 6.219659814226515e-06, "loss": 1.0231, "step": 44060 }, { "epoch": 0.6348588962357924, "grad_norm": 0.6437588334083557, "learning_rate": 6.21534077294262e-06, "loss": 1.0263, "step": 44070 }, { "epoch": 0.6350029531670917, "grad_norm": 0.5920225977897644, "learning_rate": 6.211022555526829e-06, "loss": 1.0422, "step": 44080 }, { "epoch": 0.6351470100983909, "grad_norm": 0.6045619249343872, "learning_rate": 6.206705162919154e-06, "loss": 1.0228, "step": 44090 }, { "epoch": 0.6352910670296902, "grad_norm": 0.5552858710289001, "learning_rate": 6.202388596059439e-06, "loss": 1.0113, "step": 44100 }, { "epoch": 0.6354351239609893, "grad_norm": 0.7025449275970459, "learning_rate": 6.198072855887337e-06, "loss": 1.0206, "step": 44110 }, { "epoch": 0.6355791808922886, "grad_norm": 0.7095867395401001, "learning_rate": 6.193757943342329e-06, "loss": 0.9855, "step": 44120 }, { "epoch": 0.6357232378235879, "grad_norm": 0.6301383972167969, "learning_rate": 6.189443859363709e-06, "loss": 1.0123, "step": 44130 }, { "epoch": 0.6358672947548871, "grad_norm": 0.5972219705581665, "learning_rate": 6.185130604890592e-06, "loss": 1.0374, "step": 44140 }, { "epoch": 0.6360113516861864, "grad_norm": 0.649064838886261, "learning_rate": 6.180818180861918e-06, "loss": 1.0203, "step": 44150 }, { "epoch": 0.6361554086174857, "grad_norm": 0.5843871235847473, "learning_rate": 6.176506588216445e-06, "loss": 1.0171, "step": 44160 }, { "epoch": 0.6362994655487849, "grad_norm": 0.6294156312942505, "learning_rate": 6.172195827892733e-06, "loss": 0.9945, "step": 44170 }, { "epoch": 0.6364435224800842, "grad_norm": 0.5691481828689575, "learning_rate": 6.167885900829193e-06, "loss": 1.0032, "step": 44180 }, { "epoch": 0.6365875794113833, "grad_norm": 0.5907384157180786, "learning_rate": 6.163576807964024e-06, "loss": 1.0088, "step": 44190 }, { "epoch": 0.6367316363426826, "grad_norm": 0.555263876914978, "learning_rate": 6.15926855023526e-06, "loss": 1.0111, "step": 44200 }, { "epoch": 0.6368756932739819, "grad_norm": 0.5968215465545654, "learning_rate": 6.154961128580752e-06, "loss": 1.0179, "step": 44210 }, { "epoch": 0.6370197502052811, "grad_norm": 0.6137616634368896, "learning_rate": 6.150654543938161e-06, "loss": 0.991, "step": 44220 }, { "epoch": 0.6371638071365804, "grad_norm": 0.5981063842773438, "learning_rate": 6.146348797244978e-06, "loss": 1.0065, "step": 44230 }, { "epoch": 0.6373078640678796, "grad_norm": 0.6552395820617676, "learning_rate": 6.142043889438499e-06, "loss": 1.0328, "step": 44240 }, { "epoch": 0.6374519209991789, "grad_norm": 0.5807239413261414, "learning_rate": 6.1377398214558435e-06, "loss": 1.0153, "step": 44250 }, { "epoch": 0.6375959779304782, "grad_norm": 0.626849353313446, "learning_rate": 6.1334365942339505e-06, "loss": 0.9922, "step": 44260 }, { "epoch": 0.6377400348617773, "grad_norm": 0.6650713086128235, "learning_rate": 6.129134208709572e-06, "loss": 1.0205, "step": 44270 }, { "epoch": 0.6378840917930766, "grad_norm": 0.6666887998580933, "learning_rate": 6.124832665819274e-06, "loss": 1.0311, "step": 44280 }, { "epoch": 0.6380281487243759, "grad_norm": 0.6410893201828003, "learning_rate": 6.1205319664994515e-06, "loss": 1.0218, "step": 44290 }, { "epoch": 0.6381722056556751, "grad_norm": 0.6107708811759949, "learning_rate": 6.116232111686302e-06, "loss": 1.027, "step": 44300 }, { "epoch": 0.6383162625869744, "grad_norm": 0.6178907155990601, "learning_rate": 6.1119331023158415e-06, "loss": 0.9946, "step": 44310 }, { "epoch": 0.6384603195182736, "grad_norm": 0.612699568271637, "learning_rate": 6.1076349393239134e-06, "loss": 1.0263, "step": 44320 }, { "epoch": 0.6386043764495729, "grad_norm": 0.6232872605323792, "learning_rate": 6.103337623646157e-06, "loss": 1.0208, "step": 44330 }, { "epoch": 0.6387484333808722, "grad_norm": 0.6625041365623474, "learning_rate": 6.099041156218053e-06, "loss": 1.0127, "step": 44340 }, { "epoch": 0.6388924903121713, "grad_norm": 0.614063560962677, "learning_rate": 6.094745537974873e-06, "loss": 1.0228, "step": 44350 }, { "epoch": 0.6390365472434706, "grad_norm": 0.6330206394195557, "learning_rate": 6.0904507698517145e-06, "loss": 1.0347, "step": 44360 }, { "epoch": 0.6391806041747699, "grad_norm": 0.5692284107208252, "learning_rate": 6.086156852783493e-06, "loss": 1.0066, "step": 44370 }, { "epoch": 0.6393246611060691, "grad_norm": 0.6342631578445435, "learning_rate": 6.081863787704932e-06, "loss": 0.9999, "step": 44380 }, { "epoch": 0.6394687180373684, "grad_norm": 0.5760719776153564, "learning_rate": 6.077571575550574e-06, "loss": 1.0193, "step": 44390 }, { "epoch": 0.6396127749686676, "grad_norm": 0.626284658908844, "learning_rate": 6.0732802172547745e-06, "loss": 1.011, "step": 44400 }, { "epoch": 0.6397568318999669, "grad_norm": 0.6053497195243835, "learning_rate": 6.068989713751703e-06, "loss": 1.0192, "step": 44410 }, { "epoch": 0.6399008888312662, "grad_norm": 0.6346369385719299, "learning_rate": 6.06470006597534e-06, "loss": 1.0104, "step": 44420 }, { "epoch": 0.6400449457625653, "grad_norm": 0.6166449189186096, "learning_rate": 6.060411274859488e-06, "loss": 1.0118, "step": 44430 }, { "epoch": 0.6401890026938646, "grad_norm": 0.7997186779975891, "learning_rate": 6.056123341337752e-06, "loss": 1.0358, "step": 44440 }, { "epoch": 0.6403330596251638, "grad_norm": 0.71084064245224, "learning_rate": 6.051836266343561e-06, "loss": 1.0074, "step": 44450 }, { "epoch": 0.6404771165564631, "grad_norm": 0.7153636813163757, "learning_rate": 6.0475500508101525e-06, "loss": 1.0221, "step": 44460 }, { "epoch": 0.6406211734877624, "grad_norm": 0.5669187903404236, "learning_rate": 6.043264695670568e-06, "loss": 1.0162, "step": 44470 }, { "epoch": 0.6407652304190616, "grad_norm": 0.6377536058425903, "learning_rate": 6.038980201857681e-06, "loss": 1.0407, "step": 44480 }, { "epoch": 0.6409092873503609, "grad_norm": 0.5607938766479492, "learning_rate": 6.0346965703041594e-06, "loss": 1.0044, "step": 44490 }, { "epoch": 0.6410533442816602, "grad_norm": 0.5617562532424927, "learning_rate": 6.030413801942492e-06, "loss": 1.015, "step": 44500 }, { "epoch": 0.6411974012129593, "grad_norm": 0.6788965463638306, "learning_rate": 6.026131897704981e-06, "loss": 1.01, "step": 44510 }, { "epoch": 0.6413414581442586, "grad_norm": 0.6697009801864624, "learning_rate": 6.021850858523737e-06, "loss": 1.0085, "step": 44520 }, { "epoch": 0.6414855150755578, "grad_norm": 0.7084874510765076, "learning_rate": 6.01757068533068e-06, "loss": 1.0094, "step": 44530 }, { "epoch": 0.6416295720068571, "grad_norm": 0.7182435989379883, "learning_rate": 6.01329137905755e-06, "loss": 1.0516, "step": 44540 }, { "epoch": 0.6417736289381564, "grad_norm": 0.6141085028648376, "learning_rate": 6.0090129406358874e-06, "loss": 1.0195, "step": 44550 }, { "epoch": 0.6419176858694556, "grad_norm": 0.633694589138031, "learning_rate": 6.004735370997055e-06, "loss": 1.0002, "step": 44560 }, { "epoch": 0.6420617428007549, "grad_norm": 0.7533735632896423, "learning_rate": 6.000458671072218e-06, "loss": 1.042, "step": 44570 }, { "epoch": 0.6422057997320542, "grad_norm": 0.5945227146148682, "learning_rate": 5.996182841792349e-06, "loss": 1.0289, "step": 44580 }, { "epoch": 0.6423498566633533, "grad_norm": 0.6342747211456299, "learning_rate": 5.9919078840882485e-06, "loss": 1.0154, "step": 44590 }, { "epoch": 0.6424939135946526, "grad_norm": 0.6527490615844727, "learning_rate": 5.987633798890509e-06, "loss": 1.0011, "step": 44600 }, { "epoch": 0.6426379705259518, "grad_norm": 0.6016778349876404, "learning_rate": 5.983360587129537e-06, "loss": 1.0321, "step": 44610 }, { "epoch": 0.6427820274572511, "grad_norm": 0.584942102432251, "learning_rate": 5.979088249735559e-06, "loss": 1.0102, "step": 44620 }, { "epoch": 0.6429260843885504, "grad_norm": 0.6678653359413147, "learning_rate": 5.974816787638601e-06, "loss": 1.006, "step": 44630 }, { "epoch": 0.6430701413198496, "grad_norm": 0.6083000898361206, "learning_rate": 5.970546201768498e-06, "loss": 1.0164, "step": 44640 }, { "epoch": 0.6432141982511489, "grad_norm": 0.6137006878852844, "learning_rate": 5.966276493054903e-06, "loss": 1.0366, "step": 44650 }, { "epoch": 0.6433582551824482, "grad_norm": 0.6589402556419373, "learning_rate": 5.962007662427267e-06, "loss": 0.9968, "step": 44660 }, { "epoch": 0.6435023121137473, "grad_norm": 0.6604875326156616, "learning_rate": 5.957739710814863e-06, "loss": 1.0236, "step": 44670 }, { "epoch": 0.6436463690450466, "grad_norm": 0.6565546989440918, "learning_rate": 5.953472639146759e-06, "loss": 1.0022, "step": 44680 }, { "epoch": 0.6437904259763458, "grad_norm": 0.650482714176178, "learning_rate": 5.949206448351838e-06, "loss": 1.0101, "step": 44690 }, { "epoch": 0.6439344829076451, "grad_norm": 0.7212525010108948, "learning_rate": 5.944941139358795e-06, "loss": 1.0131, "step": 44700 }, { "epoch": 0.6440785398389444, "grad_norm": 0.7000559568405151, "learning_rate": 5.9406767130961275e-06, "loss": 1.0256, "step": 44710 }, { "epoch": 0.6442225967702436, "grad_norm": 0.6735440492630005, "learning_rate": 5.936413170492137e-06, "loss": 1.0272, "step": 44720 }, { "epoch": 0.6443666537015429, "grad_norm": 0.569595992565155, "learning_rate": 5.9321505124749455e-06, "loss": 0.9982, "step": 44730 }, { "epoch": 0.6445107106328422, "grad_norm": 0.7124947309494019, "learning_rate": 5.927888739972471e-06, "loss": 1.0342, "step": 44740 }, { "epoch": 0.6446547675641413, "grad_norm": 0.6878064274787903, "learning_rate": 5.923627853912441e-06, "loss": 1.0333, "step": 44750 }, { "epoch": 0.6447988244954406, "grad_norm": 0.5419682860374451, "learning_rate": 5.919367855222396e-06, "loss": 1.002, "step": 44760 }, { "epoch": 0.6449428814267398, "grad_norm": 0.6019362211227417, "learning_rate": 5.915108744829674e-06, "loss": 0.9952, "step": 44770 }, { "epoch": 0.6450869383580391, "grad_norm": 0.6707798838615417, "learning_rate": 5.910850523661429e-06, "loss": 1.0258, "step": 44780 }, { "epoch": 0.6452309952893384, "grad_norm": 0.5261653065681458, "learning_rate": 5.906593192644615e-06, "loss": 1.0214, "step": 44790 }, { "epoch": 0.6453750522206376, "grad_norm": 0.6842942833900452, "learning_rate": 5.902336752705992e-06, "loss": 1.0161, "step": 44800 }, { "epoch": 0.6455191091519369, "grad_norm": 0.6605587005615234, "learning_rate": 5.8980812047721335e-06, "loss": 1.0134, "step": 44810 }, { "epoch": 0.645663166083236, "grad_norm": 0.5872989892959595, "learning_rate": 5.89382654976941e-06, "loss": 1.0131, "step": 44820 }, { "epoch": 0.6458072230145353, "grad_norm": 0.6560587882995605, "learning_rate": 5.889572788624001e-06, "loss": 1.0239, "step": 44830 }, { "epoch": 0.6459512799458346, "grad_norm": 0.5824592709541321, "learning_rate": 5.885319922261894e-06, "loss": 1.0144, "step": 44840 }, { "epoch": 0.6460953368771338, "grad_norm": 0.5636458396911621, "learning_rate": 5.881067951608881e-06, "loss": 1.024, "step": 44850 }, { "epoch": 0.6462393938084331, "grad_norm": 0.6905114650726318, "learning_rate": 5.876816877590546e-06, "loss": 1.0265, "step": 44860 }, { "epoch": 0.6463834507397324, "grad_norm": 0.6577576398849487, "learning_rate": 5.872566701132305e-06, "loss": 1.0203, "step": 44870 }, { "epoch": 0.6465275076710316, "grad_norm": 0.6715118288993835, "learning_rate": 5.8683174231593485e-06, "loss": 1.0153, "step": 44880 }, { "epoch": 0.6466715646023309, "grad_norm": 0.6628469228744507, "learning_rate": 5.8640690445966985e-06, "loss": 1.0208, "step": 44890 }, { "epoch": 0.64681562153363, "grad_norm": 0.7258967161178589, "learning_rate": 5.8598215663691606e-06, "loss": 1.0065, "step": 44900 }, { "epoch": 0.6469596784649293, "grad_norm": 0.6283655762672424, "learning_rate": 5.8555749894013514e-06, "loss": 1.0174, "step": 44910 }, { "epoch": 0.6471037353962286, "grad_norm": 0.6564602851867676, "learning_rate": 5.8513293146176975e-06, "loss": 1.0164, "step": 44920 }, { "epoch": 0.6472477923275278, "grad_norm": 0.7157777547836304, "learning_rate": 5.847084542942419e-06, "loss": 1.0132, "step": 44930 }, { "epoch": 0.6473918492588271, "grad_norm": 0.7218253016471863, "learning_rate": 5.842840675299546e-06, "loss": 1.0143, "step": 44940 }, { "epoch": 0.6475359061901264, "grad_norm": 0.5819610953330994, "learning_rate": 5.838597712612911e-06, "loss": 1.0164, "step": 44950 }, { "epoch": 0.6476799631214256, "grad_norm": 0.5980701446533203, "learning_rate": 5.834355655806148e-06, "loss": 1.0173, "step": 44960 }, { "epoch": 0.6478240200527249, "grad_norm": 0.6305130124092102, "learning_rate": 5.830114505802693e-06, "loss": 1.0087, "step": 44970 }, { "epoch": 0.647968076984024, "grad_norm": 0.621622622013092, "learning_rate": 5.825874263525787e-06, "loss": 1.0086, "step": 44980 }, { "epoch": 0.6481121339153233, "grad_norm": 0.6889428496360779, "learning_rate": 5.821634929898468e-06, "loss": 1.0297, "step": 44990 }, { "epoch": 0.6482561908466226, "grad_norm": 0.6638681292533875, "learning_rate": 5.817396505843586e-06, "loss": 1.0162, "step": 45000 }, { "epoch": 0.6484002477779218, "grad_norm": 0.57753586769104, "learning_rate": 5.813158992283788e-06, "loss": 1.0267, "step": 45010 }, { "epoch": 0.6485443047092211, "grad_norm": 0.634385883808136, "learning_rate": 5.808922390141519e-06, "loss": 1.0385, "step": 45020 }, { "epoch": 0.6486883616405204, "grad_norm": 0.571789562702179, "learning_rate": 5.804686700339031e-06, "loss": 1.0261, "step": 45030 }, { "epoch": 0.6488324185718196, "grad_norm": 0.6051955819129944, "learning_rate": 5.800451923798372e-06, "loss": 1.027, "step": 45040 }, { "epoch": 0.6489764755031189, "grad_norm": 0.6441025137901306, "learning_rate": 5.796218061441393e-06, "loss": 0.9946, "step": 45050 }, { "epoch": 0.649120532434418, "grad_norm": 0.701108992099762, "learning_rate": 5.79198511418976e-06, "loss": 0.998, "step": 45060 }, { "epoch": 0.6492645893657173, "grad_norm": 0.6444332599639893, "learning_rate": 5.787753082964913e-06, "loss": 1.0082, "step": 45070 }, { "epoch": 0.6494086462970166, "grad_norm": 0.6186667084693909, "learning_rate": 5.783521968688107e-06, "loss": 1.0365, "step": 45080 }, { "epoch": 0.6495527032283158, "grad_norm": 0.7480693459510803, "learning_rate": 5.779291772280408e-06, "loss": 1.0116, "step": 45090 }, { "epoch": 0.6496967601596151, "grad_norm": 0.6325919032096863, "learning_rate": 5.775062494662664e-06, "loss": 1.0168, "step": 45100 }, { "epoch": 0.6498408170909143, "grad_norm": 0.7014506459236145, "learning_rate": 5.770834136755532e-06, "loss": 1.0212, "step": 45110 }, { "epoch": 0.6499848740222136, "grad_norm": 0.6091514825820923, "learning_rate": 5.766606699479468e-06, "loss": 1.0341, "step": 45120 }, { "epoch": 0.6501289309535129, "grad_norm": 0.6190800666809082, "learning_rate": 5.7623801837547215e-06, "loss": 0.9984, "step": 45130 }, { "epoch": 0.650272987884812, "grad_norm": 0.7025150656700134, "learning_rate": 5.7581545905013546e-06, "loss": 1.0082, "step": 45140 }, { "epoch": 0.6504170448161113, "grad_norm": 0.641904354095459, "learning_rate": 5.753929920639216e-06, "loss": 1.0048, "step": 45150 }, { "epoch": 0.6505611017474106, "grad_norm": 0.7571387887001038, "learning_rate": 5.749706175087961e-06, "loss": 1.0437, "step": 45160 }, { "epoch": 0.6507051586787098, "grad_norm": 0.6438396573066711, "learning_rate": 5.745483354767038e-06, "loss": 1.012, "step": 45170 }, { "epoch": 0.6508492156100091, "grad_norm": 0.6127173900604248, "learning_rate": 5.741261460595697e-06, "loss": 1.0125, "step": 45180 }, { "epoch": 0.6509932725413083, "grad_norm": 0.568905234336853, "learning_rate": 5.737040493492985e-06, "loss": 0.9998, "step": 45190 }, { "epoch": 0.6511373294726076, "grad_norm": 0.6029191017150879, "learning_rate": 5.732820454377757e-06, "loss": 1.002, "step": 45200 }, { "epoch": 0.6512813864039069, "grad_norm": 0.6624503135681152, "learning_rate": 5.72860134416864e-06, "loss": 1.0013, "step": 45210 }, { "epoch": 0.651425443335206, "grad_norm": 0.7227169275283813, "learning_rate": 5.724383163784093e-06, "loss": 1.0325, "step": 45220 }, { "epoch": 0.6515695002665053, "grad_norm": 0.7109741568565369, "learning_rate": 5.720165914142348e-06, "loss": 1.0249, "step": 45230 }, { "epoch": 0.6517135571978045, "grad_norm": 0.7473984956741333, "learning_rate": 5.715949596161443e-06, "loss": 1.027, "step": 45240 }, { "epoch": 0.6518576141291038, "grad_norm": 0.6912552118301392, "learning_rate": 5.7117342107592135e-06, "loss": 1.0129, "step": 45250 }, { "epoch": 0.6520016710604031, "grad_norm": 0.5782814621925354, "learning_rate": 5.707519758853288e-06, "loss": 1.0191, "step": 45260 }, { "epoch": 0.6521457279917023, "grad_norm": 0.6410259008407593, "learning_rate": 5.703306241361094e-06, "loss": 1.015, "step": 45270 }, { "epoch": 0.6522897849230016, "grad_norm": 0.6248414516448975, "learning_rate": 5.699093659199861e-06, "loss": 1.0051, "step": 45280 }, { "epoch": 0.6524338418543009, "grad_norm": 0.6174207925796509, "learning_rate": 5.694882013286609e-06, "loss": 1.0286, "step": 45290 }, { "epoch": 0.6525778987856, "grad_norm": 0.6624664664268494, "learning_rate": 5.690671304538151e-06, "loss": 1.0518, "step": 45300 }, { "epoch": 0.6527219557168993, "grad_norm": 0.6410439014434814, "learning_rate": 5.6864615338711046e-06, "loss": 1.008, "step": 45310 }, { "epoch": 0.6528660126481985, "grad_norm": 0.6226246356964111, "learning_rate": 5.682252702201874e-06, "loss": 1.0026, "step": 45320 }, { "epoch": 0.6530100695794978, "grad_norm": 0.5791779160499573, "learning_rate": 5.678044810446668e-06, "loss": 1.0417, "step": 45330 }, { "epoch": 0.6531541265107971, "grad_norm": 0.676311731338501, "learning_rate": 5.673837859521492e-06, "loss": 1.0227, "step": 45340 }, { "epoch": 0.6532981834420963, "grad_norm": 0.6526930332183838, "learning_rate": 5.669631850342124e-06, "loss": 1.009, "step": 45350 }, { "epoch": 0.6534422403733956, "grad_norm": 0.5468018651008606, "learning_rate": 5.665426783824171e-06, "loss": 1.0152, "step": 45360 }, { "epoch": 0.6535862973046949, "grad_norm": 0.5868613123893738, "learning_rate": 5.66122266088301e-06, "loss": 1.0262, "step": 45370 }, { "epoch": 0.653730354235994, "grad_norm": 0.6021547913551331, "learning_rate": 5.6570194824338235e-06, "loss": 1.0243, "step": 45380 }, { "epoch": 0.6538744111672933, "grad_norm": 0.6306912302970886, "learning_rate": 5.652817249391583e-06, "loss": 1.0468, "step": 45390 }, { "epoch": 0.6540184680985925, "grad_norm": 0.6520081758499146, "learning_rate": 5.648615962671058e-06, "loss": 1.0283, "step": 45400 }, { "epoch": 0.6541625250298918, "grad_norm": 0.5919010639190674, "learning_rate": 5.644415623186807e-06, "loss": 1.0154, "step": 45410 }, { "epoch": 0.6543065819611911, "grad_norm": 0.637642502784729, "learning_rate": 5.640216231853195e-06, "loss": 1.0315, "step": 45420 }, { "epoch": 0.6544506388924903, "grad_norm": 0.6275111436843872, "learning_rate": 5.636017789584365e-06, "loss": 1.0067, "step": 45430 }, { "epoch": 0.6545946958237896, "grad_norm": 0.7290439605712891, "learning_rate": 5.6318202972942595e-06, "loss": 1.0065, "step": 45440 }, { "epoch": 0.6547387527550889, "grad_norm": 0.5982494354248047, "learning_rate": 5.627623755896618e-06, "loss": 1.0209, "step": 45450 }, { "epoch": 0.654882809686388, "grad_norm": 0.7246947288513184, "learning_rate": 5.6234281663049625e-06, "loss": 1.0423, "step": 45460 }, { "epoch": 0.6550268666176873, "grad_norm": 0.6050679087638855, "learning_rate": 5.619233529432626e-06, "loss": 0.9994, "step": 45470 }, { "epoch": 0.6551709235489865, "grad_norm": 0.600634753704071, "learning_rate": 5.61503984619272e-06, "loss": 1.0036, "step": 45480 }, { "epoch": 0.6553149804802858, "grad_norm": 0.9534344673156738, "learning_rate": 5.610847117498141e-06, "loss": 1.0101, "step": 45490 }, { "epoch": 0.6554590374115851, "grad_norm": 0.6611275672912598, "learning_rate": 5.6066553442616026e-06, "loss": 0.9951, "step": 45500 }, { "epoch": 0.6556030943428843, "grad_norm": 0.6024879217147827, "learning_rate": 5.602464527395587e-06, "loss": 1.0037, "step": 45510 }, { "epoch": 0.6557471512741836, "grad_norm": 0.557040274143219, "learning_rate": 5.59827466781238e-06, "loss": 1.0151, "step": 45520 }, { "epoch": 0.6558912082054829, "grad_norm": 0.721174418926239, "learning_rate": 5.594085766424057e-06, "loss": 1.0248, "step": 45530 }, { "epoch": 0.656035265136782, "grad_norm": 0.7238805294036865, "learning_rate": 5.589897824142479e-06, "loss": 1.0108, "step": 45540 }, { "epoch": 0.6561793220680813, "grad_norm": 0.6601713299751282, "learning_rate": 5.5857108418793095e-06, "loss": 1.027, "step": 45550 }, { "epoch": 0.6563233789993805, "grad_norm": 0.7457177639007568, "learning_rate": 5.581524820545996e-06, "loss": 1.024, "step": 45560 }, { "epoch": 0.6564674359306798, "grad_norm": 0.7239423990249634, "learning_rate": 5.577339761053775e-06, "loss": 1.0035, "step": 45570 }, { "epoch": 0.6566114928619791, "grad_norm": 0.5816134214401245, "learning_rate": 5.573155664313679e-06, "loss": 1.022, "step": 45580 }, { "epoch": 0.6567555497932783, "grad_norm": 0.6314454674720764, "learning_rate": 5.5689725312365254e-06, "loss": 1.0348, "step": 45590 }, { "epoch": 0.6568996067245776, "grad_norm": 0.6644384860992432, "learning_rate": 5.564790362732922e-06, "loss": 1.023, "step": 45600 }, { "epoch": 0.6570436636558769, "grad_norm": 0.6335042119026184, "learning_rate": 5.560609159713282e-06, "loss": 1.0395, "step": 45610 }, { "epoch": 0.657187720587176, "grad_norm": 0.616644561290741, "learning_rate": 5.556428923087781e-06, "loss": 1.0313, "step": 45620 }, { "epoch": 0.6573317775184753, "grad_norm": 0.6697009205818176, "learning_rate": 5.552249653766401e-06, "loss": 1.0231, "step": 45630 }, { "epoch": 0.6574758344497745, "grad_norm": 0.6900887489318848, "learning_rate": 5.548071352658918e-06, "loss": 1.0255, "step": 45640 }, { "epoch": 0.6576198913810738, "grad_norm": 0.6656466722488403, "learning_rate": 5.543894020674887e-06, "loss": 1.0222, "step": 45650 }, { "epoch": 0.6577639483123731, "grad_norm": 0.6971928477287292, "learning_rate": 5.5397176587236575e-06, "loss": 1.0136, "step": 45660 }, { "epoch": 0.6579080052436723, "grad_norm": 0.630372166633606, "learning_rate": 5.535542267714362e-06, "loss": 1.0155, "step": 45670 }, { "epoch": 0.6580520621749716, "grad_norm": 0.6088457703590393, "learning_rate": 5.531367848555925e-06, "loss": 1.0325, "step": 45680 }, { "epoch": 0.6581961191062708, "grad_norm": 0.6991166472434998, "learning_rate": 5.527194402157065e-06, "loss": 1.037, "step": 45690 }, { "epoch": 0.65834017603757, "grad_norm": 0.7558432221412659, "learning_rate": 5.5230219294262834e-06, "loss": 1.009, "step": 45700 }, { "epoch": 0.6584842329688693, "grad_norm": 0.6258352398872375, "learning_rate": 5.518850431271867e-06, "loss": 0.9941, "step": 45710 }, { "epoch": 0.6586282899001685, "grad_norm": 0.7778695821762085, "learning_rate": 5.514679908601894e-06, "loss": 1.0382, "step": 45720 }, { "epoch": 0.6587723468314678, "grad_norm": 0.5646911263465881, "learning_rate": 5.510510362324228e-06, "loss": 1.0158, "step": 45730 }, { "epoch": 0.6589164037627671, "grad_norm": 0.6628832817077637, "learning_rate": 5.5063417933465224e-06, "loss": 1.0066, "step": 45740 }, { "epoch": 0.6590604606940663, "grad_norm": 0.7023528814315796, "learning_rate": 5.5021742025762245e-06, "loss": 1.0069, "step": 45750 }, { "epoch": 0.6592045176253656, "grad_norm": 0.6527752876281738, "learning_rate": 5.498007590920549e-06, "loss": 1.0201, "step": 45760 }, { "epoch": 0.6593485745566648, "grad_norm": 0.611611545085907, "learning_rate": 5.493841959286519e-06, "loss": 1.0115, "step": 45770 }, { "epoch": 0.659492631487964, "grad_norm": 0.5528119206428528, "learning_rate": 5.489677308580932e-06, "loss": 1.0375, "step": 45780 }, { "epoch": 0.6596366884192633, "grad_norm": 0.6203455924987793, "learning_rate": 5.485513639710375e-06, "loss": 1.0061, "step": 45790 }, { "epoch": 0.6597807453505625, "grad_norm": 0.6796348690986633, "learning_rate": 5.481350953581221e-06, "loss": 1.0151, "step": 45800 }, { "epoch": 0.6599248022818618, "grad_norm": 0.6706281900405884, "learning_rate": 5.47718925109963e-06, "loss": 1.0089, "step": 45810 }, { "epoch": 0.660068859213161, "grad_norm": 0.6729138493537903, "learning_rate": 5.473028533171543e-06, "loss": 1.0127, "step": 45820 }, { "epoch": 0.6602129161444603, "grad_norm": 0.6415603756904602, "learning_rate": 5.468868800702698e-06, "loss": 0.9967, "step": 45830 }, { "epoch": 0.6603569730757596, "grad_norm": 0.565737247467041, "learning_rate": 5.464710054598607e-06, "loss": 1.0264, "step": 45840 }, { "epoch": 0.6605010300070588, "grad_norm": 0.6968961358070374, "learning_rate": 5.460552295764575e-06, "loss": 1.0321, "step": 45850 }, { "epoch": 0.660645086938358, "grad_norm": 0.638938844203949, "learning_rate": 5.456395525105686e-06, "loss": 1.0028, "step": 45860 }, { "epoch": 0.6607891438696573, "grad_norm": 0.7692632675170898, "learning_rate": 5.452239743526808e-06, "loss": 1.001, "step": 45870 }, { "epoch": 0.6609332008009565, "grad_norm": 0.7068073749542236, "learning_rate": 5.448084951932604e-06, "loss": 1.0274, "step": 45880 }, { "epoch": 0.6610772577322558, "grad_norm": 0.5941410064697266, "learning_rate": 5.443931151227517e-06, "loss": 1.0312, "step": 45890 }, { "epoch": 0.661221314663555, "grad_norm": 0.6203605532646179, "learning_rate": 5.43977834231576e-06, "loss": 1.0148, "step": 45900 }, { "epoch": 0.6613653715948543, "grad_norm": 0.6441105008125305, "learning_rate": 5.435626526101354e-06, "loss": 1.0096, "step": 45910 }, { "epoch": 0.6615094285261536, "grad_norm": 0.8502092957496643, "learning_rate": 5.4314757034880876e-06, "loss": 1.0068, "step": 45920 }, { "epoch": 0.6616534854574528, "grad_norm": 0.6751788258552551, "learning_rate": 5.4273258753795365e-06, "loss": 1.0128, "step": 45930 }, { "epoch": 0.661797542388752, "grad_norm": 0.6241414546966553, "learning_rate": 5.423177042679064e-06, "loss": 1.0185, "step": 45940 }, { "epoch": 0.6619415993200513, "grad_norm": 0.5784713625907898, "learning_rate": 5.419029206289811e-06, "loss": 1.0159, "step": 45950 }, { "epoch": 0.6620856562513505, "grad_norm": 0.6208174824714661, "learning_rate": 5.414882367114701e-06, "loss": 1.0197, "step": 45960 }, { "epoch": 0.6622297131826498, "grad_norm": 0.6757817268371582, "learning_rate": 5.4107365260564525e-06, "loss": 1.0115, "step": 45970 }, { "epoch": 0.662373770113949, "grad_norm": 0.6738228797912598, "learning_rate": 5.406591684017552e-06, "loss": 0.9987, "step": 45980 }, { "epoch": 0.6625178270452483, "grad_norm": 0.5967774391174316, "learning_rate": 5.402447841900278e-06, "loss": 1.0077, "step": 45990 }, { "epoch": 0.6626618839765476, "grad_norm": 0.6555769443511963, "learning_rate": 5.398305000606685e-06, "loss": 1.0269, "step": 46000 }, { "epoch": 0.6628059409078468, "grad_norm": 0.6483225226402283, "learning_rate": 5.394163161038608e-06, "loss": 0.996, "step": 46010 }, { "epoch": 0.662949997839146, "grad_norm": 0.6243937611579895, "learning_rate": 5.390022324097681e-06, "loss": 1.0199, "step": 46020 }, { "epoch": 0.6630940547704453, "grad_norm": 0.614417314529419, "learning_rate": 5.385882490685297e-06, "loss": 1.0502, "step": 46030 }, { "epoch": 0.6632381117017445, "grad_norm": 0.7182544469833374, "learning_rate": 5.381743661702638e-06, "loss": 1.0235, "step": 46040 }, { "epoch": 0.6633821686330438, "grad_norm": 0.6448560357093811, "learning_rate": 5.377605838050679e-06, "loss": 1.0316, "step": 46050 }, { "epoch": 0.663526225564343, "grad_norm": 0.61222904920578, "learning_rate": 5.373469020630162e-06, "loss": 1.0101, "step": 46060 }, { "epoch": 0.6636702824956423, "grad_norm": 0.6609721183776855, "learning_rate": 5.369333210341618e-06, "loss": 1.0079, "step": 46070 }, { "epoch": 0.6638143394269416, "grad_norm": 0.6307268738746643, "learning_rate": 5.365198408085352e-06, "loss": 1.0144, "step": 46080 }, { "epoch": 0.6639583963582408, "grad_norm": 0.5823897123336792, "learning_rate": 5.361064614761455e-06, "loss": 1.0328, "step": 46090 }, { "epoch": 0.66410245328954, "grad_norm": 0.645470380783081, "learning_rate": 5.356931831269798e-06, "loss": 1.0163, "step": 46100 }, { "epoch": 0.6642465102208392, "grad_norm": 0.6113483309745789, "learning_rate": 5.3528000585100305e-06, "loss": 1.0279, "step": 46110 }, { "epoch": 0.6643905671521385, "grad_norm": 0.5694353580474854, "learning_rate": 5.348669297381583e-06, "loss": 1.026, "step": 46120 }, { "epoch": 0.6645346240834378, "grad_norm": 0.5848360657691956, "learning_rate": 5.344539548783665e-06, "loss": 1.0331, "step": 46130 }, { "epoch": 0.664678681014737, "grad_norm": 0.633795440196991, "learning_rate": 5.3404108136152634e-06, "loss": 1.0262, "step": 46140 }, { "epoch": 0.6648227379460363, "grad_norm": 0.5898723006248474, "learning_rate": 5.336283092775145e-06, "loss": 1.0066, "step": 46150 }, { "epoch": 0.6649667948773356, "grad_norm": 0.5714325308799744, "learning_rate": 5.332156387161871e-06, "loss": 1.0232, "step": 46160 }, { "epoch": 0.6651108518086347, "grad_norm": 0.5904276967048645, "learning_rate": 5.3280306976737515e-06, "loss": 0.9951, "step": 46170 }, { "epoch": 0.665254908739934, "grad_norm": 0.6491442918777466, "learning_rate": 5.323906025208898e-06, "loss": 1.0156, "step": 46180 }, { "epoch": 0.6653989656712332, "grad_norm": 0.5478079319000244, "learning_rate": 5.319782370665198e-06, "loss": 1.0167, "step": 46190 }, { "epoch": 0.6655430226025325, "grad_norm": 0.6193534731864929, "learning_rate": 5.315659734940315e-06, "loss": 0.9997, "step": 46200 }, { "epoch": 0.6656870795338318, "grad_norm": 0.6399695873260498, "learning_rate": 5.311538118931685e-06, "loss": 1.0263, "step": 46210 }, { "epoch": 0.665831136465131, "grad_norm": 0.6257351636886597, "learning_rate": 5.30741752353653e-06, "loss": 1.0472, "step": 46220 }, { "epoch": 0.6659751933964303, "grad_norm": 0.6219946146011353, "learning_rate": 5.3032979496518424e-06, "loss": 1.0001, "step": 46230 }, { "epoch": 0.6661192503277296, "grad_norm": 0.6141021251678467, "learning_rate": 5.2991793981744024e-06, "loss": 1.0091, "step": 46240 }, { "epoch": 0.6662633072590287, "grad_norm": 0.8189109563827515, "learning_rate": 5.295061870000761e-06, "loss": 1.0213, "step": 46250 }, { "epoch": 0.666407364190328, "grad_norm": 0.5471169352531433, "learning_rate": 5.290945366027245e-06, "loss": 1.0116, "step": 46260 }, { "epoch": 0.6665514211216272, "grad_norm": 0.6610715985298157, "learning_rate": 5.286829887149961e-06, "loss": 1.0138, "step": 46270 }, { "epoch": 0.6666954780529265, "grad_norm": 0.6075613498687744, "learning_rate": 5.282715434264794e-06, "loss": 0.9949, "step": 46280 }, { "epoch": 0.6668395349842258, "grad_norm": 0.7018479704856873, "learning_rate": 5.2786020082673975e-06, "loss": 1.014, "step": 46290 }, { "epoch": 0.666983591915525, "grad_norm": 0.6411354541778564, "learning_rate": 5.274489610053219e-06, "loss": 1.016, "step": 46300 }, { "epoch": 0.6671276488468243, "grad_norm": 0.6892482042312622, "learning_rate": 5.270378240517456e-06, "loss": 1.0397, "step": 46310 }, { "epoch": 0.6672717057781236, "grad_norm": 0.6826410889625549, "learning_rate": 5.26626790055511e-06, "loss": 1.0184, "step": 46320 }, { "epoch": 0.6674157627094227, "grad_norm": 0.641666054725647, "learning_rate": 5.262158591060939e-06, "loss": 1.021, "step": 46330 }, { "epoch": 0.667559819640722, "grad_norm": 0.6144629716873169, "learning_rate": 5.2580503129294855e-06, "loss": 1.0263, "step": 46340 }, { "epoch": 0.6677038765720212, "grad_norm": 0.7096126079559326, "learning_rate": 5.2539430670550625e-06, "loss": 1.0034, "step": 46350 }, { "epoch": 0.6678479335033205, "grad_norm": 0.6550100445747375, "learning_rate": 5.249836854331763e-06, "loss": 1.0139, "step": 46360 }, { "epoch": 0.6679919904346198, "grad_norm": 0.6665602922439575, "learning_rate": 5.245731675653447e-06, "loss": 1.0414, "step": 46370 }, { "epoch": 0.668136047365919, "grad_norm": 0.5964598059654236, "learning_rate": 5.241627531913765e-06, "loss": 1.0247, "step": 46380 }, { "epoch": 0.6682801042972183, "grad_norm": 0.798501193523407, "learning_rate": 5.237524424006125e-06, "loss": 1.0345, "step": 46390 }, { "epoch": 0.6684241612285176, "grad_norm": 0.5867642760276794, "learning_rate": 5.233422352823721e-06, "loss": 0.9878, "step": 46400 }, { "epoch": 0.6685682181598167, "grad_norm": 0.5655377507209778, "learning_rate": 5.229321319259517e-06, "loss": 1.0238, "step": 46410 }, { "epoch": 0.668712275091116, "grad_norm": 0.6586424708366394, "learning_rate": 5.225221324206244e-06, "loss": 0.9973, "step": 46420 }, { "epoch": 0.6688563320224152, "grad_norm": 0.579627275466919, "learning_rate": 5.221122368556426e-06, "loss": 1.0146, "step": 46430 }, { "epoch": 0.6690003889537145, "grad_norm": 0.6135485172271729, "learning_rate": 5.217024453202348e-06, "loss": 1.0023, "step": 46440 }, { "epoch": 0.6691444458850138, "grad_norm": 0.7205511331558228, "learning_rate": 5.212927579036058e-06, "loss": 1.0071, "step": 46450 }, { "epoch": 0.669288502816313, "grad_norm": 0.5820243954658508, "learning_rate": 5.208831746949398e-06, "loss": 1.018, "step": 46460 }, { "epoch": 0.6694325597476123, "grad_norm": 0.6837009787559509, "learning_rate": 5.204736957833975e-06, "loss": 1.0251, "step": 46470 }, { "epoch": 0.6695766166789116, "grad_norm": 0.6436097025871277, "learning_rate": 5.200643212581165e-06, "loss": 1.0201, "step": 46480 }, { "epoch": 0.6697206736102107, "grad_norm": 0.5506840348243713, "learning_rate": 5.196550512082119e-06, "loss": 1.013, "step": 46490 }, { "epoch": 0.66986473054151, "grad_norm": 0.6284917593002319, "learning_rate": 5.192458857227764e-06, "loss": 1.0183, "step": 46500 }, { "epoch": 0.6700087874728092, "grad_norm": 0.5801482796669006, "learning_rate": 5.18836824890879e-06, "loss": 1.0164, "step": 46510 }, { "epoch": 0.6701528444041085, "grad_norm": 0.6342283487319946, "learning_rate": 5.184278688015678e-06, "loss": 1.0397, "step": 46520 }, { "epoch": 0.6702969013354078, "grad_norm": 0.6485182642936707, "learning_rate": 5.180190175438662e-06, "loss": 0.9983, "step": 46530 }, { "epoch": 0.670440958266707, "grad_norm": 0.614905059337616, "learning_rate": 5.176102712067755e-06, "loss": 1.0289, "step": 46540 }, { "epoch": 0.6705850151980063, "grad_norm": 0.6317541003227234, "learning_rate": 5.172016298792743e-06, "loss": 1.0153, "step": 46550 }, { "epoch": 0.6707290721293055, "grad_norm": 0.5996819734573364, "learning_rate": 5.167930936503176e-06, "loss": 1.0203, "step": 46560 }, { "epoch": 0.6708731290606047, "grad_norm": 0.7232657074928284, "learning_rate": 5.163846626088394e-06, "loss": 1.028, "step": 46570 }, { "epoch": 0.671017185991904, "grad_norm": 0.6078417897224426, "learning_rate": 5.159763368437482e-06, "loss": 0.9903, "step": 46580 }, { "epoch": 0.6711612429232032, "grad_norm": 0.6394873261451721, "learning_rate": 5.15568116443931e-06, "loss": 1.0242, "step": 46590 }, { "epoch": 0.6713052998545025, "grad_norm": 0.6627945899963379, "learning_rate": 5.151600014982525e-06, "loss": 1.0312, "step": 46600 }, { "epoch": 0.6714493567858018, "grad_norm": 0.7143929600715637, "learning_rate": 5.1475199209555325e-06, "loss": 1.0465, "step": 46610 }, { "epoch": 0.671593413717101, "grad_norm": 0.7025628685951233, "learning_rate": 5.143440883246514e-06, "loss": 1.0271, "step": 46620 }, { "epoch": 0.6717374706484003, "grad_norm": 0.6601547002792358, "learning_rate": 5.1393629027434186e-06, "loss": 1.0001, "step": 46630 }, { "epoch": 0.6718815275796995, "grad_norm": 0.6126776933670044, "learning_rate": 5.135285980333962e-06, "loss": 1.0093, "step": 46640 }, { "epoch": 0.6720255845109987, "grad_norm": 0.6820292472839355, "learning_rate": 5.131210116905645e-06, "loss": 1.0215, "step": 46650 }, { "epoch": 0.672169641442298, "grad_norm": 0.6530098915100098, "learning_rate": 5.127135313345719e-06, "loss": 0.9993, "step": 46660 }, { "epoch": 0.6723136983735972, "grad_norm": 0.5953645706176758, "learning_rate": 5.123061570541214e-06, "loss": 1.0027, "step": 46670 }, { "epoch": 0.6724577553048965, "grad_norm": 0.6368499994277954, "learning_rate": 5.118988889378927e-06, "loss": 1.0375, "step": 46680 }, { "epoch": 0.6726018122361958, "grad_norm": 0.699311375617981, "learning_rate": 5.114917270745426e-06, "loss": 1.0243, "step": 46690 }, { "epoch": 0.672745869167495, "grad_norm": 0.7033451795578003, "learning_rate": 5.110846715527042e-06, "loss": 1.0249, "step": 46700 }, { "epoch": 0.6728899260987943, "grad_norm": 0.5182821154594421, "learning_rate": 5.106777224609889e-06, "loss": 1.0024, "step": 46710 }, { "epoch": 0.6730339830300935, "grad_norm": 0.5992767214775085, "learning_rate": 5.1027087988798294e-06, "loss": 1.0258, "step": 46720 }, { "epoch": 0.6731780399613927, "grad_norm": 0.6217204928398132, "learning_rate": 5.098641439222502e-06, "loss": 1.0065, "step": 46730 }, { "epoch": 0.673322096892692, "grad_norm": 0.6292192935943604, "learning_rate": 5.094575146523322e-06, "loss": 1.0067, "step": 46740 }, { "epoch": 0.6734661538239912, "grad_norm": 0.6448467969894409, "learning_rate": 5.090509921667463e-06, "loss": 1.023, "step": 46750 }, { "epoch": 0.6736102107552905, "grad_norm": 0.850192666053772, "learning_rate": 5.086445765539867e-06, "loss": 1.0341, "step": 46760 }, { "epoch": 0.6737542676865897, "grad_norm": 0.6021695733070374, "learning_rate": 5.082382679025246e-06, "loss": 1.0352, "step": 46770 }, { "epoch": 0.673898324617889, "grad_norm": 0.5934077501296997, "learning_rate": 5.0783206630080725e-06, "loss": 0.9999, "step": 46780 }, { "epoch": 0.6740423815491883, "grad_norm": 0.5981540083885193, "learning_rate": 5.074259718372601e-06, "loss": 0.996, "step": 46790 }, { "epoch": 0.6741864384804875, "grad_norm": 0.678403377532959, "learning_rate": 5.0701998460028365e-06, "loss": 0.9898, "step": 46800 }, { "epoch": 0.6743304954117867, "grad_norm": 0.6320533156394958, "learning_rate": 5.066141046782561e-06, "loss": 0.9972, "step": 46810 }, { "epoch": 0.674474552343086, "grad_norm": 0.5987135767936707, "learning_rate": 5.062083321595316e-06, "loss": 1.0253, "step": 46820 }, { "epoch": 0.6746186092743852, "grad_norm": 0.6316280961036682, "learning_rate": 5.058026671324413e-06, "loss": 1.0177, "step": 46830 }, { "epoch": 0.6747626662056845, "grad_norm": 0.622235119342804, "learning_rate": 5.053971096852925e-06, "loss": 1.013, "step": 46840 }, { "epoch": 0.6749067231369837, "grad_norm": 0.6065994501113892, "learning_rate": 5.049916599063708e-06, "loss": 1.0176, "step": 46850 }, { "epoch": 0.675050780068283, "grad_norm": 0.6721647381782532, "learning_rate": 5.045863178839352e-06, "loss": 1.0146, "step": 46860 }, { "epoch": 0.6751948369995823, "grad_norm": 0.6812637448310852, "learning_rate": 5.041810837062244e-06, "loss": 1.0335, "step": 46870 }, { "epoch": 0.6753388939308815, "grad_norm": 0.7654386162757874, "learning_rate": 5.037759574614518e-06, "loss": 1.0149, "step": 46880 }, { "epoch": 0.6754829508621807, "grad_norm": 0.6054534912109375, "learning_rate": 5.033709392378079e-06, "loss": 0.9968, "step": 46890 }, { "epoch": 0.67562700779348, "grad_norm": 0.6277738213539124, "learning_rate": 5.029660291234595e-06, "loss": 1.0318, "step": 46900 }, { "epoch": 0.6757710647247792, "grad_norm": 0.6766389012336731, "learning_rate": 5.025612272065499e-06, "loss": 0.9958, "step": 46910 }, { "epoch": 0.6759151216560785, "grad_norm": 0.6448894739151001, "learning_rate": 5.021565335751985e-06, "loss": 1.0266, "step": 46920 }, { "epoch": 0.6760591785873777, "grad_norm": 0.6101272702217102, "learning_rate": 5.017519483175026e-06, "loss": 0.9839, "step": 46930 }, { "epoch": 0.676203235518677, "grad_norm": 0.6158223748207092, "learning_rate": 5.013474715215345e-06, "loss": 1.0151, "step": 46940 }, { "epoch": 0.6763472924499763, "grad_norm": 0.6153269410133362, "learning_rate": 5.009431032753422e-06, "loss": 1.024, "step": 46950 }, { "epoch": 0.6764913493812755, "grad_norm": 0.6961709260940552, "learning_rate": 5.005388436669522e-06, "loss": 1.0205, "step": 46960 }, { "epoch": 0.6766354063125747, "grad_norm": 0.6553806662559509, "learning_rate": 5.001346927843655e-06, "loss": 1.0261, "step": 46970 }, { "epoch": 0.676779463243874, "grad_norm": 0.7360643744468689, "learning_rate": 4.99730650715561e-06, "loss": 1.0089, "step": 46980 }, { "epoch": 0.6769235201751732, "grad_norm": 0.6369828581809998, "learning_rate": 4.993267175484928e-06, "loss": 1.018, "step": 46990 }, { "epoch": 0.6770675771064725, "grad_norm": 0.6678699851036072, "learning_rate": 4.9892289337109065e-06, "loss": 1.034, "step": 47000 }, { "epoch": 0.6772116340377717, "grad_norm": 0.6522915959358215, "learning_rate": 4.985191782712628e-06, "loss": 1.0115, "step": 47010 }, { "epoch": 0.677355690969071, "grad_norm": 0.6694148182868958, "learning_rate": 4.981155723368918e-06, "loss": 1.0043, "step": 47020 }, { "epoch": 0.6774997479003703, "grad_norm": 0.6815006136894226, "learning_rate": 4.977120756558372e-06, "loss": 0.9957, "step": 47030 }, { "epoch": 0.6776438048316695, "grad_norm": 0.7229025363922119, "learning_rate": 4.973086883159347e-06, "loss": 1.0124, "step": 47040 }, { "epoch": 0.6777878617629687, "grad_norm": 0.6131969690322876, "learning_rate": 4.9690541040499605e-06, "loss": 1.0133, "step": 47050 }, { "epoch": 0.6779319186942679, "grad_norm": 0.6573668122291565, "learning_rate": 4.965022420108091e-06, "loss": 1.0062, "step": 47060 }, { "epoch": 0.6780759756255672, "grad_norm": 0.5714235305786133, "learning_rate": 4.960991832211386e-06, "loss": 1.0364, "step": 47070 }, { "epoch": 0.6782200325568665, "grad_norm": 0.6412636637687683, "learning_rate": 4.956962341237246e-06, "loss": 1.0069, "step": 47080 }, { "epoch": 0.6783640894881657, "grad_norm": 0.6453899145126343, "learning_rate": 4.952933948062836e-06, "loss": 1.0163, "step": 47090 }, { "epoch": 0.678508146419465, "grad_norm": 0.6912153363227844, "learning_rate": 4.948906653565081e-06, "loss": 1.0182, "step": 47100 }, { "epoch": 0.6786522033507643, "grad_norm": 0.5970426201820374, "learning_rate": 4.944880458620663e-06, "loss": 1.0248, "step": 47110 }, { "epoch": 0.6787962602820635, "grad_norm": 0.7003148794174194, "learning_rate": 4.940855364106043e-06, "loss": 1.012, "step": 47120 }, { "epoch": 0.6789403172133627, "grad_norm": 0.7518492937088013, "learning_rate": 4.9368313708974144e-06, "loss": 0.9998, "step": 47130 }, { "epoch": 0.6790843741446619, "grad_norm": 0.6701333522796631, "learning_rate": 4.932808479870747e-06, "loss": 1.0081, "step": 47140 }, { "epoch": 0.6792284310759612, "grad_norm": 0.6354277729988098, "learning_rate": 4.928786691901774e-06, "loss": 1.0086, "step": 47150 }, { "epoch": 0.6793724880072605, "grad_norm": 0.6811854839324951, "learning_rate": 4.924766007865982e-06, "loss": 0.9978, "step": 47160 }, { "epoch": 0.6795165449385597, "grad_norm": 0.8547168374061584, "learning_rate": 4.920746428638617e-06, "loss": 1.009, "step": 47170 }, { "epoch": 0.679660601869859, "grad_norm": 0.7384396195411682, "learning_rate": 4.916727955094685e-06, "loss": 1.0118, "step": 47180 }, { "epoch": 0.6798046588011583, "grad_norm": 0.6684306263923645, "learning_rate": 4.912710588108951e-06, "loss": 1.0177, "step": 47190 }, { "epoch": 0.6799487157324575, "grad_norm": 0.7980669140815735, "learning_rate": 4.908694328555945e-06, "loss": 1.0269, "step": 47200 }, { "epoch": 0.6800927726637567, "grad_norm": 0.6420135498046875, "learning_rate": 4.904679177309951e-06, "loss": 1.0206, "step": 47210 }, { "epoch": 0.6802368295950559, "grad_norm": 0.5965870022773743, "learning_rate": 4.900665135245008e-06, "loss": 1.016, "step": 47220 }, { "epoch": 0.6803808865263552, "grad_norm": 0.6588281393051147, "learning_rate": 4.896652203234922e-06, "loss": 1.0155, "step": 47230 }, { "epoch": 0.6805249434576545, "grad_norm": 0.6783021688461304, "learning_rate": 4.89264038215325e-06, "loss": 1.0047, "step": 47240 }, { "epoch": 0.6806690003889537, "grad_norm": 0.6871261596679688, "learning_rate": 4.888629672873306e-06, "loss": 1.0126, "step": 47250 }, { "epoch": 0.680813057320253, "grad_norm": 0.6095453500747681, "learning_rate": 4.884620076268179e-06, "loss": 1.0229, "step": 47260 }, { "epoch": 0.6809571142515523, "grad_norm": 0.6512758135795593, "learning_rate": 4.880611593210691e-06, "loss": 1.0221, "step": 47270 }, { "epoch": 0.6811011711828515, "grad_norm": 0.5744214057922363, "learning_rate": 4.876604224573432e-06, "loss": 1.0143, "step": 47280 }, { "epoch": 0.6812452281141507, "grad_norm": 0.7972787022590637, "learning_rate": 4.872597971228762e-06, "loss": 1.0161, "step": 47290 }, { "epoch": 0.6813892850454499, "grad_norm": 0.7407034039497375, "learning_rate": 4.86859283404878e-06, "loss": 1.0169, "step": 47300 }, { "epoch": 0.6815333419767492, "grad_norm": 0.5624092817306519, "learning_rate": 4.86458881390535e-06, "loss": 1.0317, "step": 47310 }, { "epoch": 0.6816773989080485, "grad_norm": 0.6977633833885193, "learning_rate": 4.860585911670093e-06, "loss": 1.0119, "step": 47320 }, { "epoch": 0.6818214558393477, "grad_norm": 0.6715294122695923, "learning_rate": 4.85658412821438e-06, "loss": 1.0192, "step": 47330 }, { "epoch": 0.681965512770647, "grad_norm": 0.6478626728057861, "learning_rate": 4.852583464409354e-06, "loss": 1.0003, "step": 47340 }, { "epoch": 0.6821095697019463, "grad_norm": 0.6032857894897461, "learning_rate": 4.848583921125898e-06, "loss": 1.0095, "step": 47350 }, { "epoch": 0.6822536266332455, "grad_norm": 0.6184641122817993, "learning_rate": 4.844585499234659e-06, "loss": 1.0115, "step": 47360 }, { "epoch": 0.6823976835645447, "grad_norm": 0.6499150395393372, "learning_rate": 4.840588199606037e-06, "loss": 1.0388, "step": 47370 }, { "epoch": 0.6825417404958439, "grad_norm": 0.6112958788871765, "learning_rate": 4.8365920231101905e-06, "loss": 1.0212, "step": 47380 }, { "epoch": 0.6826857974271432, "grad_norm": 0.567670464515686, "learning_rate": 4.832596970617027e-06, "loss": 0.9819, "step": 47390 }, { "epoch": 0.6828298543584425, "grad_norm": 0.67822265625, "learning_rate": 4.828603042996227e-06, "loss": 1.0072, "step": 47400 }, { "epoch": 0.6829739112897417, "grad_norm": 0.6570102572441101, "learning_rate": 4.824610241117198e-06, "loss": 1.0118, "step": 47410 }, { "epoch": 0.683117968221041, "grad_norm": 0.6754155158996582, "learning_rate": 4.820618565849128e-06, "loss": 1.0106, "step": 47420 }, { "epoch": 0.6832620251523402, "grad_norm": 0.5989766120910645, "learning_rate": 4.816628018060946e-06, "loss": 0.9924, "step": 47430 }, { "epoch": 0.6834060820836394, "grad_norm": 0.6305521130561829, "learning_rate": 4.812638598621341e-06, "loss": 1.0204, "step": 47440 }, { "epoch": 0.6835501390149387, "grad_norm": 0.6485593914985657, "learning_rate": 4.808650308398753e-06, "loss": 1.0256, "step": 47450 }, { "epoch": 0.6836941959462379, "grad_norm": 0.9397467374801636, "learning_rate": 4.804663148261377e-06, "loss": 1.029, "step": 47460 }, { "epoch": 0.6838382528775372, "grad_norm": 0.8456627726554871, "learning_rate": 4.8006771190771615e-06, "loss": 1.0245, "step": 47470 }, { "epoch": 0.6839823098088365, "grad_norm": 0.6226358413696289, "learning_rate": 4.796692221713816e-06, "loss": 1.0101, "step": 47480 }, { "epoch": 0.6841263667401357, "grad_norm": 0.5895450711250305, "learning_rate": 4.792708457038797e-06, "loss": 1.037, "step": 47490 }, { "epoch": 0.684270423671435, "grad_norm": 1.2034680843353271, "learning_rate": 4.788725825919306e-06, "loss": 1.0181, "step": 47500 } ], "logging_steps": 10, "max_steps": 69417, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.4867563694146191e+18, "train_batch_size": 24, "trial_name": null, "trial_params": null }