{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.711004156639685, "eval_steps": 3000.0, "global_step": 26000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.734631371691096e-05, "grad_norm": 5.938619613647461, "learning_rate": 0.0, "loss": 0.9967, "step": 1 }, { "epoch": 5.469262743382192e-05, "grad_norm": 16.76685905456543, "learning_rate": 1.8214936247723134e-08, "loss": 0.9799, "step": 2 }, { "epoch": 8.203894115073288e-05, "grad_norm": 44.9127311706543, "learning_rate": 3.642987249544627e-08, "loss": 3.8941, "step": 3 }, { "epoch": 0.00010938525486764385, "grad_norm": 5.505643367767334, "learning_rate": 5.4644808743169406e-08, "loss": 0.9438, "step": 4 }, { "epoch": 0.0001367315685845548, "grad_norm": 6.937468528747559, "learning_rate": 7.285974499089254e-08, "loss": 1.1025, "step": 5 }, { "epoch": 0.00016407788230146577, "grad_norm": 6.357686996459961, "learning_rate": 9.107468123861569e-08, "loss": 1.0408, "step": 6 }, { "epoch": 0.00019142419601837673, "grad_norm": 5.867396354675293, "learning_rate": 1.0928961748633881e-07, "loss": 0.9964, "step": 7 }, { "epoch": 0.0002187705097352877, "grad_norm": 6.24326229095459, "learning_rate": 1.2750455373406196e-07, "loss": 0.9966, "step": 8 }, { "epoch": 0.0002461168234521986, "grad_norm": 6.399379253387451, "learning_rate": 1.4571948998178507e-07, "loss": 1.0776, "step": 9 }, { "epoch": 0.0002734631371691096, "grad_norm": 6.377708435058594, "learning_rate": 1.639344262295082e-07, "loss": 0.9379, "step": 10 }, { "epoch": 0.00030080945088602055, "grad_norm": 6.157576084136963, "learning_rate": 1.8214936247723137e-07, "loss": 0.9369, "step": 11 }, { "epoch": 0.00032815576460293154, "grad_norm": 5.895314693450928, "learning_rate": 2.0036429872495446e-07, "loss": 0.9642, "step": 12 }, { "epoch": 0.00035550207831984247, "grad_norm": 5.182945728302002, "learning_rate": 2.1857923497267762e-07, "loss": 0.8544, "step": 13 }, { "epoch": 0.00038284839203675346, "grad_norm": 6.515782833099365, "learning_rate": 2.3679417122040076e-07, "loss": 1.0016, "step": 14 }, { "epoch": 0.0004101947057536644, "grad_norm": 5.809843063354492, "learning_rate": 2.550091074681239e-07, "loss": 1.0004, "step": 15 }, { "epoch": 0.0004375410194705754, "grad_norm": 6.334860324859619, "learning_rate": 2.73224043715847e-07, "loss": 1.0335, "step": 16 }, { "epoch": 0.0004648873331874863, "grad_norm": 6.033939361572266, "learning_rate": 2.9143897996357015e-07, "loss": 0.994, "step": 17 }, { "epoch": 0.0004922336469043973, "grad_norm": 6.179076671600342, "learning_rate": 3.096539162112933e-07, "loss": 1.0065, "step": 18 }, { "epoch": 0.0005195799606213082, "grad_norm": 6.17287015914917, "learning_rate": 3.278688524590164e-07, "loss": 0.9743, "step": 19 }, { "epoch": 0.0005469262743382192, "grad_norm": 6.060050010681152, "learning_rate": 3.4608378870673953e-07, "loss": 1.0205, "step": 20 }, { "epoch": 0.0005742725880551302, "grad_norm": 6.558656692504883, "learning_rate": 3.6429872495446275e-07, "loss": 1.0461, "step": 21 }, { "epoch": 0.0006016189017720411, "grad_norm": 36.999332427978516, "learning_rate": 3.825136612021858e-07, "loss": 2.4766, "step": 22 }, { "epoch": 0.0006289652154889521, "grad_norm": 5.884373664855957, "learning_rate": 4.007285974499089e-07, "loss": 0.9291, "step": 23 }, { "epoch": 0.0006563115292058631, "grad_norm": 45.37116241455078, "learning_rate": 4.1894353369763213e-07, "loss": 3.8893, "step": 24 }, { "epoch": 0.000683657842922774, "grad_norm": 6.110790252685547, "learning_rate": 4.3715846994535524e-07, "loss": 1.0143, "step": 25 }, { "epoch": 0.0007110041566396849, "grad_norm": 5.324756145477295, "learning_rate": 4.5537340619307836e-07, "loss": 0.9379, "step": 26 }, { "epoch": 0.0007383504703565959, "grad_norm": 5.719489097595215, "learning_rate": 4.735883424408015e-07, "loss": 0.9687, "step": 27 }, { "epoch": 0.0007656967840735069, "grad_norm": 6.535736083984375, "learning_rate": 4.918032786885246e-07, "loss": 0.9723, "step": 28 }, { "epoch": 0.0007930430977904178, "grad_norm": 6.97694206237793, "learning_rate": 5.100182149362478e-07, "loss": 0.933, "step": 29 }, { "epoch": 0.0008203894115073288, "grad_norm": 5.993082046508789, "learning_rate": 5.28233151183971e-07, "loss": 0.958, "step": 30 }, { "epoch": 0.0008477357252242398, "grad_norm": 6.107892036437988, "learning_rate": 5.46448087431694e-07, "loss": 0.9702, "step": 31 }, { "epoch": 0.0008750820389411508, "grad_norm": 5.549534320831299, "learning_rate": 5.646630236794172e-07, "loss": 1.0048, "step": 32 }, { "epoch": 0.0009024283526580616, "grad_norm": 6.066496849060059, "learning_rate": 5.828779599271403e-07, "loss": 0.9723, "step": 33 }, { "epoch": 0.0009297746663749726, "grad_norm": 33.9055290222168, "learning_rate": 6.010928961748634e-07, "loss": 2.451, "step": 34 }, { "epoch": 0.0009571209800918836, "grad_norm": 6.5366950035095215, "learning_rate": 6.193078324225866e-07, "loss": 1.0835, "step": 35 }, { "epoch": 0.0009844672938087945, "grad_norm": 34.722373962402344, "learning_rate": 6.375227686703097e-07, "loss": 2.3379, "step": 36 }, { "epoch": 0.0010118136075257056, "grad_norm": 5.736315727233887, "learning_rate": 6.557377049180328e-07, "loss": 1.0079, "step": 37 }, { "epoch": 0.0010391599212426165, "grad_norm": 5.239593029022217, "learning_rate": 6.73952641165756e-07, "loss": 0.8625, "step": 38 }, { "epoch": 0.0010665062349595274, "grad_norm": 5.189340114593506, "learning_rate": 6.921675774134791e-07, "loss": 0.9045, "step": 39 }, { "epoch": 0.0010938525486764385, "grad_norm": 5.6916823387146, "learning_rate": 7.103825136612022e-07, "loss": 0.9471, "step": 40 }, { "epoch": 0.0011211988623933493, "grad_norm": 5.596184730529785, "learning_rate": 7.285974499089255e-07, "loss": 0.9726, "step": 41 }, { "epoch": 0.0011485451761102604, "grad_norm": 5.575604438781738, "learning_rate": 7.468123861566486e-07, "loss": 1.0248, "step": 42 }, { "epoch": 0.0011758914898271713, "grad_norm": 5.11284065246582, "learning_rate": 7.650273224043716e-07, "loss": 0.8911, "step": 43 }, { "epoch": 0.0012032378035440822, "grad_norm": 5.176600933074951, "learning_rate": 7.832422586520947e-07, "loss": 0.8664, "step": 44 }, { "epoch": 0.0012305841172609933, "grad_norm": 6.517277717590332, "learning_rate": 8.014571948998178e-07, "loss": 0.991, "step": 45 }, { "epoch": 0.0012579304309779042, "grad_norm": 5.424147129058838, "learning_rate": 8.196721311475409e-07, "loss": 0.9475, "step": 46 }, { "epoch": 0.001285276744694815, "grad_norm": 5.242556095123291, "learning_rate": 8.378870673952643e-07, "loss": 0.9655, "step": 47 }, { "epoch": 0.0013126230584117262, "grad_norm": 4.438906192779541, "learning_rate": 8.561020036429874e-07, "loss": 0.8005, "step": 48 }, { "epoch": 0.001339969372128637, "grad_norm": 5.766573905944824, "learning_rate": 8.743169398907105e-07, "loss": 0.9669, "step": 49 }, { "epoch": 0.001367315685845548, "grad_norm": 4.8595075607299805, "learning_rate": 8.925318761384336e-07, "loss": 0.8994, "step": 50 }, { "epoch": 0.001394661999562459, "grad_norm": 4.345108985900879, "learning_rate": 9.107468123861567e-07, "loss": 0.9035, "step": 51 }, { "epoch": 0.0014220083132793699, "grad_norm": 5.896066665649414, "learning_rate": 9.289617486338799e-07, "loss": 1.0628, "step": 52 }, { "epoch": 0.001449354626996281, "grad_norm": 5.0411529541015625, "learning_rate": 9.47176684881603e-07, "loss": 0.8411, "step": 53 }, { "epoch": 0.0014767009407131919, "grad_norm": 4.656637668609619, "learning_rate": 9.653916211293261e-07, "loss": 0.9152, "step": 54 }, { "epoch": 0.0015040472544301027, "grad_norm": 4.169492721557617, "learning_rate": 9.836065573770493e-07, "loss": 0.8733, "step": 55 }, { "epoch": 0.0015313935681470138, "grad_norm": 5.489083766937256, "learning_rate": 1.0018214936247724e-06, "loss": 0.8263, "step": 56 }, { "epoch": 0.0015587398818639247, "grad_norm": 4.702667236328125, "learning_rate": 1.0200364298724957e-06, "loss": 0.8597, "step": 57 }, { "epoch": 0.0015860861955808356, "grad_norm": 4.729521751403809, "learning_rate": 1.0382513661202188e-06, "loss": 0.925, "step": 58 }, { "epoch": 0.0016134325092977467, "grad_norm": 4.517162322998047, "learning_rate": 1.056466302367942e-06, "loss": 0.8613, "step": 59 }, { "epoch": 0.0016407788230146576, "grad_norm": 4.048605918884277, "learning_rate": 1.074681238615665e-06, "loss": 0.8587, "step": 60 }, { "epoch": 0.0016681251367315687, "grad_norm": 4.066154956817627, "learning_rate": 1.092896174863388e-06, "loss": 0.8416, "step": 61 }, { "epoch": 0.0016954714504484796, "grad_norm": 4.401191711425781, "learning_rate": 1.111111111111111e-06, "loss": 0.8169, "step": 62 }, { "epoch": 0.0017228177641653904, "grad_norm": 3.6019182205200195, "learning_rate": 1.1293260473588344e-06, "loss": 0.787, "step": 63 }, { "epoch": 0.0017501640778823015, "grad_norm": 4.063720703125, "learning_rate": 1.1475409836065575e-06, "loss": 0.8598, "step": 64 }, { "epoch": 0.0017775103915992124, "grad_norm": 3.704862356185913, "learning_rate": 1.1657559198542806e-06, "loss": 0.8062, "step": 65 }, { "epoch": 0.0018048567053161233, "grad_norm": 3.535989761352539, "learning_rate": 1.1839708561020037e-06, "loss": 0.7577, "step": 66 }, { "epoch": 0.0018322030190330344, "grad_norm": 4.233973026275635, "learning_rate": 1.2021857923497268e-06, "loss": 0.7713, "step": 67 }, { "epoch": 0.0018595493327499453, "grad_norm": 3.344144344329834, "learning_rate": 1.2204007285974501e-06, "loss": 0.7327, "step": 68 }, { "epoch": 0.0018868956464668564, "grad_norm": 24.606109619140625, "learning_rate": 1.2386156648451732e-06, "loss": 2.0641, "step": 69 }, { "epoch": 0.0019142419601837673, "grad_norm": 3.0568625926971436, "learning_rate": 1.2568306010928963e-06, "loss": 0.7, "step": 70 }, { "epoch": 0.0019415882739006781, "grad_norm": 3.3008697032928467, "learning_rate": 1.2750455373406195e-06, "loss": 0.7672, "step": 71 }, { "epoch": 0.001968934587617589, "grad_norm": 3.274148941040039, "learning_rate": 1.2932604735883426e-06, "loss": 0.7673, "step": 72 }, { "epoch": 0.0019962809013345, "grad_norm": 2.868929147720337, "learning_rate": 1.3114754098360657e-06, "loss": 0.6946, "step": 73 }, { "epoch": 0.002023627215051411, "grad_norm": 3.3864734172821045, "learning_rate": 1.3296903460837888e-06, "loss": 0.7872, "step": 74 }, { "epoch": 0.002050973528768322, "grad_norm": 2.855271100997925, "learning_rate": 1.347905282331512e-06, "loss": 0.7347, "step": 75 }, { "epoch": 0.002078319842485233, "grad_norm": 20.447223663330078, "learning_rate": 1.3661202185792352e-06, "loss": 1.9112, "step": 76 }, { "epoch": 0.002105666156202144, "grad_norm": 2.9347527027130127, "learning_rate": 1.3843351548269581e-06, "loss": 0.7762, "step": 77 }, { "epoch": 0.0021330124699190547, "grad_norm": 2.4694910049438477, "learning_rate": 1.4025500910746814e-06, "loss": 0.7016, "step": 78 }, { "epoch": 0.002160358783635966, "grad_norm": 2.874603033065796, "learning_rate": 1.4207650273224043e-06, "loss": 0.6416, "step": 79 }, { "epoch": 0.002187705097352877, "grad_norm": 2.6916370391845703, "learning_rate": 1.4389799635701277e-06, "loss": 0.7138, "step": 80 }, { "epoch": 0.0022150514110697876, "grad_norm": 18.070486068725586, "learning_rate": 1.457194899817851e-06, "loss": 1.8757, "step": 81 }, { "epoch": 0.0022423977247866987, "grad_norm": 2.735776662826538, "learning_rate": 1.4754098360655739e-06, "loss": 0.7352, "step": 82 }, { "epoch": 0.0022697440385036098, "grad_norm": 2.404538869857788, "learning_rate": 1.4936247723132972e-06, "loss": 0.7268, "step": 83 }, { "epoch": 0.002297090352220521, "grad_norm": 16.192590713500977, "learning_rate": 1.5118397085610201e-06, "loss": 1.7021, "step": 84 }, { "epoch": 0.0023244366659374315, "grad_norm": 2.263108253479004, "learning_rate": 1.5300546448087432e-06, "loss": 0.6977, "step": 85 }, { "epoch": 0.0023517829796543426, "grad_norm": 2.230760097503662, "learning_rate": 1.5482695810564663e-06, "loss": 0.6782, "step": 86 }, { "epoch": 0.0023791292933712537, "grad_norm": 2.3340771198272705, "learning_rate": 1.5664845173041894e-06, "loss": 0.6962, "step": 87 }, { "epoch": 0.0024064756070881644, "grad_norm": 2.3887627124786377, "learning_rate": 1.5846994535519128e-06, "loss": 0.6826, "step": 88 }, { "epoch": 0.0024338219208050755, "grad_norm": 14.187968254089355, "learning_rate": 1.6029143897996357e-06, "loss": 1.631, "step": 89 }, { "epoch": 0.0024611682345219866, "grad_norm": 2.174968957901001, "learning_rate": 1.621129326047359e-06, "loss": 0.6817, "step": 90 }, { "epoch": 0.0024885145482388973, "grad_norm": 2.3742117881774902, "learning_rate": 1.6393442622950819e-06, "loss": 0.6871, "step": 91 }, { "epoch": 0.0025158608619558083, "grad_norm": 2.2524850368499756, "learning_rate": 1.6575591985428052e-06, "loss": 0.6874, "step": 92 }, { "epoch": 0.0025432071756727194, "grad_norm": 2.3935434818267822, "learning_rate": 1.6757741347905285e-06, "loss": 0.6935, "step": 93 }, { "epoch": 0.00257055348938963, "grad_norm": 29.524770736694336, "learning_rate": 1.6939890710382514e-06, "loss": 2.5023, "step": 94 }, { "epoch": 0.002597899803106541, "grad_norm": 2.1521213054656982, "learning_rate": 1.7122040072859748e-06, "loss": 0.7052, "step": 95 }, { "epoch": 0.0026252461168234523, "grad_norm": 2.1503636837005615, "learning_rate": 1.7304189435336977e-06, "loss": 0.6978, "step": 96 }, { "epoch": 0.002652592430540363, "grad_norm": 2.0484979152679443, "learning_rate": 1.748633879781421e-06, "loss": 0.6666, "step": 97 }, { "epoch": 0.002679938744257274, "grad_norm": 2.0807528495788574, "learning_rate": 1.766848816029144e-06, "loss": 0.6589, "step": 98 }, { "epoch": 0.002707285057974185, "grad_norm": 2.114314556121826, "learning_rate": 1.7850637522768672e-06, "loss": 0.6867, "step": 99 }, { "epoch": 0.002734631371691096, "grad_norm": 2.3296399116516113, "learning_rate": 1.8032786885245903e-06, "loss": 0.6619, "step": 100 }, { "epoch": 0.002761977685408007, "grad_norm": 1.775242805480957, "learning_rate": 1.8214936247723134e-06, "loss": 0.6513, "step": 101 }, { "epoch": 0.002789323999124918, "grad_norm": 2.2609660625457764, "learning_rate": 1.8397085610200365e-06, "loss": 0.6812, "step": 102 }, { "epoch": 0.002816670312841829, "grad_norm": 2.1105332374572754, "learning_rate": 1.8579234972677599e-06, "loss": 0.6611, "step": 103 }, { "epoch": 0.0028440166265587398, "grad_norm": 3.0961241722106934, "learning_rate": 1.8761384335154828e-06, "loss": 0.7781, "step": 104 }, { "epoch": 0.002871362940275651, "grad_norm": 3.0465025901794434, "learning_rate": 1.894353369763206e-06, "loss": 0.7329, "step": 105 }, { "epoch": 0.002898709253992562, "grad_norm": 2.1586849689483643, "learning_rate": 1.912568306010929e-06, "loss": 0.6589, "step": 106 }, { "epoch": 0.0029260555677094726, "grad_norm": 2.0015177726745605, "learning_rate": 1.9307832422586523e-06, "loss": 0.6596, "step": 107 }, { "epoch": 0.0029534018814263837, "grad_norm": 2.9674532413482666, "learning_rate": 1.9489981785063756e-06, "loss": 0.7438, "step": 108 }, { "epoch": 0.002980748195143295, "grad_norm": 8.596776008605957, "learning_rate": 1.9672131147540985e-06, "loss": 1.405, "step": 109 }, { "epoch": 0.0030080945088602055, "grad_norm": 7.681109428405762, "learning_rate": 1.985428051001822e-06, "loss": 1.4124, "step": 110 }, { "epoch": 0.0030354408225771166, "grad_norm": 1.9990977048873901, "learning_rate": 2.0036429872495447e-06, "loss": 0.6563, "step": 111 }, { "epoch": 0.0030627871362940277, "grad_norm": 7.45889139175415, "learning_rate": 2.021857923497268e-06, "loss": 1.3611, "step": 112 }, { "epoch": 0.0030901334500109384, "grad_norm": 1.989665150642395, "learning_rate": 2.0400728597449914e-06, "loss": 0.6635, "step": 113 }, { "epoch": 0.0031174797637278494, "grad_norm": 2.0112738609313965, "learning_rate": 2.0582877959927143e-06, "loss": 0.6624, "step": 114 }, { "epoch": 0.0031448260774447605, "grad_norm": 2.179157018661499, "learning_rate": 2.0765027322404376e-06, "loss": 0.7226, "step": 115 }, { "epoch": 0.003172172391161671, "grad_norm": 2.12404727935791, "learning_rate": 2.0947176684881605e-06, "loss": 0.6514, "step": 116 }, { "epoch": 0.0031995187048785823, "grad_norm": 1.9384703636169434, "learning_rate": 2.112932604735884e-06, "loss": 0.6416, "step": 117 }, { "epoch": 0.0032268650185954934, "grad_norm": 5.398392677307129, "learning_rate": 2.1311475409836067e-06, "loss": 1.3243, "step": 118 }, { "epoch": 0.0032542113323124045, "grad_norm": 14.971677780151367, "learning_rate": 2.14936247723133e-06, "loss": 1.6451, "step": 119 }, { "epoch": 0.003281557646029315, "grad_norm": 2.092541217803955, "learning_rate": 2.167577413479053e-06, "loss": 0.6747, "step": 120 }, { "epoch": 0.0033089039597462263, "grad_norm": 2.1265053749084473, "learning_rate": 2.185792349726776e-06, "loss": 0.665, "step": 121 }, { "epoch": 0.0033362502734631374, "grad_norm": 1.872660756111145, "learning_rate": 2.204007285974499e-06, "loss": 0.7008, "step": 122 }, { "epoch": 0.003363596587180048, "grad_norm": 2.531712293624878, "learning_rate": 2.222222222222222e-06, "loss": 0.7428, "step": 123 }, { "epoch": 0.003390942900896959, "grad_norm": 1.7494702339172363, "learning_rate": 2.2404371584699454e-06, "loss": 0.6599, "step": 124 }, { "epoch": 0.00341828921461387, "grad_norm": 4.380643367767334, "learning_rate": 2.2586520947176687e-06, "loss": 1.244, "step": 125 }, { "epoch": 0.003445635528330781, "grad_norm": 2.557662010192871, "learning_rate": 2.2768670309653916e-06, "loss": 0.7289, "step": 126 }, { "epoch": 0.003472981842047692, "grad_norm": 1.8513528108596802, "learning_rate": 2.295081967213115e-06, "loss": 0.6426, "step": 127 }, { "epoch": 0.003500328155764603, "grad_norm": 1.9149812459945679, "learning_rate": 2.313296903460838e-06, "loss": 0.6682, "step": 128 }, { "epoch": 0.0035276744694815137, "grad_norm": 2.0619056224823, "learning_rate": 2.331511839708561e-06, "loss": 0.6965, "step": 129 }, { "epoch": 0.003555020783198425, "grad_norm": 1.80631422996521, "learning_rate": 2.3497267759562845e-06, "loss": 0.66, "step": 130 }, { "epoch": 0.003582367096915336, "grad_norm": 4.275056838989258, "learning_rate": 2.3679417122040074e-06, "loss": 0.6714, "step": 131 }, { "epoch": 0.0036097134106322466, "grad_norm": 1.7711447477340698, "learning_rate": 2.3861566484517307e-06, "loss": 0.6574, "step": 132 }, { "epoch": 0.0036370597243491577, "grad_norm": 2.2230842113494873, "learning_rate": 2.4043715846994536e-06, "loss": 0.7056, "step": 133 }, { "epoch": 0.003664406038066069, "grad_norm": 1.9362789392471313, "learning_rate": 2.422586520947177e-06, "loss": 0.6475, "step": 134 }, { "epoch": 0.0036917523517829795, "grad_norm": 2.0907552242279053, "learning_rate": 2.4408014571949003e-06, "loss": 0.6825, "step": 135 }, { "epoch": 0.0037190986654998905, "grad_norm": 1.7333886623382568, "learning_rate": 2.459016393442623e-06, "loss": 0.6425, "step": 136 }, { "epoch": 0.0037464449792168016, "grad_norm": 2.4887771606445312, "learning_rate": 2.4772313296903465e-06, "loss": 0.7333, "step": 137 }, { "epoch": 0.0037737912929337127, "grad_norm": 5.665512561798096, "learning_rate": 2.4954462659380694e-06, "loss": 1.0409, "step": 138 }, { "epoch": 0.0038011376066506234, "grad_norm": 1.7261035442352295, "learning_rate": 2.5136612021857927e-06, "loss": 0.6578, "step": 139 }, { "epoch": 0.0038284839203675345, "grad_norm": 2.019235610961914, "learning_rate": 2.5318761384335156e-06, "loss": 0.7075, "step": 140 }, { "epoch": 0.0038558302340844456, "grad_norm": 1.7048267126083374, "learning_rate": 2.550091074681239e-06, "loss": 0.6454, "step": 141 }, { "epoch": 0.0038831765478013563, "grad_norm": 2.639411211013794, "learning_rate": 2.5683060109289622e-06, "loss": 0.8111, "step": 142 }, { "epoch": 0.003910522861518267, "grad_norm": 1.8097383975982666, "learning_rate": 2.586520947176685e-06, "loss": 0.6684, "step": 143 }, { "epoch": 0.003937869175235178, "grad_norm": 3.1180150508880615, "learning_rate": 2.604735883424408e-06, "loss": 1.2085, "step": 144 }, { "epoch": 0.003965215488952089, "grad_norm": 1.8541127443313599, "learning_rate": 2.6229508196721314e-06, "loss": 0.6468, "step": 145 }, { "epoch": 0.003992561802669, "grad_norm": 1.838768720626831, "learning_rate": 2.6411657559198543e-06, "loss": 0.6297, "step": 146 }, { "epoch": 0.004019908116385911, "grad_norm": 2.52044677734375, "learning_rate": 2.6593806921675776e-06, "loss": 0.7259, "step": 147 }, { "epoch": 0.004047254430102822, "grad_norm": 1.7337758541107178, "learning_rate": 2.677595628415301e-06, "loss": 0.6669, "step": 148 }, { "epoch": 0.0040746007438197335, "grad_norm": 2.1618921756744385, "learning_rate": 2.695810564663024e-06, "loss": 0.7863, "step": 149 }, { "epoch": 0.004101947057536644, "grad_norm": 1.74651038646698, "learning_rate": 2.7140255009107467e-06, "loss": 0.6243, "step": 150 }, { "epoch": 0.004129293371253555, "grad_norm": 1.7981010675430298, "learning_rate": 2.7322404371584705e-06, "loss": 0.6355, "step": 151 }, { "epoch": 0.004156639684970466, "grad_norm": 2.8295369148254395, "learning_rate": 2.7504553734061934e-06, "loss": 1.1689, "step": 152 }, { "epoch": 0.004183985998687377, "grad_norm": 2.086019277572632, "learning_rate": 2.7686703096539162e-06, "loss": 0.6886, "step": 153 }, { "epoch": 0.004211332312404288, "grad_norm": 1.792688012123108, "learning_rate": 2.786885245901639e-06, "loss": 0.6573, "step": 154 }, { "epoch": 0.004238678626121199, "grad_norm": 2.0469329357147217, "learning_rate": 2.805100182149363e-06, "loss": 0.7107, "step": 155 }, { "epoch": 0.0042660249398381095, "grad_norm": 2.004525899887085, "learning_rate": 2.823315118397086e-06, "loss": 0.6023, "step": 156 }, { "epoch": 0.0042933712535550205, "grad_norm": 2.1307218074798584, "learning_rate": 2.8415300546448087e-06, "loss": 0.7036, "step": 157 }, { "epoch": 0.004320717567271932, "grad_norm": 1.990472435951233, "learning_rate": 2.8597449908925324e-06, "loss": 0.6738, "step": 158 }, { "epoch": 0.004348063880988843, "grad_norm": 2.0767645835876465, "learning_rate": 2.8779599271402553e-06, "loss": 0.6551, "step": 159 }, { "epoch": 0.004375410194705754, "grad_norm": 3.2353756427764893, "learning_rate": 2.8961748633879782e-06, "loss": 0.8712, "step": 160 }, { "epoch": 0.004402756508422665, "grad_norm": 1.9130470752716064, "learning_rate": 2.914389799635702e-06, "loss": 0.6508, "step": 161 }, { "epoch": 0.004430102822139575, "grad_norm": 2.443457841873169, "learning_rate": 2.932604735883425e-06, "loss": 0.6543, "step": 162 }, { "epoch": 0.004457449135856486, "grad_norm": 2.6727452278137207, "learning_rate": 2.9508196721311478e-06, "loss": 0.6855, "step": 163 }, { "epoch": 0.004484795449573397, "grad_norm": 1.7071540355682373, "learning_rate": 2.9690346083788707e-06, "loss": 0.6585, "step": 164 }, { "epoch": 0.0045121417632903085, "grad_norm": 1.7670332193374634, "learning_rate": 2.9872495446265944e-06, "loss": 0.6641, "step": 165 }, { "epoch": 0.0045394880770072196, "grad_norm": 1.9289405345916748, "learning_rate": 3.0054644808743173e-06, "loss": 0.7059, "step": 166 }, { "epoch": 0.004566834390724131, "grad_norm": 2.3050928115844727, "learning_rate": 3.0236794171220402e-06, "loss": 0.7544, "step": 167 }, { "epoch": 0.004594180704441042, "grad_norm": 2.5784950256347656, "learning_rate": 3.0418943533697635e-06, "loss": 1.1833, "step": 168 }, { "epoch": 0.004621527018157952, "grad_norm": 1.6477687358856201, "learning_rate": 3.0601092896174864e-06, "loss": 0.6421, "step": 169 }, { "epoch": 0.004648873331874863, "grad_norm": 1.9080559015274048, "learning_rate": 3.0783242258652098e-06, "loss": 0.6466, "step": 170 }, { "epoch": 0.004676219645591774, "grad_norm": 1.804215431213379, "learning_rate": 3.0965391621129327e-06, "loss": 0.6813, "step": 171 }, { "epoch": 0.004703565959308685, "grad_norm": 2.6034557819366455, "learning_rate": 3.114754098360656e-06, "loss": 0.6965, "step": 172 }, { "epoch": 0.004730912273025596, "grad_norm": 1.8251312971115112, "learning_rate": 3.132969034608379e-06, "loss": 0.6576, "step": 173 }, { "epoch": 0.0047582585867425075, "grad_norm": 1.8611531257629395, "learning_rate": 3.1511839708561022e-06, "loss": 0.6511, "step": 174 }, { "epoch": 0.004785604900459418, "grad_norm": 1.9919548034667969, "learning_rate": 3.1693989071038255e-06, "loss": 0.7135, "step": 175 }, { "epoch": 0.004812951214176329, "grad_norm": 1.9622056484222412, "learning_rate": 3.1876138433515484e-06, "loss": 0.6893, "step": 176 }, { "epoch": 0.00484029752789324, "grad_norm": 2.4982762336730957, "learning_rate": 3.2058287795992713e-06, "loss": 1.1837, "step": 177 }, { "epoch": 0.004867643841610151, "grad_norm": 3.8810086250305176, "learning_rate": 3.224043715846995e-06, "loss": 0.9723, "step": 178 }, { "epoch": 0.004894990155327062, "grad_norm": 1.974422574043274, "learning_rate": 3.242258652094718e-06, "loss": 0.7399, "step": 179 }, { "epoch": 0.004922336469043973, "grad_norm": 2.4604835510253906, "learning_rate": 3.260473588342441e-06, "loss": 1.1821, "step": 180 }, { "epoch": 0.004949682782760883, "grad_norm": 2.8263957500457764, "learning_rate": 3.2786885245901638e-06, "loss": 0.7593, "step": 181 }, { "epoch": 0.0049770290964777945, "grad_norm": 1.7623902559280396, "learning_rate": 3.2969034608378875e-06, "loss": 0.6487, "step": 182 }, { "epoch": 0.005004375410194706, "grad_norm": 1.8604713678359985, "learning_rate": 3.3151183970856104e-06, "loss": 0.6778, "step": 183 }, { "epoch": 0.005031721723911617, "grad_norm": 2.66776442527771, "learning_rate": 3.3333333333333333e-06, "loss": 0.8045, "step": 184 }, { "epoch": 0.005059068037628528, "grad_norm": 2.5802810192108154, "learning_rate": 3.351548269581057e-06, "loss": 1.125, "step": 185 }, { "epoch": 0.005086414351345439, "grad_norm": 1.5554087162017822, "learning_rate": 3.36976320582878e-06, "loss": 0.6452, "step": 186 }, { "epoch": 0.00511376066506235, "grad_norm": 2.122004985809326, "learning_rate": 3.387978142076503e-06, "loss": 0.6953, "step": 187 }, { "epoch": 0.00514110697877926, "grad_norm": 2.0167925357818604, "learning_rate": 3.4061930783242266e-06, "loss": 1.1279, "step": 188 }, { "epoch": 0.005168453292496171, "grad_norm": 1.9486920833587646, "learning_rate": 3.4244080145719495e-06, "loss": 0.6231, "step": 189 }, { "epoch": 0.005195799606213082, "grad_norm": 2.166785478591919, "learning_rate": 3.4426229508196724e-06, "loss": 0.7525, "step": 190 }, { "epoch": 0.0052231459199299935, "grad_norm": 2.059812307357788, "learning_rate": 3.4608378870673953e-06, "loss": 0.6272, "step": 191 }, { "epoch": 0.005250492233646905, "grad_norm": 2.0513839721679688, "learning_rate": 3.4790528233151186e-06, "loss": 0.6886, "step": 192 }, { "epoch": 0.005277838547363816, "grad_norm": 1.64750075340271, "learning_rate": 3.497267759562842e-06, "loss": 0.6327, "step": 193 }, { "epoch": 0.005305184861080726, "grad_norm": 1.946195363998413, "learning_rate": 3.515482695810565e-06, "loss": 0.6325, "step": 194 }, { "epoch": 0.005332531174797637, "grad_norm": 1.97189199924469, "learning_rate": 3.533697632058288e-06, "loss": 0.6834, "step": 195 }, { "epoch": 0.005359877488514548, "grad_norm": 2.2349486351013184, "learning_rate": 3.551912568306011e-06, "loss": 1.172, "step": 196 }, { "epoch": 0.005387223802231459, "grad_norm": 1.7677087783813477, "learning_rate": 3.5701275045537344e-06, "loss": 0.6179, "step": 197 }, { "epoch": 0.00541457011594837, "grad_norm": 2.1914658546447754, "learning_rate": 3.5883424408014573e-06, "loss": 0.7003, "step": 198 }, { "epoch": 0.005441916429665281, "grad_norm": 1.6845797300338745, "learning_rate": 3.6065573770491806e-06, "loss": 0.6579, "step": 199 }, { "epoch": 0.005469262743382192, "grad_norm": 2.104954719543457, "learning_rate": 3.6247723132969035e-06, "loss": 0.6753, "step": 200 }, { "epoch": 0.005496609057099103, "grad_norm": 1.8777859210968018, "learning_rate": 3.642987249544627e-06, "loss": 1.0768, "step": 201 }, { "epoch": 0.005523955370816014, "grad_norm": 1.898119568824768, "learning_rate": 3.66120218579235e-06, "loss": 1.0674, "step": 202 }, { "epoch": 0.005551301684532925, "grad_norm": 1.8132541179656982, "learning_rate": 3.679417122040073e-06, "loss": 0.6564, "step": 203 }, { "epoch": 0.005578647998249836, "grad_norm": 1.5471599102020264, "learning_rate": 3.697632058287796e-06, "loss": 0.6654, "step": 204 }, { "epoch": 0.005605994311966747, "grad_norm": 1.8580139875411987, "learning_rate": 3.7158469945355197e-06, "loss": 0.6811, "step": 205 }, { "epoch": 0.005633340625683658, "grad_norm": 2.2504525184631348, "learning_rate": 3.7340619307832426e-06, "loss": 0.7169, "step": 206 }, { "epoch": 0.0056606869394005685, "grad_norm": 1.9566928148269653, "learning_rate": 3.7522768670309655e-06, "loss": 0.6403, "step": 207 }, { "epoch": 0.0056880332531174796, "grad_norm": 1.788751482963562, "learning_rate": 3.7704918032786884e-06, "loss": 0.6359, "step": 208 }, { "epoch": 0.005715379566834391, "grad_norm": 1.729735255241394, "learning_rate": 3.788706739526412e-06, "loss": 0.6812, "step": 209 }, { "epoch": 0.005742725880551302, "grad_norm": 2.3899736404418945, "learning_rate": 3.806921675774135e-06, "loss": 0.8082, "step": 210 }, { "epoch": 0.005770072194268213, "grad_norm": 2.069024085998535, "learning_rate": 3.825136612021858e-06, "loss": 0.6624, "step": 211 }, { "epoch": 0.005797418507985124, "grad_norm": 1.7943648099899292, "learning_rate": 3.843351548269581e-06, "loss": 0.6654, "step": 212 }, { "epoch": 0.005824764821702034, "grad_norm": 1.6145939826965332, "learning_rate": 3.861566484517305e-06, "loss": 0.6367, "step": 213 }, { "epoch": 0.005852111135418945, "grad_norm": 1.835947871208191, "learning_rate": 3.879781420765028e-06, "loss": 0.6407, "step": 214 }, { "epoch": 0.005879457449135856, "grad_norm": 2.1978886127471924, "learning_rate": 3.897996357012751e-06, "loss": 0.6359, "step": 215 }, { "epoch": 0.0059068037628527675, "grad_norm": 1.7217575311660767, "learning_rate": 3.916211293260474e-06, "loss": 0.6237, "step": 216 }, { "epoch": 0.0059341500765696786, "grad_norm": 2.058400869369507, "learning_rate": 3.934426229508197e-06, "loss": 0.6654, "step": 217 }, { "epoch": 0.00596149639028659, "grad_norm": 1.561572790145874, "learning_rate": 3.9526411657559195e-06, "loss": 0.6631, "step": 218 }, { "epoch": 0.005988842704003501, "grad_norm": 1.756415605545044, "learning_rate": 3.970856102003644e-06, "loss": 0.6091, "step": 219 }, { "epoch": 0.006016189017720411, "grad_norm": 1.6328301429748535, "learning_rate": 3.989071038251366e-06, "loss": 0.6701, "step": 220 }, { "epoch": 0.006043535331437322, "grad_norm": 1.8569457530975342, "learning_rate": 4.0072859744990895e-06, "loss": 1.1117, "step": 221 }, { "epoch": 0.006070881645154233, "grad_norm": 1.866784930229187, "learning_rate": 4.025500910746813e-06, "loss": 0.643, "step": 222 }, { "epoch": 0.006098227958871144, "grad_norm": 1.5604426860809326, "learning_rate": 4.043715846994536e-06, "loss": 0.6765, "step": 223 }, { "epoch": 0.006125574272588055, "grad_norm": 1.5510385036468506, "learning_rate": 4.061930783242259e-06, "loss": 0.6673, "step": 224 }, { "epoch": 0.0061529205863049665, "grad_norm": 2.119959831237793, "learning_rate": 4.080145719489983e-06, "loss": 0.6662, "step": 225 }, { "epoch": 0.006180266900021877, "grad_norm": 2.0743179321289062, "learning_rate": 4.098360655737705e-06, "loss": 0.6202, "step": 226 }, { "epoch": 0.006207613213738788, "grad_norm": 2.1945598125457764, "learning_rate": 4.1165755919854286e-06, "loss": 0.7167, "step": 227 }, { "epoch": 0.006234959527455699, "grad_norm": 2.5172650814056396, "learning_rate": 4.134790528233151e-06, "loss": 0.7874, "step": 228 }, { "epoch": 0.00626230584117261, "grad_norm": 1.8052376508712769, "learning_rate": 4.153005464480875e-06, "loss": 0.6074, "step": 229 }, { "epoch": 0.006289652154889521, "grad_norm": 2.460679054260254, "learning_rate": 4.171220400728598e-06, "loss": 0.7689, "step": 230 }, { "epoch": 0.006316998468606432, "grad_norm": 1.8324936628341675, "learning_rate": 4.189435336976321e-06, "loss": 0.6491, "step": 231 }, { "epoch": 0.006344344782323342, "grad_norm": 1.5211830139160156, "learning_rate": 4.207650273224044e-06, "loss": 0.5929, "step": 232 }, { "epoch": 0.0063716910960402535, "grad_norm": 1.5962703227996826, "learning_rate": 4.225865209471768e-06, "loss": 0.6336, "step": 233 }, { "epoch": 0.006399037409757165, "grad_norm": 2.3511781692504883, "learning_rate": 4.24408014571949e-06, "loss": 0.6727, "step": 234 }, { "epoch": 0.006426383723474076, "grad_norm": 1.6107114553451538, "learning_rate": 4.2622950819672135e-06, "loss": 0.6776, "step": 235 }, { "epoch": 0.006453730037190987, "grad_norm": 1.5914514064788818, "learning_rate": 4.280510018214937e-06, "loss": 0.6364, "step": 236 }, { "epoch": 0.006481076350907898, "grad_norm": 1.828031301498413, "learning_rate": 4.29872495446266e-06, "loss": 0.7211, "step": 237 }, { "epoch": 0.006508422664624809, "grad_norm": 2.0611298084259033, "learning_rate": 4.316939890710383e-06, "loss": 0.6201, "step": 238 }, { "epoch": 0.006535768978341719, "grad_norm": 1.6278578042984009, "learning_rate": 4.335154826958106e-06, "loss": 0.6525, "step": 239 }, { "epoch": 0.00656311529205863, "grad_norm": 1.658782720565796, "learning_rate": 4.353369763205829e-06, "loss": 0.6606, "step": 240 }, { "epoch": 0.006590461605775541, "grad_norm": 1.7588530778884888, "learning_rate": 4.371584699453552e-06, "loss": 0.6457, "step": 241 }, { "epoch": 0.0066178079194924525, "grad_norm": 1.6235854625701904, "learning_rate": 4.389799635701276e-06, "loss": 1.0581, "step": 242 }, { "epoch": 0.006645154233209364, "grad_norm": 1.5812772512435913, "learning_rate": 4.408014571948998e-06, "loss": 1.0545, "step": 243 }, { "epoch": 0.006672500546926275, "grad_norm": 2.2516701221466064, "learning_rate": 4.426229508196722e-06, "loss": 0.6566, "step": 244 }, { "epoch": 0.006699846860643185, "grad_norm": 1.5599238872528076, "learning_rate": 4.444444444444444e-06, "loss": 0.6539, "step": 245 }, { "epoch": 0.006727193174360096, "grad_norm": 2.250652313232422, "learning_rate": 4.462659380692168e-06, "loss": 0.6466, "step": 246 }, { "epoch": 0.006754539488077007, "grad_norm": 1.589042067527771, "learning_rate": 4.480874316939891e-06, "loss": 0.6496, "step": 247 }, { "epoch": 0.006781885801793918, "grad_norm": 1.4981043338775635, "learning_rate": 4.499089253187614e-06, "loss": 0.6647, "step": 248 }, { "epoch": 0.006809232115510829, "grad_norm": 1.8438252210617065, "learning_rate": 4.5173041894353374e-06, "loss": 0.7086, "step": 249 }, { "epoch": 0.00683657842922774, "grad_norm": 1.6758575439453125, "learning_rate": 4.535519125683061e-06, "loss": 1.0609, "step": 250 }, { "epoch": 0.006863924742944651, "grad_norm": 1.715474247932434, "learning_rate": 4.553734061930783e-06, "loss": 0.6255, "step": 251 }, { "epoch": 0.006891271056661562, "grad_norm": 1.6316697597503662, "learning_rate": 4.571948998178507e-06, "loss": 0.6785, "step": 252 }, { "epoch": 0.006918617370378473, "grad_norm": 1.6253833770751953, "learning_rate": 4.59016393442623e-06, "loss": 0.6582, "step": 253 }, { "epoch": 0.006945963684095384, "grad_norm": 1.3764578104019165, "learning_rate": 4.608378870673953e-06, "loss": 0.6403, "step": 254 }, { "epoch": 0.006973309997812295, "grad_norm": 1.5877124071121216, "learning_rate": 4.626593806921676e-06, "loss": 0.6394, "step": 255 }, { "epoch": 0.007000656311529206, "grad_norm": 2.1105964183807373, "learning_rate": 4.6448087431694e-06, "loss": 0.7198, "step": 256 }, { "epoch": 0.007028002625246117, "grad_norm": 1.6296135187149048, "learning_rate": 4.663023679417122e-06, "loss": 0.6195, "step": 257 }, { "epoch": 0.0070553489389630275, "grad_norm": 4.6968865394592285, "learning_rate": 4.681238615664846e-06, "loss": 0.6739, "step": 258 }, { "epoch": 0.007082695252679939, "grad_norm": 1.7181528806686401, "learning_rate": 4.699453551912569e-06, "loss": 0.6795, "step": 259 }, { "epoch": 0.00711004156639685, "grad_norm": 1.746695876121521, "learning_rate": 4.717668488160292e-06, "loss": 0.6193, "step": 260 }, { "epoch": 0.007137387880113761, "grad_norm": 2.2145354747772217, "learning_rate": 4.735883424408015e-06, "loss": 0.6131, "step": 261 }, { "epoch": 0.007164734193830672, "grad_norm": 1.6698552370071411, "learning_rate": 4.754098360655738e-06, "loss": 0.6761, "step": 262 }, { "epoch": 0.007192080507547583, "grad_norm": 2.2171835899353027, "learning_rate": 4.772313296903461e-06, "loss": 0.666, "step": 263 }, { "epoch": 0.007219426821264493, "grad_norm": 1.9130802154541016, "learning_rate": 4.790528233151184e-06, "loss": 0.6188, "step": 264 }, { "epoch": 0.007246773134981404, "grad_norm": 1.5626901388168335, "learning_rate": 4.808743169398907e-06, "loss": 0.6679, "step": 265 }, { "epoch": 0.007274119448698315, "grad_norm": 1.6220868825912476, "learning_rate": 4.8269581056466305e-06, "loss": 0.6439, "step": 266 }, { "epoch": 0.0073014657624152265, "grad_norm": 1.5754534006118774, "learning_rate": 4.845173041894354e-06, "loss": 0.6259, "step": 267 }, { "epoch": 0.007328812076132138, "grad_norm": 1.6503762006759644, "learning_rate": 4.863387978142076e-06, "loss": 0.639, "step": 268 }, { "epoch": 0.007356158389849049, "grad_norm": 1.5638452768325806, "learning_rate": 4.8816029143898005e-06, "loss": 0.6017, "step": 269 }, { "epoch": 0.007383504703565959, "grad_norm": 1.5117802619934082, "learning_rate": 4.899817850637523e-06, "loss": 0.663, "step": 270 }, { "epoch": 0.00741085101728287, "grad_norm": 1.7946407794952393, "learning_rate": 4.918032786885246e-06, "loss": 0.6463, "step": 271 }, { "epoch": 0.007438197330999781, "grad_norm": 1.7408844232559204, "learning_rate": 4.936247723132969e-06, "loss": 0.6614, "step": 272 }, { "epoch": 0.007465543644716692, "grad_norm": 2.35311222076416, "learning_rate": 4.954462659380693e-06, "loss": 0.6371, "step": 273 }, { "epoch": 0.007492889958433603, "grad_norm": 1.5188924074172974, "learning_rate": 4.9726775956284154e-06, "loss": 0.6412, "step": 274 }, { "epoch": 0.007520236272150514, "grad_norm": 1.6389933824539185, "learning_rate": 4.990892531876139e-06, "loss": 0.6156, "step": 275 }, { "epoch": 0.0075475825858674255, "grad_norm": 2.0252020359039307, "learning_rate": 5.009107468123861e-06, "loss": 0.6894, "step": 276 }, { "epoch": 0.007574928899584336, "grad_norm": 1.8782418966293335, "learning_rate": 5.027322404371585e-06, "loss": 0.6731, "step": 277 }, { "epoch": 0.007602275213301247, "grad_norm": 1.4936527013778687, "learning_rate": 5.045537340619309e-06, "loss": 0.6293, "step": 278 }, { "epoch": 0.007629621527018158, "grad_norm": 1.6503119468688965, "learning_rate": 5.063752276867031e-06, "loss": 1.0946, "step": 279 }, { "epoch": 0.007656967840735069, "grad_norm": 2.374675989151001, "learning_rate": 5.0819672131147545e-06, "loss": 0.7104, "step": 280 }, { "epoch": 0.00768431415445198, "grad_norm": 2.137266159057617, "learning_rate": 5.100182149362478e-06, "loss": 0.6922, "step": 281 }, { "epoch": 0.007711660468168891, "grad_norm": 1.9660614728927612, "learning_rate": 5.1183970856102e-06, "loss": 0.706, "step": 282 }, { "epoch": 0.007739006781885801, "grad_norm": 2.4044439792633057, "learning_rate": 5.1366120218579245e-06, "loss": 0.6536, "step": 283 }, { "epoch": 0.0077663530956027125, "grad_norm": 2.1706626415252686, "learning_rate": 5.154826958105648e-06, "loss": 0.7327, "step": 284 }, { "epoch": 0.007793699409319624, "grad_norm": 1.8614171743392944, "learning_rate": 5.17304189435337e-06, "loss": 0.6295, "step": 285 }, { "epoch": 0.007821045723036534, "grad_norm": 1.6472492218017578, "learning_rate": 5.191256830601094e-06, "loss": 1.0719, "step": 286 }, { "epoch": 0.007848392036753445, "grad_norm": 1.6254693269729614, "learning_rate": 5.209471766848816e-06, "loss": 1.0334, "step": 287 }, { "epoch": 0.007875738350470356, "grad_norm": 1.4629831314086914, "learning_rate": 5.227686703096539e-06, "loss": 0.6381, "step": 288 }, { "epoch": 0.007903084664187267, "grad_norm": 1.7647664546966553, "learning_rate": 5.245901639344263e-06, "loss": 0.7077, "step": 289 }, { "epoch": 0.007930430977904178, "grad_norm": 1.873987078666687, "learning_rate": 5.264116575591985e-06, "loss": 0.6544, "step": 290 }, { "epoch": 0.00795777729162109, "grad_norm": 1.7911843061447144, "learning_rate": 5.2823315118397085e-06, "loss": 0.6239, "step": 291 }, { "epoch": 0.007985123605338, "grad_norm": 1.847502589225769, "learning_rate": 5.300546448087433e-06, "loss": 0.662, "step": 292 }, { "epoch": 0.008012469919054912, "grad_norm": 1.7268688678741455, "learning_rate": 5.318761384335155e-06, "loss": 1.0334, "step": 293 }, { "epoch": 0.008039816232771823, "grad_norm": 1.7712907791137695, "learning_rate": 5.3369763205828785e-06, "loss": 0.6275, "step": 294 }, { "epoch": 0.008067162546488734, "grad_norm": 1.7834700345993042, "learning_rate": 5.355191256830602e-06, "loss": 0.6785, "step": 295 }, { "epoch": 0.008094508860205645, "grad_norm": 1.4290049076080322, "learning_rate": 5.373406193078324e-06, "loss": 0.6298, "step": 296 }, { "epoch": 0.008121855173922556, "grad_norm": 1.8076955080032349, "learning_rate": 5.391621129326048e-06, "loss": 0.7945, "step": 297 }, { "epoch": 0.008149201487639467, "grad_norm": 2.176499128341675, "learning_rate": 5.409836065573772e-06, "loss": 0.6486, "step": 298 }, { "epoch": 0.008176547801356376, "grad_norm": 1.683600902557373, "learning_rate": 5.428051001821493e-06, "loss": 0.675, "step": 299 }, { "epoch": 0.008203894115073287, "grad_norm": 1.9409151077270508, "learning_rate": 5.446265938069218e-06, "loss": 0.7456, "step": 300 }, { "epoch": 0.008231240428790199, "grad_norm": 1.484804630279541, "learning_rate": 5.464480874316941e-06, "loss": 0.6162, "step": 301 }, { "epoch": 0.00825858674250711, "grad_norm": 1.3381857872009277, "learning_rate": 5.482695810564663e-06, "loss": 0.631, "step": 302 }, { "epoch": 0.00828593305622402, "grad_norm": 2.1298718452453613, "learning_rate": 5.500910746812387e-06, "loss": 0.6465, "step": 303 }, { "epoch": 0.008313279369940932, "grad_norm": 1.7044219970703125, "learning_rate": 5.519125683060109e-06, "loss": 0.6375, "step": 304 }, { "epoch": 0.008340625683657843, "grad_norm": 1.6025142669677734, "learning_rate": 5.5373406193078325e-06, "loss": 0.6489, "step": 305 }, { "epoch": 0.008367971997374754, "grad_norm": 2.011965274810791, "learning_rate": 5.555555555555557e-06, "loss": 0.7055, "step": 306 }, { "epoch": 0.008395318311091665, "grad_norm": 1.819584608078003, "learning_rate": 5.573770491803278e-06, "loss": 0.6198, "step": 307 }, { "epoch": 0.008422664624808576, "grad_norm": 1.6411529779434204, "learning_rate": 5.5919854280510025e-06, "loss": 0.635, "step": 308 }, { "epoch": 0.008450010938525487, "grad_norm": 1.631751298904419, "learning_rate": 5.610200364298726e-06, "loss": 1.0365, "step": 309 }, { "epoch": 0.008477357252242398, "grad_norm": 1.5843528509140015, "learning_rate": 5.628415300546448e-06, "loss": 0.5982, "step": 310 }, { "epoch": 0.00850470356595931, "grad_norm": 1.7126215696334839, "learning_rate": 5.646630236794172e-06, "loss": 0.6578, "step": 311 }, { "epoch": 0.008532049879676219, "grad_norm": 1.5280866622924805, "learning_rate": 5.664845173041895e-06, "loss": 0.5715, "step": 312 }, { "epoch": 0.00855939619339313, "grad_norm": 2.1759772300720215, "learning_rate": 5.683060109289617e-06, "loss": 0.6545, "step": 313 }, { "epoch": 0.008586742507110041, "grad_norm": 1.4769357442855835, "learning_rate": 5.701275045537341e-06, "loss": 0.6515, "step": 314 }, { "epoch": 0.008614088820826952, "grad_norm": 6.512805461883545, "learning_rate": 5.719489981785065e-06, "loss": 0.6564, "step": 315 }, { "epoch": 0.008641435134543863, "grad_norm": 1.8529338836669922, "learning_rate": 5.737704918032787e-06, "loss": 0.6365, "step": 316 }, { "epoch": 0.008668781448260774, "grad_norm": 1.9566774368286133, "learning_rate": 5.755919854280511e-06, "loss": 0.6377, "step": 317 }, { "epoch": 0.008696127761977685, "grad_norm": 1.5960988998413086, "learning_rate": 5.774134790528234e-06, "loss": 0.6228, "step": 318 }, { "epoch": 0.008723474075694597, "grad_norm": 1.7018399238586426, "learning_rate": 5.7923497267759565e-06, "loss": 0.6348, "step": 319 }, { "epoch": 0.008750820389411508, "grad_norm": 2.7132956981658936, "learning_rate": 5.81056466302368e-06, "loss": 1.0374, "step": 320 }, { "epoch": 0.008778166703128419, "grad_norm": 1.60355544090271, "learning_rate": 5.828779599271404e-06, "loss": 0.6216, "step": 321 }, { "epoch": 0.00880551301684533, "grad_norm": 1.6879910230636597, "learning_rate": 5.846994535519126e-06, "loss": 0.6399, "step": 322 }, { "epoch": 0.008832859330562241, "grad_norm": 1.7879387140274048, "learning_rate": 5.86520947176685e-06, "loss": 0.6098, "step": 323 }, { "epoch": 0.00886020564427915, "grad_norm": 2.25697660446167, "learning_rate": 5.883424408014572e-06, "loss": 0.6501, "step": 324 }, { "epoch": 0.008887551957996061, "grad_norm": 1.6348577737808228, "learning_rate": 5.9016393442622956e-06, "loss": 0.6391, "step": 325 }, { "epoch": 0.008914898271712973, "grad_norm": 1.7359694242477417, "learning_rate": 5.919854280510019e-06, "loss": 1.0526, "step": 326 }, { "epoch": 0.008942244585429884, "grad_norm": 1.7703478336334229, "learning_rate": 5.938069216757741e-06, "loss": 0.6376, "step": 327 }, { "epoch": 0.008969590899146795, "grad_norm": 1.6919001340866089, "learning_rate": 5.956284153005465e-06, "loss": 0.6494, "step": 328 }, { "epoch": 0.008996937212863706, "grad_norm": 1.8093780279159546, "learning_rate": 5.974499089253189e-06, "loss": 0.6272, "step": 329 }, { "epoch": 0.009024283526580617, "grad_norm": 1.6745080947875977, "learning_rate": 5.9927140255009105e-06, "loss": 0.645, "step": 330 }, { "epoch": 0.009051629840297528, "grad_norm": 1.8166394233703613, "learning_rate": 6.010928961748635e-06, "loss": 0.6953, "step": 331 }, { "epoch": 0.009078976154014439, "grad_norm": 1.6885511875152588, "learning_rate": 6.029143897996358e-06, "loss": 0.6096, "step": 332 }, { "epoch": 0.00910632246773135, "grad_norm": 1.7038642168045044, "learning_rate": 6.0473588342440805e-06, "loss": 1.0235, "step": 333 }, { "epoch": 0.009133668781448261, "grad_norm": 2.7220284938812256, "learning_rate": 6.065573770491804e-06, "loss": 0.6153, "step": 334 }, { "epoch": 0.009161015095165172, "grad_norm": 1.7179579734802246, "learning_rate": 6.083788706739527e-06, "loss": 0.677, "step": 335 }, { "epoch": 0.009188361408882083, "grad_norm": 1.5920093059539795, "learning_rate": 6.1020036429872496e-06, "loss": 0.5963, "step": 336 }, { "epoch": 0.009215707722598993, "grad_norm": 1.72212553024292, "learning_rate": 6.120218579234973e-06, "loss": 0.6514, "step": 337 }, { "epoch": 0.009243054036315904, "grad_norm": 2.0440919399261475, "learning_rate": 6.138433515482697e-06, "loss": 0.679, "step": 338 }, { "epoch": 0.009270400350032815, "grad_norm": 1.9958243370056152, "learning_rate": 6.1566484517304195e-06, "loss": 0.5958, "step": 339 }, { "epoch": 0.009297746663749726, "grad_norm": 2.263042688369751, "learning_rate": 6.174863387978143e-06, "loss": 0.648, "step": 340 }, { "epoch": 0.009325092977466637, "grad_norm": 1.7055268287658691, "learning_rate": 6.193078324225865e-06, "loss": 0.6885, "step": 341 }, { "epoch": 0.009352439291183548, "grad_norm": 1.4064819812774658, "learning_rate": 6.211293260473589e-06, "loss": 0.641, "step": 342 }, { "epoch": 0.00937978560490046, "grad_norm": 1.6975812911987305, "learning_rate": 6.229508196721312e-06, "loss": 0.645, "step": 343 }, { "epoch": 0.00940713191861737, "grad_norm": 1.5095428228378296, "learning_rate": 6.2477231329690345e-06, "loss": 0.6057, "step": 344 }, { "epoch": 0.009434478232334282, "grad_norm": 1.9968957901000977, "learning_rate": 6.265938069216758e-06, "loss": 0.6603, "step": 345 }, { "epoch": 0.009461824546051193, "grad_norm": 1.8031156063079834, "learning_rate": 6.284153005464482e-06, "loss": 0.6249, "step": 346 }, { "epoch": 0.009489170859768104, "grad_norm": 1.7426278591156006, "learning_rate": 6.3023679417122044e-06, "loss": 0.6339, "step": 347 }, { "epoch": 0.009516517173485015, "grad_norm": 1.7469505071640015, "learning_rate": 6.320582877959928e-06, "loss": 0.6105, "step": 348 }, { "epoch": 0.009543863487201926, "grad_norm": 1.63390052318573, "learning_rate": 6.338797814207651e-06, "loss": 0.6161, "step": 349 }, { "epoch": 0.009571209800918835, "grad_norm": 2.462480068206787, "learning_rate": 6.3570127504553735e-06, "loss": 0.6327, "step": 350 }, { "epoch": 0.009598556114635746, "grad_norm": 1.5378695726394653, "learning_rate": 6.375227686703097e-06, "loss": 0.7093, "step": 351 }, { "epoch": 0.009625902428352658, "grad_norm": 1.8323297500610352, "learning_rate": 6.393442622950821e-06, "loss": 0.6957, "step": 352 }, { "epoch": 0.009653248742069569, "grad_norm": 1.6953380107879639, "learning_rate": 6.411657559198543e-06, "loss": 0.6837, "step": 353 }, { "epoch": 0.00968059505578648, "grad_norm": 2.036919593811035, "learning_rate": 6.429872495446267e-06, "loss": 0.6759, "step": 354 }, { "epoch": 0.009707941369503391, "grad_norm": 1.5915874242782593, "learning_rate": 6.44808743169399e-06, "loss": 0.6016, "step": 355 }, { "epoch": 0.009735287683220302, "grad_norm": 1.6393182277679443, "learning_rate": 6.466302367941713e-06, "loss": 0.641, "step": 356 }, { "epoch": 0.009762633996937213, "grad_norm": 1.9494231939315796, "learning_rate": 6.484517304189436e-06, "loss": 0.6292, "step": 357 }, { "epoch": 0.009789980310654124, "grad_norm": 2.1368424892425537, "learning_rate": 6.5027322404371584e-06, "loss": 0.6282, "step": 358 }, { "epoch": 0.009817326624371035, "grad_norm": 1.6373475790023804, "learning_rate": 6.520947176684882e-06, "loss": 0.6201, "step": 359 }, { "epoch": 0.009844672938087946, "grad_norm": 1.6859676837921143, "learning_rate": 6.539162112932605e-06, "loss": 0.6154, "step": 360 }, { "epoch": 0.009872019251804857, "grad_norm": 1.6784815788269043, "learning_rate": 6.5573770491803276e-06, "loss": 0.6452, "step": 361 }, { "epoch": 0.009899365565521767, "grad_norm": 1.6525580883026123, "learning_rate": 6.575591985428052e-06, "loss": 0.6668, "step": 362 }, { "epoch": 0.009926711879238678, "grad_norm": 1.4792858362197876, "learning_rate": 6.593806921675775e-06, "loss": 0.6205, "step": 363 }, { "epoch": 0.009954058192955589, "grad_norm": 1.6061629056930542, "learning_rate": 6.6120218579234975e-06, "loss": 0.6131, "step": 364 }, { "epoch": 0.0099814045066725, "grad_norm": 1.6005804538726807, "learning_rate": 6.630236794171221e-06, "loss": 0.588, "step": 365 }, { "epoch": 0.010008750820389411, "grad_norm": 1.558019757270813, "learning_rate": 6.648451730418944e-06, "loss": 0.6345, "step": 366 }, { "epoch": 0.010036097134106322, "grad_norm": 4.506594181060791, "learning_rate": 6.666666666666667e-06, "loss": 1.0056, "step": 367 }, { "epoch": 0.010063443447823233, "grad_norm": 1.774777889251709, "learning_rate": 6.68488160291439e-06, "loss": 0.6297, "step": 368 }, { "epoch": 0.010090789761540144, "grad_norm": 1.914188027381897, "learning_rate": 6.703096539162114e-06, "loss": 0.6543, "step": 369 }, { "epoch": 0.010118136075257056, "grad_norm": 1.6796445846557617, "learning_rate": 6.721311475409837e-06, "loss": 0.6722, "step": 370 }, { "epoch": 0.010145482388973967, "grad_norm": 1.5464433431625366, "learning_rate": 6.73952641165756e-06, "loss": 1.0149, "step": 371 }, { "epoch": 0.010172828702690878, "grad_norm": 1.5594699382781982, "learning_rate": 6.757741347905283e-06, "loss": 0.6607, "step": 372 }, { "epoch": 0.010200175016407789, "grad_norm": 1.6294519901275635, "learning_rate": 6.775956284153006e-06, "loss": 1.0356, "step": 373 }, { "epoch": 0.0102275213301247, "grad_norm": 1.8038982152938843, "learning_rate": 6.794171220400729e-06, "loss": 0.9852, "step": 374 }, { "epoch": 0.01025486764384161, "grad_norm": 2.120496988296509, "learning_rate": 6.812386156648453e-06, "loss": 0.6517, "step": 375 }, { "epoch": 0.01028221395755852, "grad_norm": 1.902350902557373, "learning_rate": 6.830601092896175e-06, "loss": 0.6225, "step": 376 }, { "epoch": 0.010309560271275432, "grad_norm": 1.5392216444015503, "learning_rate": 6.848816029143899e-06, "loss": 0.6207, "step": 377 }, { "epoch": 0.010336906584992343, "grad_norm": 1.7870413064956665, "learning_rate": 6.8670309653916215e-06, "loss": 0.5912, "step": 378 }, { "epoch": 0.010364252898709254, "grad_norm": 1.710876226425171, "learning_rate": 6.885245901639345e-06, "loss": 1.0153, "step": 379 }, { "epoch": 0.010391599212426165, "grad_norm": 1.4367491006851196, "learning_rate": 6.903460837887068e-06, "loss": 0.6247, "step": 380 }, { "epoch": 0.010418945526143076, "grad_norm": 1.677118182182312, "learning_rate": 6.921675774134791e-06, "loss": 0.5935, "step": 381 }, { "epoch": 0.010446291839859987, "grad_norm": 1.4745246171951294, "learning_rate": 6.939890710382514e-06, "loss": 0.6161, "step": 382 }, { "epoch": 0.010473638153576898, "grad_norm": 1.750197410583496, "learning_rate": 6.958105646630237e-06, "loss": 1.0124, "step": 383 }, { "epoch": 0.01050098446729381, "grad_norm": 1.8201712369918823, "learning_rate": 6.97632058287796e-06, "loss": 0.6089, "step": 384 }, { "epoch": 0.01052833078101072, "grad_norm": 1.4141162633895874, "learning_rate": 6.994535519125684e-06, "loss": 0.6332, "step": 385 }, { "epoch": 0.010555677094727631, "grad_norm": 1.9573034048080444, "learning_rate": 7.012750455373407e-06, "loss": 0.6168, "step": 386 }, { "epoch": 0.010583023408444543, "grad_norm": 1.708060622215271, "learning_rate": 7.03096539162113e-06, "loss": 1.021, "step": 387 }, { "epoch": 0.010610369722161452, "grad_norm": 1.8217182159423828, "learning_rate": 7.049180327868853e-06, "loss": 0.5966, "step": 388 }, { "epoch": 0.010637716035878363, "grad_norm": 1.5981864929199219, "learning_rate": 7.067395264116576e-06, "loss": 0.5733, "step": 389 }, { "epoch": 0.010665062349595274, "grad_norm": 1.6148569583892822, "learning_rate": 7.085610200364299e-06, "loss": 0.5674, "step": 390 }, { "epoch": 0.010692408663312185, "grad_norm": 1.5123127698898315, "learning_rate": 7.103825136612022e-06, "loss": 0.5821, "step": 391 }, { "epoch": 0.010719754977029096, "grad_norm": 2.195260524749756, "learning_rate": 7.122040072859746e-06, "loss": 0.6577, "step": 392 }, { "epoch": 0.010747101290746007, "grad_norm": 1.5933287143707275, "learning_rate": 7.140255009107469e-06, "loss": 0.6191, "step": 393 }, { "epoch": 0.010774447604462918, "grad_norm": 1.4803516864776611, "learning_rate": 7.158469945355192e-06, "loss": 0.557, "step": 394 }, { "epoch": 0.01080179391817983, "grad_norm": 1.6220169067382812, "learning_rate": 7.176684881602915e-06, "loss": 0.6146, "step": 395 }, { "epoch": 0.01082914023189674, "grad_norm": 2.0507473945617676, "learning_rate": 7.194899817850638e-06, "loss": 0.6513, "step": 396 }, { "epoch": 0.010856486545613652, "grad_norm": 1.843501091003418, "learning_rate": 7.213114754098361e-06, "loss": 0.6303, "step": 397 }, { "epoch": 0.010883832859330563, "grad_norm": 1.7213014364242554, "learning_rate": 7.231329690346084e-06, "loss": 0.6171, "step": 398 }, { "epoch": 0.010911179173047474, "grad_norm": 1.7993099689483643, "learning_rate": 7.249544626593807e-06, "loss": 0.6099, "step": 399 }, { "epoch": 0.010938525486764383, "grad_norm": 1.564206600189209, "learning_rate": 7.267759562841531e-06, "loss": 0.573, "step": 400 }, { "epoch": 0.010965871800481294, "grad_norm": 1.7896406650543213, "learning_rate": 7.285974499089254e-06, "loss": 0.6144, "step": 401 }, { "epoch": 0.010993218114198205, "grad_norm": 4.106945991516113, "learning_rate": 7.304189435336977e-06, "loss": 0.6144, "step": 402 }, { "epoch": 0.011020564427915117, "grad_norm": 1.904344916343689, "learning_rate": 7.3224043715847e-06, "loss": 0.6306, "step": 403 }, { "epoch": 0.011047910741632028, "grad_norm": 1.8220164775848389, "learning_rate": 7.340619307832423e-06, "loss": 0.6165, "step": 404 }, { "epoch": 0.011075257055348939, "grad_norm": 1.9944884777069092, "learning_rate": 7.358834244080146e-06, "loss": 0.6295, "step": 405 }, { "epoch": 0.01110260336906585, "grad_norm": 1.63983952999115, "learning_rate": 7.3770491803278695e-06, "loss": 1.0066, "step": 406 }, { "epoch": 0.011129949682782761, "grad_norm": 2.317023754119873, "learning_rate": 7.395264116575592e-06, "loss": 0.6685, "step": 407 }, { "epoch": 0.011157295996499672, "grad_norm": 1.9672925472259521, "learning_rate": 7.413479052823316e-06, "loss": 0.6283, "step": 408 }, { "epoch": 0.011184642310216583, "grad_norm": 1.8350239992141724, "learning_rate": 7.4316939890710394e-06, "loss": 0.6457, "step": 409 }, { "epoch": 0.011211988623933494, "grad_norm": 1.7544620037078857, "learning_rate": 7.449908925318762e-06, "loss": 0.6237, "step": 410 }, { "epoch": 0.011239334937650405, "grad_norm": 1.7267030477523804, "learning_rate": 7.468123861566485e-06, "loss": 0.5932, "step": 411 }, { "epoch": 0.011266681251367316, "grad_norm": 1.5673305988311768, "learning_rate": 7.4863387978142085e-06, "loss": 1.036, "step": 412 }, { "epoch": 0.011294027565084226, "grad_norm": 1.6378867626190186, "learning_rate": 7.504553734061931e-06, "loss": 0.617, "step": 413 }, { "epoch": 0.011321373878801137, "grad_norm": 1.951187252998352, "learning_rate": 7.522768670309654e-06, "loss": 0.6254, "step": 414 }, { "epoch": 0.011348720192518048, "grad_norm": 1.4920920133590698, "learning_rate": 7.540983606557377e-06, "loss": 0.9953, "step": 415 }, { "epoch": 0.011376066506234959, "grad_norm": 2.197376012802124, "learning_rate": 7.559198542805101e-06, "loss": 0.6857, "step": 416 }, { "epoch": 0.01140341281995187, "grad_norm": 2.1594367027282715, "learning_rate": 7.577413479052824e-06, "loss": 0.6426, "step": 417 }, { "epoch": 0.011430759133668781, "grad_norm": 2.802342414855957, "learning_rate": 7.595628415300547e-06, "loss": 1.0479, "step": 418 }, { "epoch": 0.011458105447385692, "grad_norm": 2.210257053375244, "learning_rate": 7.61384335154827e-06, "loss": 0.6194, "step": 419 }, { "epoch": 0.011485451761102604, "grad_norm": 1.7724099159240723, "learning_rate": 7.632058287795994e-06, "loss": 0.6055, "step": 420 }, { "epoch": 0.011512798074819515, "grad_norm": 2.092611312866211, "learning_rate": 7.650273224043716e-06, "loss": 0.5781, "step": 421 }, { "epoch": 0.011540144388536426, "grad_norm": 1.672156572341919, "learning_rate": 7.66848816029144e-06, "loss": 0.6423, "step": 422 }, { "epoch": 0.011567490702253337, "grad_norm": 1.5338194370269775, "learning_rate": 7.686703096539163e-06, "loss": 0.6429, "step": 423 }, { "epoch": 0.011594837015970248, "grad_norm": 10.328383445739746, "learning_rate": 7.704918032786886e-06, "loss": 0.6328, "step": 424 }, { "epoch": 0.011622183329687159, "grad_norm": 1.6583948135375977, "learning_rate": 7.72313296903461e-06, "loss": 0.6206, "step": 425 }, { "epoch": 0.011649529643404068, "grad_norm": 2.018259048461914, "learning_rate": 7.741347905282333e-06, "loss": 0.6227, "step": 426 }, { "epoch": 0.01167687595712098, "grad_norm": 2.0524799823760986, "learning_rate": 7.759562841530056e-06, "loss": 0.6189, "step": 427 }, { "epoch": 0.01170422227083789, "grad_norm": 1.8657621145248413, "learning_rate": 7.77777777777778e-06, "loss": 0.6942, "step": 428 }, { "epoch": 0.011731568584554802, "grad_norm": 2.056906223297119, "learning_rate": 7.795992714025502e-06, "loss": 0.6387, "step": 429 }, { "epoch": 0.011758914898271713, "grad_norm": 1.6708306074142456, "learning_rate": 7.814207650273224e-06, "loss": 0.561, "step": 430 }, { "epoch": 0.011786261211988624, "grad_norm": 1.7703468799591064, "learning_rate": 7.832422586520947e-06, "loss": 0.6432, "step": 431 }, { "epoch": 0.011813607525705535, "grad_norm": 1.4810888767242432, "learning_rate": 7.85063752276867e-06, "loss": 0.5405, "step": 432 }, { "epoch": 0.011840953839422446, "grad_norm": 1.5895440578460693, "learning_rate": 7.868852459016394e-06, "loss": 0.6052, "step": 433 }, { "epoch": 0.011868300153139357, "grad_norm": 1.608431339263916, "learning_rate": 7.887067395264117e-06, "loss": 0.6264, "step": 434 }, { "epoch": 0.011895646466856268, "grad_norm": 1.9037538766860962, "learning_rate": 7.905282331511839e-06, "loss": 0.5997, "step": 435 }, { "epoch": 0.01192299278057318, "grad_norm": 3.3438408374786377, "learning_rate": 7.923497267759564e-06, "loss": 0.6345, "step": 436 }, { "epoch": 0.01195033909429009, "grad_norm": 1.8760719299316406, "learning_rate": 7.941712204007287e-06, "loss": 0.6328, "step": 437 }, { "epoch": 0.011977685408007002, "grad_norm": 1.5740036964416504, "learning_rate": 7.959927140255009e-06, "loss": 0.6161, "step": 438 }, { "epoch": 0.012005031721723911, "grad_norm": 1.942028522491455, "learning_rate": 7.978142076502732e-06, "loss": 0.6242, "step": 439 }, { "epoch": 0.012032378035440822, "grad_norm": 2.1734442710876465, "learning_rate": 7.996357012750456e-06, "loss": 0.6124, "step": 440 }, { "epoch": 0.012059724349157733, "grad_norm": 1.572441577911377, "learning_rate": 8.014571948998179e-06, "loss": 0.6121, "step": 441 }, { "epoch": 0.012087070662874644, "grad_norm": 1.7157375812530518, "learning_rate": 8.032786885245902e-06, "loss": 0.6537, "step": 442 }, { "epoch": 0.012114416976591555, "grad_norm": 2.0629794597625732, "learning_rate": 8.051001821493626e-06, "loss": 0.6311, "step": 443 }, { "epoch": 0.012141763290308466, "grad_norm": 1.8917216062545776, "learning_rate": 8.069216757741349e-06, "loss": 0.9799, "step": 444 }, { "epoch": 0.012169109604025377, "grad_norm": 2.519930601119995, "learning_rate": 8.087431693989072e-06, "loss": 1.027, "step": 445 }, { "epoch": 0.012196455917742289, "grad_norm": 3.4671809673309326, "learning_rate": 8.105646630236796e-06, "loss": 0.5605, "step": 446 }, { "epoch": 0.0122238022314592, "grad_norm": 1.5755757093429565, "learning_rate": 8.123861566484517e-06, "loss": 0.6017, "step": 447 }, { "epoch": 0.01225114854517611, "grad_norm": 1.7497553825378418, "learning_rate": 8.14207650273224e-06, "loss": 0.6094, "step": 448 }, { "epoch": 0.012278494858893022, "grad_norm": 1.9011578559875488, "learning_rate": 8.160291438979966e-06, "loss": 0.591, "step": 449 }, { "epoch": 0.012305841172609933, "grad_norm": 2.392381429672241, "learning_rate": 8.178506375227687e-06, "loss": 0.7369, "step": 450 }, { "epoch": 0.012333187486326842, "grad_norm": 1.849198818206787, "learning_rate": 8.19672131147541e-06, "loss": 0.6188, "step": 451 }, { "epoch": 0.012360533800043753, "grad_norm": 2.553251028060913, "learning_rate": 8.214936247723134e-06, "loss": 0.6394, "step": 452 }, { "epoch": 0.012387880113760665, "grad_norm": 1.8445122241973877, "learning_rate": 8.233151183970857e-06, "loss": 0.6244, "step": 453 }, { "epoch": 0.012415226427477576, "grad_norm": 1.6925734281539917, "learning_rate": 8.25136612021858e-06, "loss": 0.5791, "step": 454 }, { "epoch": 0.012442572741194487, "grad_norm": 1.6840429306030273, "learning_rate": 8.269581056466302e-06, "loss": 0.6414, "step": 455 }, { "epoch": 0.012469919054911398, "grad_norm": 2.078303098678589, "learning_rate": 8.287795992714025e-06, "loss": 0.7403, "step": 456 }, { "epoch": 0.012497265368628309, "grad_norm": 1.6699254512786865, "learning_rate": 8.30601092896175e-06, "loss": 0.5776, "step": 457 }, { "epoch": 0.01252461168234522, "grad_norm": 1.798624038696289, "learning_rate": 8.324225865209472e-06, "loss": 0.6022, "step": 458 }, { "epoch": 0.012551957996062131, "grad_norm": 2.1609156131744385, "learning_rate": 8.342440801457195e-06, "loss": 0.6253, "step": 459 }, { "epoch": 0.012579304309779042, "grad_norm": 1.9912021160125732, "learning_rate": 8.360655737704919e-06, "loss": 0.6044, "step": 460 }, { "epoch": 0.012606650623495953, "grad_norm": 3.7213034629821777, "learning_rate": 8.378870673952642e-06, "loss": 0.956, "step": 461 }, { "epoch": 0.012633996937212864, "grad_norm": 1.5884884595870972, "learning_rate": 8.397085610200365e-06, "loss": 0.6198, "step": 462 }, { "epoch": 0.012661343250929775, "grad_norm": 1.8302714824676514, "learning_rate": 8.415300546448089e-06, "loss": 0.6445, "step": 463 }, { "epoch": 0.012688689564646685, "grad_norm": 1.9601304531097412, "learning_rate": 8.43351548269581e-06, "loss": 0.6451, "step": 464 }, { "epoch": 0.012716035878363596, "grad_norm": 1.9069123268127441, "learning_rate": 8.451730418943535e-06, "loss": 0.6062, "step": 465 }, { "epoch": 0.012743382192080507, "grad_norm": 1.744845986366272, "learning_rate": 8.469945355191259e-06, "loss": 0.669, "step": 466 }, { "epoch": 0.012770728505797418, "grad_norm": 1.5203911066055298, "learning_rate": 8.48816029143898e-06, "loss": 0.5958, "step": 467 }, { "epoch": 0.01279807481951433, "grad_norm": 1.9661548137664795, "learning_rate": 8.506375227686704e-06, "loss": 0.6686, "step": 468 }, { "epoch": 0.01282542113323124, "grad_norm": 1.6890796422958374, "learning_rate": 8.524590163934427e-06, "loss": 0.6505, "step": 469 }, { "epoch": 0.012852767446948151, "grad_norm": 1.827732801437378, "learning_rate": 8.54280510018215e-06, "loss": 0.5826, "step": 470 }, { "epoch": 0.012880113760665063, "grad_norm": 1.8799326419830322, "learning_rate": 8.561020036429874e-06, "loss": 0.6135, "step": 471 }, { "epoch": 0.012907460074381974, "grad_norm": 1.5444114208221436, "learning_rate": 8.579234972677595e-06, "loss": 0.6358, "step": 472 }, { "epoch": 0.012934806388098885, "grad_norm": 1.8621654510498047, "learning_rate": 8.59744990892532e-06, "loss": 0.6168, "step": 473 }, { "epoch": 0.012962152701815796, "grad_norm": 1.9208109378814697, "learning_rate": 8.615664845173044e-06, "loss": 0.57, "step": 474 }, { "epoch": 0.012989499015532707, "grad_norm": 1.7068768739700317, "learning_rate": 8.633879781420765e-06, "loss": 0.6145, "step": 475 }, { "epoch": 0.013016845329249618, "grad_norm": 1.6348206996917725, "learning_rate": 8.652094717668488e-06, "loss": 0.5905, "step": 476 }, { "epoch": 0.013044191642966527, "grad_norm": 2.7936036586761475, "learning_rate": 8.670309653916212e-06, "loss": 0.7008, "step": 477 }, { "epoch": 0.013071537956683438, "grad_norm": 2.120164155960083, "learning_rate": 8.688524590163935e-06, "loss": 0.6252, "step": 478 }, { "epoch": 0.01309888427040035, "grad_norm": 2.245936393737793, "learning_rate": 8.706739526411658e-06, "loss": 0.6273, "step": 479 }, { "epoch": 0.01312623058411726, "grad_norm": 2.922571897506714, "learning_rate": 8.724954462659382e-06, "loss": 0.7303, "step": 480 }, { "epoch": 0.013153576897834172, "grad_norm": 2.046651601791382, "learning_rate": 8.743169398907103e-06, "loss": 0.6086, "step": 481 }, { "epoch": 0.013180923211551083, "grad_norm": 2.052096366882324, "learning_rate": 8.761384335154828e-06, "loss": 0.691, "step": 482 }, { "epoch": 0.013208269525267994, "grad_norm": 1.7093843221664429, "learning_rate": 8.779599271402552e-06, "loss": 0.6184, "step": 483 }, { "epoch": 0.013235615838984905, "grad_norm": 3.2971956729888916, "learning_rate": 8.797814207650273e-06, "loss": 0.6847, "step": 484 }, { "epoch": 0.013262962152701816, "grad_norm": 2.046957015991211, "learning_rate": 8.816029143897997e-06, "loss": 0.5756, "step": 485 }, { "epoch": 0.013290308466418727, "grad_norm": 1.7148982286453247, "learning_rate": 8.83424408014572e-06, "loss": 0.6314, "step": 486 }, { "epoch": 0.013317654780135638, "grad_norm": 2.0939295291900635, "learning_rate": 8.852459016393443e-06, "loss": 0.5858, "step": 487 }, { "epoch": 0.01334500109385255, "grad_norm": 1.9153310060501099, "learning_rate": 8.870673952641167e-06, "loss": 0.5763, "step": 488 }, { "epoch": 0.013372347407569459, "grad_norm": 2.844665765762329, "learning_rate": 8.888888888888888e-06, "loss": 0.5869, "step": 489 }, { "epoch": 0.01339969372128637, "grad_norm": 1.7968931198120117, "learning_rate": 8.907103825136613e-06, "loss": 0.6122, "step": 490 }, { "epoch": 0.013427040035003281, "grad_norm": 1.693071722984314, "learning_rate": 8.925318761384337e-06, "loss": 0.6005, "step": 491 }, { "epoch": 0.013454386348720192, "grad_norm": 1.883273959159851, "learning_rate": 8.943533697632058e-06, "loss": 0.6247, "step": 492 }, { "epoch": 0.013481732662437103, "grad_norm": 2.101813793182373, "learning_rate": 8.961748633879782e-06, "loss": 0.5211, "step": 493 }, { "epoch": 0.013509078976154014, "grad_norm": 2.1118907928466797, "learning_rate": 8.979963570127505e-06, "loss": 0.6006, "step": 494 }, { "epoch": 0.013536425289870925, "grad_norm": 1.8639864921569824, "learning_rate": 8.998178506375228e-06, "loss": 0.6156, "step": 495 }, { "epoch": 0.013563771603587836, "grad_norm": 1.668653964996338, "learning_rate": 9.016393442622952e-06, "loss": 0.6029, "step": 496 }, { "epoch": 0.013591117917304748, "grad_norm": 1.9176360368728638, "learning_rate": 9.034608378870675e-06, "loss": 0.5983, "step": 497 }, { "epoch": 0.013618464231021659, "grad_norm": 2.2462210655212402, "learning_rate": 9.052823315118398e-06, "loss": 0.7046, "step": 498 }, { "epoch": 0.01364581054473857, "grad_norm": 2.362370014190674, "learning_rate": 9.071038251366122e-06, "loss": 0.6172, "step": 499 }, { "epoch": 0.01367315685845548, "grad_norm": 1.8286558389663696, "learning_rate": 9.089253187613845e-06, "loss": 0.6358, "step": 500 }, { "epoch": 0.013700503172172392, "grad_norm": 2.8754656314849854, "learning_rate": 9.107468123861566e-06, "loss": 0.958, "step": 501 }, { "epoch": 0.013727849485889301, "grad_norm": 1.7401734590530396, "learning_rate": 9.12568306010929e-06, "loss": 0.5884, "step": 502 }, { "epoch": 0.013755195799606212, "grad_norm": 1.745208501815796, "learning_rate": 9.143897996357015e-06, "loss": 0.6013, "step": 503 }, { "epoch": 0.013782542113323124, "grad_norm": 2.1117706298828125, "learning_rate": 9.162112932604736e-06, "loss": 0.6309, "step": 504 }, { "epoch": 0.013809888427040035, "grad_norm": 1.9086681604385376, "learning_rate": 9.18032786885246e-06, "loss": 0.6175, "step": 505 }, { "epoch": 0.013837234740756946, "grad_norm": 1.7195324897766113, "learning_rate": 9.198542805100183e-06, "loss": 1.0001, "step": 506 }, { "epoch": 0.013864581054473857, "grad_norm": 2.0224761962890625, "learning_rate": 9.216757741347906e-06, "loss": 0.6235, "step": 507 }, { "epoch": 0.013891927368190768, "grad_norm": 2.107849597930908, "learning_rate": 9.23497267759563e-06, "loss": 0.6735, "step": 508 }, { "epoch": 0.013919273681907679, "grad_norm": 1.6668680906295776, "learning_rate": 9.253187613843351e-06, "loss": 0.5896, "step": 509 }, { "epoch": 0.01394661999562459, "grad_norm": 2.399165391921997, "learning_rate": 9.271402550091075e-06, "loss": 0.7606, "step": 510 }, { "epoch": 0.013973966309341501, "grad_norm": 1.5643649101257324, "learning_rate": 9.2896174863388e-06, "loss": 0.6452, "step": 511 }, { "epoch": 0.014001312623058412, "grad_norm": 1.8921451568603516, "learning_rate": 9.307832422586521e-06, "loss": 0.6241, "step": 512 }, { "epoch": 0.014028658936775323, "grad_norm": 2.006575345993042, "learning_rate": 9.326047358834245e-06, "loss": 0.6391, "step": 513 }, { "epoch": 0.014056005250492234, "grad_norm": 1.9596883058547974, "learning_rate": 9.344262295081968e-06, "loss": 0.5738, "step": 514 }, { "epoch": 0.014083351564209144, "grad_norm": 2.121260166168213, "learning_rate": 9.362477231329691e-06, "loss": 0.6402, "step": 515 }, { "epoch": 0.014110697877926055, "grad_norm": 1.5418176651000977, "learning_rate": 9.380692167577415e-06, "loss": 0.6102, "step": 516 }, { "epoch": 0.014138044191642966, "grad_norm": 1.6814466714859009, "learning_rate": 9.398907103825138e-06, "loss": 0.5196, "step": 517 }, { "epoch": 0.014165390505359877, "grad_norm": 1.6567835807800293, "learning_rate": 9.41712204007286e-06, "loss": 0.6436, "step": 518 }, { "epoch": 0.014192736819076788, "grad_norm": 1.7874743938446045, "learning_rate": 9.435336976320585e-06, "loss": 0.6294, "step": 519 }, { "epoch": 0.0142200831327937, "grad_norm": 1.9864863157272339, "learning_rate": 9.453551912568308e-06, "loss": 0.5989, "step": 520 }, { "epoch": 0.01424742944651061, "grad_norm": 1.65730881690979, "learning_rate": 9.47176684881603e-06, "loss": 0.6232, "step": 521 }, { "epoch": 0.014274775760227522, "grad_norm": 1.677111029624939, "learning_rate": 9.489981785063753e-06, "loss": 0.6185, "step": 522 }, { "epoch": 0.014302122073944433, "grad_norm": 1.7824264764785767, "learning_rate": 9.508196721311476e-06, "loss": 0.6007, "step": 523 }, { "epoch": 0.014329468387661344, "grad_norm": 1.7367056608200073, "learning_rate": 9.5264116575592e-06, "loss": 0.5747, "step": 524 }, { "epoch": 0.014356814701378255, "grad_norm": 1.6580387353897095, "learning_rate": 9.544626593806923e-06, "loss": 0.6318, "step": 525 }, { "epoch": 0.014384161015095166, "grad_norm": 2.0029189586639404, "learning_rate": 9.562841530054644e-06, "loss": 0.6388, "step": 526 }, { "epoch": 0.014411507328812075, "grad_norm": 1.9682220220565796, "learning_rate": 9.581056466302368e-06, "loss": 0.6352, "step": 527 }, { "epoch": 0.014438853642528986, "grad_norm": 1.8196934461593628, "learning_rate": 9.599271402550093e-06, "loss": 0.5964, "step": 528 }, { "epoch": 0.014466199956245897, "grad_norm": 1.915938377380371, "learning_rate": 9.617486338797814e-06, "loss": 0.5973, "step": 529 }, { "epoch": 0.014493546269962809, "grad_norm": 1.687279462814331, "learning_rate": 9.635701275045538e-06, "loss": 0.6245, "step": 530 }, { "epoch": 0.01452089258367972, "grad_norm": 1.8901585340499878, "learning_rate": 9.653916211293261e-06, "loss": 0.6626, "step": 531 }, { "epoch": 0.01454823889739663, "grad_norm": 1.8037502765655518, "learning_rate": 9.672131147540984e-06, "loss": 0.5876, "step": 532 }, { "epoch": 0.014575585211113542, "grad_norm": 1.809926986694336, "learning_rate": 9.690346083788708e-06, "loss": 0.6053, "step": 533 }, { "epoch": 0.014602931524830453, "grad_norm": 1.4816447496414185, "learning_rate": 9.708561020036431e-06, "loss": 0.5213, "step": 534 }, { "epoch": 0.014630277838547364, "grad_norm": 1.8421467542648315, "learning_rate": 9.726775956284153e-06, "loss": 0.5931, "step": 535 }, { "epoch": 0.014657624152264275, "grad_norm": 1.636500358581543, "learning_rate": 9.744990892531878e-06, "loss": 0.5944, "step": 536 }, { "epoch": 0.014684970465981186, "grad_norm": 2.213024377822876, "learning_rate": 9.763205828779601e-06, "loss": 0.5957, "step": 537 }, { "epoch": 0.014712316779698097, "grad_norm": 2.0084805488586426, "learning_rate": 9.781420765027323e-06, "loss": 0.6503, "step": 538 }, { "epoch": 0.014739663093415008, "grad_norm": 1.8866045475006104, "learning_rate": 9.799635701275046e-06, "loss": 0.6197, "step": 539 }, { "epoch": 0.014767009407131918, "grad_norm": 2.0386135578155518, "learning_rate": 9.81785063752277e-06, "loss": 0.6129, "step": 540 }, { "epoch": 0.014794355720848829, "grad_norm": 1.6639331579208374, "learning_rate": 9.836065573770493e-06, "loss": 0.6102, "step": 541 }, { "epoch": 0.01482170203456574, "grad_norm": 2.1899948120117188, "learning_rate": 9.854280510018216e-06, "loss": 0.6786, "step": 542 }, { "epoch": 0.014849048348282651, "grad_norm": 1.9370933771133423, "learning_rate": 9.872495446265938e-06, "loss": 0.7069, "step": 543 }, { "epoch": 0.014876394661999562, "grad_norm": 1.7391233444213867, "learning_rate": 9.890710382513663e-06, "loss": 0.6041, "step": 544 }, { "epoch": 0.014903740975716473, "grad_norm": 1.6887811422348022, "learning_rate": 9.908925318761386e-06, "loss": 0.6204, "step": 545 }, { "epoch": 0.014931087289433384, "grad_norm": 2.2312064170837402, "learning_rate": 9.927140255009108e-06, "loss": 0.6989, "step": 546 }, { "epoch": 0.014958433603150295, "grad_norm": 2.4457848072052, "learning_rate": 9.945355191256831e-06, "loss": 0.7207, "step": 547 }, { "epoch": 0.014985779916867207, "grad_norm": 1.7579666376113892, "learning_rate": 9.963570127504554e-06, "loss": 0.6277, "step": 548 }, { "epoch": 0.015013126230584118, "grad_norm": 2.0016815662384033, "learning_rate": 9.981785063752277e-06, "loss": 0.5723, "step": 549 }, { "epoch": 0.015040472544301029, "grad_norm": 2.2452337741851807, "learning_rate": 1e-05, "loss": 0.643, "step": 550 }, { "epoch": 0.01506781885801794, "grad_norm": 1.4889017343521118, "learning_rate": 1.0018214936247722e-05, "loss": 0.6024, "step": 551 }, { "epoch": 0.015095165171734851, "grad_norm": 1.8759735822677612, "learning_rate": 1.0036429872495447e-05, "loss": 0.5957, "step": 552 }, { "epoch": 0.01512251148545176, "grad_norm": 2.1522603034973145, "learning_rate": 1.005464480874317e-05, "loss": 0.6296, "step": 553 }, { "epoch": 0.015149857799168671, "grad_norm": 1.8790271282196045, "learning_rate": 1.0072859744990892e-05, "loss": 0.5934, "step": 554 }, { "epoch": 0.015177204112885583, "grad_norm": 2.569664478302002, "learning_rate": 1.0091074681238617e-05, "loss": 0.7336, "step": 555 }, { "epoch": 0.015204550426602494, "grad_norm": 4.4077324867248535, "learning_rate": 1.0109289617486339e-05, "loss": 0.633, "step": 556 }, { "epoch": 0.015231896740319405, "grad_norm": 1.7115191221237183, "learning_rate": 1.0127504553734062e-05, "loss": 0.6118, "step": 557 }, { "epoch": 0.015259243054036316, "grad_norm": 1.8829632997512817, "learning_rate": 1.0145719489981787e-05, "loss": 0.6195, "step": 558 }, { "epoch": 0.015286589367753227, "grad_norm": 1.889728307723999, "learning_rate": 1.0163934426229509e-05, "loss": 0.5982, "step": 559 }, { "epoch": 0.015313935681470138, "grad_norm": 2.2713723182678223, "learning_rate": 1.0182149362477232e-05, "loss": 0.7426, "step": 560 }, { "epoch": 0.015341281995187049, "grad_norm": 2.5601158142089844, "learning_rate": 1.0200364298724956e-05, "loss": 1.0047, "step": 561 }, { "epoch": 0.01536862830890396, "grad_norm": 1.8589191436767578, "learning_rate": 1.0218579234972679e-05, "loss": 0.6151, "step": 562 }, { "epoch": 0.015395974622620871, "grad_norm": 1.9352333545684814, "learning_rate": 1.02367941712204e-05, "loss": 0.6278, "step": 563 }, { "epoch": 0.015423320936337782, "grad_norm": 2.530712604522705, "learning_rate": 1.0255009107468126e-05, "loss": 0.572, "step": 564 }, { "epoch": 0.015450667250054693, "grad_norm": 1.7789582014083862, "learning_rate": 1.0273224043715849e-05, "loss": 0.6036, "step": 565 }, { "epoch": 0.015478013563771603, "grad_norm": 1.8528541326522827, "learning_rate": 1.029143897996357e-05, "loss": 0.5904, "step": 566 }, { "epoch": 0.015505359877488514, "grad_norm": 1.7233541011810303, "learning_rate": 1.0309653916211296e-05, "loss": 0.621, "step": 567 }, { "epoch": 0.015532706191205425, "grad_norm": 1.8580524921417236, "learning_rate": 1.0327868852459017e-05, "loss": 0.6062, "step": 568 }, { "epoch": 0.015560052504922336, "grad_norm": 2.710505247116089, "learning_rate": 1.034608378870674e-05, "loss": 0.5906, "step": 569 }, { "epoch": 0.015587398818639247, "grad_norm": 3.523505210876465, "learning_rate": 1.0364298724954462e-05, "loss": 0.9965, "step": 570 }, { "epoch": 0.015614745132356158, "grad_norm": 2.1988182067871094, "learning_rate": 1.0382513661202187e-05, "loss": 0.6123, "step": 571 }, { "epoch": 0.015642091446073068, "grad_norm": 2.1260428428649902, "learning_rate": 1.0400728597449909e-05, "loss": 0.9966, "step": 572 }, { "epoch": 0.01566943775978998, "grad_norm": 2.2015469074249268, "learning_rate": 1.0418943533697632e-05, "loss": 0.5785, "step": 573 }, { "epoch": 0.01569678407350689, "grad_norm": 2.4020638465881348, "learning_rate": 1.0437158469945357e-05, "loss": 0.6282, "step": 574 }, { "epoch": 0.0157241303872238, "grad_norm": 2.3281643390655518, "learning_rate": 1.0455373406193079e-05, "loss": 0.7323, "step": 575 }, { "epoch": 0.015751476700940712, "grad_norm": 1.9272328615188599, "learning_rate": 1.0473588342440802e-05, "loss": 0.6197, "step": 576 }, { "epoch": 0.015778823014657623, "grad_norm": 2.6680166721343994, "learning_rate": 1.0491803278688525e-05, "loss": 0.596, "step": 577 }, { "epoch": 0.015806169328374534, "grad_norm": 4.3603644371032715, "learning_rate": 1.0510018214936249e-05, "loss": 0.9443, "step": 578 }, { "epoch": 0.015833515642091445, "grad_norm": 2.234391450881958, "learning_rate": 1.052823315118397e-05, "loss": 0.4814, "step": 579 }, { "epoch": 0.015860861955808356, "grad_norm": 2.1491849422454834, "learning_rate": 1.0546448087431695e-05, "loss": 0.6013, "step": 580 }, { "epoch": 0.015888208269525268, "grad_norm": 2.0402190685272217, "learning_rate": 1.0564663023679417e-05, "loss": 0.6161, "step": 581 }, { "epoch": 0.01591555458324218, "grad_norm": 1.1923948526382446, "learning_rate": 1.058287795992714e-05, "loss": 0.498, "step": 582 }, { "epoch": 0.01594290089695909, "grad_norm": 1.742370843887329, "learning_rate": 1.0601092896174865e-05, "loss": 0.6146, "step": 583 }, { "epoch": 0.015970247210676, "grad_norm": 1.859079122543335, "learning_rate": 1.0619307832422587e-05, "loss": 0.5975, "step": 584 }, { "epoch": 0.015997593524392912, "grad_norm": 2.262883186340332, "learning_rate": 1.063752276867031e-05, "loss": 0.5835, "step": 585 }, { "epoch": 0.016024939838109823, "grad_norm": 1.687092900276184, "learning_rate": 1.0655737704918034e-05, "loss": 0.5865, "step": 586 }, { "epoch": 0.016052286151826734, "grad_norm": 2.0794131755828857, "learning_rate": 1.0673952641165757e-05, "loss": 0.6111, "step": 587 }, { "epoch": 0.016079632465543645, "grad_norm": 2.3732805252075195, "learning_rate": 1.0692167577413479e-05, "loss": 0.659, "step": 588 }, { "epoch": 0.016106978779260556, "grad_norm": 3.153456449508667, "learning_rate": 1.0710382513661204e-05, "loss": 0.5765, "step": 589 }, { "epoch": 0.016134325092977467, "grad_norm": 1.7986171245574951, "learning_rate": 1.0728597449908927e-05, "loss": 0.598, "step": 590 }, { "epoch": 0.01616167140669438, "grad_norm": 2.061277151107788, "learning_rate": 1.0746812386156649e-05, "loss": 0.5749, "step": 591 }, { "epoch": 0.01618901772041129, "grad_norm": 2.096442222595215, "learning_rate": 1.0765027322404374e-05, "loss": 0.5956, "step": 592 }, { "epoch": 0.0162163640341282, "grad_norm": 2.9815292358398438, "learning_rate": 1.0783242258652095e-05, "loss": 0.9954, "step": 593 }, { "epoch": 0.016243710347845112, "grad_norm": 2.57376766204834, "learning_rate": 1.0801457194899819e-05, "loss": 0.6309, "step": 594 }, { "epoch": 0.016271056661562023, "grad_norm": 2.9782330989837646, "learning_rate": 1.0819672131147544e-05, "loss": 0.5806, "step": 595 }, { "epoch": 0.016298402975278934, "grad_norm": 2.0297813415527344, "learning_rate": 1.0837887067395265e-05, "loss": 0.9321, "step": 596 }, { "epoch": 0.01632574928899584, "grad_norm": 1.7531825304031372, "learning_rate": 1.0856102003642987e-05, "loss": 0.6143, "step": 597 }, { "epoch": 0.016353095602712753, "grad_norm": 2.1456239223480225, "learning_rate": 1.0874316939890712e-05, "loss": 0.6058, "step": 598 }, { "epoch": 0.016380441916429664, "grad_norm": 2.0343434810638428, "learning_rate": 1.0892531876138435e-05, "loss": 0.5848, "step": 599 }, { "epoch": 0.016407788230146575, "grad_norm": 1.9890000820159912, "learning_rate": 1.0910746812386157e-05, "loss": 0.6997, "step": 600 }, { "epoch": 0.016435134543863486, "grad_norm": 1.6179338693618774, "learning_rate": 1.0928961748633882e-05, "loss": 0.581, "step": 601 }, { "epoch": 0.016462480857580397, "grad_norm": 1.899665117263794, "learning_rate": 1.0947176684881603e-05, "loss": 0.6103, "step": 602 }, { "epoch": 0.016489827171297308, "grad_norm": 2.693854331970215, "learning_rate": 1.0965391621129327e-05, "loss": 0.5953, "step": 603 }, { "epoch": 0.01651717348501422, "grad_norm": 1.8507592678070068, "learning_rate": 1.0983606557377052e-05, "loss": 0.6082, "step": 604 }, { "epoch": 0.01654451979873113, "grad_norm": 1.7392771244049072, "learning_rate": 1.1001821493624773e-05, "loss": 0.5902, "step": 605 }, { "epoch": 0.01657186611244804, "grad_norm": 5.072736740112305, "learning_rate": 1.1020036429872497e-05, "loss": 1.0491, "step": 606 }, { "epoch": 0.016599212426164953, "grad_norm": 1.6451514959335327, "learning_rate": 1.1038251366120218e-05, "loss": 0.5647, "step": 607 }, { "epoch": 0.016626558739881864, "grad_norm": 2.132192611694336, "learning_rate": 1.1056466302367943e-05, "loss": 0.952, "step": 608 }, { "epoch": 0.016653905053598775, "grad_norm": 1.8370542526245117, "learning_rate": 1.1074681238615665e-05, "loss": 0.6037, "step": 609 }, { "epoch": 0.016681251367315686, "grad_norm": 2.6385369300842285, "learning_rate": 1.1092896174863388e-05, "loss": 0.9929, "step": 610 }, { "epoch": 0.016708597681032597, "grad_norm": 2.051610231399536, "learning_rate": 1.1111111111111113e-05, "loss": 0.5836, "step": 611 }, { "epoch": 0.016735943994749508, "grad_norm": 2.5446372032165527, "learning_rate": 1.1129326047358835e-05, "loss": 0.5989, "step": 612 }, { "epoch": 0.01676329030846642, "grad_norm": 2.6966543197631836, "learning_rate": 1.1147540983606557e-05, "loss": 0.599, "step": 613 }, { "epoch": 0.01679063662218333, "grad_norm": 2.0735151767730713, "learning_rate": 1.1165755919854282e-05, "loss": 0.6039, "step": 614 }, { "epoch": 0.01681798293590024, "grad_norm": 1.6160295009613037, "learning_rate": 1.1183970856102005e-05, "loss": 0.6004, "step": 615 }, { "epoch": 0.016845329249617153, "grad_norm": 2.234527111053467, "learning_rate": 1.1202185792349727e-05, "loss": 0.5459, "step": 616 }, { "epoch": 0.016872675563334064, "grad_norm": 2.394163131713867, "learning_rate": 1.1220400728597452e-05, "loss": 0.6388, "step": 617 }, { "epoch": 0.016900021877050975, "grad_norm": 4.953054904937744, "learning_rate": 1.1238615664845173e-05, "loss": 0.4871, "step": 618 }, { "epoch": 0.016927368190767886, "grad_norm": 2.3722875118255615, "learning_rate": 1.1256830601092897e-05, "loss": 0.58, "step": 619 }, { "epoch": 0.016954714504484797, "grad_norm": 1.717796802520752, "learning_rate": 1.1275045537340622e-05, "loss": 0.5956, "step": 620 }, { "epoch": 0.016982060818201708, "grad_norm": 1.8528720140457153, "learning_rate": 1.1293260473588343e-05, "loss": 0.6118, "step": 621 }, { "epoch": 0.01700940713191862, "grad_norm": 2.2692184448242188, "learning_rate": 1.1311475409836066e-05, "loss": 0.6016, "step": 622 }, { "epoch": 0.017036753445635527, "grad_norm": 1.7750976085662842, "learning_rate": 1.132969034608379e-05, "loss": 0.6068, "step": 623 }, { "epoch": 0.017064099759352438, "grad_norm": 4.26057767868042, "learning_rate": 1.1347905282331513e-05, "loss": 1.0157, "step": 624 }, { "epoch": 0.01709144607306935, "grad_norm": 1.8367935419082642, "learning_rate": 1.1366120218579235e-05, "loss": 0.6125, "step": 625 }, { "epoch": 0.01711879238678626, "grad_norm": 1.832794189453125, "learning_rate": 1.138433515482696e-05, "loss": 0.5782, "step": 626 }, { "epoch": 0.01714613870050317, "grad_norm": 1.9771637916564941, "learning_rate": 1.1402550091074681e-05, "loss": 0.614, "step": 627 }, { "epoch": 0.017173485014220082, "grad_norm": 1.8115206956863403, "learning_rate": 1.1420765027322405e-05, "loss": 1.0012, "step": 628 }, { "epoch": 0.017200831327936993, "grad_norm": 1.8669912815093994, "learning_rate": 1.143897996357013e-05, "loss": 0.6795, "step": 629 }, { "epoch": 0.017228177641653904, "grad_norm": 1.98540198802948, "learning_rate": 1.1457194899817851e-05, "loss": 0.6171, "step": 630 }, { "epoch": 0.017255523955370815, "grad_norm": 1.9383270740509033, "learning_rate": 1.1475409836065575e-05, "loss": 0.6094, "step": 631 }, { "epoch": 0.017282870269087727, "grad_norm": 3.3543903827667236, "learning_rate": 1.1493624772313298e-05, "loss": 0.9727, "step": 632 }, { "epoch": 0.017310216582804638, "grad_norm": 1.8751909732818604, "learning_rate": 1.1511839708561021e-05, "loss": 0.6115, "step": 633 }, { "epoch": 0.01733756289652155, "grad_norm": 2.677769184112549, "learning_rate": 1.1530054644808743e-05, "loss": 0.9336, "step": 634 }, { "epoch": 0.01736490921023846, "grad_norm": 1.8036013841629028, "learning_rate": 1.1548269581056468e-05, "loss": 0.5805, "step": 635 }, { "epoch": 0.01739225552395537, "grad_norm": 2.48380708694458, "learning_rate": 1.1566484517304191e-05, "loss": 0.645, "step": 636 }, { "epoch": 0.017419601837672282, "grad_norm": 1.6563588380813599, "learning_rate": 1.1584699453551913e-05, "loss": 0.598, "step": 637 }, { "epoch": 0.017446948151389193, "grad_norm": 1.9955216646194458, "learning_rate": 1.1602914389799638e-05, "loss": 0.6578, "step": 638 }, { "epoch": 0.017474294465106104, "grad_norm": 2.0572094917297363, "learning_rate": 1.162112932604736e-05, "loss": 0.5867, "step": 639 }, { "epoch": 0.017501640778823015, "grad_norm": 2.3901772499084473, "learning_rate": 1.1639344262295083e-05, "loss": 0.6267, "step": 640 }, { "epoch": 0.017528987092539926, "grad_norm": 1.962005376815796, "learning_rate": 1.1657559198542808e-05, "loss": 0.6444, "step": 641 }, { "epoch": 0.017556333406256838, "grad_norm": 2.6333959102630615, "learning_rate": 1.167577413479053e-05, "loss": 0.6501, "step": 642 }, { "epoch": 0.01758367971997375, "grad_norm": 2.874480724334717, "learning_rate": 1.1693989071038251e-05, "loss": 0.5908, "step": 643 }, { "epoch": 0.01761102603369066, "grad_norm": 1.9578572511672974, "learning_rate": 1.1712204007285975e-05, "loss": 0.6077, "step": 644 }, { "epoch": 0.01763837234740757, "grad_norm": 2.3199684619903564, "learning_rate": 1.17304189435337e-05, "loss": 0.6054, "step": 645 }, { "epoch": 0.017665718661124482, "grad_norm": 2.018078327178955, "learning_rate": 1.1748633879781421e-05, "loss": 0.5585, "step": 646 }, { "epoch": 0.017693064974841393, "grad_norm": 3.052854537963867, "learning_rate": 1.1766848816029144e-05, "loss": 0.6091, "step": 647 }, { "epoch": 0.0177204112885583, "grad_norm": 2.018533229827881, "learning_rate": 1.1785063752276868e-05, "loss": 0.594, "step": 648 }, { "epoch": 0.017747757602275212, "grad_norm": 2.08070969581604, "learning_rate": 1.1803278688524591e-05, "loss": 0.614, "step": 649 }, { "epoch": 0.017775103915992123, "grad_norm": 1.7265264987945557, "learning_rate": 1.1821493624772313e-05, "loss": 0.5995, "step": 650 }, { "epoch": 0.017802450229709034, "grad_norm": 1.9093682765960693, "learning_rate": 1.1839708561020038e-05, "loss": 0.6307, "step": 651 }, { "epoch": 0.017829796543425945, "grad_norm": 2.5704290866851807, "learning_rate": 1.1857923497267761e-05, "loss": 0.5774, "step": 652 }, { "epoch": 0.017857142857142856, "grad_norm": 1.924376368522644, "learning_rate": 1.1876138433515483e-05, "loss": 0.6249, "step": 653 }, { "epoch": 0.017884489170859767, "grad_norm": 6.6651082038879395, "learning_rate": 1.1894353369763208e-05, "loss": 0.6592, "step": 654 }, { "epoch": 0.01791183548457668, "grad_norm": 1.9564520120620728, "learning_rate": 1.191256830601093e-05, "loss": 0.608, "step": 655 }, { "epoch": 0.01793918179829359, "grad_norm": 2.1003777980804443, "learning_rate": 1.1930783242258653e-05, "loss": 0.6132, "step": 656 }, { "epoch": 0.0179665281120105, "grad_norm": 1.9932011365890503, "learning_rate": 1.1948998178506378e-05, "loss": 0.6464, "step": 657 }, { "epoch": 0.01799387442572741, "grad_norm": 2.0316479206085205, "learning_rate": 1.19672131147541e-05, "loss": 0.6252, "step": 658 }, { "epoch": 0.018021220739444323, "grad_norm": 2.3425216674804688, "learning_rate": 1.1985428051001821e-05, "loss": 0.706, "step": 659 }, { "epoch": 0.018048567053161234, "grad_norm": 6.1874613761901855, "learning_rate": 1.2003642987249546e-05, "loss": 0.6207, "step": 660 }, { "epoch": 0.018075913366878145, "grad_norm": 1.8809956312179565, "learning_rate": 1.202185792349727e-05, "loss": 0.6192, "step": 661 }, { "epoch": 0.018103259680595056, "grad_norm": 1.8651353120803833, "learning_rate": 1.2040072859744991e-05, "loss": 0.6023, "step": 662 }, { "epoch": 0.018130605994311967, "grad_norm": 2.2402663230895996, "learning_rate": 1.2058287795992716e-05, "loss": 0.5982, "step": 663 }, { "epoch": 0.018157952308028878, "grad_norm": 2.524927854537964, "learning_rate": 1.2076502732240438e-05, "loss": 0.5942, "step": 664 }, { "epoch": 0.01818529862174579, "grad_norm": 2.855581521987915, "learning_rate": 1.2094717668488161e-05, "loss": 0.6006, "step": 665 }, { "epoch": 0.0182126449354627, "grad_norm": 2.2706472873687744, "learning_rate": 1.2112932604735886e-05, "loss": 0.6204, "step": 666 }, { "epoch": 0.01823999124917961, "grad_norm": 2.05793833732605, "learning_rate": 1.2131147540983608e-05, "loss": 0.5643, "step": 667 }, { "epoch": 0.018267337562896523, "grad_norm": 1.7888554334640503, "learning_rate": 1.2149362477231331e-05, "loss": 0.6018, "step": 668 }, { "epoch": 0.018294683876613434, "grad_norm": 1.9128172397613525, "learning_rate": 1.2167577413479054e-05, "loss": 0.6183, "step": 669 }, { "epoch": 0.018322030190330345, "grad_norm": 3.017016887664795, "learning_rate": 1.2185792349726778e-05, "loss": 0.6329, "step": 670 }, { "epoch": 0.018349376504047256, "grad_norm": 2.2769956588745117, "learning_rate": 1.2204007285974499e-05, "loss": 0.6003, "step": 671 }, { "epoch": 0.018376722817764167, "grad_norm": 2.1467397212982178, "learning_rate": 1.2222222222222224e-05, "loss": 0.707, "step": 672 }, { "epoch": 0.018404069131481075, "grad_norm": 1.9048237800598145, "learning_rate": 1.2240437158469946e-05, "loss": 0.6044, "step": 673 }, { "epoch": 0.018431415445197986, "grad_norm": 5.28074836730957, "learning_rate": 1.2258652094717669e-05, "loss": 1.0346, "step": 674 }, { "epoch": 0.018458761758914897, "grad_norm": 1.8844783306121826, "learning_rate": 1.2276867030965394e-05, "loss": 0.6016, "step": 675 }, { "epoch": 0.018486108072631808, "grad_norm": 2.261075735092163, "learning_rate": 1.2295081967213116e-05, "loss": 0.6221, "step": 676 }, { "epoch": 0.01851345438634872, "grad_norm": 2.2418477535247803, "learning_rate": 1.2313296903460839e-05, "loss": 0.6233, "step": 677 }, { "epoch": 0.01854080070006563, "grad_norm": 2.342578172683716, "learning_rate": 1.2331511839708562e-05, "loss": 0.6124, "step": 678 }, { "epoch": 0.01856814701378254, "grad_norm": 3.123800277709961, "learning_rate": 1.2349726775956286e-05, "loss": 0.5657, "step": 679 }, { "epoch": 0.018595493327499452, "grad_norm": 1.7398744821548462, "learning_rate": 1.2367941712204007e-05, "loss": 0.608, "step": 680 }, { "epoch": 0.018622839641216363, "grad_norm": 2.8089585304260254, "learning_rate": 1.238615664845173e-05, "loss": 1.0058, "step": 681 }, { "epoch": 0.018650185954933274, "grad_norm": 2.8050825595855713, "learning_rate": 1.2404371584699456e-05, "loss": 0.6255, "step": 682 }, { "epoch": 0.018677532268650186, "grad_norm": 2.653200626373291, "learning_rate": 1.2422586520947177e-05, "loss": 0.6137, "step": 683 }, { "epoch": 0.018704878582367097, "grad_norm": 2.4131319522857666, "learning_rate": 1.24408014571949e-05, "loss": 0.6086, "step": 684 }, { "epoch": 0.018732224896084008, "grad_norm": 2.2902400493621826, "learning_rate": 1.2459016393442624e-05, "loss": 0.5928, "step": 685 }, { "epoch": 0.01875957120980092, "grad_norm": 2.6918628215789795, "learning_rate": 1.2477231329690347e-05, "loss": 0.9759, "step": 686 }, { "epoch": 0.01878691752351783, "grad_norm": 2.096309185028076, "learning_rate": 1.2495446265938069e-05, "loss": 0.5816, "step": 687 }, { "epoch": 0.01881426383723474, "grad_norm": 2.06282377243042, "learning_rate": 1.2513661202185794e-05, "loss": 0.607, "step": 688 }, { "epoch": 0.018841610150951652, "grad_norm": 2.086693525314331, "learning_rate": 1.2531876138433516e-05, "loss": 0.6952, "step": 689 }, { "epoch": 0.018868956464668563, "grad_norm": 1.9155107736587524, "learning_rate": 1.2550091074681239e-05, "loss": 0.6549, "step": 690 }, { "epoch": 0.018896302778385474, "grad_norm": 1.9128506183624268, "learning_rate": 1.2568306010928964e-05, "loss": 0.6225, "step": 691 }, { "epoch": 0.018923649092102385, "grad_norm": 2.0049164295196533, "learning_rate": 1.2586520947176686e-05, "loss": 0.6075, "step": 692 }, { "epoch": 0.018950995405819297, "grad_norm": 2.029005289077759, "learning_rate": 1.2604735883424409e-05, "loss": 0.6044, "step": 693 }, { "epoch": 0.018978341719536208, "grad_norm": 2.1728930473327637, "learning_rate": 1.2622950819672132e-05, "loss": 0.5822, "step": 694 }, { "epoch": 0.01900568803325312, "grad_norm": 1.808133840560913, "learning_rate": 1.2641165755919856e-05, "loss": 0.6086, "step": 695 }, { "epoch": 0.01903303434697003, "grad_norm": 3.413409471511841, "learning_rate": 1.2659380692167577e-05, "loss": 0.9886, "step": 696 }, { "epoch": 0.01906038066068694, "grad_norm": 1.922970175743103, "learning_rate": 1.2677595628415302e-05, "loss": 0.6031, "step": 697 }, { "epoch": 0.019087726974403852, "grad_norm": 2.1758005619049072, "learning_rate": 1.2695810564663025e-05, "loss": 0.5823, "step": 698 }, { "epoch": 0.01911507328812076, "grad_norm": 2.0447263717651367, "learning_rate": 1.2714025500910747e-05, "loss": 0.6211, "step": 699 }, { "epoch": 0.01914241960183767, "grad_norm": 2.2923266887664795, "learning_rate": 1.2732240437158472e-05, "loss": 0.6106, "step": 700 }, { "epoch": 0.019169765915554582, "grad_norm": 2.8853211402893066, "learning_rate": 1.2750455373406194e-05, "loss": 0.6094, "step": 701 }, { "epoch": 0.019197112229271493, "grad_norm": 2.4501802921295166, "learning_rate": 1.2768670309653917e-05, "loss": 0.6199, "step": 702 }, { "epoch": 0.019224458542988404, "grad_norm": 2.6691501140594482, "learning_rate": 1.2786885245901642e-05, "loss": 0.6426, "step": 703 }, { "epoch": 0.019251804856705315, "grad_norm": 2.173083543777466, "learning_rate": 1.2805100182149364e-05, "loss": 0.9819, "step": 704 }, { "epoch": 0.019279151170422226, "grad_norm": 1.7516953945159912, "learning_rate": 1.2823315118397085e-05, "loss": 0.5879, "step": 705 }, { "epoch": 0.019306497484139137, "grad_norm": 2.237264633178711, "learning_rate": 1.284153005464481e-05, "loss": 0.6401, "step": 706 }, { "epoch": 0.01933384379785605, "grad_norm": 3.3905370235443115, "learning_rate": 1.2859744990892534e-05, "loss": 0.6201, "step": 707 }, { "epoch": 0.01936119011157296, "grad_norm": 1.9078978300094604, "learning_rate": 1.2877959927140255e-05, "loss": 0.6162, "step": 708 }, { "epoch": 0.01938853642528987, "grad_norm": 2.295917510986328, "learning_rate": 1.289617486338798e-05, "loss": 0.5813, "step": 709 }, { "epoch": 0.019415882739006782, "grad_norm": 2.733377456665039, "learning_rate": 1.2914389799635702e-05, "loss": 0.5879, "step": 710 }, { "epoch": 0.019443229052723693, "grad_norm": 2.610808849334717, "learning_rate": 1.2932604735883425e-05, "loss": 0.6848, "step": 711 }, { "epoch": 0.019470575366440604, "grad_norm": 3.1215624809265137, "learning_rate": 1.295081967213115e-05, "loss": 0.6186, "step": 712 }, { "epoch": 0.019497921680157515, "grad_norm": 2.3301002979278564, "learning_rate": 1.2969034608378872e-05, "loss": 0.9768, "step": 713 }, { "epoch": 0.019525267993874426, "grad_norm": 2.2028896808624268, "learning_rate": 1.2987249544626595e-05, "loss": 0.5704, "step": 714 }, { "epoch": 0.019552614307591337, "grad_norm": 1.9322856664657593, "learning_rate": 1.3005464480874317e-05, "loss": 0.5966, "step": 715 }, { "epoch": 0.01957996062130825, "grad_norm": 1.8997365236282349, "learning_rate": 1.3023679417122042e-05, "loss": 0.6044, "step": 716 }, { "epoch": 0.01960730693502516, "grad_norm": 2.408196449279785, "learning_rate": 1.3041894353369764e-05, "loss": 0.6107, "step": 717 }, { "epoch": 0.01963465324874207, "grad_norm": 2.0984549522399902, "learning_rate": 1.3060109289617487e-05, "loss": 0.5856, "step": 718 }, { "epoch": 0.01966199956245898, "grad_norm": 2.1004817485809326, "learning_rate": 1.307832422586521e-05, "loss": 0.605, "step": 719 }, { "epoch": 0.019689345876175893, "grad_norm": 2.8340630531311035, "learning_rate": 1.3096539162112933e-05, "loss": 0.6442, "step": 720 }, { "epoch": 0.019716692189892804, "grad_norm": 2.0659008026123047, "learning_rate": 1.3114754098360655e-05, "loss": 0.5854, "step": 721 }, { "epoch": 0.019744038503609715, "grad_norm": 2.253695487976074, "learning_rate": 1.313296903460838e-05, "loss": 0.5914, "step": 722 }, { "epoch": 0.019771384817326626, "grad_norm": 2.8799474239349365, "learning_rate": 1.3151183970856103e-05, "loss": 0.6015, "step": 723 }, { "epoch": 0.019798731131043534, "grad_norm": 1.95985746383667, "learning_rate": 1.3169398907103825e-05, "loss": 0.5838, "step": 724 }, { "epoch": 0.019826077444760445, "grad_norm": 1.984761357307434, "learning_rate": 1.318761384335155e-05, "loss": 0.6135, "step": 725 }, { "epoch": 0.019853423758477356, "grad_norm": 4.415009021759033, "learning_rate": 1.3205828779599272e-05, "loss": 0.6383, "step": 726 }, { "epoch": 0.019880770072194267, "grad_norm": 2.112330198287964, "learning_rate": 1.3224043715846995e-05, "loss": 0.5972, "step": 727 }, { "epoch": 0.019908116385911178, "grad_norm": 2.1511101722717285, "learning_rate": 1.324225865209472e-05, "loss": 0.6029, "step": 728 }, { "epoch": 0.01993546269962809, "grad_norm": 2.0682151317596436, "learning_rate": 1.3260473588342442e-05, "loss": 0.6245, "step": 729 }, { "epoch": 0.019962809013345, "grad_norm": 2.3950326442718506, "learning_rate": 1.3278688524590165e-05, "loss": 0.6322, "step": 730 }, { "epoch": 0.01999015532706191, "grad_norm": 1.8907910585403442, "learning_rate": 1.3296903460837888e-05, "loss": 0.5984, "step": 731 }, { "epoch": 0.020017501640778822, "grad_norm": 2.313872814178467, "learning_rate": 1.3315118397085612e-05, "loss": 0.5896, "step": 732 }, { "epoch": 0.020044847954495734, "grad_norm": 3.3316855430603027, "learning_rate": 1.3333333333333333e-05, "loss": 0.6032, "step": 733 }, { "epoch": 0.020072194268212645, "grad_norm": 1.5228403806686401, "learning_rate": 1.3351548269581058e-05, "loss": 0.5219, "step": 734 }, { "epoch": 0.020099540581929556, "grad_norm": 2.17976975440979, "learning_rate": 1.336976320582878e-05, "loss": 0.6188, "step": 735 }, { "epoch": 0.020126886895646467, "grad_norm": 2.287205696105957, "learning_rate": 1.3387978142076503e-05, "loss": 0.5981, "step": 736 }, { "epoch": 0.020154233209363378, "grad_norm": 2.6967661380767822, "learning_rate": 1.3406193078324228e-05, "loss": 0.6282, "step": 737 }, { "epoch": 0.02018157952308029, "grad_norm": 1.52458918094635, "learning_rate": 1.342440801457195e-05, "loss": 0.5208, "step": 738 }, { "epoch": 0.0202089258367972, "grad_norm": 1.9100393056869507, "learning_rate": 1.3442622950819673e-05, "loss": 0.5466, "step": 739 }, { "epoch": 0.02023627215051411, "grad_norm": 2.6026194095611572, "learning_rate": 1.3460837887067397e-05, "loss": 0.5584, "step": 740 }, { "epoch": 0.020263618464231022, "grad_norm": 1.7991034984588623, "learning_rate": 1.347905282331512e-05, "loss": 0.6015, "step": 741 }, { "epoch": 0.020290964777947933, "grad_norm": 2.329939603805542, "learning_rate": 1.3497267759562842e-05, "loss": 0.6171, "step": 742 }, { "epoch": 0.020318311091664844, "grad_norm": 2.1953442096710205, "learning_rate": 1.3515482695810567e-05, "loss": 0.6206, "step": 743 }, { "epoch": 0.020345657405381756, "grad_norm": 1.933560848236084, "learning_rate": 1.353369763205829e-05, "loss": 0.6267, "step": 744 }, { "epoch": 0.020373003719098667, "grad_norm": 2.418384552001953, "learning_rate": 1.3551912568306011e-05, "loss": 0.6098, "step": 745 }, { "epoch": 0.020400350032815578, "grad_norm": 2.6884360313415527, "learning_rate": 1.3570127504553736e-05, "loss": 0.6251, "step": 746 }, { "epoch": 0.02042769634653249, "grad_norm": 2.2362523078918457, "learning_rate": 1.3588342440801458e-05, "loss": 0.7295, "step": 747 }, { "epoch": 0.0204550426602494, "grad_norm": 2.614567995071411, "learning_rate": 1.3606557377049181e-05, "loss": 0.6013, "step": 748 }, { "epoch": 0.02048238897396631, "grad_norm": 2.659759998321533, "learning_rate": 1.3624772313296906e-05, "loss": 0.5598, "step": 749 }, { "epoch": 0.02050973528768322, "grad_norm": 1.9166611433029175, "learning_rate": 1.3642987249544628e-05, "loss": 0.5896, "step": 750 }, { "epoch": 0.02053708160140013, "grad_norm": 2.643218994140625, "learning_rate": 1.366120218579235e-05, "loss": 0.6063, "step": 751 }, { "epoch": 0.02056442791511704, "grad_norm": 2.3151795864105225, "learning_rate": 1.3679417122040073e-05, "loss": 0.565, "step": 752 }, { "epoch": 0.020591774228833952, "grad_norm": 2.1157188415527344, "learning_rate": 1.3697632058287798e-05, "loss": 0.7136, "step": 753 }, { "epoch": 0.020619120542550863, "grad_norm": 2.205040693283081, "learning_rate": 1.371584699453552e-05, "loss": 0.6046, "step": 754 }, { "epoch": 0.020646466856267774, "grad_norm": 2.01261830329895, "learning_rate": 1.3734061930783243e-05, "loss": 0.6024, "step": 755 }, { "epoch": 0.020673813169984685, "grad_norm": 2.5772688388824463, "learning_rate": 1.3752276867030966e-05, "loss": 0.608, "step": 756 }, { "epoch": 0.020701159483701596, "grad_norm": 1.955924391746521, "learning_rate": 1.377049180327869e-05, "loss": 0.624, "step": 757 }, { "epoch": 0.020728505797418507, "grad_norm": 1.783018946647644, "learning_rate": 1.3788706739526411e-05, "loss": 0.5953, "step": 758 }, { "epoch": 0.02075585211113542, "grad_norm": 2.127243995666504, "learning_rate": 1.3806921675774136e-05, "loss": 0.6056, "step": 759 }, { "epoch": 0.02078319842485233, "grad_norm": 2.5283021926879883, "learning_rate": 1.382513661202186e-05, "loss": 0.5766, "step": 760 }, { "epoch": 0.02081054473856924, "grad_norm": 1.8073428869247437, "learning_rate": 1.3843351548269581e-05, "loss": 0.5862, "step": 761 }, { "epoch": 0.020837891052286152, "grad_norm": 2.13124418258667, "learning_rate": 1.3861566484517306e-05, "loss": 0.6449, "step": 762 }, { "epoch": 0.020865237366003063, "grad_norm": 2.2019197940826416, "learning_rate": 1.3879781420765028e-05, "loss": 0.5913, "step": 763 }, { "epoch": 0.020892583679719974, "grad_norm": 1.8440883159637451, "learning_rate": 1.3897996357012751e-05, "loss": 0.6269, "step": 764 }, { "epoch": 0.020919929993436885, "grad_norm": 1.7293838262557983, "learning_rate": 1.3916211293260475e-05, "loss": 0.588, "step": 765 }, { "epoch": 0.020947276307153796, "grad_norm": 2.2647831439971924, "learning_rate": 1.3934426229508198e-05, "loss": 0.5105, "step": 766 }, { "epoch": 0.020974622620870707, "grad_norm": 2.3797476291656494, "learning_rate": 1.395264116575592e-05, "loss": 0.9931, "step": 767 }, { "epoch": 0.02100196893458762, "grad_norm": 3.4771909713745117, "learning_rate": 1.3970856102003645e-05, "loss": 0.6115, "step": 768 }, { "epoch": 0.02102931524830453, "grad_norm": 1.4830482006072998, "learning_rate": 1.3989071038251368e-05, "loss": 0.4819, "step": 769 }, { "epoch": 0.02105666156202144, "grad_norm": 2.982450246810913, "learning_rate": 1.400728597449909e-05, "loss": 0.9774, "step": 770 }, { "epoch": 0.02108400787573835, "grad_norm": 3.1535327434539795, "learning_rate": 1.4025500910746814e-05, "loss": 0.5639, "step": 771 }, { "epoch": 0.021111354189455263, "grad_norm": 1.4175951480865479, "learning_rate": 1.4043715846994536e-05, "loss": 0.4796, "step": 772 }, { "epoch": 0.021138700503172174, "grad_norm": 1.9389561414718628, "learning_rate": 1.406193078324226e-05, "loss": 0.5989, "step": 773 }, { "epoch": 0.021166046816889085, "grad_norm": 1.820009708404541, "learning_rate": 1.4080145719489984e-05, "loss": 1.0057, "step": 774 }, { "epoch": 0.021193393130605993, "grad_norm": 1.4585529565811157, "learning_rate": 1.4098360655737706e-05, "loss": 0.5198, "step": 775 }, { "epoch": 0.021220739444322904, "grad_norm": 1.946606993675232, "learning_rate": 1.411657559198543e-05, "loss": 0.6026, "step": 776 }, { "epoch": 0.021248085758039815, "grad_norm": 1.890234112739563, "learning_rate": 1.4134790528233153e-05, "loss": 0.6052, "step": 777 }, { "epoch": 0.021275432071756726, "grad_norm": 1.6038342714309692, "learning_rate": 1.4153005464480876e-05, "loss": 0.4849, "step": 778 }, { "epoch": 0.021302778385473637, "grad_norm": 1.8604817390441895, "learning_rate": 1.4171220400728598e-05, "loss": 0.6037, "step": 779 }, { "epoch": 0.021330124699190548, "grad_norm": 1.8324781656265259, "learning_rate": 1.4189435336976323e-05, "loss": 0.9408, "step": 780 }, { "epoch": 0.02135747101290746, "grad_norm": 1.6236344575881958, "learning_rate": 1.4207650273224044e-05, "loss": 0.4507, "step": 781 }, { "epoch": 0.02138481732662437, "grad_norm": 2.1674373149871826, "learning_rate": 1.4225865209471768e-05, "loss": 0.6054, "step": 782 }, { "epoch": 0.02141216364034128, "grad_norm": 2.2227976322174072, "learning_rate": 1.4244080145719493e-05, "loss": 0.6148, "step": 783 }, { "epoch": 0.021439509954058193, "grad_norm": 2.0259435176849365, "learning_rate": 1.4262295081967214e-05, "loss": 0.6033, "step": 784 }, { "epoch": 0.021466856267775104, "grad_norm": 1.5778025388717651, "learning_rate": 1.4280510018214938e-05, "loss": 0.6008, "step": 785 }, { "epoch": 0.021494202581492015, "grad_norm": 2.5587103366851807, "learning_rate": 1.4298724954462661e-05, "loss": 0.9793, "step": 786 }, { "epoch": 0.021521548895208926, "grad_norm": 1.8492342233657837, "learning_rate": 1.4316939890710384e-05, "loss": 0.5708, "step": 787 }, { "epoch": 0.021548895208925837, "grad_norm": 2.265671730041504, "learning_rate": 1.4335154826958106e-05, "loss": 0.5957, "step": 788 }, { "epoch": 0.021576241522642748, "grad_norm": 1.939247488975525, "learning_rate": 1.435336976320583e-05, "loss": 0.6038, "step": 789 }, { "epoch": 0.02160358783635966, "grad_norm": 1.729156494140625, "learning_rate": 1.4371584699453554e-05, "loss": 0.9295, "step": 790 }, { "epoch": 0.02163093415007657, "grad_norm": 3.802741289138794, "learning_rate": 1.4389799635701276e-05, "loss": 0.5985, "step": 791 }, { "epoch": 0.02165828046379348, "grad_norm": 16.64296531677246, "learning_rate": 1.4408014571949e-05, "loss": 0.6082, "step": 792 }, { "epoch": 0.021685626777510392, "grad_norm": 1.9668521881103516, "learning_rate": 1.4426229508196722e-05, "loss": 0.6282, "step": 793 }, { "epoch": 0.021712973091227303, "grad_norm": 1.6584495306015015, "learning_rate": 1.4444444444444446e-05, "loss": 0.6226, "step": 794 }, { "epoch": 0.021740319404944215, "grad_norm": 1.7899410724639893, "learning_rate": 1.4462659380692167e-05, "loss": 0.6041, "step": 795 }, { "epoch": 0.021767665718661126, "grad_norm": 1.8607399463653564, "learning_rate": 1.4480874316939892e-05, "loss": 0.6119, "step": 796 }, { "epoch": 0.021795012032378037, "grad_norm": 3.074247360229492, "learning_rate": 1.4499089253187614e-05, "loss": 0.5826, "step": 797 }, { "epoch": 0.021822358346094948, "grad_norm": 2.636211395263672, "learning_rate": 1.4517304189435337e-05, "loss": 0.5786, "step": 798 }, { "epoch": 0.02184970465981186, "grad_norm": 2.3431272506713867, "learning_rate": 1.4535519125683062e-05, "loss": 0.5871, "step": 799 }, { "epoch": 0.021877050973528767, "grad_norm": 1.9718741178512573, "learning_rate": 1.4553734061930784e-05, "loss": 0.566, "step": 800 }, { "epoch": 0.021904397287245678, "grad_norm": 2.1456029415130615, "learning_rate": 1.4571948998178507e-05, "loss": 0.6104, "step": 801 }, { "epoch": 0.02193174360096259, "grad_norm": 2.3105056285858154, "learning_rate": 1.459016393442623e-05, "loss": 0.5951, "step": 802 }, { "epoch": 0.0219590899146795, "grad_norm": 2.0859594345092773, "learning_rate": 1.4608378870673954e-05, "loss": 0.5071, "step": 803 }, { "epoch": 0.02198643622839641, "grad_norm": 2.838866949081421, "learning_rate": 1.4626593806921676e-05, "loss": 0.5903, "step": 804 }, { "epoch": 0.022013782542113322, "grad_norm": 2.582949638366699, "learning_rate": 1.46448087431694e-05, "loss": 0.6216, "step": 805 }, { "epoch": 0.022041128855830233, "grad_norm": 1.7666900157928467, "learning_rate": 1.4663023679417124e-05, "loss": 0.6277, "step": 806 }, { "epoch": 0.022068475169547144, "grad_norm": 2.0517704486846924, "learning_rate": 1.4681238615664846e-05, "loss": 0.6219, "step": 807 }, { "epoch": 0.022095821483264055, "grad_norm": 2.494805097579956, "learning_rate": 1.469945355191257e-05, "loss": 0.5744, "step": 808 }, { "epoch": 0.022123167796980966, "grad_norm": 2.7770767211914062, "learning_rate": 1.4717668488160292e-05, "loss": 0.6165, "step": 809 }, { "epoch": 0.022150514110697878, "grad_norm": 2.17427659034729, "learning_rate": 1.4735883424408016e-05, "loss": 0.6269, "step": 810 }, { "epoch": 0.02217786042441479, "grad_norm": 2.272891044616699, "learning_rate": 1.4754098360655739e-05, "loss": 0.7321, "step": 811 }, { "epoch": 0.0222052067381317, "grad_norm": 1.8939536809921265, "learning_rate": 1.4772313296903462e-05, "loss": 0.5992, "step": 812 }, { "epoch": 0.02223255305184861, "grad_norm": 2.1866466999053955, "learning_rate": 1.4790528233151184e-05, "loss": 0.6316, "step": 813 }, { "epoch": 0.022259899365565522, "grad_norm": 2.9775118827819824, "learning_rate": 1.4808743169398909e-05, "loss": 0.5838, "step": 814 }, { "epoch": 0.022287245679282433, "grad_norm": 2.0916318893432617, "learning_rate": 1.4826958105646632e-05, "loss": 0.5933, "step": 815 }, { "epoch": 0.022314591992999344, "grad_norm": 2.7135958671569824, "learning_rate": 1.4845173041894354e-05, "loss": 0.6448, "step": 816 }, { "epoch": 0.022341938306716255, "grad_norm": 2.282111644744873, "learning_rate": 1.4863387978142079e-05, "loss": 0.6398, "step": 817 }, { "epoch": 0.022369284620433166, "grad_norm": 2.588315725326538, "learning_rate": 1.48816029143898e-05, "loss": 0.6523, "step": 818 }, { "epoch": 0.022396630934150077, "grad_norm": 2.2935612201690674, "learning_rate": 1.4899817850637524e-05, "loss": 0.6403, "step": 819 }, { "epoch": 0.02242397724786699, "grad_norm": 2.4839701652526855, "learning_rate": 1.4918032786885249e-05, "loss": 0.6039, "step": 820 }, { "epoch": 0.0224513235615839, "grad_norm": 2.006838321685791, "learning_rate": 1.493624772313297e-05, "loss": 0.6501, "step": 821 }, { "epoch": 0.02247866987530081, "grad_norm": 1.6952992677688599, "learning_rate": 1.4954462659380694e-05, "loss": 0.5734, "step": 822 }, { "epoch": 0.022506016189017722, "grad_norm": 2.8767142295837402, "learning_rate": 1.4972677595628417e-05, "loss": 0.6271, "step": 823 }, { "epoch": 0.022533362502734633, "grad_norm": 2.1849985122680664, "learning_rate": 1.499089253187614e-05, "loss": 0.6103, "step": 824 }, { "epoch": 0.022560708816451544, "grad_norm": 2.03631591796875, "learning_rate": 1.5009107468123862e-05, "loss": 1.0159, "step": 825 }, { "epoch": 0.02258805513016845, "grad_norm": 2.2608282566070557, "learning_rate": 1.5027322404371585e-05, "loss": 0.6349, "step": 826 }, { "epoch": 0.022615401443885363, "grad_norm": 2.3521127700805664, "learning_rate": 1.5045537340619309e-05, "loss": 0.6409, "step": 827 }, { "epoch": 0.022642747757602274, "grad_norm": 2.6099841594696045, "learning_rate": 1.5063752276867032e-05, "loss": 0.7165, "step": 828 }, { "epoch": 0.022670094071319185, "grad_norm": 2.510187864303589, "learning_rate": 1.5081967213114754e-05, "loss": 0.5819, "step": 829 }, { "epoch": 0.022697440385036096, "grad_norm": 2.2308528423309326, "learning_rate": 1.5100182149362479e-05, "loss": 0.9632, "step": 830 }, { "epoch": 0.022724786698753007, "grad_norm": 2.5697920322418213, "learning_rate": 1.5118397085610202e-05, "loss": 0.5652, "step": 831 }, { "epoch": 0.022752133012469918, "grad_norm": 3.1905550956726074, "learning_rate": 1.5136612021857924e-05, "loss": 0.4906, "step": 832 }, { "epoch": 0.02277947932618683, "grad_norm": 2.732550859451294, "learning_rate": 1.5154826958105649e-05, "loss": 0.6459, "step": 833 }, { "epoch": 0.02280682563990374, "grad_norm": 1.7247099876403809, "learning_rate": 1.517304189435337e-05, "loss": 0.617, "step": 834 }, { "epoch": 0.02283417195362065, "grad_norm": 1.7964725494384766, "learning_rate": 1.5191256830601094e-05, "loss": 0.6129, "step": 835 }, { "epoch": 0.022861518267337563, "grad_norm": 1.995444655418396, "learning_rate": 1.5209471766848819e-05, "loss": 0.6169, "step": 836 }, { "epoch": 0.022888864581054474, "grad_norm": 2.440012216567993, "learning_rate": 1.522768670309654e-05, "loss": 0.6261, "step": 837 }, { "epoch": 0.022916210894771385, "grad_norm": 2.1077799797058105, "learning_rate": 1.5245901639344264e-05, "loss": 0.4537, "step": 838 }, { "epoch": 0.022943557208488296, "grad_norm": 1.9953944683074951, "learning_rate": 1.526411657559199e-05, "loss": 0.5951, "step": 839 }, { "epoch": 0.022970903522205207, "grad_norm": 2.2284767627716064, "learning_rate": 1.528233151183971e-05, "loss": 0.5968, "step": 840 }, { "epoch": 0.022998249835922118, "grad_norm": 1.8409332036972046, "learning_rate": 1.5300546448087432e-05, "loss": 0.6204, "step": 841 }, { "epoch": 0.02302559614963903, "grad_norm": 2.9471988677978516, "learning_rate": 1.5318761384335155e-05, "loss": 0.6828, "step": 842 }, { "epoch": 0.02305294246335594, "grad_norm": 1.9066071510314941, "learning_rate": 1.533697632058288e-05, "loss": 0.6093, "step": 843 }, { "epoch": 0.02308028877707285, "grad_norm": 1.723958134651184, "learning_rate": 1.5355191256830602e-05, "loss": 0.6136, "step": 844 }, { "epoch": 0.023107635090789762, "grad_norm": 2.0756630897521973, "learning_rate": 1.5373406193078325e-05, "loss": 0.6043, "step": 845 }, { "epoch": 0.023134981404506674, "grad_norm": 2.39231276512146, "learning_rate": 1.539162112932605e-05, "loss": 0.6168, "step": 846 }, { "epoch": 0.023162327718223585, "grad_norm": 2.1782639026641846, "learning_rate": 1.5409836065573772e-05, "loss": 0.6213, "step": 847 }, { "epoch": 0.023189674031940496, "grad_norm": 1.864562749862671, "learning_rate": 1.5428051001821495e-05, "loss": 0.7501, "step": 848 }, { "epoch": 0.023217020345657407, "grad_norm": 9.691981315612793, "learning_rate": 1.544626593806922e-05, "loss": 0.7399, "step": 849 }, { "epoch": 0.023244366659374318, "grad_norm": 1.9507548809051514, "learning_rate": 1.5464480874316942e-05, "loss": 0.6072, "step": 850 }, { "epoch": 0.023271712973091226, "grad_norm": 2.7803540229797363, "learning_rate": 1.5482695810564665e-05, "loss": 0.6252, "step": 851 }, { "epoch": 0.023299059286808137, "grad_norm": 1.8897855281829834, "learning_rate": 1.550091074681239e-05, "loss": 0.6139, "step": 852 }, { "epoch": 0.023326405600525048, "grad_norm": 2.2064898014068604, "learning_rate": 1.551912568306011e-05, "loss": 0.9548, "step": 853 }, { "epoch": 0.02335375191424196, "grad_norm": 1.945495367050171, "learning_rate": 1.5537340619307835e-05, "loss": 0.5798, "step": 854 }, { "epoch": 0.02338109822795887, "grad_norm": 2.5857656002044678, "learning_rate": 1.555555555555556e-05, "loss": 0.5859, "step": 855 }, { "epoch": 0.02340844454167578, "grad_norm": 2.227231025695801, "learning_rate": 1.5573770491803278e-05, "loss": 0.6252, "step": 856 }, { "epoch": 0.023435790855392692, "grad_norm": 1.8886069059371948, "learning_rate": 1.5591985428051005e-05, "loss": 0.6166, "step": 857 }, { "epoch": 0.023463137169109603, "grad_norm": 1.8840330839157104, "learning_rate": 1.5610200364298725e-05, "loss": 0.5859, "step": 858 }, { "epoch": 0.023490483482826514, "grad_norm": 2.0723278522491455, "learning_rate": 1.5628415300546448e-05, "loss": 0.5034, "step": 859 }, { "epoch": 0.023517829796543425, "grad_norm": 2.444190502166748, "learning_rate": 1.5646630236794175e-05, "loss": 0.9908, "step": 860 }, { "epoch": 0.023545176110260337, "grad_norm": 2.7423503398895264, "learning_rate": 1.5664845173041895e-05, "loss": 0.6993, "step": 861 }, { "epoch": 0.023572522423977248, "grad_norm": 2.3134727478027344, "learning_rate": 1.5683060109289618e-05, "loss": 0.6179, "step": 862 }, { "epoch": 0.02359986873769416, "grad_norm": 1.8541259765625, "learning_rate": 1.570127504553734e-05, "loss": 0.6177, "step": 863 }, { "epoch": 0.02362721505141107, "grad_norm": 1.832478404045105, "learning_rate": 1.5719489981785065e-05, "loss": 0.6163, "step": 864 }, { "epoch": 0.02365456136512798, "grad_norm": 1.6354936361312866, "learning_rate": 1.5737704918032788e-05, "loss": 0.5987, "step": 865 }, { "epoch": 0.023681907678844892, "grad_norm": 2.2473912239074707, "learning_rate": 1.575591985428051e-05, "loss": 0.6257, "step": 866 }, { "epoch": 0.023709253992561803, "grad_norm": 1.7140382528305054, "learning_rate": 1.5774134790528235e-05, "loss": 0.6132, "step": 867 }, { "epoch": 0.023736600306278714, "grad_norm": 2.609890937805176, "learning_rate": 1.5792349726775958e-05, "loss": 0.6321, "step": 868 }, { "epoch": 0.023763946619995625, "grad_norm": 1.9204728603363037, "learning_rate": 1.5810564663023678e-05, "loss": 0.5883, "step": 869 }, { "epoch": 0.023791292933712536, "grad_norm": 2.2616066932678223, "learning_rate": 1.5828779599271405e-05, "loss": 0.6053, "step": 870 }, { "epoch": 0.023818639247429448, "grad_norm": 2.686023473739624, "learning_rate": 1.5846994535519128e-05, "loss": 0.606, "step": 871 }, { "epoch": 0.02384598556114636, "grad_norm": 2.743306875228882, "learning_rate": 1.5865209471766848e-05, "loss": 0.6289, "step": 872 }, { "epoch": 0.02387333187486327, "grad_norm": 1.782246470451355, "learning_rate": 1.5883424408014575e-05, "loss": 0.613, "step": 873 }, { "epoch": 0.02390067818858018, "grad_norm": 2.6625099182128906, "learning_rate": 1.5901639344262295e-05, "loss": 0.9638, "step": 874 }, { "epoch": 0.023928024502297092, "grad_norm": 2.1939284801483154, "learning_rate": 1.5919854280510018e-05, "loss": 0.6276, "step": 875 }, { "epoch": 0.023955370816014003, "grad_norm": 2.0515449047088623, "learning_rate": 1.5938069216757745e-05, "loss": 0.6057, "step": 876 }, { "epoch": 0.02398271712973091, "grad_norm": 1.6606110334396362, "learning_rate": 1.5956284153005465e-05, "loss": 0.6025, "step": 877 }, { "epoch": 0.024010063443447822, "grad_norm": 1.8441827297210693, "learning_rate": 1.5974499089253188e-05, "loss": 0.5898, "step": 878 }, { "epoch": 0.024037409757164733, "grad_norm": 1.7150300741195679, "learning_rate": 1.599271402550091e-05, "loss": 0.6126, "step": 879 }, { "epoch": 0.024064756070881644, "grad_norm": 1.9825652837753296, "learning_rate": 1.6010928961748635e-05, "loss": 0.5835, "step": 880 }, { "epoch": 0.024092102384598555, "grad_norm": 2.538553237915039, "learning_rate": 1.6029143897996358e-05, "loss": 0.6546, "step": 881 }, { "epoch": 0.024119448698315466, "grad_norm": 2.200329065322876, "learning_rate": 1.604735883424408e-05, "loss": 0.6356, "step": 882 }, { "epoch": 0.024146795012032377, "grad_norm": 2.5357351303100586, "learning_rate": 1.6065573770491805e-05, "loss": 0.5919, "step": 883 }, { "epoch": 0.02417414132574929, "grad_norm": 3.6311187744140625, "learning_rate": 1.6083788706739528e-05, "loss": 0.6281, "step": 884 }, { "epoch": 0.0242014876394662, "grad_norm": 2.2339351177215576, "learning_rate": 1.610200364298725e-05, "loss": 0.6287, "step": 885 }, { "epoch": 0.02422883395318311, "grad_norm": 1.967390775680542, "learning_rate": 1.6120218579234975e-05, "loss": 0.6274, "step": 886 }, { "epoch": 0.02425618026690002, "grad_norm": 2.119293451309204, "learning_rate": 1.6138433515482698e-05, "loss": 0.7291, "step": 887 }, { "epoch": 0.024283526580616933, "grad_norm": 2.4902446269989014, "learning_rate": 1.615664845173042e-05, "loss": 0.6182, "step": 888 }, { "epoch": 0.024310872894333844, "grad_norm": 2.0513367652893066, "learning_rate": 1.6174863387978145e-05, "loss": 0.5799, "step": 889 }, { "epoch": 0.024338219208050755, "grad_norm": 3.64435076713562, "learning_rate": 1.6193078324225864e-05, "loss": 0.584, "step": 890 }, { "epoch": 0.024365565521767666, "grad_norm": 2.6180977821350098, "learning_rate": 1.621129326047359e-05, "loss": 0.6225, "step": 891 }, { "epoch": 0.024392911835484577, "grad_norm": 1.6402833461761475, "learning_rate": 1.6229508196721314e-05, "loss": 0.5951, "step": 892 }, { "epoch": 0.024420258149201488, "grad_norm": 2.4001009464263916, "learning_rate": 1.6247723132969034e-05, "loss": 0.6935, "step": 893 }, { "epoch": 0.0244476044629184, "grad_norm": 2.1053826808929443, "learning_rate": 1.626593806921676e-05, "loss": 0.5914, "step": 894 }, { "epoch": 0.02447495077663531, "grad_norm": 2.122371196746826, "learning_rate": 1.628415300546448e-05, "loss": 0.614, "step": 895 }, { "epoch": 0.02450229709035222, "grad_norm": 3.266545295715332, "learning_rate": 1.6302367941712204e-05, "loss": 1.0153, "step": 896 }, { "epoch": 0.024529643404069133, "grad_norm": 2.7163097858428955, "learning_rate": 1.632058287795993e-05, "loss": 0.5895, "step": 897 }, { "epoch": 0.024556989717786044, "grad_norm": 1.9600543975830078, "learning_rate": 1.633879781420765e-05, "loss": 0.9339, "step": 898 }, { "epoch": 0.024584336031502955, "grad_norm": 2.8297016620635986, "learning_rate": 1.6357012750455374e-05, "loss": 0.6181, "step": 899 }, { "epoch": 0.024611682345219866, "grad_norm": 3.3258895874023438, "learning_rate": 1.6375227686703098e-05, "loss": 0.7492, "step": 900 }, { "epoch": 0.024639028658936777, "grad_norm": 2.211017608642578, "learning_rate": 1.639344262295082e-05, "loss": 0.6123, "step": 901 }, { "epoch": 0.024666374972653685, "grad_norm": 3.934109687805176, "learning_rate": 1.6411657559198544e-05, "loss": 0.6513, "step": 902 }, { "epoch": 0.024693721286370596, "grad_norm": 1.8724944591522217, "learning_rate": 1.6429872495446268e-05, "loss": 0.6027, "step": 903 }, { "epoch": 0.024721067600087507, "grad_norm": 5.497803211212158, "learning_rate": 1.644808743169399e-05, "loss": 1.0686, "step": 904 }, { "epoch": 0.024748413913804418, "grad_norm": 2.0418894290924072, "learning_rate": 1.6466302367941714e-05, "loss": 0.5041, "step": 905 }, { "epoch": 0.02477576022752133, "grad_norm": 2.184451103210449, "learning_rate": 1.6484517304189434e-05, "loss": 0.7006, "step": 906 }, { "epoch": 0.02480310654123824, "grad_norm": 3.0656208992004395, "learning_rate": 1.650273224043716e-05, "loss": 0.6746, "step": 907 }, { "epoch": 0.02483045285495515, "grad_norm": 1.9462394714355469, "learning_rate": 1.6520947176684884e-05, "loss": 0.5999, "step": 908 }, { "epoch": 0.024857799168672062, "grad_norm": 3.1927332878112793, "learning_rate": 1.6539162112932604e-05, "loss": 0.9806, "step": 909 }, { "epoch": 0.024885145482388973, "grad_norm": 1.9347680807113647, "learning_rate": 1.655737704918033e-05, "loss": 0.6142, "step": 910 }, { "epoch": 0.024912491796105884, "grad_norm": 1.6937693357467651, "learning_rate": 1.657559198542805e-05, "loss": 0.5886, "step": 911 }, { "epoch": 0.024939838109822796, "grad_norm": 1.690497636795044, "learning_rate": 1.6593806921675774e-05, "loss": 0.59, "step": 912 }, { "epoch": 0.024967184423539707, "grad_norm": 2.1115829944610596, "learning_rate": 1.66120218579235e-05, "loss": 0.6128, "step": 913 }, { "epoch": 0.024994530737256618, "grad_norm": 1.9000040292739868, "learning_rate": 1.663023679417122e-05, "loss": 0.5778, "step": 914 }, { "epoch": 0.02502187705097353, "grad_norm": 1.7268579006195068, "learning_rate": 1.6648451730418944e-05, "loss": 0.6184, "step": 915 }, { "epoch": 0.02504922336469044, "grad_norm": 3.189096689224243, "learning_rate": 1.6666666666666667e-05, "loss": 1.0078, "step": 916 }, { "epoch": 0.02507656967840735, "grad_norm": 2.7349607944488525, "learning_rate": 1.668488160291439e-05, "loss": 0.9588, "step": 917 }, { "epoch": 0.025103915992124262, "grad_norm": 2.443648099899292, "learning_rate": 1.6703096539162114e-05, "loss": 0.9511, "step": 918 }, { "epoch": 0.025131262305841173, "grad_norm": 2.2588465213775635, "learning_rate": 1.6721311475409837e-05, "loss": 0.5874, "step": 919 }, { "epoch": 0.025158608619558084, "grad_norm": 3.226416826248169, "learning_rate": 1.673952641165756e-05, "loss": 1.0123, "step": 920 }, { "epoch": 0.025185954933274995, "grad_norm": 3.90366792678833, "learning_rate": 1.6757741347905284e-05, "loss": 0.9923, "step": 921 }, { "epoch": 0.025213301246991907, "grad_norm": 1.8751673698425293, "learning_rate": 1.6775956284153007e-05, "loss": 0.632, "step": 922 }, { "epoch": 0.025240647560708818, "grad_norm": 2.181516170501709, "learning_rate": 1.679417122040073e-05, "loss": 0.6279, "step": 923 }, { "epoch": 0.02526799387442573, "grad_norm": 1.9445685148239136, "learning_rate": 1.6812386156648454e-05, "loss": 0.7104, "step": 924 }, { "epoch": 0.02529534018814264, "grad_norm": 1.735216736793518, "learning_rate": 1.6830601092896177e-05, "loss": 0.6057, "step": 925 }, { "epoch": 0.02532268650185955, "grad_norm": 1.767179012298584, "learning_rate": 1.68488160291439e-05, "loss": 0.5966, "step": 926 }, { "epoch": 0.02535003281557646, "grad_norm": 1.8844246864318848, "learning_rate": 1.686703096539162e-05, "loss": 0.5959, "step": 927 }, { "epoch": 0.02537737912929337, "grad_norm": 1.9964033365249634, "learning_rate": 1.6885245901639347e-05, "loss": 0.6268, "step": 928 }, { "epoch": 0.02540472544301028, "grad_norm": 2.2399539947509766, "learning_rate": 1.690346083788707e-05, "loss": 0.5023, "step": 929 }, { "epoch": 0.025432071756727192, "grad_norm": 1.8690341711044312, "learning_rate": 1.692167577413479e-05, "loss": 0.5918, "step": 930 }, { "epoch": 0.025459418070444103, "grad_norm": 1.7992552518844604, "learning_rate": 1.6939890710382517e-05, "loss": 0.6574, "step": 931 }, { "epoch": 0.025486764384161014, "grad_norm": 5.9816575050354, "learning_rate": 1.6958105646630237e-05, "loss": 1.0469, "step": 932 }, { "epoch": 0.025514110697877925, "grad_norm": 2.1651527881622314, "learning_rate": 1.697632058287796e-05, "loss": 0.586, "step": 933 }, { "epoch": 0.025541457011594836, "grad_norm": 2.299362897872925, "learning_rate": 1.6994535519125684e-05, "loss": 0.6386, "step": 934 }, { "epoch": 0.025568803325311747, "grad_norm": 2.4004974365234375, "learning_rate": 1.7012750455373407e-05, "loss": 1.0147, "step": 935 }, { "epoch": 0.02559614963902866, "grad_norm": 2.6891283988952637, "learning_rate": 1.703096539162113e-05, "loss": 0.5643, "step": 936 }, { "epoch": 0.02562349595274557, "grad_norm": 2.226119041442871, "learning_rate": 1.7049180327868854e-05, "loss": 0.6461, "step": 937 }, { "epoch": 0.02565084226646248, "grad_norm": 1.6189215183258057, "learning_rate": 1.7067395264116577e-05, "loss": 0.5981, "step": 938 }, { "epoch": 0.025678188580179392, "grad_norm": 1.9467583894729614, "learning_rate": 1.70856102003643e-05, "loss": 0.6012, "step": 939 }, { "epoch": 0.025705534893896303, "grad_norm": 3.763104200363159, "learning_rate": 1.7103825136612024e-05, "loss": 1.009, "step": 940 }, { "epoch": 0.025732881207613214, "grad_norm": 2.1349716186523438, "learning_rate": 1.7122040072859747e-05, "loss": 0.5752, "step": 941 }, { "epoch": 0.025760227521330125, "grad_norm": 1.62477707862854, "learning_rate": 1.714025500910747e-05, "loss": 0.6104, "step": 942 }, { "epoch": 0.025787573835047036, "grad_norm": 2.371427536010742, "learning_rate": 1.715846994535519e-05, "loss": 0.5908, "step": 943 }, { "epoch": 0.025814920148763947, "grad_norm": 2.3157248497009277, "learning_rate": 1.7176684881602917e-05, "loss": 0.5947, "step": 944 }, { "epoch": 0.02584226646248086, "grad_norm": 2.0326197147369385, "learning_rate": 1.719489981785064e-05, "loss": 0.6008, "step": 945 }, { "epoch": 0.02586961277619777, "grad_norm": 2.027019500732422, "learning_rate": 1.721311475409836e-05, "loss": 0.6176, "step": 946 }, { "epoch": 0.02589695908991468, "grad_norm": 1.9838992357254028, "learning_rate": 1.7231329690346087e-05, "loss": 0.6142, "step": 947 }, { "epoch": 0.02592430540363159, "grad_norm": 4.781270980834961, "learning_rate": 1.7249544626593807e-05, "loss": 1.0001, "step": 948 }, { "epoch": 0.025951651717348503, "grad_norm": 2.441922664642334, "learning_rate": 1.726775956284153e-05, "loss": 0.5967, "step": 949 }, { "epoch": 0.025978998031065414, "grad_norm": 2.2098143100738525, "learning_rate": 1.7285974499089254e-05, "loss": 0.5816, "step": 950 }, { "epoch": 0.026006344344782325, "grad_norm": 2.175997018814087, "learning_rate": 1.7304189435336977e-05, "loss": 0.6075, "step": 951 }, { "epoch": 0.026033690658499236, "grad_norm": 2.2587368488311768, "learning_rate": 1.73224043715847e-05, "loss": 0.6201, "step": 952 }, { "epoch": 0.026061036972216144, "grad_norm": 2.049931526184082, "learning_rate": 1.7340619307832424e-05, "loss": 0.61, "step": 953 }, { "epoch": 0.026088383285933055, "grad_norm": 2.142240524291992, "learning_rate": 1.7358834244080147e-05, "loss": 0.6116, "step": 954 }, { "epoch": 0.026115729599649966, "grad_norm": 2.5552048683166504, "learning_rate": 1.737704918032787e-05, "loss": 0.6184, "step": 955 }, { "epoch": 0.026143075913366877, "grad_norm": 1.8506050109863281, "learning_rate": 1.7395264116575594e-05, "loss": 0.5658, "step": 956 }, { "epoch": 0.026170422227083788, "grad_norm": 2.700942039489746, "learning_rate": 1.7413479052823317e-05, "loss": 0.9462, "step": 957 }, { "epoch": 0.0261977685408007, "grad_norm": 1.7063493728637695, "learning_rate": 1.743169398907104e-05, "loss": 0.5853, "step": 958 }, { "epoch": 0.02622511485451761, "grad_norm": 1.8105981349945068, "learning_rate": 1.7449908925318764e-05, "loss": 0.6231, "step": 959 }, { "epoch": 0.02625246116823452, "grad_norm": 2.1938140392303467, "learning_rate": 1.7468123861566487e-05, "loss": 0.577, "step": 960 }, { "epoch": 0.026279807481951432, "grad_norm": 1.8990042209625244, "learning_rate": 1.7486338797814207e-05, "loss": 0.616, "step": 961 }, { "epoch": 0.026307153795668343, "grad_norm": 2.104661703109741, "learning_rate": 1.7504553734061934e-05, "loss": 0.9924, "step": 962 }, { "epoch": 0.026334500109385255, "grad_norm": 2.2902863025665283, "learning_rate": 1.7522768670309657e-05, "loss": 0.6934, "step": 963 }, { "epoch": 0.026361846423102166, "grad_norm": 1.509122371673584, "learning_rate": 1.7540983606557377e-05, "loss": 0.571, "step": 964 }, { "epoch": 0.026389192736819077, "grad_norm": 1.897693157196045, "learning_rate": 1.7559198542805104e-05, "loss": 0.6412, "step": 965 }, { "epoch": 0.026416539050535988, "grad_norm": 1.598160982131958, "learning_rate": 1.7577413479052823e-05, "loss": 0.6922, "step": 966 }, { "epoch": 0.0264438853642529, "grad_norm": 1.8690717220306396, "learning_rate": 1.7595628415300547e-05, "loss": 0.6211, "step": 967 }, { "epoch": 0.02647123167796981, "grad_norm": 2.1757256984710693, "learning_rate": 1.7613843351548273e-05, "loss": 0.9748, "step": 968 }, { "epoch": 0.02649857799168672, "grad_norm": 2.1309378147125244, "learning_rate": 1.7632058287795993e-05, "loss": 0.5871, "step": 969 }, { "epoch": 0.026525924305403632, "grad_norm": 2.6452279090881348, "learning_rate": 1.7650273224043717e-05, "loss": 0.5697, "step": 970 }, { "epoch": 0.026553270619120543, "grad_norm": 1.693384051322937, "learning_rate": 1.766848816029144e-05, "loss": 1.0027, "step": 971 }, { "epoch": 0.026580616932837454, "grad_norm": 2.0945727825164795, "learning_rate": 1.7686703096539163e-05, "loss": 0.6087, "step": 972 }, { "epoch": 0.026607963246554366, "grad_norm": 3.81382155418396, "learning_rate": 1.7704918032786887e-05, "loss": 0.9973, "step": 973 }, { "epoch": 0.026635309560271277, "grad_norm": 2.104053497314453, "learning_rate": 1.772313296903461e-05, "loss": 0.5908, "step": 974 }, { "epoch": 0.026662655873988188, "grad_norm": 2.4553580284118652, "learning_rate": 1.7741347905282333e-05, "loss": 0.594, "step": 975 }, { "epoch": 0.0266900021877051, "grad_norm": 1.5294365882873535, "learning_rate": 1.7759562841530057e-05, "loss": 0.4564, "step": 976 }, { "epoch": 0.02671734850142201, "grad_norm": 2.1444714069366455, "learning_rate": 1.7777777777777777e-05, "loss": 0.5754, "step": 977 }, { "epoch": 0.026744694815138918, "grad_norm": 1.8347746133804321, "learning_rate": 1.7795992714025503e-05, "loss": 0.5854, "step": 978 }, { "epoch": 0.02677204112885583, "grad_norm": 1.8266481161117554, "learning_rate": 1.7814207650273227e-05, "loss": 0.5895, "step": 979 }, { "epoch": 0.02679938744257274, "grad_norm": 2.2701869010925293, "learning_rate": 1.7832422586520947e-05, "loss": 0.7525, "step": 980 }, { "epoch": 0.02682673375628965, "grad_norm": 1.795569658279419, "learning_rate": 1.7850637522768673e-05, "loss": 0.6029, "step": 981 }, { "epoch": 0.026854080070006562, "grad_norm": 2.1957027912139893, "learning_rate": 1.7868852459016393e-05, "loss": 0.6126, "step": 982 }, { "epoch": 0.026881426383723473, "grad_norm": 2.134031295776367, "learning_rate": 1.7887067395264117e-05, "loss": 0.5876, "step": 983 }, { "epoch": 0.026908772697440384, "grad_norm": 2.181424617767334, "learning_rate": 1.7905282331511843e-05, "loss": 0.6272, "step": 984 }, { "epoch": 0.026936119011157295, "grad_norm": 1.626671314239502, "learning_rate": 1.7923497267759563e-05, "loss": 0.6073, "step": 985 }, { "epoch": 0.026963465324874206, "grad_norm": 5.810871124267578, "learning_rate": 1.7941712204007287e-05, "loss": 0.4971, "step": 986 }, { "epoch": 0.026990811638591117, "grad_norm": 1.7379041910171509, "learning_rate": 1.795992714025501e-05, "loss": 0.5933, "step": 987 }, { "epoch": 0.02701815795230803, "grad_norm": 1.6173970699310303, "learning_rate": 1.7978142076502733e-05, "loss": 0.6398, "step": 988 }, { "epoch": 0.02704550426602494, "grad_norm": 1.9766162633895874, "learning_rate": 1.7996357012750456e-05, "loss": 0.6181, "step": 989 }, { "epoch": 0.02707285057974185, "grad_norm": 1.9844863414764404, "learning_rate": 1.801457194899818e-05, "loss": 0.5963, "step": 990 }, { "epoch": 0.027100196893458762, "grad_norm": 2.6717894077301025, "learning_rate": 1.8032786885245903e-05, "loss": 0.5768, "step": 991 }, { "epoch": 0.027127543207175673, "grad_norm": 1.9767725467681885, "learning_rate": 1.8051001821493626e-05, "loss": 0.7113, "step": 992 }, { "epoch": 0.027154889520892584, "grad_norm": 1.8144243955612183, "learning_rate": 1.806921675774135e-05, "loss": 0.6284, "step": 993 }, { "epoch": 0.027182235834609495, "grad_norm": 2.0615110397338867, "learning_rate": 1.8087431693989073e-05, "loss": 0.6097, "step": 994 }, { "epoch": 0.027209582148326406, "grad_norm": 1.8951078653335571, "learning_rate": 1.8105646630236796e-05, "loss": 0.697, "step": 995 }, { "epoch": 0.027236928462043317, "grad_norm": 1.7921066284179688, "learning_rate": 1.812386156648452e-05, "loss": 0.627, "step": 996 }, { "epoch": 0.02726427477576023, "grad_norm": 1.8612912893295288, "learning_rate": 1.8142076502732243e-05, "loss": 0.5917, "step": 997 }, { "epoch": 0.02729162108947714, "grad_norm": 2.376260995864868, "learning_rate": 1.8160291438979963e-05, "loss": 0.6751, "step": 998 }, { "epoch": 0.02731896740319405, "grad_norm": 2.0590157508850098, "learning_rate": 1.817850637522769e-05, "loss": 0.6899, "step": 999 }, { "epoch": 0.02734631371691096, "grad_norm": 2.064396381378174, "learning_rate": 1.8196721311475413e-05, "loss": 0.685, "step": 1000 }, { "epoch": 0.027373660030627873, "grad_norm": 2.011000394821167, "learning_rate": 1.8214936247723133e-05, "loss": 0.6009, "step": 1001 }, { "epoch": 0.027401006344344784, "grad_norm": 2.041912317276001, "learning_rate": 1.823315118397086e-05, "loss": 0.5733, "step": 1002 }, { "epoch": 0.027428352658061695, "grad_norm": 3.1553306579589844, "learning_rate": 1.825136612021858e-05, "loss": 0.6635, "step": 1003 }, { "epoch": 0.027455698971778603, "grad_norm": 2.119234085083008, "learning_rate": 1.8269581056466303e-05, "loss": 0.6173, "step": 1004 }, { "epoch": 0.027483045285495514, "grad_norm": 1.9128401279449463, "learning_rate": 1.828779599271403e-05, "loss": 0.5931, "step": 1005 }, { "epoch": 0.027510391599212425, "grad_norm": 1.9147330522537231, "learning_rate": 1.830601092896175e-05, "loss": 0.5808, "step": 1006 }, { "epoch": 0.027537737912929336, "grad_norm": 2.2580199241638184, "learning_rate": 1.8324225865209473e-05, "loss": 0.714, "step": 1007 }, { "epoch": 0.027565084226646247, "grad_norm": 2.6180999279022217, "learning_rate": 1.8342440801457196e-05, "loss": 0.6067, "step": 1008 }, { "epoch": 0.027592430540363158, "grad_norm": 2.0726518630981445, "learning_rate": 1.836065573770492e-05, "loss": 0.6174, "step": 1009 }, { "epoch": 0.02761977685408007, "grad_norm": 1.8091325759887695, "learning_rate": 1.8378870673952643e-05, "loss": 0.6007, "step": 1010 }, { "epoch": 0.02764712316779698, "grad_norm": 1.9387407302856445, "learning_rate": 1.8397085610200366e-05, "loss": 0.6162, "step": 1011 }, { "epoch": 0.02767446948151389, "grad_norm": 1.7116332054138184, "learning_rate": 1.841530054644809e-05, "loss": 0.5978, "step": 1012 }, { "epoch": 0.027701815795230803, "grad_norm": 3.2208902835845947, "learning_rate": 1.8433515482695813e-05, "loss": 0.627, "step": 1013 }, { "epoch": 0.027729162108947714, "grad_norm": 2.9537107944488525, "learning_rate": 1.8451730418943533e-05, "loss": 1.0068, "step": 1014 }, { "epoch": 0.027756508422664625, "grad_norm": 2.158679723739624, "learning_rate": 1.846994535519126e-05, "loss": 0.6207, "step": 1015 }, { "epoch": 0.027783854736381536, "grad_norm": 1.9719456434249878, "learning_rate": 1.8488160291438983e-05, "loss": 0.6076, "step": 1016 }, { "epoch": 0.027811201050098447, "grad_norm": 1.7702490091323853, "learning_rate": 1.8506375227686703e-05, "loss": 0.5927, "step": 1017 }, { "epoch": 0.027838547363815358, "grad_norm": 2.055365562438965, "learning_rate": 1.852459016393443e-05, "loss": 0.5241, "step": 1018 }, { "epoch": 0.02786589367753227, "grad_norm": 1.9542723894119263, "learning_rate": 1.854280510018215e-05, "loss": 0.6135, "step": 1019 }, { "epoch": 0.02789323999124918, "grad_norm": 1.7780815362930298, "learning_rate": 1.8561020036429873e-05, "loss": 0.5921, "step": 1020 }, { "epoch": 0.02792058630496609, "grad_norm": 2.2726428508758545, "learning_rate": 1.85792349726776e-05, "loss": 0.5955, "step": 1021 }, { "epoch": 0.027947932618683002, "grad_norm": 2.261950731277466, "learning_rate": 1.859744990892532e-05, "loss": 0.6189, "step": 1022 }, { "epoch": 0.027975278932399913, "grad_norm": 2.060117483139038, "learning_rate": 1.8615664845173043e-05, "loss": 0.6195, "step": 1023 }, { "epoch": 0.028002625246116825, "grad_norm": 1.501891851425171, "learning_rate": 1.8633879781420766e-05, "loss": 0.4641, "step": 1024 }, { "epoch": 0.028029971559833736, "grad_norm": 1.5801770687103271, "learning_rate": 1.865209471766849e-05, "loss": 0.5991, "step": 1025 }, { "epoch": 0.028057317873550647, "grad_norm": 2.019463539123535, "learning_rate": 1.8670309653916213e-05, "loss": 0.5996, "step": 1026 }, { "epoch": 0.028084664187267558, "grad_norm": 2.3901169300079346, "learning_rate": 1.8688524590163936e-05, "loss": 0.6778, "step": 1027 }, { "epoch": 0.02811201050098447, "grad_norm": 2.0005221366882324, "learning_rate": 1.870673952641166e-05, "loss": 0.6039, "step": 1028 }, { "epoch": 0.028139356814701377, "grad_norm": 1.8708477020263672, "learning_rate": 1.8724954462659383e-05, "loss": 0.6196, "step": 1029 }, { "epoch": 0.028166703128418288, "grad_norm": 3.7455413341522217, "learning_rate": 1.8743169398907106e-05, "loss": 0.579, "step": 1030 }, { "epoch": 0.0281940494421352, "grad_norm": 2.3542120456695557, "learning_rate": 1.876138433515483e-05, "loss": 0.652, "step": 1031 }, { "epoch": 0.02822139575585211, "grad_norm": 9.93237590789795, "learning_rate": 1.8779599271402553e-05, "loss": 0.9463, "step": 1032 }, { "epoch": 0.02824874206956902, "grad_norm": 1.6355689764022827, "learning_rate": 1.8797814207650276e-05, "loss": 0.5957, "step": 1033 }, { "epoch": 0.028276088383285932, "grad_norm": 2.2631332874298096, "learning_rate": 1.8816029143898e-05, "loss": 0.5927, "step": 1034 }, { "epoch": 0.028303434697002843, "grad_norm": 1.5724141597747803, "learning_rate": 1.883424408014572e-05, "loss": 0.6121, "step": 1035 }, { "epoch": 0.028330781010719754, "grad_norm": 1.827088713645935, "learning_rate": 1.8852459016393446e-05, "loss": 0.5858, "step": 1036 }, { "epoch": 0.028358127324436665, "grad_norm": 1.7858392000198364, "learning_rate": 1.887067395264117e-05, "loss": 0.6082, "step": 1037 }, { "epoch": 0.028385473638153576, "grad_norm": 1.7367181777954102, "learning_rate": 1.888888888888889e-05, "loss": 0.6143, "step": 1038 }, { "epoch": 0.028412819951870488, "grad_norm": 3.1564626693725586, "learning_rate": 1.8907103825136616e-05, "loss": 0.999, "step": 1039 }, { "epoch": 0.0284401662655874, "grad_norm": 1.7702547311782837, "learning_rate": 1.8925318761384336e-05, "loss": 0.6152, "step": 1040 }, { "epoch": 0.02846751257930431, "grad_norm": 2.4517126083374023, "learning_rate": 1.894353369763206e-05, "loss": 0.6033, "step": 1041 }, { "epoch": 0.02849485889302122, "grad_norm": 2.294590950012207, "learning_rate": 1.8961748633879782e-05, "loss": 0.6417, "step": 1042 }, { "epoch": 0.028522205206738132, "grad_norm": 2.272470474243164, "learning_rate": 1.8979963570127506e-05, "loss": 0.6099, "step": 1043 }, { "epoch": 0.028549551520455043, "grad_norm": 2.3939740657806396, "learning_rate": 1.899817850637523e-05, "loss": 0.7103, "step": 1044 }, { "epoch": 0.028576897834171954, "grad_norm": 1.9780972003936768, "learning_rate": 1.9016393442622952e-05, "loss": 0.605, "step": 1045 }, { "epoch": 0.028604244147888865, "grad_norm": 2.0694291591644287, "learning_rate": 1.9034608378870676e-05, "loss": 0.5929, "step": 1046 }, { "epoch": 0.028631590461605776, "grad_norm": 1.4858978986740112, "learning_rate": 1.90528233151184e-05, "loss": 0.4994, "step": 1047 }, { "epoch": 0.028658936775322687, "grad_norm": 2.127821445465088, "learning_rate": 1.9071038251366122e-05, "loss": 0.6051, "step": 1048 }, { "epoch": 0.0286862830890396, "grad_norm": 2.3363943099975586, "learning_rate": 1.9089253187613846e-05, "loss": 0.6075, "step": 1049 }, { "epoch": 0.02871362940275651, "grad_norm": 1.932637095451355, "learning_rate": 1.910746812386157e-05, "loss": 0.6091, "step": 1050 }, { "epoch": 0.02874097571647342, "grad_norm": 1.9420026540756226, "learning_rate": 1.912568306010929e-05, "loss": 0.6192, "step": 1051 }, { "epoch": 0.028768322030190332, "grad_norm": 2.15929913520813, "learning_rate": 1.9143897996357016e-05, "loss": 0.5801, "step": 1052 }, { "epoch": 0.028795668343907243, "grad_norm": 2.0868582725524902, "learning_rate": 1.9162112932604736e-05, "loss": 0.6087, "step": 1053 }, { "epoch": 0.02882301465762415, "grad_norm": 1.6379133462905884, "learning_rate": 1.918032786885246e-05, "loss": 0.6065, "step": 1054 }, { "epoch": 0.02885036097134106, "grad_norm": 2.116777181625366, "learning_rate": 1.9198542805100186e-05, "loss": 0.6459, "step": 1055 }, { "epoch": 0.028877707285057973, "grad_norm": 2.9598734378814697, "learning_rate": 1.9216757741347906e-05, "loss": 0.459, "step": 1056 }, { "epoch": 0.028905053598774884, "grad_norm": 2.1964783668518066, "learning_rate": 1.923497267759563e-05, "loss": 0.4672, "step": 1057 }, { "epoch": 0.028932399912491795, "grad_norm": 1.5482518672943115, "learning_rate": 1.9253187613843352e-05, "loss": 0.4326, "step": 1058 }, { "epoch": 0.028959746226208706, "grad_norm": 2.950873374938965, "learning_rate": 1.9271402550091076e-05, "loss": 0.6131, "step": 1059 }, { "epoch": 0.028987092539925617, "grad_norm": 1.869550347328186, "learning_rate": 1.92896174863388e-05, "loss": 0.5935, "step": 1060 }, { "epoch": 0.029014438853642528, "grad_norm": 2.058823823928833, "learning_rate": 1.9307832422586522e-05, "loss": 0.6131, "step": 1061 }, { "epoch": 0.02904178516735944, "grad_norm": 1.5940293073654175, "learning_rate": 1.9326047358834245e-05, "loss": 0.6087, "step": 1062 }, { "epoch": 0.02906913148107635, "grad_norm": 2.136528491973877, "learning_rate": 1.934426229508197e-05, "loss": 0.5887, "step": 1063 }, { "epoch": 0.02909647779479326, "grad_norm": 2.504146099090576, "learning_rate": 1.9362477231329692e-05, "loss": 0.7346, "step": 1064 }, { "epoch": 0.029123824108510173, "grad_norm": 2.191607713699341, "learning_rate": 1.9380692167577415e-05, "loss": 0.5917, "step": 1065 }, { "epoch": 0.029151170422227084, "grad_norm": 12.072402954101562, "learning_rate": 1.939890710382514e-05, "loss": 0.6014, "step": 1066 }, { "epoch": 0.029178516735943995, "grad_norm": 2.49924373626709, "learning_rate": 1.9417122040072862e-05, "loss": 0.4524, "step": 1067 }, { "epoch": 0.029205863049660906, "grad_norm": 2.9875969886779785, "learning_rate": 1.9435336976320585e-05, "loss": 0.5879, "step": 1068 }, { "epoch": 0.029233209363377817, "grad_norm": 1.8839664459228516, "learning_rate": 1.9453551912568305e-05, "loss": 0.6003, "step": 1069 }, { "epoch": 0.029260555677094728, "grad_norm": 2.4423928260803223, "learning_rate": 1.9471766848816032e-05, "loss": 0.5858, "step": 1070 }, { "epoch": 0.02928790199081164, "grad_norm": 1.7703739404678345, "learning_rate": 1.9489981785063755e-05, "loss": 0.616, "step": 1071 }, { "epoch": 0.02931524830452855, "grad_norm": 2.0154788494110107, "learning_rate": 1.9508196721311475e-05, "loss": 0.6168, "step": 1072 }, { "epoch": 0.02934259461824546, "grad_norm": 1.849987268447876, "learning_rate": 1.9526411657559202e-05, "loss": 0.5853, "step": 1073 }, { "epoch": 0.029369940931962372, "grad_norm": 2.158301830291748, "learning_rate": 1.9544626593806922e-05, "loss": 0.6254, "step": 1074 }, { "epoch": 0.029397287245679284, "grad_norm": 2.593883991241455, "learning_rate": 1.9562841530054645e-05, "loss": 0.7432, "step": 1075 }, { "epoch": 0.029424633559396195, "grad_norm": 1.8658642768859863, "learning_rate": 1.9581056466302372e-05, "loss": 0.6667, "step": 1076 }, { "epoch": 0.029451979873113106, "grad_norm": 1.9539698362350464, "learning_rate": 1.9599271402550092e-05, "loss": 0.5996, "step": 1077 }, { "epoch": 0.029479326186830017, "grad_norm": 2.3970704078674316, "learning_rate": 1.9617486338797815e-05, "loss": 0.5892, "step": 1078 }, { "epoch": 0.029506672500546928, "grad_norm": 2.8540961742401123, "learning_rate": 1.963570127504554e-05, "loss": 0.6904, "step": 1079 }, { "epoch": 0.029534018814263836, "grad_norm": 1.9753921031951904, "learning_rate": 1.9653916211293262e-05, "loss": 0.6054, "step": 1080 }, { "epoch": 0.029561365127980747, "grad_norm": 1.8292220830917358, "learning_rate": 1.9672131147540985e-05, "loss": 0.64, "step": 1081 }, { "epoch": 0.029588711441697658, "grad_norm": 2.3176794052124023, "learning_rate": 1.969034608378871e-05, "loss": 0.6189, "step": 1082 }, { "epoch": 0.02961605775541457, "grad_norm": 1.9011964797973633, "learning_rate": 1.9708561020036432e-05, "loss": 0.6239, "step": 1083 }, { "epoch": 0.02964340406913148, "grad_norm": 2.5074405670166016, "learning_rate": 1.9726775956284155e-05, "loss": 0.6137, "step": 1084 }, { "epoch": 0.02967075038284839, "grad_norm": 2.162652015686035, "learning_rate": 1.9744990892531875e-05, "loss": 0.4655, "step": 1085 }, { "epoch": 0.029698096696565302, "grad_norm": 11.832585334777832, "learning_rate": 1.9763205828779602e-05, "loss": 0.9525, "step": 1086 }, { "epoch": 0.029725443010282213, "grad_norm": 2.58150577545166, "learning_rate": 1.9781420765027325e-05, "loss": 0.6073, "step": 1087 }, { "epoch": 0.029752789323999124, "grad_norm": 2.4763078689575195, "learning_rate": 1.9799635701275045e-05, "loss": 0.4627, "step": 1088 }, { "epoch": 0.029780135637716035, "grad_norm": 2.8045010566711426, "learning_rate": 1.9817850637522772e-05, "loss": 0.6171, "step": 1089 }, { "epoch": 0.029807481951432947, "grad_norm": 2.419703245162964, "learning_rate": 1.9836065573770492e-05, "loss": 0.6412, "step": 1090 }, { "epoch": 0.029834828265149858, "grad_norm": 2.3987855911254883, "learning_rate": 1.9854280510018215e-05, "loss": 0.6221, "step": 1091 }, { "epoch": 0.02986217457886677, "grad_norm": 2.706223964691162, "learning_rate": 1.9872495446265942e-05, "loss": 0.9879, "step": 1092 }, { "epoch": 0.02988952089258368, "grad_norm": 1.6918236017227173, "learning_rate": 1.9890710382513662e-05, "loss": 0.6152, "step": 1093 }, { "epoch": 0.02991686720630059, "grad_norm": 2.1401331424713135, "learning_rate": 1.9908925318761385e-05, "loss": 0.7026, "step": 1094 }, { "epoch": 0.029944213520017502, "grad_norm": 1.9493910074234009, "learning_rate": 1.992714025500911e-05, "loss": 0.6187, "step": 1095 }, { "epoch": 0.029971559833734413, "grad_norm": 2.199352264404297, "learning_rate": 1.994535519125683e-05, "loss": 0.6831, "step": 1096 }, { "epoch": 0.029998906147451324, "grad_norm": 2.190056562423706, "learning_rate": 1.9963570127504555e-05, "loss": 0.6222, "step": 1097 }, { "epoch": 0.030026252461168235, "grad_norm": 2.6319692134857178, "learning_rate": 1.998178506375228e-05, "loss": 0.6161, "step": 1098 }, { "epoch": 0.030053598774885146, "grad_norm": 2.2094900608062744, "learning_rate": 2e-05, "loss": 0.5873, "step": 1099 }, { "epoch": 0.030080945088602058, "grad_norm": 2.5929574966430664, "learning_rate": 1.999999996077641e-05, "loss": 0.9163, "step": 1100 }, { "epoch": 0.03010829140231897, "grad_norm": 2.8930909633636475, "learning_rate": 1.999999984310563e-05, "loss": 0.7002, "step": 1101 }, { "epoch": 0.03013563771603588, "grad_norm": 2.4024298191070557, "learning_rate": 1.9999999646987667e-05, "loss": 0.6463, "step": 1102 }, { "epoch": 0.03016298402975279, "grad_norm": 3.047121524810791, "learning_rate": 1.9999999372422524e-05, "loss": 0.6067, "step": 1103 }, { "epoch": 0.030190330343469702, "grad_norm": 2.6690292358398438, "learning_rate": 1.9999999019410196e-05, "loss": 1.0129, "step": 1104 }, { "epoch": 0.03021767665718661, "grad_norm": 2.0207509994506836, "learning_rate": 1.999999858795069e-05, "loss": 0.5987, "step": 1105 }, { "epoch": 0.03024502297090352, "grad_norm": 3.8987014293670654, "learning_rate": 1.9999998078044013e-05, "loss": 0.6505, "step": 1106 }, { "epoch": 0.030272369284620432, "grad_norm": 3.7501206398010254, "learning_rate": 1.999999748969016e-05, "loss": 0.5734, "step": 1107 }, { "epoch": 0.030299715598337343, "grad_norm": 2.179427146911621, "learning_rate": 1.9999996822889145e-05, "loss": 0.6114, "step": 1108 }, { "epoch": 0.030327061912054254, "grad_norm": 1.8503589630126953, "learning_rate": 1.999999607764097e-05, "loss": 0.9687, "step": 1109 }, { "epoch": 0.030354408225771165, "grad_norm": 2.8906569480895996, "learning_rate": 1.9999995253945636e-05, "loss": 0.9855, "step": 1110 }, { "epoch": 0.030381754539488076, "grad_norm": 2.807373523712158, "learning_rate": 1.9999994351803153e-05, "loss": 0.6389, "step": 1111 }, { "epoch": 0.030409100853204987, "grad_norm": 2.094871997833252, "learning_rate": 1.9999993371213533e-05, "loss": 0.638, "step": 1112 }, { "epoch": 0.0304364471669219, "grad_norm": 2.775904893875122, "learning_rate": 1.9999992312176777e-05, "loss": 0.618, "step": 1113 }, { "epoch": 0.03046379348063881, "grad_norm": 2.1516919136047363, "learning_rate": 1.9999991174692895e-05, "loss": 0.6359, "step": 1114 }, { "epoch": 0.03049113979435572, "grad_norm": 3.746502161026001, "learning_rate": 1.9999989958761897e-05, "loss": 0.6204, "step": 1115 }, { "epoch": 0.03051848610807263, "grad_norm": 2.7674472332000732, "learning_rate": 1.9999988664383793e-05, "loss": 0.6214, "step": 1116 }, { "epoch": 0.030545832421789543, "grad_norm": 2.1855854988098145, "learning_rate": 1.9999987291558593e-05, "loss": 0.7114, "step": 1117 }, { "epoch": 0.030573178735506454, "grad_norm": 5.125770092010498, "learning_rate": 1.9999985840286303e-05, "loss": 0.5932, "step": 1118 }, { "epoch": 0.030600525049223365, "grad_norm": 2.7748379707336426, "learning_rate": 1.9999984310566942e-05, "loss": 0.6045, "step": 1119 }, { "epoch": 0.030627871362940276, "grad_norm": 2.778254747390747, "learning_rate": 1.999998270240052e-05, "loss": 0.7209, "step": 1120 }, { "epoch": 0.030655217676657187, "grad_norm": 2.4376840591430664, "learning_rate": 1.9999981015787044e-05, "loss": 0.6297, "step": 1121 }, { "epoch": 0.030682563990374098, "grad_norm": 2.0758848190307617, "learning_rate": 1.999997925072653e-05, "loss": 0.6101, "step": 1122 }, { "epoch": 0.03070991030409101, "grad_norm": 2.070631265640259, "learning_rate": 1.9999977407218996e-05, "loss": 0.4624, "step": 1123 }, { "epoch": 0.03073725661780792, "grad_norm": 2.0056087970733643, "learning_rate": 1.9999975485264454e-05, "loss": 0.5676, "step": 1124 }, { "epoch": 0.03076460293152483, "grad_norm": 3.0265560150146484, "learning_rate": 1.9999973484862917e-05, "loss": 0.6255, "step": 1125 }, { "epoch": 0.030791949245241743, "grad_norm": 2.1191625595092773, "learning_rate": 1.99999714060144e-05, "loss": 0.6351, "step": 1126 }, { "epoch": 0.030819295558958654, "grad_norm": 2.502545118331909, "learning_rate": 1.9999969248718926e-05, "loss": 0.6181, "step": 1127 }, { "epoch": 0.030846641872675565, "grad_norm": 2.0921642780303955, "learning_rate": 1.9999967012976505e-05, "loss": 0.5873, "step": 1128 }, { "epoch": 0.030873988186392476, "grad_norm": 2.404646873474121, "learning_rate": 1.9999964698787158e-05, "loss": 0.6144, "step": 1129 }, { "epoch": 0.030901334500109387, "grad_norm": 1.8417408466339111, "learning_rate": 1.99999623061509e-05, "loss": 0.6277, "step": 1130 }, { "epoch": 0.030928680813826295, "grad_norm": 2.2807183265686035, "learning_rate": 1.9999959835067756e-05, "loss": 0.9592, "step": 1131 }, { "epoch": 0.030956027127543206, "grad_norm": 1.8966830968856812, "learning_rate": 1.9999957285537738e-05, "loss": 0.6699, "step": 1132 }, { "epoch": 0.030983373441260117, "grad_norm": 2.6494052410125732, "learning_rate": 1.9999954657560868e-05, "loss": 0.6111, "step": 1133 }, { "epoch": 0.031010719754977028, "grad_norm": 2.6547300815582275, "learning_rate": 1.9999951951137173e-05, "loss": 0.5772, "step": 1134 }, { "epoch": 0.03103806606869394, "grad_norm": 22.460844039916992, "learning_rate": 1.9999949166266664e-05, "loss": 0.9552, "step": 1135 }, { "epoch": 0.03106541238241085, "grad_norm": 1.7447447776794434, "learning_rate": 1.9999946302949374e-05, "loss": 0.607, "step": 1136 }, { "epoch": 0.03109275869612776, "grad_norm": 2.819598913192749, "learning_rate": 1.9999943361185314e-05, "loss": 0.6219, "step": 1137 }, { "epoch": 0.031120105009844672, "grad_norm": 2.10145902633667, "learning_rate": 1.9999940340974516e-05, "loss": 0.5338, "step": 1138 }, { "epoch": 0.031147451323561583, "grad_norm": 1.9022135734558105, "learning_rate": 1.9999937242317e-05, "loss": 0.5902, "step": 1139 }, { "epoch": 0.031174797637278494, "grad_norm": 1.758895993232727, "learning_rate": 1.9999934065212786e-05, "loss": 0.5966, "step": 1140 }, { "epoch": 0.031202143950995406, "grad_norm": 2.257765531539917, "learning_rate": 1.9999930809661908e-05, "loss": 0.5987, "step": 1141 }, { "epoch": 0.031229490264712317, "grad_norm": 1.8210638761520386, "learning_rate": 1.9999927475664384e-05, "loss": 0.6202, "step": 1142 }, { "epoch": 0.031256836578429224, "grad_norm": 3.043221950531006, "learning_rate": 1.9999924063220246e-05, "loss": 0.6254, "step": 1143 }, { "epoch": 0.031284182892146135, "grad_norm": 2.507526397705078, "learning_rate": 1.9999920572329516e-05, "loss": 0.6038, "step": 1144 }, { "epoch": 0.031311529205863046, "grad_norm": 2.3316495418548584, "learning_rate": 1.9999917002992223e-05, "loss": 0.6006, "step": 1145 }, { "epoch": 0.03133887551957996, "grad_norm": 2.224640130996704, "learning_rate": 1.9999913355208396e-05, "loss": 0.705, "step": 1146 }, { "epoch": 0.03136622183329687, "grad_norm": 2.6352145671844482, "learning_rate": 1.9999909628978067e-05, "loss": 0.6241, "step": 1147 }, { "epoch": 0.03139356814701378, "grad_norm": 2.83691143989563, "learning_rate": 1.9999905824301258e-05, "loss": 1.0004, "step": 1148 }, { "epoch": 0.03142091446073069, "grad_norm": 2.877469539642334, "learning_rate": 1.9999901941178e-05, "loss": 0.6143, "step": 1149 }, { "epoch": 0.0314482607744476, "grad_norm": 2.4141786098480225, "learning_rate": 1.9999897979608328e-05, "loss": 0.6071, "step": 1150 }, { "epoch": 0.03147560708816451, "grad_norm": 2.401146173477173, "learning_rate": 1.9999893939592268e-05, "loss": 0.6303, "step": 1151 }, { "epoch": 0.031502953401881424, "grad_norm": 2.052335500717163, "learning_rate": 1.9999889821129858e-05, "loss": 1.034, "step": 1152 }, { "epoch": 0.031530299715598335, "grad_norm": 4.368825435638428, "learning_rate": 1.9999885624221124e-05, "loss": 0.4954, "step": 1153 }, { "epoch": 0.031557646029315246, "grad_norm": 2.7196133136749268, "learning_rate": 1.99998813488661e-05, "loss": 0.6507, "step": 1154 }, { "epoch": 0.03158499234303216, "grad_norm": 3.0386338233947754, "learning_rate": 1.9999876995064825e-05, "loss": 0.6226, "step": 1155 }, { "epoch": 0.03161233865674907, "grad_norm": 2.681586742401123, "learning_rate": 1.9999872562817328e-05, "loss": 0.6348, "step": 1156 }, { "epoch": 0.03163968497046598, "grad_norm": 2.621523141860962, "learning_rate": 1.9999868052123643e-05, "loss": 0.6659, "step": 1157 }, { "epoch": 0.03166703128418289, "grad_norm": 2.5888988971710205, "learning_rate": 1.999986346298381e-05, "loss": 0.6391, "step": 1158 }, { "epoch": 0.0316943775978998, "grad_norm": 2.6357548236846924, "learning_rate": 1.9999858795397863e-05, "loss": 0.6235, "step": 1159 }, { "epoch": 0.03172172391161671, "grad_norm": 2.040771722793579, "learning_rate": 1.9999854049365835e-05, "loss": 0.6392, "step": 1160 }, { "epoch": 0.031749070225333624, "grad_norm": 2.337376594543457, "learning_rate": 1.9999849224887768e-05, "loss": 0.617, "step": 1161 }, { "epoch": 0.031776416539050535, "grad_norm": 2.0596697330474854, "learning_rate": 1.9999844321963696e-05, "loss": 0.6408, "step": 1162 }, { "epoch": 0.031803762852767446, "grad_norm": 3.754276752471924, "learning_rate": 1.9999839340593657e-05, "loss": 1.0444, "step": 1163 }, { "epoch": 0.03183110916648436, "grad_norm": 2.082094192504883, "learning_rate": 1.9999834280777698e-05, "loss": 0.6502, "step": 1164 }, { "epoch": 0.03185845548020127, "grad_norm": 1.7790615558624268, "learning_rate": 1.9999829142515855e-05, "loss": 0.653, "step": 1165 }, { "epoch": 0.03188580179391818, "grad_norm": 2.6540544033050537, "learning_rate": 1.999982392580816e-05, "loss": 0.5919, "step": 1166 }, { "epoch": 0.03191314810763509, "grad_norm": 2.1991608142852783, "learning_rate": 1.9999818630654665e-05, "loss": 0.5998, "step": 1167 }, { "epoch": 0.031940494421352, "grad_norm": 1.660484790802002, "learning_rate": 1.9999813257055404e-05, "loss": 0.6057, "step": 1168 }, { "epoch": 0.03196784073506891, "grad_norm": 2.053617000579834, "learning_rate": 1.9999807805010423e-05, "loss": 0.5852, "step": 1169 }, { "epoch": 0.031995187048785824, "grad_norm": 2.0058934688568115, "learning_rate": 1.9999802274519766e-05, "loss": 0.6102, "step": 1170 }, { "epoch": 0.032022533362502735, "grad_norm": 2.237889528274536, "learning_rate": 1.9999796665583474e-05, "loss": 0.6047, "step": 1171 }, { "epoch": 0.032049879676219646, "grad_norm": 3.8663220405578613, "learning_rate": 1.999979097820159e-05, "loss": 0.9885, "step": 1172 }, { "epoch": 0.03207722598993656, "grad_norm": 1.7085000276565552, "learning_rate": 1.9999785212374162e-05, "loss": 0.6116, "step": 1173 }, { "epoch": 0.03210457230365347, "grad_norm": 2.2249326705932617, "learning_rate": 1.9999779368101227e-05, "loss": 0.5969, "step": 1174 }, { "epoch": 0.03213191861737038, "grad_norm": 1.9968713521957397, "learning_rate": 1.9999773445382843e-05, "loss": 0.9952, "step": 1175 }, { "epoch": 0.03215926493108729, "grad_norm": 2.240849018096924, "learning_rate": 1.999976744421905e-05, "loss": 0.6167, "step": 1176 }, { "epoch": 0.0321866112448042, "grad_norm": 2.670912027359009, "learning_rate": 1.9999761364609894e-05, "loss": 0.6095, "step": 1177 }, { "epoch": 0.03221395755852111, "grad_norm": 1.9797792434692383, "learning_rate": 1.9999755206555423e-05, "loss": 0.6955, "step": 1178 }, { "epoch": 0.032241303872238024, "grad_norm": 1.9224458932876587, "learning_rate": 1.999974897005569e-05, "loss": 0.5577, "step": 1179 }, { "epoch": 0.032268650185954935, "grad_norm": 1.8700296878814697, "learning_rate": 1.999974265511074e-05, "loss": 0.5998, "step": 1180 }, { "epoch": 0.032295996499671846, "grad_norm": 2.442335367202759, "learning_rate": 1.999973626172062e-05, "loss": 0.5994, "step": 1181 }, { "epoch": 0.03232334281338876, "grad_norm": 1.9528487920761108, "learning_rate": 1.9999729789885386e-05, "loss": 0.6042, "step": 1182 }, { "epoch": 0.03235068912710567, "grad_norm": 2.5303666591644287, "learning_rate": 1.9999723239605086e-05, "loss": 0.6345, "step": 1183 }, { "epoch": 0.03237803544082258, "grad_norm": 1.7512245178222656, "learning_rate": 1.999971661087977e-05, "loss": 0.6068, "step": 1184 }, { "epoch": 0.03240538175453949, "grad_norm": 1.7955373525619507, "learning_rate": 1.9999709903709493e-05, "loss": 0.5834, "step": 1185 }, { "epoch": 0.0324327280682564, "grad_norm": 2.0394599437713623, "learning_rate": 1.9999703118094305e-05, "loss": 0.603, "step": 1186 }, { "epoch": 0.03246007438197331, "grad_norm": 1.926244854927063, "learning_rate": 1.999969625403426e-05, "loss": 0.6112, "step": 1187 }, { "epoch": 0.032487420695690224, "grad_norm": 3.0625784397125244, "learning_rate": 1.999968931152941e-05, "loss": 0.5995, "step": 1188 }, { "epoch": 0.032514767009407135, "grad_norm": 2.2061519622802734, "learning_rate": 1.9999682290579814e-05, "loss": 0.5906, "step": 1189 }, { "epoch": 0.032542113323124046, "grad_norm": 1.6792640686035156, "learning_rate": 1.9999675191185527e-05, "loss": 0.6171, "step": 1190 }, { "epoch": 0.03256945963684096, "grad_norm": 1.723919153213501, "learning_rate": 1.9999668013346597e-05, "loss": 0.6075, "step": 1191 }, { "epoch": 0.03259680595055787, "grad_norm": 1.904536247253418, "learning_rate": 1.999966075706309e-05, "loss": 0.6037, "step": 1192 }, { "epoch": 0.03262415226427478, "grad_norm": 1.621700644493103, "learning_rate": 1.9999653422335058e-05, "loss": 0.5978, "step": 1193 }, { "epoch": 0.03265149857799168, "grad_norm": 1.8186917304992676, "learning_rate": 1.9999646009162555e-05, "loss": 0.6164, "step": 1194 }, { "epoch": 0.032678844891708594, "grad_norm": 2.930023431777954, "learning_rate": 1.9999638517545647e-05, "loss": 0.6094, "step": 1195 }, { "epoch": 0.032706191205425506, "grad_norm": 2.211873769760132, "learning_rate": 1.9999630947484386e-05, "loss": 0.5941, "step": 1196 }, { "epoch": 0.03273353751914242, "grad_norm": 1.600006341934204, "learning_rate": 1.999962329897884e-05, "loss": 0.6016, "step": 1197 }, { "epoch": 0.03276088383285933, "grad_norm": 6.135520935058594, "learning_rate": 1.9999615572029056e-05, "loss": 1.1187, "step": 1198 }, { "epoch": 0.03278823014657624, "grad_norm": 1.6231162548065186, "learning_rate": 1.9999607766635107e-05, "loss": 0.4567, "step": 1199 }, { "epoch": 0.03281557646029315, "grad_norm": 1.7135003805160522, "learning_rate": 1.9999599882797048e-05, "loss": 0.6292, "step": 1200 }, { "epoch": 0.03284292277401006, "grad_norm": 2.0147111415863037, "learning_rate": 1.999959192051494e-05, "loss": 0.6006, "step": 1201 }, { "epoch": 0.03287026908772697, "grad_norm": 3.165583372116089, "learning_rate": 1.999958387978885e-05, "loss": 1.0311, "step": 1202 }, { "epoch": 0.03289761540144388, "grad_norm": 3.9794371128082275, "learning_rate": 1.9999575760618838e-05, "loss": 0.5937, "step": 1203 }, { "epoch": 0.032924961715160794, "grad_norm": 1.858689785003662, "learning_rate": 1.9999567563004965e-05, "loss": 0.5622, "step": 1204 }, { "epoch": 0.032952308028877705, "grad_norm": 5.5388264656066895, "learning_rate": 1.9999559286947307e-05, "loss": 0.6056, "step": 1205 }, { "epoch": 0.032979654342594616, "grad_norm": 1.6350784301757812, "learning_rate": 1.9999550932445914e-05, "loss": 0.6057, "step": 1206 }, { "epoch": 0.03300700065631153, "grad_norm": 1.6081074476242065, "learning_rate": 1.999954249950086e-05, "loss": 0.5824, "step": 1207 }, { "epoch": 0.03303434697002844, "grad_norm": 1.9806653261184692, "learning_rate": 1.9999533988112208e-05, "loss": 0.6011, "step": 1208 }, { "epoch": 0.03306169328374535, "grad_norm": 1.9275338649749756, "learning_rate": 1.9999525398280024e-05, "loss": 0.4991, "step": 1209 }, { "epoch": 0.03308903959746226, "grad_norm": 2.0906217098236084, "learning_rate": 1.999951673000438e-05, "loss": 0.6334, "step": 1210 }, { "epoch": 0.03311638591117917, "grad_norm": 1.92037832736969, "learning_rate": 1.9999507983285342e-05, "loss": 0.5907, "step": 1211 }, { "epoch": 0.03314373222489608, "grad_norm": 3.672605037689209, "learning_rate": 1.9999499158122977e-05, "loss": 0.5807, "step": 1212 }, { "epoch": 0.033171078538612994, "grad_norm": 2.027892827987671, "learning_rate": 1.9999490254517354e-05, "loss": 0.5993, "step": 1213 }, { "epoch": 0.033198424852329905, "grad_norm": 5.132105827331543, "learning_rate": 1.999948127246855e-05, "loss": 0.4866, "step": 1214 }, { "epoch": 0.033225771166046816, "grad_norm": 1.9135774374008179, "learning_rate": 1.9999472211976623e-05, "loss": 0.6126, "step": 1215 }, { "epoch": 0.03325311747976373, "grad_norm": 2.513354539871216, "learning_rate": 1.9999463073041653e-05, "loss": 0.4867, "step": 1216 }, { "epoch": 0.03328046379348064, "grad_norm": 2.0724170207977295, "learning_rate": 1.999945385566371e-05, "loss": 0.6249, "step": 1217 }, { "epoch": 0.03330781010719755, "grad_norm": 1.3322556018829346, "learning_rate": 1.9999444559842863e-05, "loss": 0.4355, "step": 1218 }, { "epoch": 0.03333515642091446, "grad_norm": 2.177367687225342, "learning_rate": 1.999943518557919e-05, "loss": 0.6677, "step": 1219 }, { "epoch": 0.03336250273463137, "grad_norm": 2.583306312561035, "learning_rate": 1.999942573287276e-05, "loss": 0.4701, "step": 1220 }, { "epoch": 0.03338984904834828, "grad_norm": 2.655719757080078, "learning_rate": 1.999941620172365e-05, "loss": 0.6221, "step": 1221 }, { "epoch": 0.033417195362065194, "grad_norm": 1.9218450784683228, "learning_rate": 1.9999406592131934e-05, "loss": 0.6199, "step": 1222 }, { "epoch": 0.033444541675782105, "grad_norm": 2.594761848449707, "learning_rate": 1.9999396904097685e-05, "loss": 0.6398, "step": 1223 }, { "epoch": 0.033471887989499016, "grad_norm": 2.0720701217651367, "learning_rate": 1.999938713762099e-05, "loss": 0.5896, "step": 1224 }, { "epoch": 0.03349923430321593, "grad_norm": 1.7237162590026855, "learning_rate": 1.9999377292701905e-05, "loss": 0.632, "step": 1225 }, { "epoch": 0.03352658061693284, "grad_norm": 2.0349104404449463, "learning_rate": 1.9999367369340524e-05, "loss": 0.559, "step": 1226 }, { "epoch": 0.03355392693064975, "grad_norm": 2.3583571910858154, "learning_rate": 1.9999357367536923e-05, "loss": 0.6913, "step": 1227 }, { "epoch": 0.03358127324436666, "grad_norm": 4.434088230133057, "learning_rate": 1.999934728729118e-05, "loss": 1.0471, "step": 1228 }, { "epoch": 0.03360861955808357, "grad_norm": 2.414854049682617, "learning_rate": 1.9999337128603364e-05, "loss": 0.6357, "step": 1229 }, { "epoch": 0.03363596587180048, "grad_norm": 1.9669491052627563, "learning_rate": 1.9999326891473565e-05, "loss": 0.6229, "step": 1230 }, { "epoch": 0.033663312185517394, "grad_norm": 2.1938462257385254, "learning_rate": 1.9999316575901862e-05, "loss": 0.6425, "step": 1231 }, { "epoch": 0.033690658499234305, "grad_norm": 2.0173685550689697, "learning_rate": 1.999930618188833e-05, "loss": 0.6344, "step": 1232 }, { "epoch": 0.033718004812951216, "grad_norm": 2.507181167602539, "learning_rate": 1.999929570943306e-05, "loss": 0.6442, "step": 1233 }, { "epoch": 0.03374535112666813, "grad_norm": 1.9520057439804077, "learning_rate": 1.999928515853613e-05, "loss": 0.593, "step": 1234 }, { "epoch": 0.03377269744038504, "grad_norm": 2.51605224609375, "learning_rate": 1.999927452919762e-05, "loss": 0.6014, "step": 1235 }, { "epoch": 0.03380004375410195, "grad_norm": 2.9785091876983643, "learning_rate": 1.9999263821417613e-05, "loss": 0.6217, "step": 1236 }, { "epoch": 0.03382739006781886, "grad_norm": 1.8703824281692505, "learning_rate": 1.99992530351962e-05, "loss": 0.5818, "step": 1237 }, { "epoch": 0.03385473638153577, "grad_norm": 2.7978150844573975, "learning_rate": 1.9999242170533456e-05, "loss": 0.6273, "step": 1238 }, { "epoch": 0.03388208269525268, "grad_norm": 1.861634373664856, "learning_rate": 1.9999231227429473e-05, "loss": 0.6309, "step": 1239 }, { "epoch": 0.033909429008969594, "grad_norm": 2.471458911895752, "learning_rate": 1.999922020588434e-05, "loss": 0.6209, "step": 1240 }, { "epoch": 0.033936775322686505, "grad_norm": 2.373098611831665, "learning_rate": 1.9999209105898134e-05, "loss": 0.6268, "step": 1241 }, { "epoch": 0.033964121636403416, "grad_norm": 2.06469988822937, "learning_rate": 1.9999197927470945e-05, "loss": 0.6094, "step": 1242 }, { "epoch": 0.03399146795012033, "grad_norm": 2.3198623657226562, "learning_rate": 1.9999186670602864e-05, "loss": 0.6148, "step": 1243 }, { "epoch": 0.03401881426383724, "grad_norm": 2.1020922660827637, "learning_rate": 1.999917533529398e-05, "loss": 0.5995, "step": 1244 }, { "epoch": 0.03404616057755414, "grad_norm": 2.3280858993530273, "learning_rate": 1.9999163921544374e-05, "loss": 0.5939, "step": 1245 }, { "epoch": 0.03407350689127105, "grad_norm": 2.1733458042144775, "learning_rate": 1.999915242935415e-05, "loss": 0.7061, "step": 1246 }, { "epoch": 0.034100853204987965, "grad_norm": 4.347835063934326, "learning_rate": 1.9999140858723377e-05, "loss": 1.1074, "step": 1247 }, { "epoch": 0.034128199518704876, "grad_norm": 3.0232555866241455, "learning_rate": 1.999912920965217e-05, "loss": 0.6294, "step": 1248 }, { "epoch": 0.03415554583242179, "grad_norm": 2.4148619174957275, "learning_rate": 1.99991174821406e-05, "loss": 0.5965, "step": 1249 }, { "epoch": 0.0341828921461387, "grad_norm": 2.240074872970581, "learning_rate": 1.999910567618877e-05, "loss": 0.6072, "step": 1250 }, { "epoch": 0.03421023845985561, "grad_norm": 1.7382780313491821, "learning_rate": 1.9999093791796767e-05, "loss": 0.5995, "step": 1251 }, { "epoch": 0.03423758477357252, "grad_norm": 2.3267407417297363, "learning_rate": 1.9999081828964692e-05, "loss": 0.6547, "step": 1252 }, { "epoch": 0.03426493108728943, "grad_norm": 2.0465481281280518, "learning_rate": 1.9999069787692633e-05, "loss": 0.5792, "step": 1253 }, { "epoch": 0.03429227740100634, "grad_norm": 1.9065337181091309, "learning_rate": 1.9999057667980687e-05, "loss": 0.5734, "step": 1254 }, { "epoch": 0.03431962371472325, "grad_norm": 124.56659698486328, "learning_rate": 1.9999045469828945e-05, "loss": 0.6227, "step": 1255 }, { "epoch": 0.034346970028440164, "grad_norm": 2.787778854370117, "learning_rate": 1.9999033193237505e-05, "loss": 1.0292, "step": 1256 }, { "epoch": 0.034374316342157075, "grad_norm": 1.8153808116912842, "learning_rate": 1.999902083820646e-05, "loss": 0.6017, "step": 1257 }, { "epoch": 0.03440166265587399, "grad_norm": 3.464770555496216, "learning_rate": 1.999900840473592e-05, "loss": 0.5811, "step": 1258 }, { "epoch": 0.0344290089695909, "grad_norm": 1.8965548276901245, "learning_rate": 1.9998995892825967e-05, "loss": 0.6132, "step": 1259 }, { "epoch": 0.03445635528330781, "grad_norm": 2.0361101627349854, "learning_rate": 1.9998983302476705e-05, "loss": 0.613, "step": 1260 }, { "epoch": 0.03448370159702472, "grad_norm": 1.4666273593902588, "learning_rate": 1.9998970633688235e-05, "loss": 0.5464, "step": 1261 }, { "epoch": 0.03451104791074163, "grad_norm": 2.6026804447174072, "learning_rate": 1.9998957886460652e-05, "loss": 0.6087, "step": 1262 }, { "epoch": 0.03453839422445854, "grad_norm": 1.3677630424499512, "learning_rate": 1.999894506079406e-05, "loss": 0.6205, "step": 1263 }, { "epoch": 0.03456574053817545, "grad_norm": 2.0354268550872803, "learning_rate": 1.9998932156688556e-05, "loss": 0.5228, "step": 1264 }, { "epoch": 0.034593086851892364, "grad_norm": 2.0559258460998535, "learning_rate": 1.9998919174144246e-05, "loss": 0.7321, "step": 1265 }, { "epoch": 0.034620433165609275, "grad_norm": 1.8032472133636475, "learning_rate": 1.9998906113161226e-05, "loss": 0.6021, "step": 1266 }, { "epoch": 0.034647779479326186, "grad_norm": 2.29101824760437, "learning_rate": 1.9998892973739607e-05, "loss": 0.6428, "step": 1267 }, { "epoch": 0.0346751257930431, "grad_norm": 1.697793960571289, "learning_rate": 1.999887975587948e-05, "loss": 0.6196, "step": 1268 }, { "epoch": 0.03470247210676001, "grad_norm": 1.7429783344268799, "learning_rate": 1.9998866459580958e-05, "loss": 0.6261, "step": 1269 }, { "epoch": 0.03472981842047692, "grad_norm": 2.0412490367889404, "learning_rate": 1.9998853084844143e-05, "loss": 0.6035, "step": 1270 }, { "epoch": 0.03475716473419383, "grad_norm": 1.4923290014266968, "learning_rate": 1.999883963166914e-05, "loss": 0.6181, "step": 1271 }, { "epoch": 0.03478451104791074, "grad_norm": 1.923958659172058, "learning_rate": 1.999882610005605e-05, "loss": 0.671, "step": 1272 }, { "epoch": 0.03481185736162765, "grad_norm": 1.789735198020935, "learning_rate": 1.9998812490004987e-05, "loss": 0.6152, "step": 1273 }, { "epoch": 0.034839203675344564, "grad_norm": 1.696592926979065, "learning_rate": 1.9998798801516057e-05, "loss": 0.6163, "step": 1274 }, { "epoch": 0.034866549989061475, "grad_norm": 1.8438444137573242, "learning_rate": 1.9998785034589357e-05, "loss": 0.6222, "step": 1275 }, { "epoch": 0.034893896302778386, "grad_norm": 4.896993637084961, "learning_rate": 1.9998771189225005e-05, "loss": 1.0903, "step": 1276 }, { "epoch": 0.0349212426164953, "grad_norm": 1.8362442255020142, "learning_rate": 1.999875726542311e-05, "loss": 0.5837, "step": 1277 }, { "epoch": 0.03494858893021221, "grad_norm": 2.194810390472412, "learning_rate": 1.9998743263183775e-05, "loss": 0.6941, "step": 1278 }, { "epoch": 0.03497593524392912, "grad_norm": 2.4662973880767822, "learning_rate": 1.9998729182507114e-05, "loss": 1.0375, "step": 1279 }, { "epoch": 0.03500328155764603, "grad_norm": 1.8469204902648926, "learning_rate": 1.9998715023393237e-05, "loss": 0.6938, "step": 1280 }, { "epoch": 0.03503062787136294, "grad_norm": 1.8020896911621094, "learning_rate": 1.9998700785842253e-05, "loss": 0.6129, "step": 1281 }, { "epoch": 0.03505797418507985, "grad_norm": 1.7673604488372803, "learning_rate": 1.9998686469854272e-05, "loss": 0.6455, "step": 1282 }, { "epoch": 0.035085320498796764, "grad_norm": 1.9859694242477417, "learning_rate": 1.9998672075429418e-05, "loss": 0.682, "step": 1283 }, { "epoch": 0.035112666812513675, "grad_norm": 1.5891019105911255, "learning_rate": 1.9998657602567788e-05, "loss": 0.6521, "step": 1284 }, { "epoch": 0.035140013126230586, "grad_norm": 1.6152212619781494, "learning_rate": 1.9998643051269505e-05, "loss": 0.6216, "step": 1285 }, { "epoch": 0.0351673594399475, "grad_norm": 1.8887802362442017, "learning_rate": 1.9998628421534686e-05, "loss": 0.5852, "step": 1286 }, { "epoch": 0.03519470575366441, "grad_norm": 2.1390364170074463, "learning_rate": 1.9998613713363434e-05, "loss": 0.5322, "step": 1287 }, { "epoch": 0.03522205206738132, "grad_norm": 2.1192803382873535, "learning_rate": 1.9998598926755878e-05, "loss": 0.6159, "step": 1288 }, { "epoch": 0.03524939838109823, "grad_norm": 2.3353254795074463, "learning_rate": 1.9998584061712122e-05, "loss": 0.7158, "step": 1289 }, { "epoch": 0.03527674469481514, "grad_norm": 2.4667248725891113, "learning_rate": 1.9998569118232288e-05, "loss": 0.6851, "step": 1290 }, { "epoch": 0.03530409100853205, "grad_norm": 1.7443101406097412, "learning_rate": 1.9998554096316495e-05, "loss": 0.5902, "step": 1291 }, { "epoch": 0.035331437322248964, "grad_norm": 2.4916326999664307, "learning_rate": 1.999853899596486e-05, "loss": 0.5908, "step": 1292 }, { "epoch": 0.035358783635965875, "grad_norm": 1.9666202068328857, "learning_rate": 1.9998523817177497e-05, "loss": 0.6161, "step": 1293 }, { "epoch": 0.035386129949682786, "grad_norm": 1.9865373373031616, "learning_rate": 1.9998508559954532e-05, "loss": 0.4963, "step": 1294 }, { "epoch": 0.0354134762633997, "grad_norm": 2.142983913421631, "learning_rate": 1.999849322429608e-05, "loss": 0.5974, "step": 1295 }, { "epoch": 0.0354408225771166, "grad_norm": 2.0142362117767334, "learning_rate": 1.999847781020226e-05, "loss": 0.6214, "step": 1296 }, { "epoch": 0.03546816889083351, "grad_norm": 2.0387985706329346, "learning_rate": 1.9998462317673196e-05, "loss": 0.6222, "step": 1297 }, { "epoch": 0.035495515204550424, "grad_norm": 1.7268060445785522, "learning_rate": 1.999844674670901e-05, "loss": 0.5902, "step": 1298 }, { "epoch": 0.035522861518267335, "grad_norm": 2.295837163925171, "learning_rate": 1.9998431097309825e-05, "loss": 0.6592, "step": 1299 }, { "epoch": 0.035550207831984246, "grad_norm": 2.555525541305542, "learning_rate": 1.9998415369475757e-05, "loss": 0.7033, "step": 1300 }, { "epoch": 0.03557755414570116, "grad_norm": 2.1757612228393555, "learning_rate": 1.999839956320694e-05, "loss": 0.6416, "step": 1301 }, { "epoch": 0.03560490045941807, "grad_norm": 2.3112950325012207, "learning_rate": 1.999838367850349e-05, "loss": 0.6268, "step": 1302 }, { "epoch": 0.03563224677313498, "grad_norm": 4.102482318878174, "learning_rate": 1.9998367715365533e-05, "loss": 0.6527, "step": 1303 }, { "epoch": 0.03565959308685189, "grad_norm": 2.375716209411621, "learning_rate": 1.9998351673793196e-05, "loss": 0.6164, "step": 1304 }, { "epoch": 0.0356869394005688, "grad_norm": 2.05938982963562, "learning_rate": 1.9998335553786604e-05, "loss": 0.6309, "step": 1305 }, { "epoch": 0.03571428571428571, "grad_norm": 1.9791643619537354, "learning_rate": 1.999831935534588e-05, "loss": 0.6264, "step": 1306 }, { "epoch": 0.03574163202800262, "grad_norm": 2.5085959434509277, "learning_rate": 1.9998303078471158e-05, "loss": 0.6214, "step": 1307 }, { "epoch": 0.035768978341719534, "grad_norm": 1.849109172821045, "learning_rate": 1.999828672316256e-05, "loss": 0.4482, "step": 1308 }, { "epoch": 0.035796324655436446, "grad_norm": 1.8022122383117676, "learning_rate": 1.999827028942022e-05, "loss": 0.6276, "step": 1309 }, { "epoch": 0.03582367096915336, "grad_norm": 1.6323843002319336, "learning_rate": 1.9998253777244262e-05, "loss": 0.6071, "step": 1310 }, { "epoch": 0.03585101728287027, "grad_norm": 1.5401172637939453, "learning_rate": 1.9998237186634815e-05, "loss": 0.6025, "step": 1311 }, { "epoch": 0.03587836359658718, "grad_norm": 4.07182502746582, "learning_rate": 1.999822051759201e-05, "loss": 1.028, "step": 1312 }, { "epoch": 0.03590570991030409, "grad_norm": 1.9665287733078003, "learning_rate": 1.9998203770115982e-05, "loss": 0.5951, "step": 1313 }, { "epoch": 0.035933056224021, "grad_norm": 2.7596144676208496, "learning_rate": 1.999818694420686e-05, "loss": 0.6477, "step": 1314 }, { "epoch": 0.03596040253773791, "grad_norm": 2.0260236263275146, "learning_rate": 1.999817003986477e-05, "loss": 0.7343, "step": 1315 }, { "epoch": 0.03598774885145482, "grad_norm": 1.5618665218353271, "learning_rate": 1.9998153057089854e-05, "loss": 0.6039, "step": 1316 }, { "epoch": 0.036015095165171734, "grad_norm": 1.9052553176879883, "learning_rate": 1.999813599588224e-05, "loss": 0.5967, "step": 1317 }, { "epoch": 0.036042441478888645, "grad_norm": 1.8329479694366455, "learning_rate": 1.9998118856242062e-05, "loss": 0.6205, "step": 1318 }, { "epoch": 0.03606978779260556, "grad_norm": 1.8654738664627075, "learning_rate": 1.9998101638169456e-05, "loss": 0.6081, "step": 1319 }, { "epoch": 0.03609713410632247, "grad_norm": 2.4742043018341064, "learning_rate": 1.9998084341664556e-05, "loss": 0.6939, "step": 1320 }, { "epoch": 0.03612448042003938, "grad_norm": 2.286031484603882, "learning_rate": 1.99980669667275e-05, "loss": 0.4804, "step": 1321 }, { "epoch": 0.03615182673375629, "grad_norm": 2.3656861782073975, "learning_rate": 1.999804951335842e-05, "loss": 0.5766, "step": 1322 }, { "epoch": 0.0361791730474732, "grad_norm": 2.4664926528930664, "learning_rate": 1.9998031981557455e-05, "loss": 0.6122, "step": 1323 }, { "epoch": 0.03620651936119011, "grad_norm": 2.0041611194610596, "learning_rate": 1.9998014371324742e-05, "loss": 0.5983, "step": 1324 }, { "epoch": 0.03623386567490702, "grad_norm": 1.8820898532867432, "learning_rate": 1.9997996682660423e-05, "loss": 0.6044, "step": 1325 }, { "epoch": 0.036261211988623934, "grad_norm": 2.162357807159424, "learning_rate": 1.9997978915564632e-05, "loss": 0.4853, "step": 1326 }, { "epoch": 0.036288558302340845, "grad_norm": 1.8041400909423828, "learning_rate": 1.9997961070037513e-05, "loss": 0.6156, "step": 1327 }, { "epoch": 0.036315904616057756, "grad_norm": 2.214384078979492, "learning_rate": 1.99979431460792e-05, "loss": 0.6099, "step": 1328 }, { "epoch": 0.03634325092977467, "grad_norm": 2.4967451095581055, "learning_rate": 1.9997925143689834e-05, "loss": 0.9701, "step": 1329 }, { "epoch": 0.03637059724349158, "grad_norm": 2.3639509677886963, "learning_rate": 1.9997907062869563e-05, "loss": 0.5697, "step": 1330 }, { "epoch": 0.03639794355720849, "grad_norm": 1.8785690069198608, "learning_rate": 1.9997888903618527e-05, "loss": 0.6133, "step": 1331 }, { "epoch": 0.0364252898709254, "grad_norm": 2.0768704414367676, "learning_rate": 1.999787066593686e-05, "loss": 0.6133, "step": 1332 }, { "epoch": 0.03645263618464231, "grad_norm": 1.7520291805267334, "learning_rate": 1.9997852349824715e-05, "loss": 0.6106, "step": 1333 }, { "epoch": 0.03647998249835922, "grad_norm": 2.2584750652313232, "learning_rate": 1.999783395528223e-05, "loss": 0.6012, "step": 1334 }, { "epoch": 0.036507328812076134, "grad_norm": 1.69122314453125, "learning_rate": 1.9997815482309554e-05, "loss": 0.9814, "step": 1335 }, { "epoch": 0.036534675125793045, "grad_norm": 1.439465045928955, "learning_rate": 1.9997796930906827e-05, "loss": 0.5988, "step": 1336 }, { "epoch": 0.036562021439509956, "grad_norm": 2.049760103225708, "learning_rate": 1.99977783010742e-05, "loss": 0.5964, "step": 1337 }, { "epoch": 0.03658936775322687, "grad_norm": 1.8244874477386475, "learning_rate": 1.999775959281181e-05, "loss": 0.6104, "step": 1338 }, { "epoch": 0.03661671406694378, "grad_norm": 1.9844456911087036, "learning_rate": 1.9997740806119815e-05, "loss": 0.6317, "step": 1339 }, { "epoch": 0.03664406038066069, "grad_norm": 1.5369877815246582, "learning_rate": 1.9997721940998355e-05, "loss": 0.6024, "step": 1340 }, { "epoch": 0.0366714066943776, "grad_norm": 1.9824445247650146, "learning_rate": 1.999770299744758e-05, "loss": 0.6022, "step": 1341 }, { "epoch": 0.03669875300809451, "grad_norm": 1.5184571743011475, "learning_rate": 1.9997683975467635e-05, "loss": 0.6091, "step": 1342 }, { "epoch": 0.03672609932181142, "grad_norm": 1.6243776082992554, "learning_rate": 1.9997664875058677e-05, "loss": 0.5102, "step": 1343 }, { "epoch": 0.036753445635528334, "grad_norm": 1.4912240505218506, "learning_rate": 1.999764569622085e-05, "loss": 0.6054, "step": 1344 }, { "epoch": 0.036780791949245245, "grad_norm": 2.0541460514068604, "learning_rate": 1.999762643895431e-05, "loss": 0.9999, "step": 1345 }, { "epoch": 0.03680813826296215, "grad_norm": 1.6329630613327026, "learning_rate": 1.99976071032592e-05, "loss": 0.4255, "step": 1346 }, { "epoch": 0.03683548457667906, "grad_norm": 1.5651172399520874, "learning_rate": 1.9997587689135676e-05, "loss": 0.6318, "step": 1347 }, { "epoch": 0.03686283089039597, "grad_norm": 1.612257480621338, "learning_rate": 1.999756819658389e-05, "loss": 0.6035, "step": 1348 }, { "epoch": 0.03689017720411288, "grad_norm": 1.5766521692276, "learning_rate": 1.9997548625604e-05, "loss": 0.6034, "step": 1349 }, { "epoch": 0.036917523517829794, "grad_norm": 2.5508310794830322, "learning_rate": 1.9997528976196147e-05, "loss": 0.6097, "step": 1350 }, { "epoch": 0.036944869831546705, "grad_norm": 1.749396800994873, "learning_rate": 1.99975092483605e-05, "loss": 0.5906, "step": 1351 }, { "epoch": 0.036972216145263616, "grad_norm": 1.7338882684707642, "learning_rate": 1.99974894420972e-05, "loss": 0.4786, "step": 1352 }, { "epoch": 0.03699956245898053, "grad_norm": 1.6439670324325562, "learning_rate": 1.9997469557406414e-05, "loss": 0.6248, "step": 1353 }, { "epoch": 0.03702690877269744, "grad_norm": 1.7850199937820435, "learning_rate": 1.999744959428829e-05, "loss": 0.6091, "step": 1354 }, { "epoch": 0.03705425508641435, "grad_norm": 1.8305140733718872, "learning_rate": 1.999742955274299e-05, "loss": 0.5737, "step": 1355 }, { "epoch": 0.03708160140013126, "grad_norm": 1.8251615762710571, "learning_rate": 1.9997409432770665e-05, "loss": 0.6438, "step": 1356 }, { "epoch": 0.03710894771384817, "grad_norm": 1.5061845779418945, "learning_rate": 1.9997389234371478e-05, "loss": 0.5924, "step": 1357 }, { "epoch": 0.03713629402756508, "grad_norm": 1.817406415939331, "learning_rate": 1.9997368957545585e-05, "loss": 0.6089, "step": 1358 }, { "epoch": 0.037163640341281994, "grad_norm": 2.3548123836517334, "learning_rate": 1.999734860229315e-05, "loss": 0.5665, "step": 1359 }, { "epoch": 0.037190986654998905, "grad_norm": 1.7581907510757446, "learning_rate": 1.999732816861432e-05, "loss": 0.5837, "step": 1360 }, { "epoch": 0.037218332968715816, "grad_norm": 2.302380323410034, "learning_rate": 1.9997307656509272e-05, "loss": 0.5903, "step": 1361 }, { "epoch": 0.03724567928243273, "grad_norm": 1.6331754922866821, "learning_rate": 1.9997287065978158e-05, "loss": 0.6222, "step": 1362 }, { "epoch": 0.03727302559614964, "grad_norm": 1.9653255939483643, "learning_rate": 1.999726639702114e-05, "loss": 0.6166, "step": 1363 }, { "epoch": 0.03730037190986655, "grad_norm": 1.7345852851867676, "learning_rate": 1.9997245649638382e-05, "loss": 0.5905, "step": 1364 }, { "epoch": 0.03732771822358346, "grad_norm": 2.004995822906494, "learning_rate": 1.9997224823830043e-05, "loss": 0.611, "step": 1365 }, { "epoch": 0.03735506453730037, "grad_norm": 1.6202975511550903, "learning_rate": 1.999720391959629e-05, "loss": 0.6174, "step": 1366 }, { "epoch": 0.03738241085101728, "grad_norm": 1.8087737560272217, "learning_rate": 1.999718293693728e-05, "loss": 0.6887, "step": 1367 }, { "epoch": 0.03740975716473419, "grad_norm": 1.592071533203125, "learning_rate": 1.999716187585319e-05, "loss": 0.6273, "step": 1368 }, { "epoch": 0.037437103478451104, "grad_norm": 1.717786192893982, "learning_rate": 1.9997140736344177e-05, "loss": 0.5965, "step": 1369 }, { "epoch": 0.037464449792168016, "grad_norm": 1.9504475593566895, "learning_rate": 1.999711951841041e-05, "loss": 0.6877, "step": 1370 }, { "epoch": 0.03749179610588493, "grad_norm": 1.9084962606430054, "learning_rate": 1.9997098222052052e-05, "loss": 0.6021, "step": 1371 }, { "epoch": 0.03751914241960184, "grad_norm": 2.0037050247192383, "learning_rate": 1.999707684726927e-05, "loss": 0.6004, "step": 1372 }, { "epoch": 0.03754648873331875, "grad_norm": 1.657381296157837, "learning_rate": 1.9997055394062233e-05, "loss": 0.5749, "step": 1373 }, { "epoch": 0.03757383504703566, "grad_norm": 2.013369083404541, "learning_rate": 1.9997033862431114e-05, "loss": 0.6121, "step": 1374 }, { "epoch": 0.03760118136075257, "grad_norm": 2.236711263656616, "learning_rate": 1.9997012252376077e-05, "loss": 0.629, "step": 1375 }, { "epoch": 0.03762852767446948, "grad_norm": 1.6355955600738525, "learning_rate": 1.9996990563897288e-05, "loss": 0.6105, "step": 1376 }, { "epoch": 0.03765587398818639, "grad_norm": 1.939768671989441, "learning_rate": 1.9996968796994927e-05, "loss": 0.5988, "step": 1377 }, { "epoch": 0.037683220301903304, "grad_norm": 1.9265884160995483, "learning_rate": 1.9996946951669157e-05, "loss": 0.6044, "step": 1378 }, { "epoch": 0.037710566615620215, "grad_norm": 1.6464289426803589, "learning_rate": 1.9996925027920147e-05, "loss": 0.5905, "step": 1379 }, { "epoch": 0.03773791292933713, "grad_norm": 3.0992259979248047, "learning_rate": 1.9996903025748084e-05, "loss": 0.574, "step": 1380 }, { "epoch": 0.03776525924305404, "grad_norm": 2.2680771350860596, "learning_rate": 1.9996880945153122e-05, "loss": 0.6099, "step": 1381 }, { "epoch": 0.03779260555677095, "grad_norm": 2.0342798233032227, "learning_rate": 1.9996858786135442e-05, "loss": 0.6154, "step": 1382 }, { "epoch": 0.03781995187048786, "grad_norm": 1.8283506631851196, "learning_rate": 1.999683654869522e-05, "loss": 0.5899, "step": 1383 }, { "epoch": 0.03784729818420477, "grad_norm": 1.7892718315124512, "learning_rate": 1.999681423283263e-05, "loss": 0.6356, "step": 1384 }, { "epoch": 0.03787464449792168, "grad_norm": 1.7135838270187378, "learning_rate": 1.9996791838547846e-05, "loss": 0.599, "step": 1385 }, { "epoch": 0.03790199081163859, "grad_norm": 1.884841799736023, "learning_rate": 1.999676936584104e-05, "loss": 0.6507, "step": 1386 }, { "epoch": 0.037929337125355504, "grad_norm": 1.651294469833374, "learning_rate": 1.9996746814712396e-05, "loss": 0.6105, "step": 1387 }, { "epoch": 0.037956683439072415, "grad_norm": 2.32928204536438, "learning_rate": 1.9996724185162083e-05, "loss": 0.47, "step": 1388 }, { "epoch": 0.037984029752789326, "grad_norm": 2.9021060466766357, "learning_rate": 1.9996701477190282e-05, "loss": 0.9717, "step": 1389 }, { "epoch": 0.03801137606650624, "grad_norm": 2.36102032661438, "learning_rate": 1.9996678690797172e-05, "loss": 0.5681, "step": 1390 }, { "epoch": 0.03803872238022315, "grad_norm": 2.7139880657196045, "learning_rate": 1.9996655825982936e-05, "loss": 0.6074, "step": 1391 }, { "epoch": 0.03806606869394006, "grad_norm": 2.57096529006958, "learning_rate": 1.9996632882747744e-05, "loss": 0.6031, "step": 1392 }, { "epoch": 0.03809341500765697, "grad_norm": 1.9065165519714355, "learning_rate": 1.999660986109178e-05, "loss": 0.6142, "step": 1393 }, { "epoch": 0.03812076132137388, "grad_norm": 1.778965950012207, "learning_rate": 1.9996586761015223e-05, "loss": 0.5749, "step": 1394 }, { "epoch": 0.03814810763509079, "grad_norm": 2.4224448204040527, "learning_rate": 1.999656358251826e-05, "loss": 0.6057, "step": 1395 }, { "epoch": 0.038175453948807704, "grad_norm": 2.8222076892852783, "learning_rate": 1.9996540325601068e-05, "loss": 1.0015, "step": 1396 }, { "epoch": 0.03820280026252461, "grad_norm": 1.8307281732559204, "learning_rate": 1.9996516990263828e-05, "loss": 0.6165, "step": 1397 }, { "epoch": 0.03823014657624152, "grad_norm": 2.157228946685791, "learning_rate": 1.999649357650673e-05, "loss": 0.9767, "step": 1398 }, { "epoch": 0.03825749288995843, "grad_norm": 2.026045560836792, "learning_rate": 1.999647008432995e-05, "loss": 0.5173, "step": 1399 }, { "epoch": 0.03828483920367534, "grad_norm": 2.2979979515075684, "learning_rate": 1.9996446513733675e-05, "loss": 0.6348, "step": 1400 }, { "epoch": 0.03831218551739225, "grad_norm": 3.0321671962738037, "learning_rate": 1.999642286471809e-05, "loss": 0.6877, "step": 1401 }, { "epoch": 0.038339531831109164, "grad_norm": 1.8509465456008911, "learning_rate": 1.999639913728338e-05, "loss": 0.6396, "step": 1402 }, { "epoch": 0.038366878144826075, "grad_norm": 1.9200292825698853, "learning_rate": 1.9996375331429737e-05, "loss": 0.586, "step": 1403 }, { "epoch": 0.038394224458542986, "grad_norm": 1.9788405895233154, "learning_rate": 1.999635144715734e-05, "loss": 0.6237, "step": 1404 }, { "epoch": 0.0384215707722599, "grad_norm": 2.9008686542510986, "learning_rate": 1.9996327484466377e-05, "loss": 0.5919, "step": 1405 }, { "epoch": 0.03844891708597681, "grad_norm": 2.213331460952759, "learning_rate": 1.999630344335704e-05, "loss": 0.6187, "step": 1406 }, { "epoch": 0.03847626339969372, "grad_norm": 3.4506449699401855, "learning_rate": 1.9996279323829518e-05, "loss": 1.0427, "step": 1407 }, { "epoch": 0.03850360971341063, "grad_norm": 1.7467620372772217, "learning_rate": 1.9996255125883994e-05, "loss": 0.6116, "step": 1408 }, { "epoch": 0.03853095602712754, "grad_norm": 1.7438790798187256, "learning_rate": 1.9996230849520665e-05, "loss": 0.6089, "step": 1409 }, { "epoch": 0.03855830234084445, "grad_norm": 1.4933110475540161, "learning_rate": 1.9996206494739713e-05, "loss": 0.6076, "step": 1410 }, { "epoch": 0.038585648654561364, "grad_norm": 1.8952839374542236, "learning_rate": 1.999618206154134e-05, "loss": 0.6171, "step": 1411 }, { "epoch": 0.038612994968278275, "grad_norm": 1.825415849685669, "learning_rate": 1.999615754992573e-05, "loss": 1.0218, "step": 1412 }, { "epoch": 0.038640341281995186, "grad_norm": 1.5688011646270752, "learning_rate": 1.9996132959893075e-05, "loss": 0.6229, "step": 1413 }, { "epoch": 0.0386676875957121, "grad_norm": 1.4556899070739746, "learning_rate": 1.9996108291443575e-05, "loss": 0.5888, "step": 1414 }, { "epoch": 0.03869503390942901, "grad_norm": 1.7854892015457153, "learning_rate": 1.9996083544577414e-05, "loss": 0.5699, "step": 1415 }, { "epoch": 0.03872238022314592, "grad_norm": 1.571449875831604, "learning_rate": 1.9996058719294793e-05, "loss": 0.5966, "step": 1416 }, { "epoch": 0.03874972653686283, "grad_norm": 1.3697693347930908, "learning_rate": 1.9996033815595902e-05, "loss": 0.5783, "step": 1417 }, { "epoch": 0.03877707285057974, "grad_norm": 1.7968066930770874, "learning_rate": 1.999600883348094e-05, "loss": 0.595, "step": 1418 }, { "epoch": 0.03880441916429665, "grad_norm": 1.5982794761657715, "learning_rate": 1.9995983772950104e-05, "loss": 0.6219, "step": 1419 }, { "epoch": 0.038831765478013563, "grad_norm": 1.5723958015441895, "learning_rate": 1.9995958634003586e-05, "loss": 0.587, "step": 1420 }, { "epoch": 0.038859111791730475, "grad_norm": 1.9182838201522827, "learning_rate": 1.9995933416641588e-05, "loss": 0.5616, "step": 1421 }, { "epoch": 0.038886458105447386, "grad_norm": 1.7298510074615479, "learning_rate": 1.99959081208643e-05, "loss": 0.6275, "step": 1422 }, { "epoch": 0.0389138044191643, "grad_norm": 3.176466703414917, "learning_rate": 1.9995882746671934e-05, "loss": 1.0099, "step": 1423 }, { "epoch": 0.03894115073288121, "grad_norm": 1.537397861480713, "learning_rate": 1.9995857294064674e-05, "loss": 0.5911, "step": 1424 }, { "epoch": 0.03896849704659812, "grad_norm": 2.0489249229431152, "learning_rate": 1.999583176304273e-05, "loss": 0.601, "step": 1425 }, { "epoch": 0.03899584336031503, "grad_norm": 1.683374285697937, "learning_rate": 1.99958061536063e-05, "loss": 0.7265, "step": 1426 }, { "epoch": 0.03902318967403194, "grad_norm": 2.8059608936309814, "learning_rate": 1.999578046575558e-05, "loss": 0.6599, "step": 1427 }, { "epoch": 0.03905053598774885, "grad_norm": 1.7368265390396118, "learning_rate": 1.9995754699490775e-05, "loss": 0.6109, "step": 1428 }, { "epoch": 0.03907788230146576, "grad_norm": 1.3671413660049438, "learning_rate": 1.999572885481209e-05, "loss": 0.465, "step": 1429 }, { "epoch": 0.039105228615182674, "grad_norm": 1.4773664474487305, "learning_rate": 1.9995702931719726e-05, "loss": 0.407, "step": 1430 }, { "epoch": 0.039132574928899586, "grad_norm": 1.656914472579956, "learning_rate": 1.9995676930213887e-05, "loss": 0.5461, "step": 1431 }, { "epoch": 0.0391599212426165, "grad_norm": 1.6920661926269531, "learning_rate": 1.999565085029477e-05, "loss": 0.5823, "step": 1432 }, { "epoch": 0.03918726755633341, "grad_norm": 1.7129236459732056, "learning_rate": 1.9995624691962588e-05, "loss": 0.6308, "step": 1433 }, { "epoch": 0.03921461387005032, "grad_norm": 1.4364666938781738, "learning_rate": 1.9995598455217546e-05, "loss": 0.6145, "step": 1434 }, { "epoch": 0.03924196018376723, "grad_norm": 3.0133798122406006, "learning_rate": 1.999557214005984e-05, "loss": 0.9629, "step": 1435 }, { "epoch": 0.03926930649748414, "grad_norm": 1.6172981262207031, "learning_rate": 1.999554574648969e-05, "loss": 0.6206, "step": 1436 }, { "epoch": 0.03929665281120105, "grad_norm": 3.44270920753479, "learning_rate": 1.9995519274507295e-05, "loss": 0.5866, "step": 1437 }, { "epoch": 0.03932399912491796, "grad_norm": 1.7356152534484863, "learning_rate": 1.9995492724112868e-05, "loss": 0.6048, "step": 1438 }, { "epoch": 0.039351345438634874, "grad_norm": 1.5805608034133911, "learning_rate": 1.9995466095306605e-05, "loss": 0.4715, "step": 1439 }, { "epoch": 0.039378691752351785, "grad_norm": 2.248250961303711, "learning_rate": 1.999543938808873e-05, "loss": 0.4837, "step": 1440 }, { "epoch": 0.039406038066068697, "grad_norm": 1.9940921068191528, "learning_rate": 1.9995412602459442e-05, "loss": 0.6093, "step": 1441 }, { "epoch": 0.03943338437978561, "grad_norm": 1.8813462257385254, "learning_rate": 1.9995385738418957e-05, "loss": 0.6282, "step": 1442 }, { "epoch": 0.03946073069350252, "grad_norm": 1.988182544708252, "learning_rate": 1.9995358795967482e-05, "loss": 0.6285, "step": 1443 }, { "epoch": 0.03948807700721943, "grad_norm": 1.710924506187439, "learning_rate": 1.9995331775105233e-05, "loss": 0.6508, "step": 1444 }, { "epoch": 0.03951542332093634, "grad_norm": 1.772693157196045, "learning_rate": 1.999530467583242e-05, "loss": 0.5995, "step": 1445 }, { "epoch": 0.03954276963465325, "grad_norm": 1.3653359413146973, "learning_rate": 1.9995277498149253e-05, "loss": 0.6082, "step": 1446 }, { "epoch": 0.03957011594837016, "grad_norm": 1.9670161008834839, "learning_rate": 1.9995250242055946e-05, "loss": 0.5995, "step": 1447 }, { "epoch": 0.03959746226208707, "grad_norm": 1.5452549457550049, "learning_rate": 1.9995222907552714e-05, "loss": 0.609, "step": 1448 }, { "epoch": 0.03962480857580398, "grad_norm": 1.8149734735488892, "learning_rate": 1.999519549463977e-05, "loss": 0.6223, "step": 1449 }, { "epoch": 0.03965215488952089, "grad_norm": 1.8498483896255493, "learning_rate": 1.9995168003317334e-05, "loss": 0.5953, "step": 1450 }, { "epoch": 0.0396795012032378, "grad_norm": 1.6717605590820312, "learning_rate": 1.9995140433585616e-05, "loss": 0.5041, "step": 1451 }, { "epoch": 0.03970684751695471, "grad_norm": 1.652207612991333, "learning_rate": 1.9995112785444833e-05, "loss": 0.6059, "step": 1452 }, { "epoch": 0.03973419383067162, "grad_norm": 2.9811041355133057, "learning_rate": 1.9995085058895204e-05, "loss": 0.5987, "step": 1453 }, { "epoch": 0.039761540144388534, "grad_norm": 1.6047264337539673, "learning_rate": 1.9995057253936947e-05, "loss": 0.586, "step": 1454 }, { "epoch": 0.039788886458105445, "grad_norm": 1.9889222383499146, "learning_rate": 1.9995029370570277e-05, "loss": 0.61, "step": 1455 }, { "epoch": 0.039816232771822356, "grad_norm": 1.7410131692886353, "learning_rate": 1.9995001408795414e-05, "loss": 0.5999, "step": 1456 }, { "epoch": 0.03984357908553927, "grad_norm": 1.64464271068573, "learning_rate": 1.999497336861258e-05, "loss": 0.5854, "step": 1457 }, { "epoch": 0.03987092539925618, "grad_norm": 2.650987386703491, "learning_rate": 1.9994945250021992e-05, "loss": 0.491, "step": 1458 }, { "epoch": 0.03989827171297309, "grad_norm": 1.9809576272964478, "learning_rate": 1.9994917053023867e-05, "loss": 0.6178, "step": 1459 }, { "epoch": 0.03992561802669, "grad_norm": 2.3598105907440186, "learning_rate": 1.999488877761844e-05, "loss": 0.5724, "step": 1460 }, { "epoch": 0.03995296434040691, "grad_norm": 1.9064797163009644, "learning_rate": 1.9994860423805913e-05, "loss": 0.6076, "step": 1461 }, { "epoch": 0.03998031065412382, "grad_norm": 1.9399744272232056, "learning_rate": 1.9994831991586525e-05, "loss": 0.9415, "step": 1462 }, { "epoch": 0.040007656967840734, "grad_norm": 1.956225037574768, "learning_rate": 1.999480348096049e-05, "loss": 0.6179, "step": 1463 }, { "epoch": 0.040035003281557645, "grad_norm": 1.8930108547210693, "learning_rate": 1.9994774891928034e-05, "loss": 0.6275, "step": 1464 }, { "epoch": 0.040062349595274556, "grad_norm": 1.5863279104232788, "learning_rate": 1.9994746224489385e-05, "loss": 0.5969, "step": 1465 }, { "epoch": 0.04008969590899147, "grad_norm": 1.7395637035369873, "learning_rate": 1.9994717478644763e-05, "loss": 0.5946, "step": 1466 }, { "epoch": 0.04011704222270838, "grad_norm": 2.4694528579711914, "learning_rate": 1.9994688654394396e-05, "loss": 0.5987, "step": 1467 }, { "epoch": 0.04014438853642529, "grad_norm": 1.8286311626434326, "learning_rate": 1.9994659751738506e-05, "loss": 0.6749, "step": 1468 }, { "epoch": 0.0401717348501422, "grad_norm": 1.7777960300445557, "learning_rate": 1.9994630770677327e-05, "loss": 0.6479, "step": 1469 }, { "epoch": 0.04019908116385911, "grad_norm": 2.266313076019287, "learning_rate": 1.999460171121108e-05, "loss": 1.0094, "step": 1470 }, { "epoch": 0.04022642747757602, "grad_norm": 2.2281782627105713, "learning_rate": 1.9994572573339997e-05, "loss": 0.5932, "step": 1471 }, { "epoch": 0.040253773791292934, "grad_norm": 1.9031078815460205, "learning_rate": 1.9994543357064304e-05, "loss": 0.6299, "step": 1472 }, { "epoch": 0.040281120105009845, "grad_norm": 2.4916248321533203, "learning_rate": 1.9994514062384232e-05, "loss": 0.5967, "step": 1473 }, { "epoch": 0.040308466418726756, "grad_norm": 1.672569990158081, "learning_rate": 1.999448468930001e-05, "loss": 0.5873, "step": 1474 }, { "epoch": 0.04033581273244367, "grad_norm": 2.2463955879211426, "learning_rate": 1.9994455237811866e-05, "loss": 1.001, "step": 1475 }, { "epoch": 0.04036315904616058, "grad_norm": 1.9477605819702148, "learning_rate": 1.999442570792003e-05, "loss": 0.6403, "step": 1476 }, { "epoch": 0.04039050535987749, "grad_norm": 2.0147135257720947, "learning_rate": 1.9994396099624744e-05, "loss": 0.5845, "step": 1477 }, { "epoch": 0.0404178516735944, "grad_norm": 1.9465829133987427, "learning_rate": 1.999436641292623e-05, "loss": 0.605, "step": 1478 }, { "epoch": 0.04044519798731131, "grad_norm": 1.6015543937683105, "learning_rate": 1.9994336647824724e-05, "loss": 0.6223, "step": 1479 }, { "epoch": 0.04047254430102822, "grad_norm": 2.1190648078918457, "learning_rate": 1.9994306804320463e-05, "loss": 0.5642, "step": 1480 }, { "epoch": 0.04049989061474513, "grad_norm": 1.5229594707489014, "learning_rate": 1.9994276882413673e-05, "loss": 0.6028, "step": 1481 }, { "epoch": 0.040527236928462045, "grad_norm": 2.479036569595337, "learning_rate": 1.99942468821046e-05, "loss": 0.9651, "step": 1482 }, { "epoch": 0.040554583242178956, "grad_norm": 2.0886824131011963, "learning_rate": 1.9994216803393467e-05, "loss": 0.4497, "step": 1483 }, { "epoch": 0.04058192955589587, "grad_norm": 1.593528151512146, "learning_rate": 1.999418664628052e-05, "loss": 0.6232, "step": 1484 }, { "epoch": 0.04060927586961278, "grad_norm": 1.6002737283706665, "learning_rate": 1.9994156410765987e-05, "loss": 0.4396, "step": 1485 }, { "epoch": 0.04063662218332969, "grad_norm": 2.712975263595581, "learning_rate": 1.999412609685011e-05, "loss": 0.5788, "step": 1486 }, { "epoch": 0.0406639684970466, "grad_norm": 2.1744332313537598, "learning_rate": 1.9994095704533127e-05, "loss": 0.6159, "step": 1487 }, { "epoch": 0.04069131481076351, "grad_norm": 1.7625929117202759, "learning_rate": 1.9994065233815276e-05, "loss": 0.6099, "step": 1488 }, { "epoch": 0.04071866112448042, "grad_norm": 2.2321698665618896, "learning_rate": 1.9994034684696798e-05, "loss": 0.6353, "step": 1489 }, { "epoch": 0.04074600743819733, "grad_norm": 2.2072970867156982, "learning_rate": 1.999400405717793e-05, "loss": 0.6498, "step": 1490 }, { "epoch": 0.040773353751914244, "grad_norm": 2.178502321243286, "learning_rate": 1.999397335125891e-05, "loss": 0.6247, "step": 1491 }, { "epoch": 0.040800700065631156, "grad_norm": 1.6252387762069702, "learning_rate": 1.9993942566939982e-05, "loss": 0.5361, "step": 1492 }, { "epoch": 0.04082804637934807, "grad_norm": 1.959162950515747, "learning_rate": 1.999391170422139e-05, "loss": 0.6043, "step": 1493 }, { "epoch": 0.04085539269306498, "grad_norm": 1.4926490783691406, "learning_rate": 1.999388076310337e-05, "loss": 0.6195, "step": 1494 }, { "epoch": 0.04088273900678189, "grad_norm": 1.9002118110656738, "learning_rate": 1.999384974358617e-05, "loss": 0.5679, "step": 1495 }, { "epoch": 0.0409100853204988, "grad_norm": 1.7714351415634155, "learning_rate": 1.999381864567003e-05, "loss": 0.6568, "step": 1496 }, { "epoch": 0.04093743163421571, "grad_norm": 1.8689433336257935, "learning_rate": 1.9993787469355196e-05, "loss": 0.6068, "step": 1497 }, { "epoch": 0.04096477794793262, "grad_norm": 2.117915153503418, "learning_rate": 1.999375621464191e-05, "loss": 0.6942, "step": 1498 }, { "epoch": 0.040992124261649526, "grad_norm": 1.668264627456665, "learning_rate": 1.9993724881530424e-05, "loss": 0.5924, "step": 1499 }, { "epoch": 0.04101947057536644, "grad_norm": 1.914258360862732, "learning_rate": 1.9993693470020973e-05, "loss": 0.6042, "step": 1500 }, { "epoch": 0.04104681688908335, "grad_norm": 1.8441126346588135, "learning_rate": 1.9993661980113812e-05, "loss": 0.6045, "step": 1501 }, { "epoch": 0.04107416320280026, "grad_norm": 16.081140518188477, "learning_rate": 1.9993630411809187e-05, "loss": 0.7049, "step": 1502 }, { "epoch": 0.04110150951651717, "grad_norm": 1.744462490081787, "learning_rate": 1.999359876510734e-05, "loss": 0.6042, "step": 1503 }, { "epoch": 0.04112885583023408, "grad_norm": 1.814051866531372, "learning_rate": 1.9993567040008525e-05, "loss": 0.5874, "step": 1504 }, { "epoch": 0.04115620214395099, "grad_norm": 1.8171072006225586, "learning_rate": 1.9993535236512988e-05, "loss": 0.5641, "step": 1505 }, { "epoch": 0.041183548457667904, "grad_norm": 1.8117619752883911, "learning_rate": 1.999350335462098e-05, "loss": 0.5625, "step": 1506 }, { "epoch": 0.041210894771384815, "grad_norm": 2.9547464847564697, "learning_rate": 1.999347139433275e-05, "loss": 0.9828, "step": 1507 }, { "epoch": 0.041238241085101726, "grad_norm": 1.9232970476150513, "learning_rate": 1.9993439355648552e-05, "loss": 0.5982, "step": 1508 }, { "epoch": 0.04126558739881864, "grad_norm": 1.8293732404708862, "learning_rate": 1.9993407238568632e-05, "loss": 0.5951, "step": 1509 }, { "epoch": 0.04129293371253555, "grad_norm": 1.5403646230697632, "learning_rate": 1.9993375043093247e-05, "loss": 0.5744, "step": 1510 }, { "epoch": 0.04132028002625246, "grad_norm": 2.0414211750030518, "learning_rate": 1.9993342769222646e-05, "loss": 0.5952, "step": 1511 }, { "epoch": 0.04134762633996937, "grad_norm": 1.842059850692749, "learning_rate": 1.9993310416957082e-05, "loss": 0.611, "step": 1512 }, { "epoch": 0.04137497265368628, "grad_norm": 2.0153543949127197, "learning_rate": 1.999327798629681e-05, "loss": 0.6219, "step": 1513 }, { "epoch": 0.04140231896740319, "grad_norm": 1.9224830865859985, "learning_rate": 1.9993245477242086e-05, "loss": 0.6944, "step": 1514 }, { "epoch": 0.041429665281120104, "grad_norm": 1.8854475021362305, "learning_rate": 1.9993212889793162e-05, "loss": 0.5851, "step": 1515 }, { "epoch": 0.041457011594837015, "grad_norm": 1.8491733074188232, "learning_rate": 1.9993180223950296e-05, "loss": 0.5968, "step": 1516 }, { "epoch": 0.041484357908553926, "grad_norm": 2.4660120010375977, "learning_rate": 1.9993147479713747e-05, "loss": 0.6229, "step": 1517 }, { "epoch": 0.04151170422227084, "grad_norm": 2.061343193054199, "learning_rate": 1.9993114657083764e-05, "loss": 0.6309, "step": 1518 }, { "epoch": 0.04153905053598775, "grad_norm": 1.8705015182495117, "learning_rate": 1.999308175606061e-05, "loss": 0.5916, "step": 1519 }, { "epoch": 0.04156639684970466, "grad_norm": 1.879679560661316, "learning_rate": 1.999304877664454e-05, "loss": 0.6004, "step": 1520 }, { "epoch": 0.04159374316342157, "grad_norm": 2.0685062408447266, "learning_rate": 1.999301571883582e-05, "loss": 0.63, "step": 1521 }, { "epoch": 0.04162108947713848, "grad_norm": 1.9392900466918945, "learning_rate": 1.99929825826347e-05, "loss": 0.5875, "step": 1522 }, { "epoch": 0.04164843579085539, "grad_norm": 2.007283926010132, "learning_rate": 1.9992949368041447e-05, "loss": 0.6077, "step": 1523 }, { "epoch": 0.041675782104572304, "grad_norm": 1.82052481174469, "learning_rate": 1.999291607505631e-05, "loss": 0.9683, "step": 1524 }, { "epoch": 0.041703128418289215, "grad_norm": 2.664423942565918, "learning_rate": 1.9992882703679567e-05, "loss": 0.6387, "step": 1525 }, { "epoch": 0.041730474732006126, "grad_norm": 2.115212917327881, "learning_rate": 1.9992849253911472e-05, "loss": 0.5612, "step": 1526 }, { "epoch": 0.04175782104572304, "grad_norm": 2.6617486476898193, "learning_rate": 1.999281572575228e-05, "loss": 0.6204, "step": 1527 }, { "epoch": 0.04178516735943995, "grad_norm": 2.5340795516967773, "learning_rate": 1.999278211920227e-05, "loss": 0.5966, "step": 1528 }, { "epoch": 0.04181251367315686, "grad_norm": 2.3196635246276855, "learning_rate": 1.9992748434261687e-05, "loss": 0.592, "step": 1529 }, { "epoch": 0.04183985998687377, "grad_norm": 1.8710099458694458, "learning_rate": 1.999271467093081e-05, "loss": 0.629, "step": 1530 }, { "epoch": 0.04186720630059068, "grad_norm": 2.285266637802124, "learning_rate": 1.9992680829209895e-05, "loss": 0.6175, "step": 1531 }, { "epoch": 0.04189455261430759, "grad_norm": 2.619488000869751, "learning_rate": 1.9992646909099217e-05, "loss": 0.577, "step": 1532 }, { "epoch": 0.041921898928024504, "grad_norm": 2.2163655757904053, "learning_rate": 1.999261291059903e-05, "loss": 0.5529, "step": 1533 }, { "epoch": 0.041949245241741415, "grad_norm": 2.253380060195923, "learning_rate": 1.999257883370961e-05, "loss": 1.0097, "step": 1534 }, { "epoch": 0.041976591555458326, "grad_norm": 1.8091695308685303, "learning_rate": 1.999254467843122e-05, "loss": 0.6123, "step": 1535 }, { "epoch": 0.04200393786917524, "grad_norm": 1.7109302282333374, "learning_rate": 1.9992510444764132e-05, "loss": 0.6367, "step": 1536 }, { "epoch": 0.04203128418289215, "grad_norm": 1.7169487476348877, "learning_rate": 1.999247613270861e-05, "loss": 0.6251, "step": 1537 }, { "epoch": 0.04205863049660906, "grad_norm": 84.40033721923828, "learning_rate": 1.9992441742264927e-05, "loss": 0.4954, "step": 1538 }, { "epoch": 0.04208597681032597, "grad_norm": 2.025352954864502, "learning_rate": 1.9992407273433346e-05, "loss": 0.5857, "step": 1539 }, { "epoch": 0.04211332312404288, "grad_norm": 1.9588024616241455, "learning_rate": 1.9992372726214145e-05, "loss": 0.6656, "step": 1540 }, { "epoch": 0.04214066943775979, "grad_norm": 2.3055338859558105, "learning_rate": 1.999233810060759e-05, "loss": 0.7054, "step": 1541 }, { "epoch": 0.0421680157514767, "grad_norm": 1.9333912134170532, "learning_rate": 1.999230339661396e-05, "loss": 0.6901, "step": 1542 }, { "epoch": 0.042195362065193615, "grad_norm": 4.746925354003906, "learning_rate": 1.9992268614233515e-05, "loss": 0.557, "step": 1543 }, { "epoch": 0.042222708378910526, "grad_norm": 1.7047368288040161, "learning_rate": 1.9992233753466535e-05, "loss": 0.6, "step": 1544 }, { "epoch": 0.04225005469262744, "grad_norm": 2.3145995140075684, "learning_rate": 1.99921988143133e-05, "loss": 0.6848, "step": 1545 }, { "epoch": 0.04227740100634435, "grad_norm": 1.7111756801605225, "learning_rate": 1.999216379677407e-05, "loss": 0.6007, "step": 1546 }, { "epoch": 0.04230474732006126, "grad_norm": 2.0559158325195312, "learning_rate": 1.9992128700849133e-05, "loss": 0.5983, "step": 1547 }, { "epoch": 0.04233209363377817, "grad_norm": 1.9414749145507812, "learning_rate": 1.9992093526538755e-05, "loss": 0.6403, "step": 1548 }, { "epoch": 0.04235943994749508, "grad_norm": 1.499971866607666, "learning_rate": 1.9992058273843217e-05, "loss": 0.6256, "step": 1549 }, { "epoch": 0.042386786261211985, "grad_norm": 1.5530426502227783, "learning_rate": 1.9992022942762793e-05, "loss": 0.6086, "step": 1550 }, { "epoch": 0.042414132574928896, "grad_norm": 1.9327439069747925, "learning_rate": 1.999198753329776e-05, "loss": 0.6013, "step": 1551 }, { "epoch": 0.04244147888864581, "grad_norm": 2.588085174560547, "learning_rate": 1.9991952045448396e-05, "loss": 1.0118, "step": 1552 }, { "epoch": 0.04246882520236272, "grad_norm": 1.8624202013015747, "learning_rate": 1.9991916479214986e-05, "loss": 1.0481, "step": 1553 }, { "epoch": 0.04249617151607963, "grad_norm": 1.4136385917663574, "learning_rate": 1.9991880834597798e-05, "loss": 0.9768, "step": 1554 }, { "epoch": 0.04252351782979654, "grad_norm": 1.96840500831604, "learning_rate": 1.9991845111597116e-05, "loss": 0.6447, "step": 1555 }, { "epoch": 0.04255086414351345, "grad_norm": 1.8267744779586792, "learning_rate": 1.9991809310213225e-05, "loss": 0.9943, "step": 1556 }, { "epoch": 0.04257821045723036, "grad_norm": 2.02467942237854, "learning_rate": 1.99917734304464e-05, "loss": 1.0381, "step": 1557 }, { "epoch": 0.042605556770947274, "grad_norm": 2.0192525386810303, "learning_rate": 1.9991737472296923e-05, "loss": 0.4639, "step": 1558 }, { "epoch": 0.042632903084664185, "grad_norm": 2.482914447784424, "learning_rate": 1.999170143576508e-05, "loss": 0.646, "step": 1559 }, { "epoch": 0.042660249398381096, "grad_norm": 2.036449432373047, "learning_rate": 1.999166532085115e-05, "loss": 0.6256, "step": 1560 }, { "epoch": 0.04268759571209801, "grad_norm": 1.8956654071807861, "learning_rate": 1.9991629127555416e-05, "loss": 0.6373, "step": 1561 }, { "epoch": 0.04271494202581492, "grad_norm": 2.126199722290039, "learning_rate": 1.9991592855878165e-05, "loss": 0.6455, "step": 1562 }, { "epoch": 0.04274228833953183, "grad_norm": 2.093827724456787, "learning_rate": 1.999155650581968e-05, "loss": 0.6272, "step": 1563 }, { "epoch": 0.04276963465324874, "grad_norm": 2.446965456008911, "learning_rate": 1.9991520077380246e-05, "loss": 0.7086, "step": 1564 }, { "epoch": 0.04279698096696565, "grad_norm": 1.833694577217102, "learning_rate": 1.999148357056015e-05, "loss": 0.6514, "step": 1565 }, { "epoch": 0.04282432728068256, "grad_norm": 2.280637502670288, "learning_rate": 1.9991446985359674e-05, "loss": 0.5785, "step": 1566 }, { "epoch": 0.042851673594399474, "grad_norm": 1.9236878156661987, "learning_rate": 1.999141032177911e-05, "loss": 0.6766, "step": 1567 }, { "epoch": 0.042879019908116385, "grad_norm": 1.818860411643982, "learning_rate": 1.999137357981874e-05, "loss": 0.6059, "step": 1568 }, { "epoch": 0.042906366221833296, "grad_norm": 1.248408555984497, "learning_rate": 1.9991336759478863e-05, "loss": 0.5349, "step": 1569 }, { "epoch": 0.04293371253555021, "grad_norm": 2.5584611892700195, "learning_rate": 1.999129986075976e-05, "loss": 0.6448, "step": 1570 }, { "epoch": 0.04296105884926712, "grad_norm": 1.8888285160064697, "learning_rate": 1.999126288366172e-05, "loss": 0.6375, "step": 1571 }, { "epoch": 0.04298840516298403, "grad_norm": 1.6573688983917236, "learning_rate": 1.9991225828185032e-05, "loss": 0.6186, "step": 1572 }, { "epoch": 0.04301575147670094, "grad_norm": 2.470515012741089, "learning_rate": 1.999118869432999e-05, "loss": 1.0588, "step": 1573 }, { "epoch": 0.04304309779041785, "grad_norm": 1.9302163124084473, "learning_rate": 1.9991151482096885e-05, "loss": 0.6114, "step": 1574 }, { "epoch": 0.04307044410413476, "grad_norm": 1.6579742431640625, "learning_rate": 1.9991114191486008e-05, "loss": 1.0293, "step": 1575 }, { "epoch": 0.043097790417851674, "grad_norm": 2.3116700649261475, "learning_rate": 1.9991076822497655e-05, "loss": 0.618, "step": 1576 }, { "epoch": 0.043125136731568585, "grad_norm": 2.0729894638061523, "learning_rate": 1.9991039375132114e-05, "loss": 0.6181, "step": 1577 }, { "epoch": 0.043152483045285496, "grad_norm": 1.7712888717651367, "learning_rate": 1.999100184938968e-05, "loss": 0.6817, "step": 1578 }, { "epoch": 0.04317982935900241, "grad_norm": 1.6412205696105957, "learning_rate": 1.999096424527065e-05, "loss": 0.6317, "step": 1579 }, { "epoch": 0.04320717567271932, "grad_norm": 1.6757391691207886, "learning_rate": 1.9990926562775314e-05, "loss": 0.5781, "step": 1580 }, { "epoch": 0.04323452198643623, "grad_norm": 1.566792607307434, "learning_rate": 1.9990888801903973e-05, "loss": 0.6098, "step": 1581 }, { "epoch": 0.04326186830015314, "grad_norm": 2.300654172897339, "learning_rate": 1.999085096265692e-05, "loss": 0.6315, "step": 1582 }, { "epoch": 0.04328921461387005, "grad_norm": 2.264331579208374, "learning_rate": 1.9990813045034455e-05, "loss": 0.6148, "step": 1583 }, { "epoch": 0.04331656092758696, "grad_norm": 1.6016638278961182, "learning_rate": 1.9990775049036872e-05, "loss": 0.5912, "step": 1584 }, { "epoch": 0.043343907241303874, "grad_norm": 1.446449875831604, "learning_rate": 1.999073697466447e-05, "loss": 0.6156, "step": 1585 }, { "epoch": 0.043371253555020785, "grad_norm": 1.466484785079956, "learning_rate": 1.999069882191755e-05, "loss": 0.6057, "step": 1586 }, { "epoch": 0.043398599868737696, "grad_norm": 1.5410627126693726, "learning_rate": 1.9990660590796406e-05, "loss": 0.5998, "step": 1587 }, { "epoch": 0.04342594618245461, "grad_norm": 5.045588970184326, "learning_rate": 1.9990622281301344e-05, "loss": 1.1153, "step": 1588 }, { "epoch": 0.04345329249617152, "grad_norm": 2.2116875648498535, "learning_rate": 1.999058389343266e-05, "loss": 0.6077, "step": 1589 }, { "epoch": 0.04348063880988843, "grad_norm": 1.755283236503601, "learning_rate": 1.9990545427190656e-05, "loss": 0.6119, "step": 1590 }, { "epoch": 0.04350798512360534, "grad_norm": 2.099571943283081, "learning_rate": 1.9990506882575636e-05, "loss": 0.6841, "step": 1591 }, { "epoch": 0.04353533143732225, "grad_norm": 1.5976083278656006, "learning_rate": 1.9990468259587904e-05, "loss": 0.6034, "step": 1592 }, { "epoch": 0.04356267775103916, "grad_norm": 1.464820384979248, "learning_rate": 1.9990429558227757e-05, "loss": 0.6294, "step": 1593 }, { "epoch": 0.043590024064756074, "grad_norm": 1.7212716341018677, "learning_rate": 1.9990390778495504e-05, "loss": 0.5893, "step": 1594 }, { "epoch": 0.043617370378472985, "grad_norm": 2.7735941410064697, "learning_rate": 1.9990351920391445e-05, "loss": 0.5873, "step": 1595 }, { "epoch": 0.043644716692189896, "grad_norm": 2.19877552986145, "learning_rate": 1.9990312983915884e-05, "loss": 0.6134, "step": 1596 }, { "epoch": 0.04367206300590681, "grad_norm": 1.5709123611450195, "learning_rate": 1.9990273969069135e-05, "loss": 0.6385, "step": 1597 }, { "epoch": 0.04369940931962372, "grad_norm": 1.469606876373291, "learning_rate": 1.9990234875851495e-05, "loss": 0.6112, "step": 1598 }, { "epoch": 0.04372675563334063, "grad_norm": 1.39963960647583, "learning_rate": 1.9990195704263274e-05, "loss": 0.5854, "step": 1599 }, { "epoch": 0.04375410194705753, "grad_norm": 1.9865632057189941, "learning_rate": 1.999015645430478e-05, "loss": 0.612, "step": 1600 }, { "epoch": 0.043781448260774444, "grad_norm": 2.608267068862915, "learning_rate": 1.9990117125976317e-05, "loss": 0.4963, "step": 1601 }, { "epoch": 0.043808794574491355, "grad_norm": 2.107034683227539, "learning_rate": 1.99900777192782e-05, "loss": 0.6909, "step": 1602 }, { "epoch": 0.043836140888208266, "grad_norm": 3.1187539100646973, "learning_rate": 1.9990038234210733e-05, "loss": 0.5031, "step": 1603 }, { "epoch": 0.04386348720192518, "grad_norm": 1.8726565837860107, "learning_rate": 1.998999867077423e-05, "loss": 0.603, "step": 1604 }, { "epoch": 0.04389083351564209, "grad_norm": 2.19805645942688, "learning_rate": 1.9989959028968996e-05, "loss": 0.6136, "step": 1605 }, { "epoch": 0.043918179829359, "grad_norm": 4.527102470397949, "learning_rate": 1.9989919308795346e-05, "loss": 1.1203, "step": 1606 }, { "epoch": 0.04394552614307591, "grad_norm": 1.5001906156539917, "learning_rate": 1.9989879510253592e-05, "loss": 0.5959, "step": 1607 }, { "epoch": 0.04397287245679282, "grad_norm": 1.6302945613861084, "learning_rate": 1.998983963334404e-05, "loss": 0.6195, "step": 1608 }, { "epoch": 0.04400021877050973, "grad_norm": 2.030433177947998, "learning_rate": 1.998979967806701e-05, "loss": 0.6081, "step": 1609 }, { "epoch": 0.044027565084226644, "grad_norm": 1.849224328994751, "learning_rate": 1.9989759644422816e-05, "loss": 1.0248, "step": 1610 }, { "epoch": 0.044054911397943555, "grad_norm": 1.7401455640792847, "learning_rate": 1.9989719532411766e-05, "loss": 0.6409, "step": 1611 }, { "epoch": 0.044082257711660466, "grad_norm": 1.8052542209625244, "learning_rate": 1.998967934203418e-05, "loss": 0.5691, "step": 1612 }, { "epoch": 0.04410960402537738, "grad_norm": 1.5961410999298096, "learning_rate": 1.998963907329037e-05, "loss": 0.6106, "step": 1613 }, { "epoch": 0.04413695033909429, "grad_norm": 1.8774555921554565, "learning_rate": 1.9989598726180648e-05, "loss": 0.5954, "step": 1614 }, { "epoch": 0.0441642966528112, "grad_norm": 2.775282382965088, "learning_rate": 1.9989558300705337e-05, "loss": 0.5162, "step": 1615 }, { "epoch": 0.04419164296652811, "grad_norm": 1.845774531364441, "learning_rate": 1.9989517796864757e-05, "loss": 0.5992, "step": 1616 }, { "epoch": 0.04421898928024502, "grad_norm": 1.7831732034683228, "learning_rate": 1.998947721465922e-05, "loss": 0.6136, "step": 1617 }, { "epoch": 0.04424633559396193, "grad_norm": 3.6389477252960205, "learning_rate": 1.9989436554089042e-05, "loss": 1.0833, "step": 1618 }, { "epoch": 0.044273681907678844, "grad_norm": 1.665675163269043, "learning_rate": 1.998939581515455e-05, "loss": 0.612, "step": 1619 }, { "epoch": 0.044301028221395755, "grad_norm": 1.6469945907592773, "learning_rate": 1.9989354997856054e-05, "loss": 0.6123, "step": 1620 }, { "epoch": 0.044328374535112666, "grad_norm": 1.5092718601226807, "learning_rate": 1.998931410219388e-05, "loss": 0.6544, "step": 1621 }, { "epoch": 0.04435572084882958, "grad_norm": 1.6113718748092651, "learning_rate": 1.9989273128168355e-05, "loss": 0.6757, "step": 1622 }, { "epoch": 0.04438306716254649, "grad_norm": 1.333516001701355, "learning_rate": 1.9989232075779785e-05, "loss": 0.6191, "step": 1623 }, { "epoch": 0.0444104134762634, "grad_norm": 1.5614075660705566, "learning_rate": 1.9989190945028504e-05, "loss": 0.6046, "step": 1624 }, { "epoch": 0.04443775978998031, "grad_norm": 1.7909200191497803, "learning_rate": 1.9989149735914834e-05, "loss": 0.6141, "step": 1625 }, { "epoch": 0.04446510610369722, "grad_norm": 1.8302215337753296, "learning_rate": 1.9989108448439093e-05, "loss": 0.6159, "step": 1626 }, { "epoch": 0.04449245241741413, "grad_norm": 2.0866684913635254, "learning_rate": 1.9989067082601607e-05, "loss": 0.5805, "step": 1627 }, { "epoch": 0.044519798731131044, "grad_norm": 2.1102054119110107, "learning_rate": 1.99890256384027e-05, "loss": 0.5639, "step": 1628 }, { "epoch": 0.044547145044847955, "grad_norm": 1.542061686515808, "learning_rate": 1.99889841158427e-05, "loss": 0.6243, "step": 1629 }, { "epoch": 0.044574491358564866, "grad_norm": 1.6025985479354858, "learning_rate": 1.998894251492193e-05, "loss": 0.6297, "step": 1630 }, { "epoch": 0.04460183767228178, "grad_norm": 3.394202709197998, "learning_rate": 1.9988900835640718e-05, "loss": 1.0306, "step": 1631 }, { "epoch": 0.04462918398599869, "grad_norm": 1.7990257740020752, "learning_rate": 1.998885907799939e-05, "loss": 0.5893, "step": 1632 }, { "epoch": 0.0446565302997156, "grad_norm": 2.0963950157165527, "learning_rate": 1.9988817241998273e-05, "loss": 0.5857, "step": 1633 }, { "epoch": 0.04468387661343251, "grad_norm": 2.0615925788879395, "learning_rate": 1.9988775327637696e-05, "loss": 0.6122, "step": 1634 }, { "epoch": 0.04471122292714942, "grad_norm": 1.989486575126648, "learning_rate": 1.9988733334917984e-05, "loss": 0.6225, "step": 1635 }, { "epoch": 0.04473856924086633, "grad_norm": 1.9093624353408813, "learning_rate": 1.9988691263839476e-05, "loss": 0.5802, "step": 1636 }, { "epoch": 0.044765915554583244, "grad_norm": 1.6266143321990967, "learning_rate": 1.9988649114402494e-05, "loss": 0.9915, "step": 1637 }, { "epoch": 0.044793261868300155, "grad_norm": 1.8065909147262573, "learning_rate": 1.998860688660737e-05, "loss": 0.5816, "step": 1638 }, { "epoch": 0.044820608182017066, "grad_norm": 2.1451809406280518, "learning_rate": 1.9988564580454435e-05, "loss": 0.6186, "step": 1639 }, { "epoch": 0.04484795449573398, "grad_norm": 2.343372106552124, "learning_rate": 1.9988522195944024e-05, "loss": 0.5871, "step": 1640 }, { "epoch": 0.04487530080945089, "grad_norm": 1.889890432357788, "learning_rate": 1.9988479733076468e-05, "loss": 0.4564, "step": 1641 }, { "epoch": 0.0449026471231678, "grad_norm": 1.7114129066467285, "learning_rate": 1.9988437191852096e-05, "loss": 0.6127, "step": 1642 }, { "epoch": 0.04492999343688471, "grad_norm": 1.6578247547149658, "learning_rate": 1.9988394572271245e-05, "loss": 0.6219, "step": 1643 }, { "epoch": 0.04495733975060162, "grad_norm": 1.8205509185791016, "learning_rate": 1.9988351874334255e-05, "loss": 0.5763, "step": 1644 }, { "epoch": 0.04498468606431853, "grad_norm": 1.4956793785095215, "learning_rate": 1.9988309098041452e-05, "loss": 0.5745, "step": 1645 }, { "epoch": 0.045012032378035444, "grad_norm": 1.6935855150222778, "learning_rate": 1.9988266243393176e-05, "loss": 0.5943, "step": 1646 }, { "epoch": 0.045039378691752355, "grad_norm": 1.5142531394958496, "learning_rate": 1.998822331038976e-05, "loss": 0.5902, "step": 1647 }, { "epoch": 0.045066725005469266, "grad_norm": 1.7456393241882324, "learning_rate": 1.9988180299031546e-05, "loss": 0.6161, "step": 1648 }, { "epoch": 0.04509407131918618, "grad_norm": 1.4927746057510376, "learning_rate": 1.9988137209318864e-05, "loss": 0.5972, "step": 1649 }, { "epoch": 0.04512141763290309, "grad_norm": 1.4311257600784302, "learning_rate": 1.9988094041252063e-05, "loss": 0.4375, "step": 1650 }, { "epoch": 0.04514876394661999, "grad_norm": 3.0928900241851807, "learning_rate": 1.998805079483147e-05, "loss": 1.0419, "step": 1651 }, { "epoch": 0.0451761102603369, "grad_norm": 1.6315159797668457, "learning_rate": 1.9988007470057432e-05, "loss": 0.6656, "step": 1652 }, { "epoch": 0.045203456574053814, "grad_norm": 1.7857120037078857, "learning_rate": 1.9987964066930284e-05, "loss": 0.6012, "step": 1653 }, { "epoch": 0.045230802887770725, "grad_norm": 1.5564378499984741, "learning_rate": 1.9987920585450373e-05, "loss": 0.5796, "step": 1654 }, { "epoch": 0.04525814920148764, "grad_norm": 1.6589025259017944, "learning_rate": 1.9987877025618033e-05, "loss": 0.6076, "step": 1655 }, { "epoch": 0.04528549551520455, "grad_norm": 1.6330140829086304, "learning_rate": 1.9987833387433608e-05, "loss": 0.482, "step": 1656 }, { "epoch": 0.04531284182892146, "grad_norm": 1.7354742288589478, "learning_rate": 1.9987789670897442e-05, "loss": 0.5846, "step": 1657 }, { "epoch": 0.04534018814263837, "grad_norm": 1.5563334226608276, "learning_rate": 1.9987745876009875e-05, "loss": 0.6133, "step": 1658 }, { "epoch": 0.04536753445635528, "grad_norm": 1.8585046529769897, "learning_rate": 1.9987702002771255e-05, "loss": 0.6075, "step": 1659 }, { "epoch": 0.04539488077007219, "grad_norm": 1.9890702962875366, "learning_rate": 1.9987658051181922e-05, "loss": 0.6149, "step": 1660 }, { "epoch": 0.0454222270837891, "grad_norm": 2.09173321723938, "learning_rate": 1.9987614021242224e-05, "loss": 0.5832, "step": 1661 }, { "epoch": 0.045449573397506014, "grad_norm": 2.498049259185791, "learning_rate": 1.9987569912952504e-05, "loss": 0.6109, "step": 1662 }, { "epoch": 0.045476919711222925, "grad_norm": 2.830547571182251, "learning_rate": 1.9987525726313107e-05, "loss": 0.634, "step": 1663 }, { "epoch": 0.045504266024939836, "grad_norm": 1.927467942237854, "learning_rate": 1.998748146132438e-05, "loss": 0.6169, "step": 1664 }, { "epoch": 0.04553161233865675, "grad_norm": 1.6525120735168457, "learning_rate": 1.9987437117986677e-05, "loss": 0.5806, "step": 1665 }, { "epoch": 0.04555895865237366, "grad_norm": 1.8387497663497925, "learning_rate": 1.9987392696300337e-05, "loss": 0.6205, "step": 1666 }, { "epoch": 0.04558630496609057, "grad_norm": 1.9000444412231445, "learning_rate": 1.9987348196265715e-05, "loss": 0.6058, "step": 1667 }, { "epoch": 0.04561365127980748, "grad_norm": 2.0728490352630615, "learning_rate": 1.998730361788315e-05, "loss": 0.5994, "step": 1668 }, { "epoch": 0.04564099759352439, "grad_norm": 1.747223138809204, "learning_rate": 1.9987258961153008e-05, "loss": 0.6163, "step": 1669 }, { "epoch": 0.0456683439072413, "grad_norm": 1.5957376956939697, "learning_rate": 1.9987214226075623e-05, "loss": 0.5597, "step": 1670 }, { "epoch": 0.045695690220958214, "grad_norm": 1.878993034362793, "learning_rate": 1.9987169412651358e-05, "loss": 0.5797, "step": 1671 }, { "epoch": 0.045723036534675125, "grad_norm": 1.6833680868148804, "learning_rate": 1.9987124520880556e-05, "loss": 0.6025, "step": 1672 }, { "epoch": 0.045750382848392036, "grad_norm": 1.5886021852493286, "learning_rate": 1.9987079550763577e-05, "loss": 0.6191, "step": 1673 }, { "epoch": 0.04577772916210895, "grad_norm": 2.511230945587158, "learning_rate": 1.9987034502300763e-05, "loss": 0.6018, "step": 1674 }, { "epoch": 0.04580507547582586, "grad_norm": 2.4074859619140625, "learning_rate": 1.9986989375492477e-05, "loss": 0.5894, "step": 1675 }, { "epoch": 0.04583242178954277, "grad_norm": 1.8198505640029907, "learning_rate": 1.9986944170339075e-05, "loss": 0.6213, "step": 1676 }, { "epoch": 0.04585976810325968, "grad_norm": 1.8983943462371826, "learning_rate": 1.99868988868409e-05, "loss": 0.5584, "step": 1677 }, { "epoch": 0.04588711441697659, "grad_norm": 1.6410080194473267, "learning_rate": 1.9986853524998317e-05, "loss": 0.5997, "step": 1678 }, { "epoch": 0.0459144607306935, "grad_norm": 1.6661478281021118, "learning_rate": 1.9986808084811676e-05, "loss": 0.6141, "step": 1679 }, { "epoch": 0.045941807044410414, "grad_norm": 2.476811408996582, "learning_rate": 1.998676256628134e-05, "loss": 0.5935, "step": 1680 }, { "epoch": 0.045969153358127325, "grad_norm": 2.0743472576141357, "learning_rate": 1.9986716969407656e-05, "loss": 0.6043, "step": 1681 }, { "epoch": 0.045996499671844236, "grad_norm": 2.266399621963501, "learning_rate": 1.9986671294190992e-05, "loss": 0.7029, "step": 1682 }, { "epoch": 0.04602384598556115, "grad_norm": 1.6898061037063599, "learning_rate": 1.9986625540631705e-05, "loss": 0.5984, "step": 1683 }, { "epoch": 0.04605119229927806, "grad_norm": 1.7989271879196167, "learning_rate": 1.9986579708730148e-05, "loss": 0.6186, "step": 1684 }, { "epoch": 0.04607853861299497, "grad_norm": 3.1718690395355225, "learning_rate": 1.9986533798486683e-05, "loss": 1.0164, "step": 1685 }, { "epoch": 0.04610588492671188, "grad_norm": 1.713274359703064, "learning_rate": 1.9986487809901673e-05, "loss": 0.5736, "step": 1686 }, { "epoch": 0.04613323124042879, "grad_norm": 2.303694725036621, "learning_rate": 1.9986441742975473e-05, "loss": 0.6121, "step": 1687 }, { "epoch": 0.0461605775541457, "grad_norm": 1.9526236057281494, "learning_rate": 1.998639559770845e-05, "loss": 0.5987, "step": 1688 }, { "epoch": 0.046187923867862614, "grad_norm": 1.704246997833252, "learning_rate": 1.9986349374100964e-05, "loss": 0.5832, "step": 1689 }, { "epoch": 0.046215270181579525, "grad_norm": 3.0928211212158203, "learning_rate": 1.9986303072153377e-05, "loss": 0.6056, "step": 1690 }, { "epoch": 0.046242616495296436, "grad_norm": 2.2314453125, "learning_rate": 1.9986256691866054e-05, "loss": 0.6944, "step": 1691 }, { "epoch": 0.04626996280901335, "grad_norm": 1.5492115020751953, "learning_rate": 1.9986210233239357e-05, "loss": 0.5947, "step": 1692 }, { "epoch": 0.04629730912273026, "grad_norm": 1.5146156549453735, "learning_rate": 1.998616369627365e-05, "loss": 0.6264, "step": 1693 }, { "epoch": 0.04632465543644717, "grad_norm": 1.5320405960083008, "learning_rate": 1.99861170809693e-05, "loss": 0.4785, "step": 1694 }, { "epoch": 0.04635200175016408, "grad_norm": 1.8022761344909668, "learning_rate": 1.9986070387326672e-05, "loss": 0.594, "step": 1695 }, { "epoch": 0.04637934806388099, "grad_norm": 1.6129578351974487, "learning_rate": 1.9986023615346134e-05, "loss": 0.5874, "step": 1696 }, { "epoch": 0.0464066943775979, "grad_norm": 2.6660890579223633, "learning_rate": 1.9985976765028048e-05, "loss": 0.5952, "step": 1697 }, { "epoch": 0.046434040691314814, "grad_norm": 1.9269590377807617, "learning_rate": 1.9985929836372787e-05, "loss": 0.6474, "step": 1698 }, { "epoch": 0.046461387005031725, "grad_norm": 3.336430072784424, "learning_rate": 1.9985882829380715e-05, "loss": 0.5862, "step": 1699 }, { "epoch": 0.046488733318748636, "grad_norm": 1.96220064163208, "learning_rate": 1.9985835744052203e-05, "loss": 0.5889, "step": 1700 }, { "epoch": 0.04651607963246555, "grad_norm": 3.228081703186035, "learning_rate": 1.998578858038762e-05, "loss": 1.0354, "step": 1701 }, { "epoch": 0.04654342594618245, "grad_norm": 1.6518570184707642, "learning_rate": 1.9985741338387338e-05, "loss": 0.5906, "step": 1702 }, { "epoch": 0.04657077225989936, "grad_norm": 2.1239209175109863, "learning_rate": 1.9985694018051723e-05, "loss": 0.5619, "step": 1703 }, { "epoch": 0.04659811857361627, "grad_norm": 1.8576397895812988, "learning_rate": 1.9985646619381147e-05, "loss": 0.5837, "step": 1704 }, { "epoch": 0.046625464887333185, "grad_norm": 1.937888741493225, "learning_rate": 1.9985599142375984e-05, "loss": 0.6168, "step": 1705 }, { "epoch": 0.046652811201050096, "grad_norm": 1.7511662244796753, "learning_rate": 1.9985551587036607e-05, "loss": 0.615, "step": 1706 }, { "epoch": 0.04668015751476701, "grad_norm": 1.5429006814956665, "learning_rate": 1.9985503953363387e-05, "loss": 0.5966, "step": 1707 }, { "epoch": 0.04670750382848392, "grad_norm": 1.7067357301712036, "learning_rate": 1.9985456241356702e-05, "loss": 0.6434, "step": 1708 }, { "epoch": 0.04673485014220083, "grad_norm": 2.0957555770874023, "learning_rate": 1.998540845101692e-05, "loss": 0.5939, "step": 1709 }, { "epoch": 0.04676219645591774, "grad_norm": 1.404502511024475, "learning_rate": 1.998536058234442e-05, "loss": 0.4467, "step": 1710 }, { "epoch": 0.04678954276963465, "grad_norm": 2.9454329013824463, "learning_rate": 1.9985312635339576e-05, "loss": 0.589, "step": 1711 }, { "epoch": 0.04681688908335156, "grad_norm": 1.6612635850906372, "learning_rate": 1.9985264610002762e-05, "loss": 0.6213, "step": 1712 }, { "epoch": 0.04684423539706847, "grad_norm": 1.8690378665924072, "learning_rate": 1.998521650633436e-05, "loss": 0.4518, "step": 1713 }, { "epoch": 0.046871581710785384, "grad_norm": 1.7839676141738892, "learning_rate": 1.998516832433475e-05, "loss": 0.4539, "step": 1714 }, { "epoch": 0.046898928024502295, "grad_norm": 2.326364040374756, "learning_rate": 1.9985120064004294e-05, "loss": 1.0311, "step": 1715 }, { "epoch": 0.04692627433821921, "grad_norm": 2.274733781814575, "learning_rate": 1.9985071725343386e-05, "loss": 0.6086, "step": 1716 }, { "epoch": 0.04695362065193612, "grad_norm": 1.915050983428955, "learning_rate": 1.99850233083524e-05, "loss": 0.6158, "step": 1717 }, { "epoch": 0.04698096696565303, "grad_norm": 2.479396104812622, "learning_rate": 1.9984974813031718e-05, "loss": 0.5889, "step": 1718 }, { "epoch": 0.04700831327936994, "grad_norm": 1.6215546131134033, "learning_rate": 1.9984926239381716e-05, "loss": 0.565, "step": 1719 }, { "epoch": 0.04703565959308685, "grad_norm": 2.579634428024292, "learning_rate": 1.998487758740278e-05, "loss": 0.5825, "step": 1720 }, { "epoch": 0.04706300590680376, "grad_norm": 1.5050522089004517, "learning_rate": 1.9984828857095287e-05, "loss": 0.5781, "step": 1721 }, { "epoch": 0.04709035222052067, "grad_norm": 2.2629477977752686, "learning_rate": 1.9984780048459623e-05, "loss": 0.9768, "step": 1722 }, { "epoch": 0.047117698534237584, "grad_norm": 1.9294188022613525, "learning_rate": 1.998473116149617e-05, "loss": 0.5893, "step": 1723 }, { "epoch": 0.047145044847954495, "grad_norm": 2.429086685180664, "learning_rate": 1.998468219620531e-05, "loss": 0.4912, "step": 1724 }, { "epoch": 0.047172391161671406, "grad_norm": 1.758101224899292, "learning_rate": 1.9984633152587428e-05, "loss": 0.6046, "step": 1725 }, { "epoch": 0.04719973747538832, "grad_norm": 1.579136848449707, "learning_rate": 1.998458403064291e-05, "loss": 0.5769, "step": 1726 }, { "epoch": 0.04722708378910523, "grad_norm": 1.8802202939987183, "learning_rate": 1.998453483037214e-05, "loss": 0.5755, "step": 1727 }, { "epoch": 0.04725443010282214, "grad_norm": 1.5253684520721436, "learning_rate": 1.99844855517755e-05, "loss": 0.5724, "step": 1728 }, { "epoch": 0.04728177641653905, "grad_norm": 1.8046420812606812, "learning_rate": 1.9984436194853383e-05, "loss": 0.5908, "step": 1729 }, { "epoch": 0.04730912273025596, "grad_norm": 1.4523016214370728, "learning_rate": 1.9984386759606177e-05, "loss": 0.5793, "step": 1730 }, { "epoch": 0.04733646904397287, "grad_norm": 1.803388237953186, "learning_rate": 1.9984337246034264e-05, "loss": 0.9594, "step": 1731 }, { "epoch": 0.047363815357689784, "grad_norm": 1.910267949104309, "learning_rate": 1.9984287654138038e-05, "loss": 0.6007, "step": 1732 }, { "epoch": 0.047391161671406695, "grad_norm": 2.131321907043457, "learning_rate": 1.998423798391788e-05, "loss": 0.5925, "step": 1733 }, { "epoch": 0.047418507985123606, "grad_norm": 1.790452480316162, "learning_rate": 1.9984188235374188e-05, "loss": 0.558, "step": 1734 }, { "epoch": 0.04744585429884052, "grad_norm": 1.7581700086593628, "learning_rate": 1.998413840850735e-05, "loss": 0.595, "step": 1735 }, { "epoch": 0.04747320061255743, "grad_norm": 2.0366780757904053, "learning_rate": 1.9984088503317756e-05, "loss": 0.6301, "step": 1736 }, { "epoch": 0.04750054692627434, "grad_norm": 1.4844775199890137, "learning_rate": 1.9984038519805794e-05, "loss": 0.5782, "step": 1737 }, { "epoch": 0.04752789323999125, "grad_norm": 2.094937324523926, "learning_rate": 1.9983988457971864e-05, "loss": 0.5892, "step": 1738 }, { "epoch": 0.04755523955370816, "grad_norm": 1.7851011753082275, "learning_rate": 1.998393831781635e-05, "loss": 0.605, "step": 1739 }, { "epoch": 0.04758258586742507, "grad_norm": 2.1605985164642334, "learning_rate": 1.9983888099339652e-05, "loss": 0.4598, "step": 1740 }, { "epoch": 0.047609932181141984, "grad_norm": 1.696311593055725, "learning_rate": 1.9983837802542163e-05, "loss": 0.6018, "step": 1741 }, { "epoch": 0.047637278494858895, "grad_norm": 1.7763079404830933, "learning_rate": 1.9983787427424275e-05, "loss": 0.5662, "step": 1742 }, { "epoch": 0.047664624808575806, "grad_norm": 2.045797109603882, "learning_rate": 1.9983736973986383e-05, "loss": 0.7023, "step": 1743 }, { "epoch": 0.04769197112229272, "grad_norm": 2.2335028648376465, "learning_rate": 1.9983686442228886e-05, "loss": 0.6112, "step": 1744 }, { "epoch": 0.04771931743600963, "grad_norm": 1.9004532098770142, "learning_rate": 1.9983635832152176e-05, "loss": 0.9548, "step": 1745 }, { "epoch": 0.04774666374972654, "grad_norm": 8.978667259216309, "learning_rate": 1.9983585143756655e-05, "loss": 0.9813, "step": 1746 }, { "epoch": 0.04777401006344345, "grad_norm": 1.6669765710830688, "learning_rate": 1.9983534377042716e-05, "loss": 0.4982, "step": 1747 }, { "epoch": 0.04780135637716036, "grad_norm": 2.7440147399902344, "learning_rate": 1.9983483532010762e-05, "loss": 0.9961, "step": 1748 }, { "epoch": 0.04782870269087727, "grad_norm": 2.450319766998291, "learning_rate": 1.998343260866119e-05, "loss": 0.646, "step": 1749 }, { "epoch": 0.047856049004594184, "grad_norm": 1.7088338136672974, "learning_rate": 1.9983381606994395e-05, "loss": 0.6244, "step": 1750 }, { "epoch": 0.047883395318311095, "grad_norm": 2.080808401107788, "learning_rate": 1.9983330527010782e-05, "loss": 0.6137, "step": 1751 }, { "epoch": 0.047910741632028006, "grad_norm": 2.325593948364258, "learning_rate": 1.998327936871075e-05, "loss": 1.0015, "step": 1752 }, { "epoch": 0.04793808794574491, "grad_norm": 1.650421142578125, "learning_rate": 1.9983228132094702e-05, "loss": 0.5833, "step": 1753 }, { "epoch": 0.04796543425946182, "grad_norm": 2.0781500339508057, "learning_rate": 1.9983176817163038e-05, "loss": 0.6034, "step": 1754 }, { "epoch": 0.04799278057317873, "grad_norm": 3.1131975650787354, "learning_rate": 1.998312542391616e-05, "loss": 0.4276, "step": 1755 }, { "epoch": 0.048020126886895644, "grad_norm": 2.102055549621582, "learning_rate": 1.9983073952354475e-05, "loss": 0.5905, "step": 1756 }, { "epoch": 0.048047473200612555, "grad_norm": 2.3550078868865967, "learning_rate": 1.9983022402478386e-05, "loss": 0.5954, "step": 1757 }, { "epoch": 0.048074819514329466, "grad_norm": 2.0099685192108154, "learning_rate": 1.9982970774288294e-05, "loss": 0.6236, "step": 1758 }, { "epoch": 0.04810216582804638, "grad_norm": 3.04976487159729, "learning_rate": 1.9982919067784604e-05, "loss": 0.6392, "step": 1759 }, { "epoch": 0.04812951214176329, "grad_norm": 1.9452180862426758, "learning_rate": 1.9982867282967725e-05, "loss": 0.6011, "step": 1760 }, { "epoch": 0.0481568584554802, "grad_norm": 2.376264810562134, "learning_rate": 1.9982815419838062e-05, "loss": 0.5824, "step": 1761 }, { "epoch": 0.04818420476919711, "grad_norm": 4.7361555099487305, "learning_rate": 1.9982763478396023e-05, "loss": 0.6235, "step": 1762 }, { "epoch": 0.04821155108291402, "grad_norm": 1.7627490758895874, "learning_rate": 1.9982711458642013e-05, "loss": 0.5782, "step": 1763 }, { "epoch": 0.04823889739663093, "grad_norm": 1.616388201713562, "learning_rate": 1.998265936057644e-05, "loss": 0.5804, "step": 1764 }, { "epoch": 0.04826624371034784, "grad_norm": 3.803400754928589, "learning_rate": 1.998260718419971e-05, "loss": 0.623, "step": 1765 }, { "epoch": 0.048293590024064754, "grad_norm": 2.5606675148010254, "learning_rate": 1.998255492951224e-05, "loss": 0.5764, "step": 1766 }, { "epoch": 0.048320936337781666, "grad_norm": 2.5203793048858643, "learning_rate": 1.9982502596514438e-05, "loss": 0.7168, "step": 1767 }, { "epoch": 0.04834828265149858, "grad_norm": 2.3719635009765625, "learning_rate": 1.998245018520671e-05, "loss": 0.6011, "step": 1768 }, { "epoch": 0.04837562896521549, "grad_norm": 1.9709211587905884, "learning_rate": 1.998239769558947e-05, "loss": 1.0468, "step": 1769 }, { "epoch": 0.0484029752789324, "grad_norm": 2.6311495304107666, "learning_rate": 1.998234512766313e-05, "loss": 0.9812, "step": 1770 }, { "epoch": 0.04843032159264931, "grad_norm": 1.896500825881958, "learning_rate": 1.9982292481428097e-05, "loss": 0.6122, "step": 1771 }, { "epoch": 0.04845766790636622, "grad_norm": 1.924055814743042, "learning_rate": 1.9982239756884792e-05, "loss": 0.6271, "step": 1772 }, { "epoch": 0.04848501422008313, "grad_norm": 2.2387161254882812, "learning_rate": 1.9982186954033627e-05, "loss": 0.7173, "step": 1773 }, { "epoch": 0.04851236053380004, "grad_norm": 1.7055317163467407, "learning_rate": 1.998213407287501e-05, "loss": 0.6815, "step": 1774 }, { "epoch": 0.048539706847516954, "grad_norm": 2.6905875205993652, "learning_rate": 1.9982081113409368e-05, "loss": 0.4501, "step": 1775 }, { "epoch": 0.048567053161233865, "grad_norm": 2.0690834522247314, "learning_rate": 1.9982028075637104e-05, "loss": 0.6376, "step": 1776 }, { "epoch": 0.04859439947495078, "grad_norm": 2.5221593379974365, "learning_rate": 1.998197495955864e-05, "loss": 0.6823, "step": 1777 }, { "epoch": 0.04862174578866769, "grad_norm": 1.84353506565094, "learning_rate": 1.998192176517439e-05, "loss": 0.5933, "step": 1778 }, { "epoch": 0.0486490921023846, "grad_norm": 1.5318210124969482, "learning_rate": 1.998186849248477e-05, "loss": 0.6227, "step": 1779 }, { "epoch": 0.04867643841610151, "grad_norm": 1.855567216873169, "learning_rate": 1.9981815141490206e-05, "loss": 0.6039, "step": 1780 }, { "epoch": 0.04870378472981842, "grad_norm": 2.6318631172180176, "learning_rate": 1.998176171219111e-05, "loss": 0.5643, "step": 1781 }, { "epoch": 0.04873113104353533, "grad_norm": 1.7228405475616455, "learning_rate": 1.9981708204587904e-05, "loss": 0.5954, "step": 1782 }, { "epoch": 0.04875847735725224, "grad_norm": 1.4089887142181396, "learning_rate": 1.9981654618681006e-05, "loss": 0.4602, "step": 1783 }, { "epoch": 0.048785823670969154, "grad_norm": 2.7237205505371094, "learning_rate": 1.9981600954470832e-05, "loss": 0.6145, "step": 1784 }, { "epoch": 0.048813169984686065, "grad_norm": 1.7827321290969849, "learning_rate": 1.998154721195781e-05, "loss": 0.6488, "step": 1785 }, { "epoch": 0.048840516298402976, "grad_norm": 2.659916639328003, "learning_rate": 1.9981493391142362e-05, "loss": 0.614, "step": 1786 }, { "epoch": 0.04886786261211989, "grad_norm": 1.9503916501998901, "learning_rate": 1.9981439492024907e-05, "loss": 0.5993, "step": 1787 }, { "epoch": 0.0488952089258368, "grad_norm": 2.1078879833221436, "learning_rate": 1.9981385514605865e-05, "loss": 0.6703, "step": 1788 }, { "epoch": 0.04892255523955371, "grad_norm": 1.590466022491455, "learning_rate": 1.9981331458885665e-05, "loss": 0.5925, "step": 1789 }, { "epoch": 0.04894990155327062, "grad_norm": 1.8131225109100342, "learning_rate": 1.9981277324864727e-05, "loss": 0.5812, "step": 1790 }, { "epoch": 0.04897724786698753, "grad_norm": 2.4666574001312256, "learning_rate": 1.998122311254348e-05, "loss": 0.6142, "step": 1791 }, { "epoch": 0.04900459418070444, "grad_norm": 1.4019348621368408, "learning_rate": 1.9981168821922344e-05, "loss": 0.4799, "step": 1792 }, { "epoch": 0.049031940494421354, "grad_norm": 1.977381706237793, "learning_rate": 1.9981114453001747e-05, "loss": 0.5854, "step": 1793 }, { "epoch": 0.049059286808138265, "grad_norm": 3.5420126914978027, "learning_rate": 1.998106000578212e-05, "loss": 0.6092, "step": 1794 }, { "epoch": 0.049086633121855176, "grad_norm": 2.4080517292022705, "learning_rate": 1.9981005480263884e-05, "loss": 0.6098, "step": 1795 }, { "epoch": 0.04911397943557209, "grad_norm": 1.8138947486877441, "learning_rate": 1.9980950876447468e-05, "loss": 0.6088, "step": 1796 }, { "epoch": 0.049141325749289, "grad_norm": 1.7636312246322632, "learning_rate": 1.9980896194333305e-05, "loss": 0.5796, "step": 1797 }, { "epoch": 0.04916867206300591, "grad_norm": 1.7068918943405151, "learning_rate": 1.9980841433921815e-05, "loss": 0.5788, "step": 1798 }, { "epoch": 0.04919601837672282, "grad_norm": 3.1503050327301025, "learning_rate": 1.9980786595213435e-05, "loss": 0.984, "step": 1799 }, { "epoch": 0.04922336469043973, "grad_norm": 6.104787826538086, "learning_rate": 1.9980731678208593e-05, "loss": 0.641, "step": 1800 }, { "epoch": 0.04925071100415664, "grad_norm": 2.2942278385162354, "learning_rate": 1.998067668290772e-05, "loss": 0.5934, "step": 1801 }, { "epoch": 0.049278057317873554, "grad_norm": 1.6399619579315186, "learning_rate": 1.9980621609311247e-05, "loss": 0.6125, "step": 1802 }, { "epoch": 0.049305403631590465, "grad_norm": 1.9578533172607422, "learning_rate": 1.9980566457419604e-05, "loss": 0.5769, "step": 1803 }, { "epoch": 0.04933274994530737, "grad_norm": 2.13598895072937, "learning_rate": 1.998051122723323e-05, "loss": 0.5788, "step": 1804 }, { "epoch": 0.04936009625902428, "grad_norm": 1.930821180343628, "learning_rate": 1.998045591875255e-05, "loss": 0.5863, "step": 1805 }, { "epoch": 0.04938744257274119, "grad_norm": 2.2889115810394287, "learning_rate": 1.9980400531978005e-05, "loss": 0.9673, "step": 1806 }, { "epoch": 0.0494147888864581, "grad_norm": 1.7581497430801392, "learning_rate": 1.9980345066910028e-05, "loss": 0.6189, "step": 1807 }, { "epoch": 0.049442135200175014, "grad_norm": 1.5353814363479614, "learning_rate": 1.998028952354905e-05, "loss": 0.485, "step": 1808 }, { "epoch": 0.049469481513891925, "grad_norm": 2.030235767364502, "learning_rate": 1.998023390189551e-05, "loss": 0.6237, "step": 1809 }, { "epoch": 0.049496827827608836, "grad_norm": 1.8363360166549683, "learning_rate": 1.9980178201949845e-05, "loss": 0.9789, "step": 1810 }, { "epoch": 0.04952417414132575, "grad_norm": 1.8233877420425415, "learning_rate": 1.9980122423712486e-05, "loss": 0.6137, "step": 1811 }, { "epoch": 0.04955152045504266, "grad_norm": 1.7665640115737915, "learning_rate": 1.998006656718388e-05, "loss": 0.5903, "step": 1812 }, { "epoch": 0.04957886676875957, "grad_norm": 1.692097544670105, "learning_rate": 1.998001063236446e-05, "loss": 0.5847, "step": 1813 }, { "epoch": 0.04960621308247648, "grad_norm": 1.6127454042434692, "learning_rate": 1.9979954619254664e-05, "loss": 0.6076, "step": 1814 }, { "epoch": 0.04963355939619339, "grad_norm": 1.8518692255020142, "learning_rate": 1.9979898527854933e-05, "loss": 0.9862, "step": 1815 }, { "epoch": 0.0496609057099103, "grad_norm": 1.8047504425048828, "learning_rate": 1.9979842358165706e-05, "loss": 0.5935, "step": 1816 }, { "epoch": 0.049688252023627213, "grad_norm": 1.9076924324035645, "learning_rate": 1.9979786110187426e-05, "loss": 0.6181, "step": 1817 }, { "epoch": 0.049715598337344125, "grad_norm": 1.8992681503295898, "learning_rate": 1.9979729783920533e-05, "loss": 0.9689, "step": 1818 }, { "epoch": 0.049742944651061036, "grad_norm": 2.11845326423645, "learning_rate": 1.9979673379365465e-05, "loss": 0.6964, "step": 1819 }, { "epoch": 0.04977029096477795, "grad_norm": 3.68982195854187, "learning_rate": 1.9979616896522668e-05, "loss": 0.614, "step": 1820 }, { "epoch": 0.04979763727849486, "grad_norm": 5.82841682434082, "learning_rate": 1.997956033539259e-05, "loss": 0.4562, "step": 1821 }, { "epoch": 0.04982498359221177, "grad_norm": 1.8461295366287231, "learning_rate": 1.9979503695975665e-05, "loss": 0.6005, "step": 1822 }, { "epoch": 0.04985232990592868, "grad_norm": 1.927324891090393, "learning_rate": 1.9979446978272344e-05, "loss": 0.6309, "step": 1823 }, { "epoch": 0.04987967621964559, "grad_norm": 1.6528602838516235, "learning_rate": 1.9979390182283066e-05, "loss": 0.5943, "step": 1824 }, { "epoch": 0.0499070225333625, "grad_norm": 1.5472265481948853, "learning_rate": 1.9979333308008285e-05, "loss": 0.5953, "step": 1825 }, { "epoch": 0.04993436884707941, "grad_norm": 1.6765954494476318, "learning_rate": 1.9979276355448443e-05, "loss": 0.61, "step": 1826 }, { "epoch": 0.049961715160796324, "grad_norm": 2.572028636932373, "learning_rate": 1.9979219324603983e-05, "loss": 0.9708, "step": 1827 }, { "epoch": 0.049989061474513236, "grad_norm": 1.5636299848556519, "learning_rate": 1.9979162215475356e-05, "loss": 0.4309, "step": 1828 }, { "epoch": 0.05001640778823015, "grad_norm": 1.524622917175293, "learning_rate": 1.997910502806301e-05, "loss": 0.585, "step": 1829 }, { "epoch": 0.05004375410194706, "grad_norm": 2.8414101600646973, "learning_rate": 1.9979047762367394e-05, "loss": 0.6518, "step": 1830 }, { "epoch": 0.05007110041566397, "grad_norm": 2.1039745807647705, "learning_rate": 1.997899041838896e-05, "loss": 0.6419, "step": 1831 }, { "epoch": 0.05009844672938088, "grad_norm": 1.9364582300186157, "learning_rate": 1.997893299612815e-05, "loss": 0.6023, "step": 1832 }, { "epoch": 0.05012579304309779, "grad_norm": 2.0150654315948486, "learning_rate": 1.9978875495585423e-05, "loss": 0.6485, "step": 1833 }, { "epoch": 0.0501531393568147, "grad_norm": 1.5401861667633057, "learning_rate": 1.997881791676122e-05, "loss": 0.6012, "step": 1834 }, { "epoch": 0.05018048567053161, "grad_norm": 1.4786295890808105, "learning_rate": 1.997876025965601e-05, "loss": 0.5795, "step": 1835 }, { "epoch": 0.050207831984248524, "grad_norm": 1.5357158184051514, "learning_rate": 1.997870252427022e-05, "loss": 0.607, "step": 1836 }, { "epoch": 0.050235178297965435, "grad_norm": 1.6097861528396606, "learning_rate": 1.9978644710604324e-05, "loss": 0.5966, "step": 1837 }, { "epoch": 0.05026252461168235, "grad_norm": 1.5783346891403198, "learning_rate": 1.997858681865877e-05, "loss": 0.6303, "step": 1838 }, { "epoch": 0.05028987092539926, "grad_norm": 2.152548313140869, "learning_rate": 1.997852884843401e-05, "loss": 0.9891, "step": 1839 }, { "epoch": 0.05031721723911617, "grad_norm": 1.716815710067749, "learning_rate": 1.9978470799930495e-05, "loss": 0.9732, "step": 1840 }, { "epoch": 0.05034456355283308, "grad_norm": 1.507604956626892, "learning_rate": 1.997841267314869e-05, "loss": 0.5815, "step": 1841 }, { "epoch": 0.05037190986654999, "grad_norm": 2.2236719131469727, "learning_rate": 1.997835446808904e-05, "loss": 0.5854, "step": 1842 }, { "epoch": 0.0503992561802669, "grad_norm": 1.915806770324707, "learning_rate": 1.997829618475201e-05, "loss": 0.6134, "step": 1843 }, { "epoch": 0.05042660249398381, "grad_norm": 1.619472861289978, "learning_rate": 1.997823782313806e-05, "loss": 1.0104, "step": 1844 }, { "epoch": 0.050453948807700724, "grad_norm": 1.9694496393203735, "learning_rate": 1.9978179383247637e-05, "loss": 0.683, "step": 1845 }, { "epoch": 0.050481295121417635, "grad_norm": 2.043196201324463, "learning_rate": 1.9978120865081206e-05, "loss": 0.5941, "step": 1846 }, { "epoch": 0.050508641435134546, "grad_norm": 1.8862241506576538, "learning_rate": 1.9978062268639224e-05, "loss": 0.5863, "step": 1847 }, { "epoch": 0.05053598774885146, "grad_norm": 1.6862454414367676, "learning_rate": 1.997800359392215e-05, "loss": 0.5802, "step": 1848 }, { "epoch": 0.05056333406256837, "grad_norm": 2.883915662765503, "learning_rate": 1.997794484093045e-05, "loss": 0.609, "step": 1849 }, { "epoch": 0.05059068037628528, "grad_norm": 1.9812734127044678, "learning_rate": 1.9977886009664577e-05, "loss": 0.6226, "step": 1850 }, { "epoch": 0.05061802669000219, "grad_norm": 1.737937092781067, "learning_rate": 1.9977827100124998e-05, "loss": 0.6027, "step": 1851 }, { "epoch": 0.0506453730037191, "grad_norm": 2.502096176147461, "learning_rate": 1.9977768112312174e-05, "loss": 0.5006, "step": 1852 }, { "epoch": 0.05067271931743601, "grad_norm": 1.8058849573135376, "learning_rate": 1.9977709046226567e-05, "loss": 0.9826, "step": 1853 }, { "epoch": 0.05070006563115292, "grad_norm": 1.7929489612579346, "learning_rate": 1.9977649901868638e-05, "loss": 0.5831, "step": 1854 }, { "epoch": 0.05072741194486983, "grad_norm": 1.8520002365112305, "learning_rate": 1.9977590679238857e-05, "loss": 0.6757, "step": 1855 }, { "epoch": 0.05075475825858674, "grad_norm": 1.7155015468597412, "learning_rate": 1.9977531378337686e-05, "loss": 0.5954, "step": 1856 }, { "epoch": 0.05078210457230365, "grad_norm": 2.1675686836242676, "learning_rate": 1.9977471999165584e-05, "loss": 0.588, "step": 1857 }, { "epoch": 0.05080945088602056, "grad_norm": 2.116769313812256, "learning_rate": 1.9977412541723024e-05, "loss": 0.6018, "step": 1858 }, { "epoch": 0.05083679719973747, "grad_norm": 7.17722749710083, "learning_rate": 1.9977353006010472e-05, "loss": 0.446, "step": 1859 }, { "epoch": 0.050864143513454384, "grad_norm": 1.8485883474349976, "learning_rate": 1.9977293392028393e-05, "loss": 0.6367, "step": 1860 }, { "epoch": 0.050891489827171295, "grad_norm": 1.9779373407363892, "learning_rate": 1.9977233699777258e-05, "loss": 0.6075, "step": 1861 }, { "epoch": 0.050918836140888206, "grad_norm": 2.02382493019104, "learning_rate": 1.9977173929257527e-05, "loss": 0.5788, "step": 1862 }, { "epoch": 0.05094618245460512, "grad_norm": 2.0150012969970703, "learning_rate": 1.997711408046968e-05, "loss": 0.5714, "step": 1863 }, { "epoch": 0.05097352876832203, "grad_norm": 1.8478178977966309, "learning_rate": 1.9977054153414173e-05, "loss": 0.5861, "step": 1864 }, { "epoch": 0.05100087508203894, "grad_norm": 2.244401454925537, "learning_rate": 1.997699414809149e-05, "loss": 0.9721, "step": 1865 }, { "epoch": 0.05102822139575585, "grad_norm": 2.3421003818511963, "learning_rate": 1.9976934064502096e-05, "loss": 0.6253, "step": 1866 }, { "epoch": 0.05105556770947276, "grad_norm": 1.6765282154083252, "learning_rate": 1.997687390264646e-05, "loss": 0.6234, "step": 1867 }, { "epoch": 0.05108291402318967, "grad_norm": 2.055258274078369, "learning_rate": 1.9976813662525055e-05, "loss": 0.5987, "step": 1868 }, { "epoch": 0.051110260336906584, "grad_norm": 2.168473482131958, "learning_rate": 1.9976753344138356e-05, "loss": 0.5971, "step": 1869 }, { "epoch": 0.051137606650623495, "grad_norm": 2.1661322116851807, "learning_rate": 1.9976692947486836e-05, "loss": 0.6129, "step": 1870 }, { "epoch": 0.051164952964340406, "grad_norm": 1.7544158697128296, "learning_rate": 1.9976632472570966e-05, "loss": 0.6173, "step": 1871 }, { "epoch": 0.05119229927805732, "grad_norm": 1.4767100811004639, "learning_rate": 1.997657191939122e-05, "loss": 0.4817, "step": 1872 }, { "epoch": 0.05121964559177423, "grad_norm": 3.1603691577911377, "learning_rate": 1.9976511287948076e-05, "loss": 0.6071, "step": 1873 }, { "epoch": 0.05124699190549114, "grad_norm": 1.6861438751220703, "learning_rate": 1.997645057824201e-05, "loss": 0.6331, "step": 1874 }, { "epoch": 0.05127433821920805, "grad_norm": 1.7872103452682495, "learning_rate": 1.9976389790273497e-05, "loss": 0.5994, "step": 1875 }, { "epoch": 0.05130168453292496, "grad_norm": 1.7544746398925781, "learning_rate": 1.9976328924043015e-05, "loss": 0.5784, "step": 1876 }, { "epoch": 0.05132903084664187, "grad_norm": 1.6926915645599365, "learning_rate": 1.997626797955104e-05, "loss": 0.7131, "step": 1877 }, { "epoch": 0.051356377160358783, "grad_norm": 1.8506240844726562, "learning_rate": 1.9976206956798046e-05, "loss": 0.9807, "step": 1878 }, { "epoch": 0.051383723474075695, "grad_norm": 1.7382533550262451, "learning_rate": 1.9976145855784515e-05, "loss": 0.9496, "step": 1879 }, { "epoch": 0.051411069787792606, "grad_norm": 1.5801666975021362, "learning_rate": 1.9976084676510934e-05, "loss": 0.5991, "step": 1880 }, { "epoch": 0.05143841610150952, "grad_norm": 1.5756808519363403, "learning_rate": 1.9976023418977772e-05, "loss": 0.5955, "step": 1881 }, { "epoch": 0.05146576241522643, "grad_norm": 2.0408523082733154, "learning_rate": 1.9975962083185516e-05, "loss": 0.5957, "step": 1882 }, { "epoch": 0.05149310872894334, "grad_norm": 1.421202301979065, "learning_rate": 1.9975900669134645e-05, "loss": 0.5905, "step": 1883 }, { "epoch": 0.05152045504266025, "grad_norm": 1.720019817352295, "learning_rate": 1.997583917682564e-05, "loss": 0.6384, "step": 1884 }, { "epoch": 0.05154780135637716, "grad_norm": 1.7534040212631226, "learning_rate": 1.997577760625898e-05, "loss": 0.6168, "step": 1885 }, { "epoch": 0.05157514767009407, "grad_norm": 2.946444511413574, "learning_rate": 1.9975715957435157e-05, "loss": 0.5743, "step": 1886 }, { "epoch": 0.05160249398381098, "grad_norm": 1.8989158868789673, "learning_rate": 1.997565423035465e-05, "loss": 0.6302, "step": 1887 }, { "epoch": 0.051629840297527894, "grad_norm": 1.8343806266784668, "learning_rate": 1.997559242501794e-05, "loss": 0.612, "step": 1888 }, { "epoch": 0.051657186611244806, "grad_norm": 1.7008631229400635, "learning_rate": 1.9975530541425514e-05, "loss": 0.5843, "step": 1889 }, { "epoch": 0.05168453292496172, "grad_norm": 2.8611905574798584, "learning_rate": 1.997546857957786e-05, "loss": 1.0084, "step": 1890 }, { "epoch": 0.05171187923867863, "grad_norm": 2.7315587997436523, "learning_rate": 1.9975406539475465e-05, "loss": 0.6208, "step": 1891 }, { "epoch": 0.05173922555239554, "grad_norm": 1.901859998703003, "learning_rate": 1.997534442111881e-05, "loss": 0.5826, "step": 1892 }, { "epoch": 0.05176657186611245, "grad_norm": 2.209918260574341, "learning_rate": 1.997528222450839e-05, "loss": 0.586, "step": 1893 }, { "epoch": 0.05179391817982936, "grad_norm": 2.3090710639953613, "learning_rate": 1.9975219949644684e-05, "loss": 0.5909, "step": 1894 }, { "epoch": 0.05182126449354627, "grad_norm": 1.864741325378418, "learning_rate": 1.997515759652818e-05, "loss": 0.6134, "step": 1895 }, { "epoch": 0.05184861080726318, "grad_norm": 1.842880129814148, "learning_rate": 1.997509516515938e-05, "loss": 0.5938, "step": 1896 }, { "epoch": 0.051875957120980094, "grad_norm": 1.8274931907653809, "learning_rate": 1.9975032655538763e-05, "loss": 0.5766, "step": 1897 }, { "epoch": 0.051903303434697005, "grad_norm": 1.6915277242660522, "learning_rate": 1.9974970067666822e-05, "loss": 0.6016, "step": 1898 }, { "epoch": 0.051930649748413917, "grad_norm": 2.070038318634033, "learning_rate": 1.9974907401544046e-05, "loss": 0.5755, "step": 1899 }, { "epoch": 0.05195799606213083, "grad_norm": 4.129176139831543, "learning_rate": 1.997484465717093e-05, "loss": 0.429, "step": 1900 }, { "epoch": 0.05198534237584774, "grad_norm": 3.548649311065674, "learning_rate": 1.9974781834547965e-05, "loss": 0.5994, "step": 1901 }, { "epoch": 0.05201268868956465, "grad_norm": 2.2591068744659424, "learning_rate": 1.9974718933675645e-05, "loss": 0.611, "step": 1902 }, { "epoch": 0.05204003500328156, "grad_norm": 1.8427397012710571, "learning_rate": 1.997465595455446e-05, "loss": 0.5747, "step": 1903 }, { "epoch": 0.05206738131699847, "grad_norm": 1.8989096879959106, "learning_rate": 1.997459289718491e-05, "loss": 0.6407, "step": 1904 }, { "epoch": 0.052094727630715376, "grad_norm": 2.14471435546875, "learning_rate": 1.997452976156748e-05, "loss": 0.5895, "step": 1905 }, { "epoch": 0.05212207394443229, "grad_norm": 1.5316381454467773, "learning_rate": 1.9974466547702675e-05, "loss": 0.6068, "step": 1906 }, { "epoch": 0.0521494202581492, "grad_norm": 2.8813834190368652, "learning_rate": 1.9974403255590987e-05, "loss": 1.0332, "step": 1907 }, { "epoch": 0.05217676657186611, "grad_norm": 1.7905644178390503, "learning_rate": 1.997433988523291e-05, "loss": 0.5718, "step": 1908 }, { "epoch": 0.05220411288558302, "grad_norm": 1.586449146270752, "learning_rate": 1.9974276436628944e-05, "loss": 0.5871, "step": 1909 }, { "epoch": 0.05223145919929993, "grad_norm": 1.9934967756271362, "learning_rate": 1.9974212909779588e-05, "loss": 0.6275, "step": 1910 }, { "epoch": 0.05225880551301684, "grad_norm": 2.0972979068756104, "learning_rate": 1.9974149304685337e-05, "loss": 0.5948, "step": 1911 }, { "epoch": 0.052286151826733754, "grad_norm": 2.053895950317383, "learning_rate": 1.997408562134669e-05, "loss": 0.6077, "step": 1912 }, { "epoch": 0.052313498140450665, "grad_norm": 1.7206919193267822, "learning_rate": 1.9974021859764154e-05, "loss": 0.6317, "step": 1913 }, { "epoch": 0.052340844454167576, "grad_norm": 2.141199827194214, "learning_rate": 1.997395801993822e-05, "loss": 0.5861, "step": 1914 }, { "epoch": 0.05236819076788449, "grad_norm": 1.9893276691436768, "learning_rate": 1.9973894101869392e-05, "loss": 0.9936, "step": 1915 }, { "epoch": 0.0523955370816014, "grad_norm": 4.480663299560547, "learning_rate": 1.9973830105558175e-05, "loss": 0.5824, "step": 1916 }, { "epoch": 0.05242288339531831, "grad_norm": 1.9148906469345093, "learning_rate": 1.9973766031005064e-05, "loss": 0.6221, "step": 1917 }, { "epoch": 0.05245022970903522, "grad_norm": 1.734224796295166, "learning_rate": 1.9973701878210567e-05, "loss": 0.5736, "step": 1918 }, { "epoch": 0.05247757602275213, "grad_norm": 2.5038416385650635, "learning_rate": 1.9973637647175184e-05, "loss": 0.6419, "step": 1919 }, { "epoch": 0.05250492233646904, "grad_norm": 3.4853744506835938, "learning_rate": 1.997357333789942e-05, "loss": 0.595, "step": 1920 }, { "epoch": 0.052532268650185954, "grad_norm": 1.8928724527359009, "learning_rate": 1.997350895038378e-05, "loss": 0.9698, "step": 1921 }, { "epoch": 0.052559614963902865, "grad_norm": 1.9206584692001343, "learning_rate": 1.9973444484628774e-05, "loss": 0.585, "step": 1922 }, { "epoch": 0.052586961277619776, "grad_norm": 2.218620777130127, "learning_rate": 1.99733799406349e-05, "loss": 0.688, "step": 1923 }, { "epoch": 0.05261430759133669, "grad_norm": 1.3209792375564575, "learning_rate": 1.9973315318402665e-05, "loss": 0.5859, "step": 1924 }, { "epoch": 0.0526416539050536, "grad_norm": 1.5793591737747192, "learning_rate": 1.997325061793258e-05, "loss": 0.591, "step": 1925 }, { "epoch": 0.05266900021877051, "grad_norm": 1.7063425779342651, "learning_rate": 1.997318583922515e-05, "loss": 0.6064, "step": 1926 }, { "epoch": 0.05269634653248742, "grad_norm": 2.6510202884674072, "learning_rate": 1.9973120982280885e-05, "loss": 0.5968, "step": 1927 }, { "epoch": 0.05272369284620433, "grad_norm": 2.8311781883239746, "learning_rate": 1.9973056047100294e-05, "loss": 0.5882, "step": 1928 }, { "epoch": 0.05275103915992124, "grad_norm": 1.9243274927139282, "learning_rate": 1.997299103368388e-05, "loss": 0.5789, "step": 1929 }, { "epoch": 0.052778385473638154, "grad_norm": 1.9479005336761475, "learning_rate": 1.997292594203216e-05, "loss": 0.5901, "step": 1930 }, { "epoch": 0.052805731787355065, "grad_norm": 1.9075686931610107, "learning_rate": 1.9972860772145643e-05, "loss": 0.595, "step": 1931 }, { "epoch": 0.052833078101071976, "grad_norm": 1.5910801887512207, "learning_rate": 1.9972795524024843e-05, "loss": 0.5777, "step": 1932 }, { "epoch": 0.05286042441478889, "grad_norm": 2.16184401512146, "learning_rate": 1.997273019767026e-05, "loss": 0.5954, "step": 1933 }, { "epoch": 0.0528877707285058, "grad_norm": 1.8839870691299438, "learning_rate": 1.9972664793082425e-05, "loss": 0.5742, "step": 1934 }, { "epoch": 0.05291511704222271, "grad_norm": 1.744797945022583, "learning_rate": 1.9972599310261836e-05, "loss": 0.5673, "step": 1935 }, { "epoch": 0.05294246335593962, "grad_norm": 1.8157504796981812, "learning_rate": 1.9972533749209013e-05, "loss": 0.6107, "step": 1936 }, { "epoch": 0.05296980966965653, "grad_norm": 1.7771775722503662, "learning_rate": 1.9972468109924468e-05, "loss": 0.617, "step": 1937 }, { "epoch": 0.05299715598337344, "grad_norm": 1.8705317974090576, "learning_rate": 1.997240239240872e-05, "loss": 0.584, "step": 1938 }, { "epoch": 0.05302450229709035, "grad_norm": 2.0082874298095703, "learning_rate": 1.997233659666228e-05, "loss": 0.6759, "step": 1939 }, { "epoch": 0.053051848610807265, "grad_norm": 1.6396441459655762, "learning_rate": 1.9972270722685664e-05, "loss": 0.6412, "step": 1940 }, { "epoch": 0.053079194924524176, "grad_norm": 1.9405971765518188, "learning_rate": 1.9972204770479396e-05, "loss": 0.5804, "step": 1941 }, { "epoch": 0.05310654123824109, "grad_norm": 1.8289659023284912, "learning_rate": 1.9972138740043984e-05, "loss": 0.5811, "step": 1942 }, { "epoch": 0.053133887551958, "grad_norm": 2.2533812522888184, "learning_rate": 1.997207263137995e-05, "loss": 0.579, "step": 1943 }, { "epoch": 0.05316123386567491, "grad_norm": 2.1630024909973145, "learning_rate": 1.997200644448781e-05, "loss": 0.604, "step": 1944 }, { "epoch": 0.05318858017939182, "grad_norm": 1.7869882583618164, "learning_rate": 1.997194017936809e-05, "loss": 0.5885, "step": 1945 }, { "epoch": 0.05321592649310873, "grad_norm": 2.586867094039917, "learning_rate": 1.9971873836021306e-05, "loss": 0.9795, "step": 1946 }, { "epoch": 0.05324327280682564, "grad_norm": 2.2491297721862793, "learning_rate": 1.9971807414447976e-05, "loss": 0.5758, "step": 1947 }, { "epoch": 0.05327061912054255, "grad_norm": 2.5930817127227783, "learning_rate": 1.9971740914648623e-05, "loss": 0.6021, "step": 1948 }, { "epoch": 0.053297965434259464, "grad_norm": 2.1712615489959717, "learning_rate": 1.997167433662377e-05, "loss": 0.6172, "step": 1949 }, { "epoch": 0.053325311747976376, "grad_norm": 1.8600090742111206, "learning_rate": 1.997160768037394e-05, "loss": 0.6529, "step": 1950 }, { "epoch": 0.05335265806169329, "grad_norm": 1.8501932621002197, "learning_rate": 1.997154094589965e-05, "loss": 0.6025, "step": 1951 }, { "epoch": 0.0533800043754102, "grad_norm": 3.685637950897217, "learning_rate": 1.9971474133201432e-05, "loss": 0.6786, "step": 1952 }, { "epoch": 0.05340735068912711, "grad_norm": 1.7635765075683594, "learning_rate": 1.99714072422798e-05, "loss": 0.5655, "step": 1953 }, { "epoch": 0.05343469700284402, "grad_norm": 2.7595553398132324, "learning_rate": 1.997134027313529e-05, "loss": 0.6321, "step": 1954 }, { "epoch": 0.05346204331656093, "grad_norm": 1.867368221282959, "learning_rate": 1.997127322576842e-05, "loss": 0.6149, "step": 1955 }, { "epoch": 0.053489389630277835, "grad_norm": 1.7911723852157593, "learning_rate": 1.9971206100179714e-05, "loss": 0.5853, "step": 1956 }, { "epoch": 0.053516735943994746, "grad_norm": 1.8097888231277466, "learning_rate": 1.9971138896369706e-05, "loss": 0.6071, "step": 1957 }, { "epoch": 0.05354408225771166, "grad_norm": 1.841992735862732, "learning_rate": 1.9971071614338916e-05, "loss": 0.5836, "step": 1958 }, { "epoch": 0.05357142857142857, "grad_norm": 2.2064075469970703, "learning_rate": 1.997100425408788e-05, "loss": 0.5802, "step": 1959 }, { "epoch": 0.05359877488514548, "grad_norm": 2.186521291732788, "learning_rate": 1.997093681561712e-05, "loss": 0.6168, "step": 1960 }, { "epoch": 0.05362612119886239, "grad_norm": 3.527749538421631, "learning_rate": 1.9970869298927167e-05, "loss": 0.6041, "step": 1961 }, { "epoch": 0.0536534675125793, "grad_norm": 2.165024995803833, "learning_rate": 1.9970801704018547e-05, "loss": 0.6807, "step": 1962 }, { "epoch": 0.05368081382629621, "grad_norm": 2.084573984146118, "learning_rate": 1.9970734030891798e-05, "loss": 0.578, "step": 1963 }, { "epoch": 0.053708160140013124, "grad_norm": 2.7088701725006104, "learning_rate": 1.9970666279547444e-05, "loss": 0.9766, "step": 1964 }, { "epoch": 0.053735506453730035, "grad_norm": 2.589478015899658, "learning_rate": 1.9970598449986022e-05, "loss": 0.5818, "step": 1965 }, { "epoch": 0.053762852767446946, "grad_norm": 1.9902423620224, "learning_rate": 1.9970530542208056e-05, "loss": 0.5972, "step": 1966 }, { "epoch": 0.05379019908116386, "grad_norm": 2.7008652687072754, "learning_rate": 1.997046255621409e-05, "loss": 0.6195, "step": 1967 }, { "epoch": 0.05381754539488077, "grad_norm": 2.2347259521484375, "learning_rate": 1.9970394492004645e-05, "loss": 0.5955, "step": 1968 }, { "epoch": 0.05384489170859768, "grad_norm": 2.162600517272949, "learning_rate": 1.9970326349580264e-05, "loss": 0.5931, "step": 1969 }, { "epoch": 0.05387223802231459, "grad_norm": 2.036839723587036, "learning_rate": 1.997025812894148e-05, "loss": 0.6633, "step": 1970 }, { "epoch": 0.0538995843360315, "grad_norm": 2.1048338413238525, "learning_rate": 1.9970189830088826e-05, "loss": 0.5908, "step": 1971 }, { "epoch": 0.05392693064974841, "grad_norm": 1.687822937965393, "learning_rate": 1.997012145302284e-05, "loss": 0.6018, "step": 1972 }, { "epoch": 0.053954276963465324, "grad_norm": 1.585188388824463, "learning_rate": 1.997005299774405e-05, "loss": 0.4514, "step": 1973 }, { "epoch": 0.053981623277182235, "grad_norm": 2.1700375080108643, "learning_rate": 1.996998446425301e-05, "loss": 0.6055, "step": 1974 }, { "epoch": 0.054008969590899146, "grad_norm": 2.268239974975586, "learning_rate": 1.996991585255024e-05, "loss": 0.545, "step": 1975 }, { "epoch": 0.05403631590461606, "grad_norm": 2.031273603439331, "learning_rate": 1.9969847162636287e-05, "loss": 0.6076, "step": 1976 }, { "epoch": 0.05406366221833297, "grad_norm": 1.9709964990615845, "learning_rate": 1.9969778394511693e-05, "loss": 0.6756, "step": 1977 }, { "epoch": 0.05409100853204988, "grad_norm": 1.8436609506607056, "learning_rate": 1.9969709548176993e-05, "loss": 0.4346, "step": 1978 }, { "epoch": 0.05411835484576679, "grad_norm": 2.0209670066833496, "learning_rate": 1.9969640623632726e-05, "loss": 0.581, "step": 1979 }, { "epoch": 0.0541457011594837, "grad_norm": 1.7305793762207031, "learning_rate": 1.9969571620879433e-05, "loss": 0.6195, "step": 1980 }, { "epoch": 0.05417304747320061, "grad_norm": 1.5462805032730103, "learning_rate": 1.9969502539917657e-05, "loss": 0.422, "step": 1981 }, { "epoch": 0.054200393786917524, "grad_norm": 2.9143290519714355, "learning_rate": 1.9969433380747936e-05, "loss": 0.9839, "step": 1982 }, { "epoch": 0.054227740100634435, "grad_norm": 1.7321771383285522, "learning_rate": 1.996936414337082e-05, "loss": 0.6056, "step": 1983 }, { "epoch": 0.054255086414351346, "grad_norm": 1.997519612312317, "learning_rate": 1.9969294827786847e-05, "loss": 0.965, "step": 1984 }, { "epoch": 0.05428243272806826, "grad_norm": 1.6217972040176392, "learning_rate": 1.9969225433996562e-05, "loss": 0.4764, "step": 1985 }, { "epoch": 0.05430977904178517, "grad_norm": 2.637962818145752, "learning_rate": 1.9969155962000508e-05, "loss": 0.4356, "step": 1986 }, { "epoch": 0.05433712535550208, "grad_norm": 1.949913501739502, "learning_rate": 1.996908641179923e-05, "loss": 0.6263, "step": 1987 }, { "epoch": 0.05436447166921899, "grad_norm": 2.5507681369781494, "learning_rate": 1.9969016783393277e-05, "loss": 0.5967, "step": 1988 }, { "epoch": 0.0543918179829359, "grad_norm": 2.3320295810699463, "learning_rate": 1.996894707678319e-05, "loss": 0.6129, "step": 1989 }, { "epoch": 0.05441916429665281, "grad_norm": 1.9835087060928345, "learning_rate": 1.996887729196952e-05, "loss": 0.5979, "step": 1990 }, { "epoch": 0.054446510610369724, "grad_norm": 1.5213145017623901, "learning_rate": 1.9968807428952816e-05, "loss": 0.5843, "step": 1991 }, { "epoch": 0.054473856924086635, "grad_norm": 1.677582859992981, "learning_rate": 1.996873748773362e-05, "loss": 0.568, "step": 1992 }, { "epoch": 0.054501203237803546, "grad_norm": 1.9570139646530151, "learning_rate": 1.9968667468312482e-05, "loss": 0.5942, "step": 1993 }, { "epoch": 0.05452854955152046, "grad_norm": 1.6472848653793335, "learning_rate": 1.996859737068996e-05, "loss": 0.5995, "step": 1994 }, { "epoch": 0.05455589586523737, "grad_norm": 2.2520618438720703, "learning_rate": 1.996852719486659e-05, "loss": 0.5939, "step": 1995 }, { "epoch": 0.05458324217895428, "grad_norm": 3.523588180541992, "learning_rate": 1.9968456940842934e-05, "loss": 0.5999, "step": 1996 }, { "epoch": 0.05461058849267119, "grad_norm": 2.020789384841919, "learning_rate": 1.9968386608619537e-05, "loss": 0.5811, "step": 1997 }, { "epoch": 0.0546379348063881, "grad_norm": 1.8895412683486938, "learning_rate": 1.996831619819695e-05, "loss": 0.4719, "step": 1998 }, { "epoch": 0.05466528112010501, "grad_norm": 1.971163272857666, "learning_rate": 1.9968245709575732e-05, "loss": 0.5761, "step": 1999 }, { "epoch": 0.05469262743382192, "grad_norm": 2.2809789180755615, "learning_rate": 1.996817514275643e-05, "loss": 0.5933, "step": 2000 }, { "epoch": 0.054719973747538835, "grad_norm": 2.0291361808776855, "learning_rate": 1.99681044977396e-05, "loss": 0.6945, "step": 2001 }, { "epoch": 0.054747320061255746, "grad_norm": 1.797637939453125, "learning_rate": 1.9968033774525795e-05, "loss": 0.678, "step": 2002 }, { "epoch": 0.05477466637497266, "grad_norm": 2.1658213138580322, "learning_rate": 1.996796297311557e-05, "loss": 0.4533, "step": 2003 }, { "epoch": 0.05480201268868957, "grad_norm": 2.383692979812622, "learning_rate": 1.996789209350948e-05, "loss": 0.5909, "step": 2004 }, { "epoch": 0.05482935900240648, "grad_norm": 3.6119589805603027, "learning_rate": 1.996782113570808e-05, "loss": 0.5794, "step": 2005 }, { "epoch": 0.05485670531612339, "grad_norm": 1.9152640104293823, "learning_rate": 1.9967750099711935e-05, "loss": 0.5981, "step": 2006 }, { "epoch": 0.054884051629840294, "grad_norm": 1.9125094413757324, "learning_rate": 1.996767898552159e-05, "loss": 0.6457, "step": 2007 }, { "epoch": 0.054911397943557205, "grad_norm": 1.5220651626586914, "learning_rate": 1.996760779313761e-05, "loss": 0.5802, "step": 2008 }, { "epoch": 0.054938744257274116, "grad_norm": 1.843684434890747, "learning_rate": 1.996753652256055e-05, "loss": 0.5828, "step": 2009 }, { "epoch": 0.05496609057099103, "grad_norm": 2.0274860858917236, "learning_rate": 1.9967465173790973e-05, "loss": 0.6201, "step": 2010 }, { "epoch": 0.05499343688470794, "grad_norm": 2.756265878677368, "learning_rate": 1.9967393746829436e-05, "loss": 0.6072, "step": 2011 }, { "epoch": 0.05502078319842485, "grad_norm": 1.8983186483383179, "learning_rate": 1.9967322241676502e-05, "loss": 0.5948, "step": 2012 }, { "epoch": 0.05504812951214176, "grad_norm": 1.4323254823684692, "learning_rate": 1.996725065833273e-05, "loss": 0.5707, "step": 2013 }, { "epoch": 0.05507547582585867, "grad_norm": 3.1668546199798584, "learning_rate": 1.996717899679868e-05, "loss": 1.0201, "step": 2014 }, { "epoch": 0.05510282213957558, "grad_norm": 1.8121503591537476, "learning_rate": 1.996710725707492e-05, "loss": 0.6772, "step": 2015 }, { "epoch": 0.055130168453292494, "grad_norm": 1.8854460716247559, "learning_rate": 1.9967035439162005e-05, "loss": 0.6671, "step": 2016 }, { "epoch": 0.055157514767009405, "grad_norm": 1.9500701427459717, "learning_rate": 1.99669635430605e-05, "loss": 0.5805, "step": 2017 }, { "epoch": 0.055184861080726316, "grad_norm": 1.6610023975372314, "learning_rate": 1.996689156877097e-05, "loss": 0.6138, "step": 2018 }, { "epoch": 0.05521220739444323, "grad_norm": 1.426914095878601, "learning_rate": 1.9966819516293987e-05, "loss": 0.6381, "step": 2019 }, { "epoch": 0.05523955370816014, "grad_norm": 1.5070427656173706, "learning_rate": 1.9966747385630104e-05, "loss": 0.6337, "step": 2020 }, { "epoch": 0.05526690002187705, "grad_norm": 1.710109829902649, "learning_rate": 1.9966675176779894e-05, "loss": 0.5884, "step": 2021 }, { "epoch": 0.05529424633559396, "grad_norm": 1.578298568725586, "learning_rate": 1.9966602889743925e-05, "loss": 0.6036, "step": 2022 }, { "epoch": 0.05532159264931087, "grad_norm": 1.371516466140747, "learning_rate": 1.996653052452276e-05, "loss": 0.6277, "step": 2023 }, { "epoch": 0.05534893896302778, "grad_norm": 1.8689854145050049, "learning_rate": 1.9966458081116965e-05, "loss": 0.6109, "step": 2024 }, { "epoch": 0.055376285276744694, "grad_norm": 1.486509084701538, "learning_rate": 1.9966385559527113e-05, "loss": 0.5902, "step": 2025 }, { "epoch": 0.055403631590461605, "grad_norm": 1.454769253730774, "learning_rate": 1.996631295975377e-05, "loss": 0.6014, "step": 2026 }, { "epoch": 0.055430977904178516, "grad_norm": 2.285015821456909, "learning_rate": 1.996624028179751e-05, "loss": 0.5794, "step": 2027 }, { "epoch": 0.05545832421789543, "grad_norm": 1.8089312314987183, "learning_rate": 1.9966167525658898e-05, "loss": 0.6216, "step": 2028 }, { "epoch": 0.05548567053161234, "grad_norm": 2.12811017036438, "learning_rate": 1.9966094691338505e-05, "loss": 0.5979, "step": 2029 }, { "epoch": 0.05551301684532925, "grad_norm": 1.7486728429794312, "learning_rate": 1.9966021778836904e-05, "loss": 0.579, "step": 2030 }, { "epoch": 0.05554036315904616, "grad_norm": 1.8622175455093384, "learning_rate": 1.9965948788154667e-05, "loss": 0.6805, "step": 2031 }, { "epoch": 0.05556770947276307, "grad_norm": 1.7377442121505737, "learning_rate": 1.996587571929237e-05, "loss": 0.6689, "step": 2032 }, { "epoch": 0.05559505578647998, "grad_norm": 1.9790329933166504, "learning_rate": 1.996580257225058e-05, "loss": 0.6, "step": 2033 }, { "epoch": 0.055622402100196894, "grad_norm": 1.645075798034668, "learning_rate": 1.9965729347029873e-05, "loss": 0.6162, "step": 2034 }, { "epoch": 0.055649748413913805, "grad_norm": 1.883785605430603, "learning_rate": 1.9965656043630824e-05, "loss": 0.6039, "step": 2035 }, { "epoch": 0.055677094727630716, "grad_norm": 2.133984327316284, "learning_rate": 1.9965582662054008e-05, "loss": 0.6242, "step": 2036 }, { "epoch": 0.05570444104134763, "grad_norm": 2.279827356338501, "learning_rate": 1.99655092023e-05, "loss": 0.4784, "step": 2037 }, { "epoch": 0.05573178735506454, "grad_norm": 1.5048643350601196, "learning_rate": 1.9965435664369377e-05, "loss": 0.6016, "step": 2038 }, { "epoch": 0.05575913366878145, "grad_norm": 3.555955410003662, "learning_rate": 1.9965362048262714e-05, "loss": 1.0026, "step": 2039 }, { "epoch": 0.05578647998249836, "grad_norm": 1.9933035373687744, "learning_rate": 1.9965288353980595e-05, "loss": 0.5565, "step": 2040 }, { "epoch": 0.05581382629621527, "grad_norm": 1.7770010232925415, "learning_rate": 1.996521458152359e-05, "loss": 0.6156, "step": 2041 }, { "epoch": 0.05584117260993218, "grad_norm": 18.83879280090332, "learning_rate": 1.996514073089228e-05, "loss": 1.0001, "step": 2042 }, { "epoch": 0.055868518923649094, "grad_norm": 1.594638705253601, "learning_rate": 1.9965066802087248e-05, "loss": 0.5917, "step": 2043 }, { "epoch": 0.055895865237366005, "grad_norm": 1.402978777885437, "learning_rate": 1.996499279510907e-05, "loss": 0.439, "step": 2044 }, { "epoch": 0.055923211551082916, "grad_norm": 1.9824223518371582, "learning_rate": 1.996491870995833e-05, "loss": 0.5859, "step": 2045 }, { "epoch": 0.05595055786479983, "grad_norm": 1.8482972383499146, "learning_rate": 1.9964844546635605e-05, "loss": 0.6015, "step": 2046 }, { "epoch": 0.05597790417851674, "grad_norm": 1.6261488199234009, "learning_rate": 1.9964770305141478e-05, "loss": 0.5715, "step": 2047 }, { "epoch": 0.05600525049223365, "grad_norm": 2.044555902481079, "learning_rate": 1.9964695985476537e-05, "loss": 0.5832, "step": 2048 }, { "epoch": 0.05603259680595056, "grad_norm": 1.451370120048523, "learning_rate": 1.9964621587641355e-05, "loss": 0.5824, "step": 2049 }, { "epoch": 0.05605994311966747, "grad_norm": 1.7740880250930786, "learning_rate": 1.9964547111636524e-05, "loss": 0.5723, "step": 2050 }, { "epoch": 0.05608728943338438, "grad_norm": 1.5226547718048096, "learning_rate": 1.9964472557462623e-05, "loss": 0.611, "step": 2051 }, { "epoch": 0.056114635747101294, "grad_norm": 1.9409703016281128, "learning_rate": 1.996439792512024e-05, "loss": 0.5608, "step": 2052 }, { "epoch": 0.056141982060818205, "grad_norm": 4.343430519104004, "learning_rate": 1.996432321460996e-05, "loss": 1.058, "step": 2053 }, { "epoch": 0.056169328374535116, "grad_norm": 2.3308160305023193, "learning_rate": 1.996424842593237e-05, "loss": 0.5711, "step": 2054 }, { "epoch": 0.05619667468825203, "grad_norm": 2.2356510162353516, "learning_rate": 1.996417355908805e-05, "loss": 0.5533, "step": 2055 }, { "epoch": 0.05622402100196894, "grad_norm": 3.3581151962280273, "learning_rate": 1.9964098614077598e-05, "loss": 0.4706, "step": 2056 }, { "epoch": 0.05625136731568584, "grad_norm": 2.3250086307525635, "learning_rate": 1.9964023590901592e-05, "loss": 0.5885, "step": 2057 }, { "epoch": 0.05627871362940275, "grad_norm": 1.776869773864746, "learning_rate": 1.996394848956063e-05, "loss": 0.5661, "step": 2058 }, { "epoch": 0.056306059943119664, "grad_norm": 1.6892894506454468, "learning_rate": 1.9963873310055292e-05, "loss": 0.5749, "step": 2059 }, { "epoch": 0.056333406256836575, "grad_norm": 2.5765974521636963, "learning_rate": 1.9963798052386177e-05, "loss": 0.9664, "step": 2060 }, { "epoch": 0.056360752570553486, "grad_norm": 1.3856191635131836, "learning_rate": 1.9963722716553864e-05, "loss": 0.6013, "step": 2061 }, { "epoch": 0.0563880988842704, "grad_norm": 1.8923393487930298, "learning_rate": 1.9963647302558954e-05, "loss": 0.6181, "step": 2062 }, { "epoch": 0.05641544519798731, "grad_norm": 1.5141136646270752, "learning_rate": 1.9963571810402035e-05, "loss": 0.5819, "step": 2063 }, { "epoch": 0.05644279151170422, "grad_norm": 1.3553152084350586, "learning_rate": 1.9963496240083698e-05, "loss": 0.5923, "step": 2064 }, { "epoch": 0.05647013782542113, "grad_norm": 1.975037932395935, "learning_rate": 1.996342059160454e-05, "loss": 0.5953, "step": 2065 }, { "epoch": 0.05649748413913804, "grad_norm": 1.3897053003311157, "learning_rate": 1.996334486496515e-05, "loss": 0.5077, "step": 2066 }, { "epoch": 0.05652483045285495, "grad_norm": 1.9917281866073608, "learning_rate": 1.996326906016612e-05, "loss": 0.625, "step": 2067 }, { "epoch": 0.056552176766571864, "grad_norm": 2.3950583934783936, "learning_rate": 1.996319317720805e-05, "loss": 0.5976, "step": 2068 }, { "epoch": 0.056579523080288775, "grad_norm": 1.537669062614441, "learning_rate": 1.9963117216091534e-05, "loss": 0.421, "step": 2069 }, { "epoch": 0.056606869394005686, "grad_norm": 1.3822641372680664, "learning_rate": 1.996304117681717e-05, "loss": 0.4485, "step": 2070 }, { "epoch": 0.0566342157077226, "grad_norm": 1.6663966178894043, "learning_rate": 1.9962965059385547e-05, "loss": 0.5571, "step": 2071 }, { "epoch": 0.05666156202143951, "grad_norm": 2.3355579376220703, "learning_rate": 1.996288886379727e-05, "loss": 0.6828, "step": 2072 }, { "epoch": 0.05668890833515642, "grad_norm": 3.368081569671631, "learning_rate": 1.996281259005293e-05, "loss": 0.6305, "step": 2073 }, { "epoch": 0.05671625464887333, "grad_norm": 1.873714804649353, "learning_rate": 1.996273623815313e-05, "loss": 0.5606, "step": 2074 }, { "epoch": 0.05674360096259024, "grad_norm": 1.3613159656524658, "learning_rate": 1.9962659808098474e-05, "loss": 0.5994, "step": 2075 }, { "epoch": 0.05677094727630715, "grad_norm": 1.8301875591278076, "learning_rate": 1.996258329988955e-05, "loss": 0.5971, "step": 2076 }, { "epoch": 0.056798293590024064, "grad_norm": 3.1407864093780518, "learning_rate": 1.9962506713526965e-05, "loss": 0.9899, "step": 2077 }, { "epoch": 0.056825639903740975, "grad_norm": 2.1175801753997803, "learning_rate": 1.9962430049011318e-05, "loss": 0.6069, "step": 2078 }, { "epoch": 0.056852986217457886, "grad_norm": 2.1322567462921143, "learning_rate": 1.996235330634321e-05, "loss": 0.5966, "step": 2079 }, { "epoch": 0.0568803325311748, "grad_norm": 1.8825480937957764, "learning_rate": 1.9962276485523247e-05, "loss": 0.5553, "step": 2080 }, { "epoch": 0.05690767884489171, "grad_norm": 1.86295747756958, "learning_rate": 1.9962199586552024e-05, "loss": 0.6931, "step": 2081 }, { "epoch": 0.05693502515860862, "grad_norm": 3.132462978363037, "learning_rate": 1.9962122609430153e-05, "loss": 0.6378, "step": 2082 }, { "epoch": 0.05696237147232553, "grad_norm": 1.9410065412521362, "learning_rate": 1.9962045554158234e-05, "loss": 0.5999, "step": 2083 }, { "epoch": 0.05698971778604244, "grad_norm": 2.6183812618255615, "learning_rate": 1.996196842073687e-05, "loss": 0.6441, "step": 2084 }, { "epoch": 0.05701706409975935, "grad_norm": 4.671468734741211, "learning_rate": 1.9961891209166666e-05, "loss": 0.4769, "step": 2085 }, { "epoch": 0.057044410413476264, "grad_norm": 2.0833373069763184, "learning_rate": 1.9961813919448228e-05, "loss": 0.643, "step": 2086 }, { "epoch": 0.057071756727193175, "grad_norm": 2.109419107437134, "learning_rate": 1.9961736551582167e-05, "loss": 0.6141, "step": 2087 }, { "epoch": 0.057099103040910086, "grad_norm": 1.597983717918396, "learning_rate": 1.9961659105569084e-05, "loss": 0.4343, "step": 2088 }, { "epoch": 0.057126449354627, "grad_norm": 1.7428233623504639, "learning_rate": 1.996158158140959e-05, "loss": 0.6038, "step": 2089 }, { "epoch": 0.05715379566834391, "grad_norm": 1.7089468240737915, "learning_rate": 1.996150397910429e-05, "loss": 0.6269, "step": 2090 }, { "epoch": 0.05718114198206082, "grad_norm": 2.0774102210998535, "learning_rate": 1.9961426298653798e-05, "loss": 0.404, "step": 2091 }, { "epoch": 0.05720848829577773, "grad_norm": 1.6769336462020874, "learning_rate": 1.9961348540058716e-05, "loss": 0.5733, "step": 2092 }, { "epoch": 0.05723583460949464, "grad_norm": 1.900349736213684, "learning_rate": 1.9961270703319658e-05, "loss": 0.5766, "step": 2093 }, { "epoch": 0.05726318092321155, "grad_norm": 1.6928483247756958, "learning_rate": 1.9961192788437237e-05, "loss": 0.5734, "step": 2094 }, { "epoch": 0.057290527236928464, "grad_norm": 1.897495985031128, "learning_rate": 1.9961114795412065e-05, "loss": 0.9293, "step": 2095 }, { "epoch": 0.057317873550645375, "grad_norm": 1.5556317567825317, "learning_rate": 1.9961036724244747e-05, "loss": 0.5795, "step": 2096 }, { "epoch": 0.057345219864362286, "grad_norm": 1.723861813545227, "learning_rate": 1.9960958574935898e-05, "loss": 0.6178, "step": 2097 }, { "epoch": 0.0573725661780792, "grad_norm": 1.6283040046691895, "learning_rate": 1.9960880347486135e-05, "loss": 0.5579, "step": 2098 }, { "epoch": 0.05739991249179611, "grad_norm": 1.9482886791229248, "learning_rate": 1.9960802041896065e-05, "loss": 0.5854, "step": 2099 }, { "epoch": 0.05742725880551302, "grad_norm": 1.709329605102539, "learning_rate": 1.996072365816631e-05, "loss": 0.9822, "step": 2100 }, { "epoch": 0.05745460511922993, "grad_norm": 1.8074928522109985, "learning_rate": 1.996064519629748e-05, "loss": 0.6106, "step": 2101 }, { "epoch": 0.05748195143294684, "grad_norm": 2.0039689540863037, "learning_rate": 1.996056665629019e-05, "loss": 0.6067, "step": 2102 }, { "epoch": 0.05750929774666375, "grad_norm": 1.843922734260559, "learning_rate": 1.9960488038145062e-05, "loss": 0.6272, "step": 2103 }, { "epoch": 0.057536644060380664, "grad_norm": 1.838263750076294, "learning_rate": 1.99604093418627e-05, "loss": 0.6857, "step": 2104 }, { "epoch": 0.057563990374097575, "grad_norm": 2.0833003520965576, "learning_rate": 1.996033056744374e-05, "loss": 0.6201, "step": 2105 }, { "epoch": 0.057591336687814486, "grad_norm": 1.718459129333496, "learning_rate": 1.9960251714888782e-05, "loss": 0.6048, "step": 2106 }, { "epoch": 0.0576186830015314, "grad_norm": 2.764772653579712, "learning_rate": 1.9960172784198457e-05, "loss": 0.6394, "step": 2107 }, { "epoch": 0.0576460293152483, "grad_norm": 1.9971022605895996, "learning_rate": 1.996009377537338e-05, "loss": 0.6041, "step": 2108 }, { "epoch": 0.05767337562896521, "grad_norm": 2.396843671798706, "learning_rate": 1.9960014688414166e-05, "loss": 0.5897, "step": 2109 }, { "epoch": 0.05770072194268212, "grad_norm": 3.761075735092163, "learning_rate": 1.9959935523321445e-05, "loss": 0.949, "step": 2110 }, { "epoch": 0.057728068256399034, "grad_norm": 1.833544135093689, "learning_rate": 1.995985628009583e-05, "loss": 0.6064, "step": 2111 }, { "epoch": 0.057755414570115945, "grad_norm": 2.3288979530334473, "learning_rate": 1.9959776958737944e-05, "loss": 0.5669, "step": 2112 }, { "epoch": 0.05778276088383286, "grad_norm": 1.6998504400253296, "learning_rate": 1.995969755924841e-05, "loss": 0.6031, "step": 2113 }, { "epoch": 0.05781010719754977, "grad_norm": 2.649247884750366, "learning_rate": 1.9959618081627857e-05, "loss": 0.5844, "step": 2114 }, { "epoch": 0.05783745351126668, "grad_norm": 1.6325833797454834, "learning_rate": 1.99595385258769e-05, "loss": 0.6129, "step": 2115 }, { "epoch": 0.05786479982498359, "grad_norm": 1.7450674772262573, "learning_rate": 1.9959458891996166e-05, "loss": 0.6652, "step": 2116 }, { "epoch": 0.0578921461387005, "grad_norm": 1.7615605592727661, "learning_rate": 1.9959379179986278e-05, "loss": 0.6068, "step": 2117 }, { "epoch": 0.05791949245241741, "grad_norm": 4.233437538146973, "learning_rate": 1.9959299389847864e-05, "loss": 0.9407, "step": 2118 }, { "epoch": 0.05794683876613432, "grad_norm": 1.9699187278747559, "learning_rate": 1.9959219521581553e-05, "loss": 0.6259, "step": 2119 }, { "epoch": 0.057974185079851234, "grad_norm": 1.6703619956970215, "learning_rate": 1.9959139575187962e-05, "loss": 1.0101, "step": 2120 }, { "epoch": 0.058001531393568145, "grad_norm": 2.0226168632507324, "learning_rate": 1.9959059550667725e-05, "loss": 0.6287, "step": 2121 }, { "epoch": 0.058028877707285056, "grad_norm": 2.2701144218444824, "learning_rate": 1.995897944802147e-05, "loss": 0.5914, "step": 2122 }, { "epoch": 0.05805622402100197, "grad_norm": 1.653259515762329, "learning_rate": 1.9958899267249825e-05, "loss": 0.575, "step": 2123 }, { "epoch": 0.05808357033471888, "grad_norm": 1.7463343143463135, "learning_rate": 1.995881900835342e-05, "loss": 0.6194, "step": 2124 }, { "epoch": 0.05811091664843579, "grad_norm": 1.5373784303665161, "learning_rate": 1.9958738671332876e-05, "loss": 0.6151, "step": 2125 }, { "epoch": 0.0581382629621527, "grad_norm": 1.6828744411468506, "learning_rate": 1.9958658256188833e-05, "loss": 0.4593, "step": 2126 }, { "epoch": 0.05816560927586961, "grad_norm": 2.592730760574341, "learning_rate": 1.995857776292192e-05, "loss": 0.9662, "step": 2127 }, { "epoch": 0.05819295558958652, "grad_norm": 1.7152187824249268, "learning_rate": 1.9958497191532764e-05, "loss": 0.5638, "step": 2128 }, { "epoch": 0.058220301903303434, "grad_norm": 1.9711252450942993, "learning_rate": 1.9958416542022e-05, "loss": 0.5785, "step": 2129 }, { "epoch": 0.058247648217020345, "grad_norm": 1.7336076498031616, "learning_rate": 1.9958335814390267e-05, "loss": 0.6067, "step": 2130 }, { "epoch": 0.058274994530737256, "grad_norm": 1.5591235160827637, "learning_rate": 1.9958255008638186e-05, "loss": 0.588, "step": 2131 }, { "epoch": 0.05830234084445417, "grad_norm": 1.8848121166229248, "learning_rate": 1.9958174124766398e-05, "loss": 0.4508, "step": 2132 }, { "epoch": 0.05832968715817108, "grad_norm": 1.9723000526428223, "learning_rate": 1.9958093162775537e-05, "loss": 0.5799, "step": 2133 }, { "epoch": 0.05835703347188799, "grad_norm": 1.651229977607727, "learning_rate": 1.9958012122666238e-05, "loss": 0.5902, "step": 2134 }, { "epoch": 0.0583843797856049, "grad_norm": 1.4329862594604492, "learning_rate": 1.9957931004439136e-05, "loss": 0.588, "step": 2135 }, { "epoch": 0.05841172609932181, "grad_norm": 3.0728719234466553, "learning_rate": 1.9957849808094868e-05, "loss": 0.5687, "step": 2136 }, { "epoch": 0.05843907241303872, "grad_norm": 1.5261825323104858, "learning_rate": 1.995776853363407e-05, "loss": 0.5856, "step": 2137 }, { "epoch": 0.058466418726755634, "grad_norm": 1.5087754726409912, "learning_rate": 1.995768718105738e-05, "loss": 0.5999, "step": 2138 }, { "epoch": 0.058493765040472545, "grad_norm": 2.042574882507324, "learning_rate": 1.9957605750365438e-05, "loss": 0.9666, "step": 2139 }, { "epoch": 0.058521111354189456, "grad_norm": 1.7440624237060547, "learning_rate": 1.9957524241558878e-05, "loss": 0.9953, "step": 2140 }, { "epoch": 0.05854845766790637, "grad_norm": 1.4445428848266602, "learning_rate": 1.9957442654638342e-05, "loss": 0.6212, "step": 2141 }, { "epoch": 0.05857580398162328, "grad_norm": 2.326165199279785, "learning_rate": 1.9957360989604476e-05, "loss": 0.9799, "step": 2142 }, { "epoch": 0.05860315029534019, "grad_norm": 1.6237778663635254, "learning_rate": 1.9957279246457913e-05, "loss": 0.5657, "step": 2143 }, { "epoch": 0.0586304966090571, "grad_norm": 1.6955405473709106, "learning_rate": 1.9957197425199295e-05, "loss": 0.5967, "step": 2144 }, { "epoch": 0.05865784292277401, "grad_norm": 1.4349387884140015, "learning_rate": 1.995711552582927e-05, "loss": 0.6165, "step": 2145 }, { "epoch": 0.05868518923649092, "grad_norm": 1.9969813823699951, "learning_rate": 1.995703354834847e-05, "loss": 0.5952, "step": 2146 }, { "epoch": 0.058712535550207834, "grad_norm": 1.8745992183685303, "learning_rate": 1.9956951492757546e-05, "loss": 0.5864, "step": 2147 }, { "epoch": 0.058739881863924745, "grad_norm": 1.721218228340149, "learning_rate": 1.9956869359057138e-05, "loss": 0.5649, "step": 2148 }, { "epoch": 0.058767228177641656, "grad_norm": 1.567185640335083, "learning_rate": 1.9956787147247895e-05, "loss": 0.6009, "step": 2149 }, { "epoch": 0.05879457449135857, "grad_norm": 5.647528171539307, "learning_rate": 1.9956704857330458e-05, "loss": 0.5929, "step": 2150 }, { "epoch": 0.05882192080507548, "grad_norm": 1.7954366207122803, "learning_rate": 1.9956622489305473e-05, "loss": 0.6293, "step": 2151 }, { "epoch": 0.05884926711879239, "grad_norm": 1.7296994924545288, "learning_rate": 1.995654004317359e-05, "loss": 0.6431, "step": 2152 }, { "epoch": 0.0588766134325093, "grad_norm": 1.7613303661346436, "learning_rate": 1.9956457518935447e-05, "loss": 0.543, "step": 2153 }, { "epoch": 0.05890395974622621, "grad_norm": 1.6458569765090942, "learning_rate": 1.99563749165917e-05, "loss": 0.6157, "step": 2154 }, { "epoch": 0.05893130605994312, "grad_norm": 2.3767035007476807, "learning_rate": 1.9956292236142993e-05, "loss": 1.0045, "step": 2155 }, { "epoch": 0.058958652373660034, "grad_norm": 1.605872392654419, "learning_rate": 1.9956209477589975e-05, "loss": 0.5469, "step": 2156 }, { "epoch": 0.058985998687376945, "grad_norm": 2.1112849712371826, "learning_rate": 1.9956126640933292e-05, "loss": 0.6192, "step": 2157 }, { "epoch": 0.059013345001093856, "grad_norm": 2.0879998207092285, "learning_rate": 1.9956043726173605e-05, "loss": 0.582, "step": 2158 }, { "epoch": 0.05904069131481076, "grad_norm": 1.9234201908111572, "learning_rate": 1.995596073331155e-05, "loss": 0.6007, "step": 2159 }, { "epoch": 0.05906803762852767, "grad_norm": 1.5326646566390991, "learning_rate": 1.9955877662347788e-05, "loss": 0.5909, "step": 2160 }, { "epoch": 0.05909538394224458, "grad_norm": 1.9617940187454224, "learning_rate": 1.995579451328297e-05, "loss": 0.6007, "step": 2161 }, { "epoch": 0.05912273025596149, "grad_norm": 2.234105110168457, "learning_rate": 1.9955711286117744e-05, "loss": 0.9803, "step": 2162 }, { "epoch": 0.059150076569678404, "grad_norm": 1.7309061288833618, "learning_rate": 1.9955627980852762e-05, "loss": 0.5788, "step": 2163 }, { "epoch": 0.059177422883395316, "grad_norm": 1.580890417098999, "learning_rate": 1.9955544597488682e-05, "loss": 0.5893, "step": 2164 }, { "epoch": 0.05920476919711223, "grad_norm": 2.2920918464660645, "learning_rate": 1.9955461136026155e-05, "loss": 0.602, "step": 2165 }, { "epoch": 0.05923211551082914, "grad_norm": 1.8262168169021606, "learning_rate": 1.995537759646584e-05, "loss": 0.4166, "step": 2166 }, { "epoch": 0.05925946182454605, "grad_norm": 2.458094358444214, "learning_rate": 1.9955293978808386e-05, "loss": 0.5908, "step": 2167 }, { "epoch": 0.05928680813826296, "grad_norm": 1.8325787782669067, "learning_rate": 1.9955210283054456e-05, "loss": 0.5975, "step": 2168 }, { "epoch": 0.05931415445197987, "grad_norm": 1.9864823818206787, "learning_rate": 1.9955126509204704e-05, "loss": 0.5801, "step": 2169 }, { "epoch": 0.05934150076569678, "grad_norm": 2.695436954498291, "learning_rate": 1.9955042657259782e-05, "loss": 0.5967, "step": 2170 }, { "epoch": 0.05936884707941369, "grad_norm": 2.0359833240509033, "learning_rate": 1.9954958727220353e-05, "loss": 0.6141, "step": 2171 }, { "epoch": 0.059396193393130604, "grad_norm": 2.017247200012207, "learning_rate": 1.9954874719087075e-05, "loss": 0.9405, "step": 2172 }, { "epoch": 0.059423539706847515, "grad_norm": 1.8048033714294434, "learning_rate": 1.9954790632860606e-05, "loss": 0.5539, "step": 2173 }, { "epoch": 0.05945088602056443, "grad_norm": 1.6294597387313843, "learning_rate": 1.9954706468541608e-05, "loss": 0.5929, "step": 2174 }, { "epoch": 0.05947823233428134, "grad_norm": 2.7485713958740234, "learning_rate": 1.9954622226130736e-05, "loss": 0.5928, "step": 2175 }, { "epoch": 0.05950557864799825, "grad_norm": 2.046109676361084, "learning_rate": 1.9954537905628655e-05, "loss": 0.4403, "step": 2176 }, { "epoch": 0.05953292496171516, "grad_norm": 2.1505017280578613, "learning_rate": 1.9954453507036028e-05, "loss": 0.9921, "step": 2177 }, { "epoch": 0.05956027127543207, "grad_norm": 2.033933401107788, "learning_rate": 1.9954369030353512e-05, "loss": 0.5941, "step": 2178 }, { "epoch": 0.05958761758914898, "grad_norm": 2.2097039222717285, "learning_rate": 1.9954284475581772e-05, "loss": 0.925, "step": 2179 }, { "epoch": 0.05961496390286589, "grad_norm": 1.8133461475372314, "learning_rate": 1.9954199842721475e-05, "loss": 0.602, "step": 2180 }, { "epoch": 0.059642310216582804, "grad_norm": 2.4332404136657715, "learning_rate": 1.995411513177328e-05, "loss": 0.6166, "step": 2181 }, { "epoch": 0.059669656530299715, "grad_norm": 2.0752487182617188, "learning_rate": 1.995403034273785e-05, "loss": 0.6831, "step": 2182 }, { "epoch": 0.059697002844016626, "grad_norm": 1.564119577407837, "learning_rate": 1.9953945475615856e-05, "loss": 0.5685, "step": 2183 }, { "epoch": 0.05972434915773354, "grad_norm": 1.4760173559188843, "learning_rate": 1.9953860530407962e-05, "loss": 0.5851, "step": 2184 }, { "epoch": 0.05975169547145045, "grad_norm": 2.221874952316284, "learning_rate": 1.995377550711483e-05, "loss": 0.448, "step": 2185 }, { "epoch": 0.05977904178516736, "grad_norm": 2.6624677181243896, "learning_rate": 1.9953690405737135e-05, "loss": 0.5702, "step": 2186 }, { "epoch": 0.05980638809888427, "grad_norm": 2.4200339317321777, "learning_rate": 1.9953605226275536e-05, "loss": 0.6405, "step": 2187 }, { "epoch": 0.05983373441260118, "grad_norm": 3.1260437965393066, "learning_rate": 1.9953519968730707e-05, "loss": 0.6293, "step": 2188 }, { "epoch": 0.05986108072631809, "grad_norm": 2.203667163848877, "learning_rate": 1.995343463310332e-05, "loss": 0.6024, "step": 2189 }, { "epoch": 0.059888427040035004, "grad_norm": 1.7802214622497559, "learning_rate": 1.995334921939403e-05, "loss": 0.5788, "step": 2190 }, { "epoch": 0.059915773353751915, "grad_norm": 2.550327777862549, "learning_rate": 1.9953263727603524e-05, "loss": 0.599, "step": 2191 }, { "epoch": 0.059943119667468826, "grad_norm": 2.183964729309082, "learning_rate": 1.995317815773246e-05, "loss": 0.6675, "step": 2192 }, { "epoch": 0.05997046598118574, "grad_norm": 1.765287160873413, "learning_rate": 1.9953092509781516e-05, "loss": 0.5712, "step": 2193 }, { "epoch": 0.05999781229490265, "grad_norm": 2.439188241958618, "learning_rate": 1.9953006783751363e-05, "loss": 0.5705, "step": 2194 }, { "epoch": 0.06002515860861956, "grad_norm": 1.6328057050704956, "learning_rate": 1.9952920979642672e-05, "loss": 0.5937, "step": 2195 }, { "epoch": 0.06005250492233647, "grad_norm": 1.9759292602539062, "learning_rate": 1.9952835097456117e-05, "loss": 0.595, "step": 2196 }, { "epoch": 0.06007985123605338, "grad_norm": 2.5372931957244873, "learning_rate": 1.995274913719237e-05, "loss": 0.5945, "step": 2197 }, { "epoch": 0.06010719754977029, "grad_norm": 3.2518386840820312, "learning_rate": 1.995266309885211e-05, "loss": 0.4413, "step": 2198 }, { "epoch": 0.060134543863487204, "grad_norm": 5.376275539398193, "learning_rate": 1.995257698243601e-05, "loss": 0.6125, "step": 2199 }, { "epoch": 0.060161890177204115, "grad_norm": 1.7358543872833252, "learning_rate": 1.9952490787944744e-05, "loss": 0.5712, "step": 2200 }, { "epoch": 0.060189236490921026, "grad_norm": 2.234575033187866, "learning_rate": 1.9952404515378986e-05, "loss": 0.596, "step": 2201 }, { "epoch": 0.06021658280463794, "grad_norm": 1.876882791519165, "learning_rate": 1.9952318164739415e-05, "loss": 0.9673, "step": 2202 }, { "epoch": 0.06024392911835485, "grad_norm": 2.751373767852783, "learning_rate": 1.995223173602671e-05, "loss": 0.5655, "step": 2203 }, { "epoch": 0.06027127543207176, "grad_norm": 2.315757989883423, "learning_rate": 1.995214522924155e-05, "loss": 0.6657, "step": 2204 }, { "epoch": 0.06029862174578867, "grad_norm": 2.3300085067749023, "learning_rate": 1.9952058644384607e-05, "loss": 0.6077, "step": 2205 }, { "epoch": 0.06032596805950558, "grad_norm": 2.3840832710266113, "learning_rate": 1.995197198145657e-05, "loss": 0.5912, "step": 2206 }, { "epoch": 0.06035331437322249, "grad_norm": 1.6725308895111084, "learning_rate": 1.9951885240458112e-05, "loss": 0.5937, "step": 2207 }, { "epoch": 0.060380660686939404, "grad_norm": 1.648084044456482, "learning_rate": 1.995179842138991e-05, "loss": 0.5782, "step": 2208 }, { "epoch": 0.060408007000656315, "grad_norm": 1.9369263648986816, "learning_rate": 1.9951711524252656e-05, "loss": 0.6017, "step": 2209 }, { "epoch": 0.06043535331437322, "grad_norm": 2.9237375259399414, "learning_rate": 1.995162454904702e-05, "loss": 0.5876, "step": 2210 }, { "epoch": 0.06046269962809013, "grad_norm": 1.9075852632522583, "learning_rate": 1.9951537495773695e-05, "loss": 0.5523, "step": 2211 }, { "epoch": 0.06049004594180704, "grad_norm": 2.278468132019043, "learning_rate": 1.995145036443336e-05, "loss": 0.5582, "step": 2212 }, { "epoch": 0.06051739225552395, "grad_norm": 1.8436356782913208, "learning_rate": 1.9951363155026696e-05, "loss": 0.603, "step": 2213 }, { "epoch": 0.060544738569240864, "grad_norm": 2.753657817840576, "learning_rate": 1.995127586755439e-05, "loss": 0.9392, "step": 2214 }, { "epoch": 0.060572084882957775, "grad_norm": 1.909407377243042, "learning_rate": 1.995118850201712e-05, "loss": 0.4595, "step": 2215 }, { "epoch": 0.060599431196674686, "grad_norm": 1.955099105834961, "learning_rate": 1.995110105841558e-05, "loss": 0.6347, "step": 2216 }, { "epoch": 0.0606267775103916, "grad_norm": 2.3159008026123047, "learning_rate": 1.995101353675045e-05, "loss": 0.6119, "step": 2217 }, { "epoch": 0.06065412382410851, "grad_norm": 1.9820082187652588, "learning_rate": 1.9950925937022423e-05, "loss": 0.5893, "step": 2218 }, { "epoch": 0.06068147013782542, "grad_norm": 1.8661959171295166, "learning_rate": 1.995083825923218e-05, "loss": 0.6563, "step": 2219 }, { "epoch": 0.06070881645154233, "grad_norm": 1.730963110923767, "learning_rate": 1.9950750503380414e-05, "loss": 0.6006, "step": 2220 }, { "epoch": 0.06073616276525924, "grad_norm": 1.5779792070388794, "learning_rate": 1.995066266946781e-05, "loss": 0.5859, "step": 2221 }, { "epoch": 0.06076350907897615, "grad_norm": 1.897138237953186, "learning_rate": 1.9950574757495053e-05, "loss": 0.6068, "step": 2222 }, { "epoch": 0.06079085539269306, "grad_norm": 1.9824557304382324, "learning_rate": 1.995048676746284e-05, "loss": 0.5812, "step": 2223 }, { "epoch": 0.060818201706409974, "grad_norm": 2.0239508152008057, "learning_rate": 1.995039869937186e-05, "loss": 0.5966, "step": 2224 }, { "epoch": 0.060845548020126886, "grad_norm": 1.863458514213562, "learning_rate": 1.9950310553222807e-05, "loss": 0.5824, "step": 2225 }, { "epoch": 0.0608728943338438, "grad_norm": 1.7851825952529907, "learning_rate": 1.995022232901636e-05, "loss": 0.5922, "step": 2226 }, { "epoch": 0.06090024064756071, "grad_norm": 2.5828540325164795, "learning_rate": 1.9950134026753224e-05, "loss": 0.5825, "step": 2227 }, { "epoch": 0.06092758696127762, "grad_norm": 1.8682993650436401, "learning_rate": 1.9950045646434085e-05, "loss": 0.5633, "step": 2228 }, { "epoch": 0.06095493327499453, "grad_norm": 2.3796956539154053, "learning_rate": 1.9949957188059637e-05, "loss": 0.6113, "step": 2229 }, { "epoch": 0.06098227958871144, "grad_norm": 1.6218938827514648, "learning_rate": 1.9949868651630577e-05, "loss": 0.6092, "step": 2230 }, { "epoch": 0.06100962590242835, "grad_norm": 2.1579387187957764, "learning_rate": 1.9949780037147597e-05, "loss": 0.5873, "step": 2231 }, { "epoch": 0.06103697221614526, "grad_norm": 1.8528305292129517, "learning_rate": 1.9949691344611392e-05, "loss": 0.9683, "step": 2232 }, { "epoch": 0.061064318529862174, "grad_norm": 2.090973377227783, "learning_rate": 1.994960257402266e-05, "loss": 0.6288, "step": 2233 }, { "epoch": 0.061091664843579085, "grad_norm": 1.7514069080352783, "learning_rate": 1.9949513725382094e-05, "loss": 0.5867, "step": 2234 }, { "epoch": 0.061119011157296, "grad_norm": 1.4794286489486694, "learning_rate": 1.9949424798690393e-05, "loss": 0.5797, "step": 2235 }, { "epoch": 0.06114635747101291, "grad_norm": 1.6305077075958252, "learning_rate": 1.9949335793948255e-05, "loss": 0.5884, "step": 2236 }, { "epoch": 0.06117370378472982, "grad_norm": 2.2497124671936035, "learning_rate": 1.994924671115638e-05, "loss": 0.6833, "step": 2237 }, { "epoch": 0.06120105009844673, "grad_norm": 2.052626371383667, "learning_rate": 1.994915755031546e-05, "loss": 0.5968, "step": 2238 }, { "epoch": 0.06122839641216364, "grad_norm": 1.9211559295654297, "learning_rate": 1.99490683114262e-05, "loss": 0.5806, "step": 2239 }, { "epoch": 0.06125574272588055, "grad_norm": 2.409464120864868, "learning_rate": 1.99489789944893e-05, "loss": 0.5964, "step": 2240 }, { "epoch": 0.06128308903959746, "grad_norm": 2.089390277862549, "learning_rate": 1.9948889599505462e-05, "loss": 0.5916, "step": 2241 }, { "epoch": 0.061310435353314374, "grad_norm": 1.929934024810791, "learning_rate": 1.9948800126475386e-05, "loss": 0.5747, "step": 2242 }, { "epoch": 0.061337781667031285, "grad_norm": 1.498630404472351, "learning_rate": 1.994871057539977e-05, "loss": 0.5831, "step": 2243 }, { "epoch": 0.061365127980748196, "grad_norm": 2.210200786590576, "learning_rate": 1.9948620946279315e-05, "loss": 0.6046, "step": 2244 }, { "epoch": 0.06139247429446511, "grad_norm": 1.9586992263793945, "learning_rate": 1.9948531239114733e-05, "loss": 0.5924, "step": 2245 }, { "epoch": 0.06141982060818202, "grad_norm": 2.3570761680603027, "learning_rate": 1.9948441453906724e-05, "loss": 0.4714, "step": 2246 }, { "epoch": 0.06144716692189893, "grad_norm": 1.7011739015579224, "learning_rate": 1.994835159065599e-05, "loss": 0.5957, "step": 2247 }, { "epoch": 0.06147451323561584, "grad_norm": 1.9714428186416626, "learning_rate": 1.9948261649363237e-05, "loss": 0.9675, "step": 2248 }, { "epoch": 0.06150185954933275, "grad_norm": 2.492319107055664, "learning_rate": 1.9948171630029175e-05, "loss": 0.5785, "step": 2249 }, { "epoch": 0.06152920586304966, "grad_norm": 2.084446907043457, "learning_rate": 1.99480815326545e-05, "loss": 0.5862, "step": 2250 }, { "epoch": 0.061556552176766574, "grad_norm": 4.009796619415283, "learning_rate": 1.9947991357239928e-05, "loss": 0.4197, "step": 2251 }, { "epoch": 0.061583898490483485, "grad_norm": 2.0255353450775146, "learning_rate": 1.9947901103786163e-05, "loss": 0.5977, "step": 2252 }, { "epoch": 0.061611244804200396, "grad_norm": 2.003047466278076, "learning_rate": 1.9947810772293914e-05, "loss": 0.6028, "step": 2253 }, { "epoch": 0.06163859111791731, "grad_norm": 1.5042366981506348, "learning_rate": 1.994772036276389e-05, "loss": 0.5779, "step": 2254 }, { "epoch": 0.06166593743163422, "grad_norm": 1.8093202114105225, "learning_rate": 1.9947629875196797e-05, "loss": 0.6272, "step": 2255 }, { "epoch": 0.06169328374535113, "grad_norm": 2.2013866901397705, "learning_rate": 1.994753930959335e-05, "loss": 0.5961, "step": 2256 }, { "epoch": 0.06172063005906804, "grad_norm": 2.5246620178222656, "learning_rate": 1.9947448665954253e-05, "loss": 0.6285, "step": 2257 }, { "epoch": 0.06174797637278495, "grad_norm": 1.9294180870056152, "learning_rate": 1.9947357944280224e-05, "loss": 0.6795, "step": 2258 }, { "epoch": 0.06177532268650186, "grad_norm": 1.621824860572815, "learning_rate": 1.994726714457197e-05, "loss": 0.6174, "step": 2259 }, { "epoch": 0.061802669000218774, "grad_norm": 1.6207376718521118, "learning_rate": 1.9947176266830204e-05, "loss": 0.5671, "step": 2260 }, { "epoch": 0.06183001531393568, "grad_norm": 1.963847041130066, "learning_rate": 1.994708531105564e-05, "loss": 0.6862, "step": 2261 }, { "epoch": 0.06185736162765259, "grad_norm": 2.326493263244629, "learning_rate": 1.994699427724899e-05, "loss": 0.605, "step": 2262 }, { "epoch": 0.0618847079413695, "grad_norm": 1.8877267837524414, "learning_rate": 1.994690316541097e-05, "loss": 0.6295, "step": 2263 }, { "epoch": 0.06191205425508641, "grad_norm": 1.9361796379089355, "learning_rate": 1.9946811975542296e-05, "loss": 1.0022, "step": 2264 }, { "epoch": 0.06193940056880332, "grad_norm": 3.4298110008239746, "learning_rate": 1.994672070764368e-05, "loss": 0.6326, "step": 2265 }, { "epoch": 0.061966746882520234, "grad_norm": 1.8335516452789307, "learning_rate": 1.9946629361715838e-05, "loss": 0.4663, "step": 2266 }, { "epoch": 0.061994093196237145, "grad_norm": 1.5941964387893677, "learning_rate": 1.994653793775949e-05, "loss": 0.5996, "step": 2267 }, { "epoch": 0.062021439509954056, "grad_norm": 2.243131160736084, "learning_rate": 1.994644643577535e-05, "loss": 0.586, "step": 2268 }, { "epoch": 0.06204878582367097, "grad_norm": 1.7279446125030518, "learning_rate": 1.9946354855764136e-05, "loss": 0.6093, "step": 2269 }, { "epoch": 0.06207613213738788, "grad_norm": 2.3055715560913086, "learning_rate": 1.9946263197726568e-05, "loss": 0.5963, "step": 2270 }, { "epoch": 0.06210347845110479, "grad_norm": 1.880074381828308, "learning_rate": 1.9946171461663368e-05, "loss": 0.6099, "step": 2271 }, { "epoch": 0.0621308247648217, "grad_norm": 2.081134557723999, "learning_rate": 1.9946079647575246e-05, "loss": 0.5686, "step": 2272 }, { "epoch": 0.06215817107853861, "grad_norm": 2.0734307765960693, "learning_rate": 1.994598775546293e-05, "loss": 0.9891, "step": 2273 }, { "epoch": 0.06218551739225552, "grad_norm": 1.8348898887634277, "learning_rate": 1.994589578532714e-05, "loss": 0.6603, "step": 2274 }, { "epoch": 0.062212863705972433, "grad_norm": 2.1179165840148926, "learning_rate": 1.99458037371686e-05, "loss": 0.6066, "step": 2275 }, { "epoch": 0.062240210019689345, "grad_norm": 2.1637723445892334, "learning_rate": 1.994571161098802e-05, "loss": 0.6043, "step": 2276 }, { "epoch": 0.062267556333406256, "grad_norm": 1.9449076652526855, "learning_rate": 1.994561940678614e-05, "loss": 0.6274, "step": 2277 }, { "epoch": 0.06229490264712317, "grad_norm": 1.824188470840454, "learning_rate": 1.994552712456367e-05, "loss": 0.6147, "step": 2278 }, { "epoch": 0.06232224896084008, "grad_norm": 1.7060378789901733, "learning_rate": 1.9945434764321342e-05, "loss": 0.5825, "step": 2279 }, { "epoch": 0.06234959527455699, "grad_norm": 1.8730394840240479, "learning_rate": 1.9945342326059873e-05, "loss": 0.6001, "step": 2280 }, { "epoch": 0.0623769415882739, "grad_norm": 1.938247561454773, "learning_rate": 1.9945249809779995e-05, "loss": 0.5927, "step": 2281 }, { "epoch": 0.06240428790199081, "grad_norm": 2.000727891921997, "learning_rate": 1.994515721548243e-05, "loss": 0.5971, "step": 2282 }, { "epoch": 0.06243163421570772, "grad_norm": 1.6171084642410278, "learning_rate": 1.9945064543167905e-05, "loss": 0.5641, "step": 2283 }, { "epoch": 0.06245898052942463, "grad_norm": 6.613129138946533, "learning_rate": 1.994497179283715e-05, "loss": 0.6059, "step": 2284 }, { "epoch": 0.062486326843141544, "grad_norm": 2.5887420177459717, "learning_rate": 1.9944878964490887e-05, "loss": 0.6663, "step": 2285 }, { "epoch": 0.06251367315685845, "grad_norm": 1.458509922027588, "learning_rate": 1.994478605812985e-05, "loss": 0.4307, "step": 2286 }, { "epoch": 0.06254101947057536, "grad_norm": 1.9490638971328735, "learning_rate": 1.994469307375476e-05, "loss": 0.9697, "step": 2287 }, { "epoch": 0.06256836578429227, "grad_norm": 2.38979172706604, "learning_rate": 1.9944600011366358e-05, "loss": 0.6015, "step": 2288 }, { "epoch": 0.06259571209800918, "grad_norm": 1.8794442415237427, "learning_rate": 1.9944506870965363e-05, "loss": 1.008, "step": 2289 }, { "epoch": 0.06262305841172609, "grad_norm": 1.9469795227050781, "learning_rate": 1.9944413652552513e-05, "loss": 0.5658, "step": 2290 }, { "epoch": 0.062650404725443, "grad_norm": 2.067715644836426, "learning_rate": 1.9944320356128534e-05, "loss": 0.5698, "step": 2291 }, { "epoch": 0.06267775103915992, "grad_norm": 1.7151196002960205, "learning_rate": 1.9944226981694163e-05, "loss": 0.5792, "step": 2292 }, { "epoch": 0.06270509735287683, "grad_norm": 1.8640625476837158, "learning_rate": 1.9944133529250127e-05, "loss": 0.5844, "step": 2293 }, { "epoch": 0.06273244366659374, "grad_norm": 2.1401209831237793, "learning_rate": 1.9944039998797166e-05, "loss": 0.6119, "step": 2294 }, { "epoch": 0.06275978998031065, "grad_norm": 1.8909573554992676, "learning_rate": 1.9943946390336007e-05, "loss": 0.5967, "step": 2295 }, { "epoch": 0.06278713629402756, "grad_norm": 3.8924481868743896, "learning_rate": 1.9943852703867387e-05, "loss": 0.5931, "step": 2296 }, { "epoch": 0.06281448260774447, "grad_norm": 1.6259264945983887, "learning_rate": 1.9943758939392043e-05, "loss": 0.5888, "step": 2297 }, { "epoch": 0.06284182892146138, "grad_norm": 1.4966825246810913, "learning_rate": 1.9943665096910704e-05, "loss": 0.5768, "step": 2298 }, { "epoch": 0.06286917523517829, "grad_norm": 1.8329211473464966, "learning_rate": 1.9943571176424116e-05, "loss": 0.565, "step": 2299 }, { "epoch": 0.0628965215488952, "grad_norm": 1.7653642892837524, "learning_rate": 1.994347717793301e-05, "loss": 0.5931, "step": 2300 }, { "epoch": 0.06292386786261212, "grad_norm": 4.099376201629639, "learning_rate": 1.9943383101438118e-05, "loss": 0.6303, "step": 2301 }, { "epoch": 0.06295121417632903, "grad_norm": 2.1999032497406006, "learning_rate": 1.994328894694019e-05, "loss": 0.566, "step": 2302 }, { "epoch": 0.06297856049004594, "grad_norm": 1.7710975408554077, "learning_rate": 1.9943194714439954e-05, "loss": 0.5693, "step": 2303 }, { "epoch": 0.06300590680376285, "grad_norm": 3.4861271381378174, "learning_rate": 1.9943100403938156e-05, "loss": 1.0346, "step": 2304 }, { "epoch": 0.06303325311747976, "grad_norm": 2.9374988079071045, "learning_rate": 1.9943006015435534e-05, "loss": 0.9426, "step": 2305 }, { "epoch": 0.06306059943119667, "grad_norm": 2.045135736465454, "learning_rate": 1.994291154893283e-05, "loss": 0.5771, "step": 2306 }, { "epoch": 0.06308794574491358, "grad_norm": 2.3461389541625977, "learning_rate": 1.994281700443078e-05, "loss": 0.6177, "step": 2307 }, { "epoch": 0.06311529205863049, "grad_norm": 1.6210318803787231, "learning_rate": 1.994272238193013e-05, "loss": 0.5799, "step": 2308 }, { "epoch": 0.0631426383723474, "grad_norm": 1.666815161705017, "learning_rate": 1.9942627681431618e-05, "loss": 0.5962, "step": 2309 }, { "epoch": 0.06316998468606431, "grad_norm": 3.8267550468444824, "learning_rate": 1.9942532902935995e-05, "loss": 0.9762, "step": 2310 }, { "epoch": 0.06319733099978123, "grad_norm": 1.99782395362854, "learning_rate": 1.9942438046443997e-05, "loss": 0.6107, "step": 2311 }, { "epoch": 0.06322467731349814, "grad_norm": 3.2915587425231934, "learning_rate": 1.994234311195637e-05, "loss": 0.9817, "step": 2312 }, { "epoch": 0.06325202362721505, "grad_norm": 2.4149296283721924, "learning_rate": 1.9942248099473857e-05, "loss": 0.5912, "step": 2313 }, { "epoch": 0.06327936994093196, "grad_norm": 1.533762812614441, "learning_rate": 1.994215300899721e-05, "loss": 0.5948, "step": 2314 }, { "epoch": 0.06330671625464887, "grad_norm": 2.6304869651794434, "learning_rate": 1.994205784052717e-05, "loss": 0.6349, "step": 2315 }, { "epoch": 0.06333406256836578, "grad_norm": 1.7231292724609375, "learning_rate": 1.994196259406448e-05, "loss": 0.5954, "step": 2316 }, { "epoch": 0.06336140888208269, "grad_norm": 2.3264544010162354, "learning_rate": 1.99418672696099e-05, "loss": 0.6757, "step": 2317 }, { "epoch": 0.0633887551957996, "grad_norm": 1.866062879562378, "learning_rate": 1.9941771867164163e-05, "loss": 0.5768, "step": 2318 }, { "epoch": 0.06341610150951651, "grad_norm": 2.0007481575012207, "learning_rate": 1.994167638672802e-05, "loss": 0.6043, "step": 2319 }, { "epoch": 0.06344344782323343, "grad_norm": 2.045902967453003, "learning_rate": 1.994158082830223e-05, "loss": 0.6073, "step": 2320 }, { "epoch": 0.06347079413695034, "grad_norm": 4.248052597045898, "learning_rate": 1.9941485191887536e-05, "loss": 0.9995, "step": 2321 }, { "epoch": 0.06349814045066725, "grad_norm": 1.8665863275527954, "learning_rate": 1.9941389477484685e-05, "loss": 0.6063, "step": 2322 }, { "epoch": 0.06352548676438416, "grad_norm": 2.1916210651397705, "learning_rate": 1.9941293685094433e-05, "loss": 0.5504, "step": 2323 }, { "epoch": 0.06355283307810107, "grad_norm": 1.8653196096420288, "learning_rate": 1.994119781471753e-05, "loss": 0.5663, "step": 2324 }, { "epoch": 0.06358017939181798, "grad_norm": 2.6714160442352295, "learning_rate": 1.9941101866354725e-05, "loss": 0.675, "step": 2325 }, { "epoch": 0.06360752570553489, "grad_norm": 1.8018089532852173, "learning_rate": 1.9941005840006776e-05, "loss": 0.6171, "step": 2326 }, { "epoch": 0.0636348720192518, "grad_norm": 1.6258968114852905, "learning_rate": 1.9940909735674436e-05, "loss": 0.6107, "step": 2327 }, { "epoch": 0.06366221833296871, "grad_norm": 1.936837077140808, "learning_rate": 1.9940813553358455e-05, "loss": 0.9777, "step": 2328 }, { "epoch": 0.06368956464668563, "grad_norm": 2.3541817665100098, "learning_rate": 1.994071729305959e-05, "loss": 0.5831, "step": 2329 }, { "epoch": 0.06371691096040254, "grad_norm": 1.8644485473632812, "learning_rate": 1.9940620954778592e-05, "loss": 0.6154, "step": 2330 }, { "epoch": 0.06374425727411945, "grad_norm": 4.121081352233887, "learning_rate": 1.994052453851622e-05, "loss": 0.6153, "step": 2331 }, { "epoch": 0.06377160358783636, "grad_norm": 1.9665967226028442, "learning_rate": 1.9940428044273236e-05, "loss": 0.5813, "step": 2332 }, { "epoch": 0.06379894990155327, "grad_norm": 1.6926517486572266, "learning_rate": 1.994033147205039e-05, "loss": 0.5989, "step": 2333 }, { "epoch": 0.06382629621527018, "grad_norm": 1.8869736194610596, "learning_rate": 1.9940234821848437e-05, "loss": 0.6013, "step": 2334 }, { "epoch": 0.06385364252898709, "grad_norm": 2.1215054988861084, "learning_rate": 1.994013809366814e-05, "loss": 0.5858, "step": 2335 }, { "epoch": 0.063880988842704, "grad_norm": 2.5665807723999023, "learning_rate": 1.9940041287510258e-05, "loss": 0.6203, "step": 2336 }, { "epoch": 0.06390833515642091, "grad_norm": 2.186030149459839, "learning_rate": 1.9939944403375554e-05, "loss": 0.6321, "step": 2337 }, { "epoch": 0.06393568147013783, "grad_norm": 1.6316423416137695, "learning_rate": 1.9939847441264775e-05, "loss": 0.5754, "step": 2338 }, { "epoch": 0.06396302778385474, "grad_norm": 2.241652727127075, "learning_rate": 1.9939750401178695e-05, "loss": 0.5957, "step": 2339 }, { "epoch": 0.06399037409757165, "grad_norm": 2.03511905670166, "learning_rate": 1.9939653283118072e-05, "loss": 0.5856, "step": 2340 }, { "epoch": 0.06401772041128856, "grad_norm": 1.8046588897705078, "learning_rate": 1.9939556087083665e-05, "loss": 0.5445, "step": 2341 }, { "epoch": 0.06404506672500547, "grad_norm": 2.0315871238708496, "learning_rate": 1.9939458813076236e-05, "loss": 0.5746, "step": 2342 }, { "epoch": 0.06407241303872238, "grad_norm": 1.7037612199783325, "learning_rate": 1.993936146109655e-05, "loss": 0.5985, "step": 2343 }, { "epoch": 0.06409975935243929, "grad_norm": 5.351289749145508, "learning_rate": 1.993926403114537e-05, "loss": 0.5803, "step": 2344 }, { "epoch": 0.0641271056661562, "grad_norm": 1.6062958240509033, "learning_rate": 1.9939166523223467e-05, "loss": 0.5947, "step": 2345 }, { "epoch": 0.06415445197987311, "grad_norm": 1.805482268333435, "learning_rate": 1.9939068937331592e-05, "loss": 0.5815, "step": 2346 }, { "epoch": 0.06418179829359003, "grad_norm": 1.4971731901168823, "learning_rate": 1.9938971273470524e-05, "loss": 0.5699, "step": 2347 }, { "epoch": 0.06420914460730694, "grad_norm": 2.133462429046631, "learning_rate": 1.9938873531641018e-05, "loss": 0.5924, "step": 2348 }, { "epoch": 0.06423649092102385, "grad_norm": 2.3050003051757812, "learning_rate": 1.993877571184385e-05, "loss": 0.5872, "step": 2349 }, { "epoch": 0.06426383723474076, "grad_norm": 1.5211453437805176, "learning_rate": 1.9938677814079784e-05, "loss": 0.6106, "step": 2350 }, { "epoch": 0.06429118354845767, "grad_norm": 3.3889219760894775, "learning_rate": 1.9938579838349584e-05, "loss": 0.5767, "step": 2351 }, { "epoch": 0.06431852986217458, "grad_norm": 2.2055981159210205, "learning_rate": 1.9938481784654026e-05, "loss": 0.4472, "step": 2352 }, { "epoch": 0.06434587617589149, "grad_norm": 3.3165647983551025, "learning_rate": 1.9938383652993873e-05, "loss": 0.612, "step": 2353 }, { "epoch": 0.0643732224896084, "grad_norm": 1.854717493057251, "learning_rate": 1.9938285443369897e-05, "loss": 0.5914, "step": 2354 }, { "epoch": 0.06440056880332531, "grad_norm": 1.7535479068756104, "learning_rate": 1.993818715578287e-05, "loss": 0.6142, "step": 2355 }, { "epoch": 0.06442791511704223, "grad_norm": 1.8468152284622192, "learning_rate": 1.993808879023356e-05, "loss": 0.667, "step": 2356 }, { "epoch": 0.06445526143075914, "grad_norm": 1.898388147354126, "learning_rate": 1.9937990346722743e-05, "loss": 0.6173, "step": 2357 }, { "epoch": 0.06448260774447605, "grad_norm": 1.887538194656372, "learning_rate": 1.9937891825251183e-05, "loss": 0.5817, "step": 2358 }, { "epoch": 0.06450995405819296, "grad_norm": 1.8412506580352783, "learning_rate": 1.9937793225819663e-05, "loss": 0.6144, "step": 2359 }, { "epoch": 0.06453730037190987, "grad_norm": 2.001424789428711, "learning_rate": 1.993769454842895e-05, "loss": 0.5822, "step": 2360 }, { "epoch": 0.06456464668562678, "grad_norm": 2.445891857147217, "learning_rate": 1.993759579307982e-05, "loss": 0.5612, "step": 2361 }, { "epoch": 0.06459199299934369, "grad_norm": 1.6545881032943726, "learning_rate": 1.9937496959773052e-05, "loss": 0.6023, "step": 2362 }, { "epoch": 0.0646193393130606, "grad_norm": 2.1890954971313477, "learning_rate": 1.9937398048509412e-05, "loss": 0.5978, "step": 2363 }, { "epoch": 0.06464668562677751, "grad_norm": 1.9484424591064453, "learning_rate": 1.993729905928968e-05, "loss": 0.6264, "step": 2364 }, { "epoch": 0.06467403194049443, "grad_norm": 1.8423802852630615, "learning_rate": 1.993719999211464e-05, "loss": 0.591, "step": 2365 }, { "epoch": 0.06470137825421134, "grad_norm": 2.5233895778656006, "learning_rate": 1.993710084698506e-05, "loss": 0.5641, "step": 2366 }, { "epoch": 0.06472872456792825, "grad_norm": 1.8475364446640015, "learning_rate": 1.9937001623901716e-05, "loss": 0.6211, "step": 2367 }, { "epoch": 0.06475607088164516, "grad_norm": 1.8975731134414673, "learning_rate": 1.9936902322865396e-05, "loss": 0.5937, "step": 2368 }, { "epoch": 0.06478341719536207, "grad_norm": 1.6295251846313477, "learning_rate": 1.9936802943876868e-05, "loss": 0.5981, "step": 2369 }, { "epoch": 0.06481076350907898, "grad_norm": 2.615741729736328, "learning_rate": 1.993670348693692e-05, "loss": 0.5714, "step": 2370 }, { "epoch": 0.06483810982279589, "grad_norm": 1.9748954772949219, "learning_rate": 1.9936603952046333e-05, "loss": 0.5665, "step": 2371 }, { "epoch": 0.0648654561365128, "grad_norm": 1.8604099750518799, "learning_rate": 1.9936504339205883e-05, "loss": 0.5942, "step": 2372 }, { "epoch": 0.06489280245022971, "grad_norm": 2.3221383094787598, "learning_rate": 1.993640464841635e-05, "loss": 1.018, "step": 2373 }, { "epoch": 0.06492014876394663, "grad_norm": 5.512157917022705, "learning_rate": 1.993630487967852e-05, "loss": 0.6879, "step": 2374 }, { "epoch": 0.06494749507766354, "grad_norm": 2.691453456878662, "learning_rate": 1.9936205032993174e-05, "loss": 0.5674, "step": 2375 }, { "epoch": 0.06497484139138045, "grad_norm": 3.145071029663086, "learning_rate": 1.99361051083611e-05, "loss": 0.9806, "step": 2376 }, { "epoch": 0.06500218770509736, "grad_norm": 2.766765832901001, "learning_rate": 1.9936005105783072e-05, "loss": 0.591, "step": 2377 }, { "epoch": 0.06502953401881427, "grad_norm": 1.6880794763565063, "learning_rate": 1.993590502525988e-05, "loss": 0.593, "step": 2378 }, { "epoch": 0.06505688033253118, "grad_norm": 2.117734670639038, "learning_rate": 1.9935804866792316e-05, "loss": 0.6065, "step": 2379 }, { "epoch": 0.06508422664624809, "grad_norm": 2.81819486618042, "learning_rate": 1.9935704630381154e-05, "loss": 0.5632, "step": 2380 }, { "epoch": 0.065111572959965, "grad_norm": 2.6490561962127686, "learning_rate": 1.9935604316027187e-05, "loss": 0.6085, "step": 2381 }, { "epoch": 0.06513891927368191, "grad_norm": 2.263092517852783, "learning_rate": 1.99355039237312e-05, "loss": 0.4453, "step": 2382 }, { "epoch": 0.06516626558739883, "grad_norm": 1.8370400667190552, "learning_rate": 1.993540345349398e-05, "loss": 0.5898, "step": 2383 }, { "epoch": 0.06519361190111574, "grad_norm": 2.3197879791259766, "learning_rate": 1.9935302905316316e-05, "loss": 0.6916, "step": 2384 }, { "epoch": 0.06522095821483265, "grad_norm": 1.9123995304107666, "learning_rate": 1.9935202279198997e-05, "loss": 0.5976, "step": 2385 }, { "epoch": 0.06524830452854956, "grad_norm": 1.9293626546859741, "learning_rate": 1.9935101575142816e-05, "loss": 0.6148, "step": 2386 }, { "epoch": 0.06527565084226647, "grad_norm": 1.5079143047332764, "learning_rate": 1.9935000793148555e-05, "loss": 0.5692, "step": 2387 }, { "epoch": 0.06530299715598337, "grad_norm": 2.2180535793304443, "learning_rate": 1.9934899933217006e-05, "loss": 0.9632, "step": 2388 }, { "epoch": 0.06533034346970028, "grad_norm": 2.594661235809326, "learning_rate": 1.9934798995348967e-05, "loss": 0.5699, "step": 2389 }, { "epoch": 0.06535768978341719, "grad_norm": 1.7381131649017334, "learning_rate": 1.9934697979545224e-05, "loss": 0.6405, "step": 2390 }, { "epoch": 0.0653850360971341, "grad_norm": 1.60940420627594, "learning_rate": 1.993459688580657e-05, "loss": 0.5904, "step": 2391 }, { "epoch": 0.06541238241085101, "grad_norm": 1.8803529739379883, "learning_rate": 1.9934495714133803e-05, "loss": 0.5688, "step": 2392 }, { "epoch": 0.06543972872456792, "grad_norm": 3.6425790786743164, "learning_rate": 1.9934394464527712e-05, "loss": 0.5605, "step": 2393 }, { "epoch": 0.06546707503828483, "grad_norm": 1.9512780904769897, "learning_rate": 1.993429313698909e-05, "loss": 0.5869, "step": 2394 }, { "epoch": 0.06549442135200174, "grad_norm": 1.6933820247650146, "learning_rate": 1.9934191731518736e-05, "loss": 0.5692, "step": 2395 }, { "epoch": 0.06552176766571866, "grad_norm": 1.6868927478790283, "learning_rate": 1.9934090248117443e-05, "loss": 0.5866, "step": 2396 }, { "epoch": 0.06554911397943557, "grad_norm": 1.8749029636383057, "learning_rate": 1.993398868678601e-05, "loss": 0.597, "step": 2397 }, { "epoch": 0.06557646029315248, "grad_norm": 1.7639843225479126, "learning_rate": 1.9933887047525226e-05, "loss": 0.6543, "step": 2398 }, { "epoch": 0.06560380660686939, "grad_norm": 2.4061901569366455, "learning_rate": 1.9933785330335898e-05, "loss": 0.6162, "step": 2399 }, { "epoch": 0.0656311529205863, "grad_norm": 1.744725227355957, "learning_rate": 1.993368353521882e-05, "loss": 0.5589, "step": 2400 }, { "epoch": 0.06565849923430321, "grad_norm": 2.1120622158050537, "learning_rate": 1.993358166217479e-05, "loss": 0.5995, "step": 2401 }, { "epoch": 0.06568584554802012, "grad_norm": 1.8740061521530151, "learning_rate": 1.9933479711204608e-05, "loss": 0.625, "step": 2402 }, { "epoch": 0.06571319186173703, "grad_norm": 2.319143295288086, "learning_rate": 1.993337768230907e-05, "loss": 0.9673, "step": 2403 }, { "epoch": 0.06574053817545394, "grad_norm": 2.4471781253814697, "learning_rate": 1.993327557548898e-05, "loss": 0.6504, "step": 2404 }, { "epoch": 0.06576788448917086, "grad_norm": 3.410419225692749, "learning_rate": 1.993317339074514e-05, "loss": 0.5941, "step": 2405 }, { "epoch": 0.06579523080288777, "grad_norm": 1.6708530187606812, "learning_rate": 1.993307112807835e-05, "loss": 0.59, "step": 2406 }, { "epoch": 0.06582257711660468, "grad_norm": 2.1846847534179688, "learning_rate": 1.9932968787489414e-05, "loss": 0.6085, "step": 2407 }, { "epoch": 0.06584992343032159, "grad_norm": 1.9815024137496948, "learning_rate": 1.993286636897913e-05, "loss": 0.5858, "step": 2408 }, { "epoch": 0.0658772697440385, "grad_norm": 1.5609391927719116, "learning_rate": 1.9932763872548306e-05, "loss": 0.9388, "step": 2409 }, { "epoch": 0.06590461605775541, "grad_norm": 3.035815954208374, "learning_rate": 1.9932661298197746e-05, "loss": 0.5816, "step": 2410 }, { "epoch": 0.06593196237147232, "grad_norm": 2.7739548683166504, "learning_rate": 1.9932558645928252e-05, "loss": 0.5973, "step": 2411 }, { "epoch": 0.06595930868518923, "grad_norm": 1.9441591501235962, "learning_rate": 1.993245591574063e-05, "loss": 0.5536, "step": 2412 }, { "epoch": 0.06598665499890614, "grad_norm": 1.6077126264572144, "learning_rate": 1.9932353107635686e-05, "loss": 1.0032, "step": 2413 }, { "epoch": 0.06601400131262306, "grad_norm": 2.291722297668457, "learning_rate": 1.993225022161423e-05, "loss": 0.5844, "step": 2414 }, { "epoch": 0.06604134762633997, "grad_norm": 2.1827809810638428, "learning_rate": 1.9932147257677066e-05, "loss": 0.4718, "step": 2415 }, { "epoch": 0.06606869394005688, "grad_norm": 1.7162561416625977, "learning_rate": 1.9932044215825003e-05, "loss": 0.6644, "step": 2416 }, { "epoch": 0.06609604025377379, "grad_norm": 1.8545663356781006, "learning_rate": 1.9931941096058846e-05, "loss": 0.5713, "step": 2417 }, { "epoch": 0.0661233865674907, "grad_norm": 1.6699979305267334, "learning_rate": 1.9931837898379403e-05, "loss": 0.6138, "step": 2418 }, { "epoch": 0.06615073288120761, "grad_norm": 1.5114362239837646, "learning_rate": 1.993173462278749e-05, "loss": 0.5826, "step": 2419 }, { "epoch": 0.06617807919492452, "grad_norm": 1.8156288862228394, "learning_rate": 1.9931631269283916e-05, "loss": 0.5898, "step": 2420 }, { "epoch": 0.06620542550864143, "grad_norm": 1.699734091758728, "learning_rate": 1.9931527837869485e-05, "loss": 0.5699, "step": 2421 }, { "epoch": 0.06623277182235834, "grad_norm": 2.6002960205078125, "learning_rate": 1.9931424328545018e-05, "loss": 0.6014, "step": 2422 }, { "epoch": 0.06626011813607526, "grad_norm": 1.716444492340088, "learning_rate": 1.9931320741311317e-05, "loss": 0.6127, "step": 2423 }, { "epoch": 0.06628746444979217, "grad_norm": 2.5601730346679688, "learning_rate": 1.9931217076169204e-05, "loss": 0.5705, "step": 2424 }, { "epoch": 0.06631481076350908, "grad_norm": 2.24489688873291, "learning_rate": 1.9931113333119486e-05, "loss": 0.5798, "step": 2425 }, { "epoch": 0.06634215707722599, "grad_norm": 2.671926498413086, "learning_rate": 1.993100951216298e-05, "loss": 0.5499, "step": 2426 }, { "epoch": 0.0663695033909429, "grad_norm": 1.7897909879684448, "learning_rate": 1.9930905613300497e-05, "loss": 0.6729, "step": 2427 }, { "epoch": 0.06639684970465981, "grad_norm": 1.437692642211914, "learning_rate": 1.9930801636532858e-05, "loss": 0.4168, "step": 2428 }, { "epoch": 0.06642419601837672, "grad_norm": 1.6278250217437744, "learning_rate": 1.993069758186087e-05, "loss": 0.5968, "step": 2429 }, { "epoch": 0.06645154233209363, "grad_norm": 1.9640543460845947, "learning_rate": 1.9930593449285357e-05, "loss": 0.5789, "step": 2430 }, { "epoch": 0.06647888864581054, "grad_norm": 3.5315866470336914, "learning_rate": 1.9930489238807134e-05, "loss": 0.9597, "step": 2431 }, { "epoch": 0.06650623495952745, "grad_norm": 2.8293986320495605, "learning_rate": 1.9930384950427014e-05, "loss": 0.5845, "step": 2432 }, { "epoch": 0.06653358127324437, "grad_norm": 1.5886706113815308, "learning_rate": 1.9930280584145826e-05, "loss": 0.5738, "step": 2433 }, { "epoch": 0.06656092758696128, "grad_norm": 1.883467197418213, "learning_rate": 1.9930176139964373e-05, "loss": 0.6194, "step": 2434 }, { "epoch": 0.06658827390067819, "grad_norm": 1.943069338798523, "learning_rate": 1.9930071617883486e-05, "loss": 0.6704, "step": 2435 }, { "epoch": 0.0666156202143951, "grad_norm": 1.6350644826889038, "learning_rate": 1.9929967017903983e-05, "loss": 0.6071, "step": 2436 }, { "epoch": 0.06664296652811201, "grad_norm": 1.798638939857483, "learning_rate": 1.9929862340026682e-05, "loss": 0.9591, "step": 2437 }, { "epoch": 0.06667031284182892, "grad_norm": 1.7307655811309814, "learning_rate": 1.9929757584252406e-05, "loss": 0.642, "step": 2438 }, { "epoch": 0.06669765915554583, "grad_norm": 1.981839895248413, "learning_rate": 1.9929652750581975e-05, "loss": 0.597, "step": 2439 }, { "epoch": 0.06672500546926274, "grad_norm": 1.3671904802322388, "learning_rate": 1.9929547839016216e-05, "loss": 0.605, "step": 2440 }, { "epoch": 0.06675235178297965, "grad_norm": 1.4419668912887573, "learning_rate": 1.9929442849555948e-05, "loss": 0.6774, "step": 2441 }, { "epoch": 0.06677969809669657, "grad_norm": 1.8378641605377197, "learning_rate": 1.992933778220199e-05, "loss": 0.6114, "step": 2442 }, { "epoch": 0.06680704441041348, "grad_norm": 1.4613298177719116, "learning_rate": 1.992923263695518e-05, "loss": 0.5782, "step": 2443 }, { "epoch": 0.06683439072413039, "grad_norm": 1.8053762912750244, "learning_rate": 1.9929127413816326e-05, "loss": 0.3912, "step": 2444 }, { "epoch": 0.0668617370378473, "grad_norm": 1.9983569383621216, "learning_rate": 1.9929022112786264e-05, "loss": 0.5963, "step": 2445 }, { "epoch": 0.06688908335156421, "grad_norm": 1.815406084060669, "learning_rate": 1.992891673386582e-05, "loss": 0.5509, "step": 2446 }, { "epoch": 0.06691642966528112, "grad_norm": 2.0388236045837402, "learning_rate": 1.992881127705582e-05, "loss": 0.6097, "step": 2447 }, { "epoch": 0.06694377597899803, "grad_norm": 1.8550901412963867, "learning_rate": 1.9928705742357085e-05, "loss": 0.6215, "step": 2448 }, { "epoch": 0.06697112229271494, "grad_norm": 1.6416592597961426, "learning_rate": 1.992860012977045e-05, "loss": 0.5908, "step": 2449 }, { "epoch": 0.06699846860643185, "grad_norm": 1.697306752204895, "learning_rate": 1.992849443929674e-05, "loss": 0.5809, "step": 2450 }, { "epoch": 0.06702581492014877, "grad_norm": 1.4291486740112305, "learning_rate": 1.9928388670936787e-05, "loss": 0.5858, "step": 2451 }, { "epoch": 0.06705316123386568, "grad_norm": 2.3501014709472656, "learning_rate": 1.9928282824691418e-05, "loss": 0.6484, "step": 2452 }, { "epoch": 0.06708050754758259, "grad_norm": 2.074422836303711, "learning_rate": 1.9928176900561463e-05, "loss": 0.6047, "step": 2453 }, { "epoch": 0.0671078538612995, "grad_norm": 1.8136276006698608, "learning_rate": 1.9928070898547754e-05, "loss": 0.6162, "step": 2454 }, { "epoch": 0.06713520017501641, "grad_norm": 7.211296081542969, "learning_rate": 1.9927964818651127e-05, "loss": 0.61, "step": 2455 }, { "epoch": 0.06716254648873332, "grad_norm": 1.6384586095809937, "learning_rate": 1.9927858660872406e-05, "loss": 0.5653, "step": 2456 }, { "epoch": 0.06718989280245023, "grad_norm": 1.5115705728530884, "learning_rate": 1.992775242521243e-05, "loss": 0.5836, "step": 2457 }, { "epoch": 0.06721723911616714, "grad_norm": 1.5793278217315674, "learning_rate": 1.992764611167203e-05, "loss": 0.5804, "step": 2458 }, { "epoch": 0.06724458542988405, "grad_norm": 1.6337124109268188, "learning_rate": 1.992753972025204e-05, "loss": 0.5903, "step": 2459 }, { "epoch": 0.06727193174360097, "grad_norm": 1.7690051794052124, "learning_rate": 1.9927433250953293e-05, "loss": 0.6138, "step": 2460 }, { "epoch": 0.06729927805731788, "grad_norm": 2.164428949356079, "learning_rate": 1.9927326703776626e-05, "loss": 0.6651, "step": 2461 }, { "epoch": 0.06732662437103479, "grad_norm": 1.4492294788360596, "learning_rate": 1.9927220078722875e-05, "loss": 0.577, "step": 2462 }, { "epoch": 0.0673539706847517, "grad_norm": 2.230654716491699, "learning_rate": 1.9927113375792875e-05, "loss": 0.6281, "step": 2463 }, { "epoch": 0.06738131699846861, "grad_norm": 1.3535428047180176, "learning_rate": 1.992700659498747e-05, "loss": 0.5894, "step": 2464 }, { "epoch": 0.06740866331218552, "grad_norm": 1.8637372255325317, "learning_rate": 1.9926899736307487e-05, "loss": 0.9962, "step": 2465 }, { "epoch": 0.06743600962590243, "grad_norm": 1.5607514381408691, "learning_rate": 1.9926792799753772e-05, "loss": 0.4574, "step": 2466 }, { "epoch": 0.06746335593961934, "grad_norm": 1.8517671823501587, "learning_rate": 1.992668578532716e-05, "loss": 0.5918, "step": 2467 }, { "epoch": 0.06749070225333625, "grad_norm": 2.100417137145996, "learning_rate": 1.992657869302849e-05, "loss": 0.5645, "step": 2468 }, { "epoch": 0.06751804856705317, "grad_norm": 1.4829347133636475, "learning_rate": 1.9926471522858605e-05, "loss": 0.5992, "step": 2469 }, { "epoch": 0.06754539488077008, "grad_norm": 1.575649619102478, "learning_rate": 1.9926364274818347e-05, "loss": 0.5994, "step": 2470 }, { "epoch": 0.06757274119448699, "grad_norm": 2.0520288944244385, "learning_rate": 1.992625694890855e-05, "loss": 0.5927, "step": 2471 }, { "epoch": 0.0676000875082039, "grad_norm": 1.9723529815673828, "learning_rate": 1.9926149545130062e-05, "loss": 0.9743, "step": 2472 }, { "epoch": 0.06762743382192081, "grad_norm": 1.7935608625411987, "learning_rate": 1.9926042063483727e-05, "loss": 0.6035, "step": 2473 }, { "epoch": 0.06765478013563772, "grad_norm": 1.9068937301635742, "learning_rate": 1.992593450397038e-05, "loss": 0.6782, "step": 2474 }, { "epoch": 0.06768212644935463, "grad_norm": 1.807439923286438, "learning_rate": 1.992582686659088e-05, "loss": 0.5529, "step": 2475 }, { "epoch": 0.06770947276307154, "grad_norm": 1.8920780420303345, "learning_rate": 1.9925719151346055e-05, "loss": 0.6943, "step": 2476 }, { "epoch": 0.06773681907678845, "grad_norm": 2.166917562484741, "learning_rate": 1.9925611358236757e-05, "loss": 0.6506, "step": 2477 }, { "epoch": 0.06776416539050537, "grad_norm": 1.7351990938186646, "learning_rate": 1.992550348726383e-05, "loss": 0.6112, "step": 2478 }, { "epoch": 0.06779151170422228, "grad_norm": 1.8381500244140625, "learning_rate": 1.992539553842812e-05, "loss": 0.6154, "step": 2479 }, { "epoch": 0.06781885801793919, "grad_norm": 1.8343536853790283, "learning_rate": 1.992528751173048e-05, "loss": 0.6067, "step": 2480 }, { "epoch": 0.0678462043316561, "grad_norm": 1.7367559671401978, "learning_rate": 1.9925179407171753e-05, "loss": 0.4454, "step": 2481 }, { "epoch": 0.06787355064537301, "grad_norm": 1.8216407299041748, "learning_rate": 1.9925071224752783e-05, "loss": 0.5685, "step": 2482 }, { "epoch": 0.06790089695908992, "grad_norm": 1.444439172744751, "learning_rate": 1.9924962964474424e-05, "loss": 0.5937, "step": 2483 }, { "epoch": 0.06792824327280683, "grad_norm": 1.3521519899368286, "learning_rate": 1.9924854626337524e-05, "loss": 0.6084, "step": 2484 }, { "epoch": 0.06795558958652374, "grad_norm": 1.647044062614441, "learning_rate": 1.992474621034293e-05, "loss": 0.6283, "step": 2485 }, { "epoch": 0.06798293590024065, "grad_norm": 1.5197231769561768, "learning_rate": 1.99246377164915e-05, "loss": 0.5962, "step": 2486 }, { "epoch": 0.06801028221395757, "grad_norm": 2.0711653232574463, "learning_rate": 1.9924529144784075e-05, "loss": 0.6827, "step": 2487 }, { "epoch": 0.06803762852767448, "grad_norm": 1.7374125719070435, "learning_rate": 1.9924420495221516e-05, "loss": 0.6279, "step": 2488 }, { "epoch": 0.06806497484139137, "grad_norm": 1.7000012397766113, "learning_rate": 1.992431176780467e-05, "loss": 0.5972, "step": 2489 }, { "epoch": 0.06809232115510828, "grad_norm": 1.5048543214797974, "learning_rate": 1.992420296253439e-05, "loss": 0.5809, "step": 2490 }, { "epoch": 0.0681196674688252, "grad_norm": 1.9758706092834473, "learning_rate": 1.9924094079411532e-05, "loss": 0.5603, "step": 2491 }, { "epoch": 0.0681470137825421, "grad_norm": 3.439842700958252, "learning_rate": 1.9923985118436948e-05, "loss": 0.5747, "step": 2492 }, { "epoch": 0.06817436009625902, "grad_norm": 3.6703073978424072, "learning_rate": 1.9923876079611494e-05, "loss": 0.9736, "step": 2493 }, { "epoch": 0.06820170640997593, "grad_norm": 2.6106653213500977, "learning_rate": 1.992376696293602e-05, "loss": 0.5708, "step": 2494 }, { "epoch": 0.06822905272369284, "grad_norm": 1.9127862453460693, "learning_rate": 1.9923657768411396e-05, "loss": 0.5927, "step": 2495 }, { "epoch": 0.06825639903740975, "grad_norm": 1.941595435142517, "learning_rate": 1.9923548496038463e-05, "loss": 0.4751, "step": 2496 }, { "epoch": 0.06828374535112666, "grad_norm": 2.188429832458496, "learning_rate": 1.9923439145818087e-05, "loss": 0.5688, "step": 2497 }, { "epoch": 0.06831109166484357, "grad_norm": 1.7264149188995361, "learning_rate": 1.9923329717751124e-05, "loss": 0.5863, "step": 2498 }, { "epoch": 0.06833843797856048, "grad_norm": 1.7117791175842285, "learning_rate": 1.992322021183843e-05, "loss": 0.6164, "step": 2499 }, { "epoch": 0.0683657842922774, "grad_norm": 1.8452725410461426, "learning_rate": 1.992311062808087e-05, "loss": 0.62, "step": 2500 }, { "epoch": 0.0683931306059943, "grad_norm": 1.6791726350784302, "learning_rate": 1.9923000966479296e-05, "loss": 0.5987, "step": 2501 }, { "epoch": 0.06842047691971122, "grad_norm": 1.9744893312454224, "learning_rate": 1.992289122703457e-05, "loss": 0.547, "step": 2502 }, { "epoch": 0.06844782323342813, "grad_norm": 1.7803813219070435, "learning_rate": 1.9922781409747563e-05, "loss": 0.9518, "step": 2503 }, { "epoch": 0.06847516954714504, "grad_norm": 1.5341132879257202, "learning_rate": 1.992267151461912e-05, "loss": 0.605, "step": 2504 }, { "epoch": 0.06850251586086195, "grad_norm": 1.6655796766281128, "learning_rate": 1.9922561541650117e-05, "loss": 0.6048, "step": 2505 }, { "epoch": 0.06852986217457886, "grad_norm": 1.9369988441467285, "learning_rate": 1.9922451490841407e-05, "loss": 0.5729, "step": 2506 }, { "epoch": 0.06855720848829577, "grad_norm": 1.9090110063552856, "learning_rate": 1.9922341362193863e-05, "loss": 0.6894, "step": 2507 }, { "epoch": 0.06858455480201268, "grad_norm": 1.553381323814392, "learning_rate": 1.992223115570834e-05, "loss": 0.5765, "step": 2508 }, { "epoch": 0.0686119011157296, "grad_norm": 1.842016577720642, "learning_rate": 1.9922120871385705e-05, "loss": 0.5986, "step": 2509 }, { "epoch": 0.0686392474294465, "grad_norm": 1.4213453531265259, "learning_rate": 1.9922010509226823e-05, "loss": 0.5893, "step": 2510 }, { "epoch": 0.06866659374316342, "grad_norm": 1.6543549299240112, "learning_rate": 1.9921900069232566e-05, "loss": 0.6346, "step": 2511 }, { "epoch": 0.06869394005688033, "grad_norm": 1.928092360496521, "learning_rate": 1.992178955140379e-05, "loss": 0.6252, "step": 2512 }, { "epoch": 0.06872128637059724, "grad_norm": 1.4670472145080566, "learning_rate": 1.9921678955741372e-05, "loss": 0.5792, "step": 2513 }, { "epoch": 0.06874863268431415, "grad_norm": 1.6215925216674805, "learning_rate": 1.9921568282246172e-05, "loss": 0.576, "step": 2514 }, { "epoch": 0.06877597899803106, "grad_norm": 1.490285873413086, "learning_rate": 1.992145753091906e-05, "loss": 0.5914, "step": 2515 }, { "epoch": 0.06880332531174797, "grad_norm": 1.9202287197113037, "learning_rate": 1.9921346701760906e-05, "loss": 0.4454, "step": 2516 }, { "epoch": 0.06883067162546488, "grad_norm": 1.9550306797027588, "learning_rate": 1.992123579477258e-05, "loss": 0.9178, "step": 2517 }, { "epoch": 0.0688580179391818, "grad_norm": 1.811346411705017, "learning_rate": 1.992112480995495e-05, "loss": 0.6005, "step": 2518 }, { "epoch": 0.0688853642528987, "grad_norm": 2.8009555339813232, "learning_rate": 1.9921013747308892e-05, "loss": 0.6032, "step": 2519 }, { "epoch": 0.06891271056661562, "grad_norm": 1.9770500659942627, "learning_rate": 1.992090260683527e-05, "loss": 0.5927, "step": 2520 }, { "epoch": 0.06894005688033253, "grad_norm": 1.6719143390655518, "learning_rate": 1.9920791388534958e-05, "loss": 0.604, "step": 2521 }, { "epoch": 0.06896740319404944, "grad_norm": 2.0439748764038086, "learning_rate": 1.992068009240883e-05, "loss": 0.4424, "step": 2522 }, { "epoch": 0.06899474950776635, "grad_norm": 1.8950843811035156, "learning_rate": 1.992056871845776e-05, "loss": 0.6594, "step": 2523 }, { "epoch": 0.06902209582148326, "grad_norm": 2.1756482124328613, "learning_rate": 1.9920457266682624e-05, "loss": 0.5486, "step": 2524 }, { "epoch": 0.06904944213520017, "grad_norm": 1.741563320159912, "learning_rate": 1.9920345737084286e-05, "loss": 0.601, "step": 2525 }, { "epoch": 0.06907678844891708, "grad_norm": 1.754878044128418, "learning_rate": 1.992023412966363e-05, "loss": 0.5888, "step": 2526 }, { "epoch": 0.069104134762634, "grad_norm": 1.3791605234146118, "learning_rate": 1.9920122444421527e-05, "loss": 0.5958, "step": 2527 }, { "epoch": 0.0691314810763509, "grad_norm": 2.0242512226104736, "learning_rate": 1.9920010681358862e-05, "loss": 0.6028, "step": 2528 }, { "epoch": 0.06915882739006782, "grad_norm": 2.202732563018799, "learning_rate": 1.9919898840476496e-05, "loss": 0.5674, "step": 2529 }, { "epoch": 0.06918617370378473, "grad_norm": 1.9720216989517212, "learning_rate": 1.991978692177532e-05, "loss": 0.5891, "step": 2530 }, { "epoch": 0.06921352001750164, "grad_norm": 2.3348705768585205, "learning_rate": 1.9919674925256207e-05, "loss": 0.6778, "step": 2531 }, { "epoch": 0.06924086633121855, "grad_norm": 2.1428630352020264, "learning_rate": 1.9919562850920037e-05, "loss": 0.5587, "step": 2532 }, { "epoch": 0.06926821264493546, "grad_norm": 1.8159700632095337, "learning_rate": 1.9919450698767685e-05, "loss": 0.5817, "step": 2533 }, { "epoch": 0.06929555895865237, "grad_norm": 1.7390737533569336, "learning_rate": 1.9919338468800036e-05, "loss": 0.488, "step": 2534 }, { "epoch": 0.06932290527236928, "grad_norm": 2.3118176460266113, "learning_rate": 1.9919226161017967e-05, "loss": 0.6056, "step": 2535 }, { "epoch": 0.0693502515860862, "grad_norm": 2.789140224456787, "learning_rate": 1.9919113775422362e-05, "loss": 0.6163, "step": 2536 }, { "epoch": 0.0693775978998031, "grad_norm": 1.6371712684631348, "learning_rate": 1.99190013120141e-05, "loss": 0.5989, "step": 2537 }, { "epoch": 0.06940494421352002, "grad_norm": 2.0270731449127197, "learning_rate": 1.9918888770794063e-05, "loss": 0.5986, "step": 2538 }, { "epoch": 0.06943229052723693, "grad_norm": 2.3425843715667725, "learning_rate": 1.9918776151763138e-05, "loss": 0.5768, "step": 2539 }, { "epoch": 0.06945963684095384, "grad_norm": 1.9410966634750366, "learning_rate": 1.9918663454922204e-05, "loss": 0.5843, "step": 2540 }, { "epoch": 0.06948698315467075, "grad_norm": 6.708958148956299, "learning_rate": 1.9918550680272142e-05, "loss": 0.6131, "step": 2541 }, { "epoch": 0.06951432946838766, "grad_norm": 1.7526733875274658, "learning_rate": 1.991843782781385e-05, "loss": 0.6051, "step": 2542 }, { "epoch": 0.06954167578210457, "grad_norm": 1.7853145599365234, "learning_rate": 1.99183248975482e-05, "loss": 0.5893, "step": 2543 }, { "epoch": 0.06956902209582148, "grad_norm": 2.242347478866577, "learning_rate": 1.9918211889476083e-05, "loss": 0.6279, "step": 2544 }, { "epoch": 0.0695963684095384, "grad_norm": 2.118454933166504, "learning_rate": 1.991809880359838e-05, "loss": 0.633, "step": 2545 }, { "epoch": 0.0696237147232553, "grad_norm": 2.652407646179199, "learning_rate": 1.991798563991599e-05, "loss": 0.9639, "step": 2546 }, { "epoch": 0.06965106103697222, "grad_norm": 2.4439876079559326, "learning_rate": 1.9917872398429788e-05, "loss": 0.5867, "step": 2547 }, { "epoch": 0.06967840735068913, "grad_norm": 2.476719856262207, "learning_rate": 1.9917759079140676e-05, "loss": 0.5859, "step": 2548 }, { "epoch": 0.06970575366440604, "grad_norm": 2.1005775928497314, "learning_rate": 1.991764568204953e-05, "loss": 0.5976, "step": 2549 }, { "epoch": 0.06973309997812295, "grad_norm": 1.9030897617340088, "learning_rate": 1.9917532207157243e-05, "loss": 0.6119, "step": 2550 }, { "epoch": 0.06976044629183986, "grad_norm": 1.830122470855713, "learning_rate": 1.991741865446471e-05, "loss": 0.5993, "step": 2551 }, { "epoch": 0.06978779260555677, "grad_norm": 2.36659836769104, "learning_rate": 1.9917305023972818e-05, "loss": 0.5785, "step": 2552 }, { "epoch": 0.06981513891927368, "grad_norm": 2.166792154312134, "learning_rate": 1.991719131568246e-05, "loss": 0.5642, "step": 2553 }, { "epoch": 0.0698424852329906, "grad_norm": 2.0337398052215576, "learning_rate": 1.9917077529594523e-05, "loss": 0.6053, "step": 2554 }, { "epoch": 0.0698698315467075, "grad_norm": 2.4360480308532715, "learning_rate": 1.9916963665709907e-05, "loss": 0.5701, "step": 2555 }, { "epoch": 0.06989717786042442, "grad_norm": 1.7022554874420166, "learning_rate": 1.9916849724029502e-05, "loss": 0.5914, "step": 2556 }, { "epoch": 0.06992452417414133, "grad_norm": 2.7227916717529297, "learning_rate": 1.9916735704554204e-05, "loss": 0.586, "step": 2557 }, { "epoch": 0.06995187048785824, "grad_norm": 2.7468960285186768, "learning_rate": 1.9916621607284902e-05, "loss": 0.5996, "step": 2558 }, { "epoch": 0.06997921680157515, "grad_norm": 2.3852999210357666, "learning_rate": 1.9916507432222492e-05, "loss": 0.6072, "step": 2559 }, { "epoch": 0.07000656311529206, "grad_norm": 1.9496203660964966, "learning_rate": 1.9916393179367875e-05, "loss": 0.6354, "step": 2560 }, { "epoch": 0.07003390942900897, "grad_norm": 3.3750500679016113, "learning_rate": 1.9916278848721945e-05, "loss": 0.6486, "step": 2561 }, { "epoch": 0.07006125574272588, "grad_norm": 1.4795563220977783, "learning_rate": 1.9916164440285594e-05, "loss": 0.5729, "step": 2562 }, { "epoch": 0.0700886020564428, "grad_norm": 2.4223740100860596, "learning_rate": 1.991604995405973e-05, "loss": 0.5595, "step": 2563 }, { "epoch": 0.0701159483701597, "grad_norm": 1.879715085029602, "learning_rate": 1.991593539004524e-05, "loss": 0.5818, "step": 2564 }, { "epoch": 0.07014329468387662, "grad_norm": 1.5255595445632935, "learning_rate": 1.991582074824303e-05, "loss": 0.6111, "step": 2565 }, { "epoch": 0.07017064099759353, "grad_norm": 2.1174933910369873, "learning_rate": 1.9915706028653992e-05, "loss": 0.6619, "step": 2566 }, { "epoch": 0.07019798731131044, "grad_norm": 1.8571876287460327, "learning_rate": 1.9915591231279038e-05, "loss": 0.9787, "step": 2567 }, { "epoch": 0.07022533362502735, "grad_norm": 1.4271522760391235, "learning_rate": 1.9915476356119055e-05, "loss": 0.5561, "step": 2568 }, { "epoch": 0.07025267993874426, "grad_norm": 1.7148971557617188, "learning_rate": 1.9915361403174956e-05, "loss": 0.6014, "step": 2569 }, { "epoch": 0.07028002625246117, "grad_norm": 1.3645843267440796, "learning_rate": 1.9915246372447632e-05, "loss": 0.9322, "step": 2570 }, { "epoch": 0.07030737256617808, "grad_norm": 1.7302931547164917, "learning_rate": 1.9915131263937994e-05, "loss": 0.6213, "step": 2571 }, { "epoch": 0.070334718879895, "grad_norm": 1.8180747032165527, "learning_rate": 1.991501607764694e-05, "loss": 0.5833, "step": 2572 }, { "epoch": 0.0703620651936119, "grad_norm": 1.9760671854019165, "learning_rate": 1.9914900813575374e-05, "loss": 0.6681, "step": 2573 }, { "epoch": 0.07038941150732882, "grad_norm": 1.7386330366134644, "learning_rate": 1.9914785471724202e-05, "loss": 0.5976, "step": 2574 }, { "epoch": 0.07041675782104573, "grad_norm": 2.361781597137451, "learning_rate": 1.991467005209433e-05, "loss": 0.5678, "step": 2575 }, { "epoch": 0.07044410413476264, "grad_norm": 1.707586646080017, "learning_rate": 1.9914554554686662e-05, "loss": 0.6713, "step": 2576 }, { "epoch": 0.07047145044847955, "grad_norm": 1.5522595643997192, "learning_rate": 1.99144389795021e-05, "loss": 0.6035, "step": 2577 }, { "epoch": 0.07049879676219646, "grad_norm": 2.9389126300811768, "learning_rate": 1.9914323326541562e-05, "loss": 0.4411, "step": 2578 }, { "epoch": 0.07052614307591337, "grad_norm": 1.4780620336532593, "learning_rate": 1.991420759580594e-05, "loss": 0.5544, "step": 2579 }, { "epoch": 0.07055348938963028, "grad_norm": 1.413217306137085, "learning_rate": 1.9914091787296154e-05, "loss": 0.6024, "step": 2580 }, { "epoch": 0.0705808357033472, "grad_norm": 2.8959298133850098, "learning_rate": 1.9913975901013104e-05, "loss": 0.4263, "step": 2581 }, { "epoch": 0.0706081820170641, "grad_norm": 1.701891303062439, "learning_rate": 1.9913859936957706e-05, "loss": 0.5791, "step": 2582 }, { "epoch": 0.07063552833078102, "grad_norm": 1.5183972120285034, "learning_rate": 1.9913743895130866e-05, "loss": 0.6021, "step": 2583 }, { "epoch": 0.07066287464449793, "grad_norm": 1.8149348497390747, "learning_rate": 1.9913627775533496e-05, "loss": 0.5797, "step": 2584 }, { "epoch": 0.07069022095821484, "grad_norm": 1.457509994506836, "learning_rate": 1.9913511578166504e-05, "loss": 0.6222, "step": 2585 }, { "epoch": 0.07071756727193175, "grad_norm": 1.587174892425537, "learning_rate": 1.9913395303030805e-05, "loss": 0.5909, "step": 2586 }, { "epoch": 0.07074491358564866, "grad_norm": 1.5564641952514648, "learning_rate": 1.9913278950127307e-05, "loss": 0.5968, "step": 2587 }, { "epoch": 0.07077225989936557, "grad_norm": 2.1640045642852783, "learning_rate": 1.991316251945693e-05, "loss": 0.6425, "step": 2588 }, { "epoch": 0.07079960621308248, "grad_norm": 1.436119794845581, "learning_rate": 1.991304601102058e-05, "loss": 0.6489, "step": 2589 }, { "epoch": 0.0708269525267994, "grad_norm": 1.4235053062438965, "learning_rate": 1.9912929424819176e-05, "loss": 0.5825, "step": 2590 }, { "epoch": 0.07085429884051629, "grad_norm": 1.6849395036697388, "learning_rate": 1.991281276085363e-05, "loss": 0.5715, "step": 2591 }, { "epoch": 0.0708816451542332, "grad_norm": 2.1837871074676514, "learning_rate": 1.9912696019124854e-05, "loss": 0.6264, "step": 2592 }, { "epoch": 0.07090899146795011, "grad_norm": 1.9674431085586548, "learning_rate": 1.9912579199633772e-05, "loss": 0.5835, "step": 2593 }, { "epoch": 0.07093633778166702, "grad_norm": 1.9709504842758179, "learning_rate": 1.9912462302381295e-05, "loss": 0.6113, "step": 2594 }, { "epoch": 0.07096368409538394, "grad_norm": 1.9309600591659546, "learning_rate": 1.991234532736834e-05, "loss": 0.612, "step": 2595 }, { "epoch": 0.07099103040910085, "grad_norm": 1.6389696598052979, "learning_rate": 1.9912228274595825e-05, "loss": 0.5924, "step": 2596 }, { "epoch": 0.07101837672281776, "grad_norm": 1.559814453125, "learning_rate": 1.9912111144064667e-05, "loss": 0.5607, "step": 2597 }, { "epoch": 0.07104572303653467, "grad_norm": 3.1220619678497314, "learning_rate": 1.991199393577579e-05, "loss": 0.5672, "step": 2598 }, { "epoch": 0.07107306935025158, "grad_norm": 2.115337371826172, "learning_rate": 1.9911876649730106e-05, "loss": 0.5892, "step": 2599 }, { "epoch": 0.07110041566396849, "grad_norm": 1.7164306640625, "learning_rate": 1.9911759285928544e-05, "loss": 0.5686, "step": 2600 }, { "epoch": 0.0711277619776854, "grad_norm": 1.873070240020752, "learning_rate": 1.9911641844372017e-05, "loss": 0.5571, "step": 2601 }, { "epoch": 0.07115510829140231, "grad_norm": 1.6943436861038208, "learning_rate": 1.991152432506145e-05, "loss": 0.5993, "step": 2602 }, { "epoch": 0.07118245460511922, "grad_norm": 1.5592979192733765, "learning_rate": 1.991140672799776e-05, "loss": 0.5883, "step": 2603 }, { "epoch": 0.07120980091883614, "grad_norm": 1.7407357692718506, "learning_rate": 1.991128905318188e-05, "loss": 0.5974, "step": 2604 }, { "epoch": 0.07123714723255305, "grad_norm": 1.787922978401184, "learning_rate": 1.9911171300614725e-05, "loss": 0.5711, "step": 2605 }, { "epoch": 0.07126449354626996, "grad_norm": 2.0298550128936768, "learning_rate": 1.9911053470297217e-05, "loss": 0.5814, "step": 2606 }, { "epoch": 0.07129183985998687, "grad_norm": 1.678636074066162, "learning_rate": 1.9910935562230284e-05, "loss": 0.5722, "step": 2607 }, { "epoch": 0.07131918617370378, "grad_norm": 1.9429080486297607, "learning_rate": 1.9910817576414858e-05, "loss": 0.5894, "step": 2608 }, { "epoch": 0.07134653248742069, "grad_norm": 1.5649585723876953, "learning_rate": 1.991069951285185e-05, "loss": 0.5886, "step": 2609 }, { "epoch": 0.0713738788011376, "grad_norm": 1.7249304056167603, "learning_rate": 1.9910581371542193e-05, "loss": 0.626, "step": 2610 }, { "epoch": 0.07140122511485451, "grad_norm": 1.58961021900177, "learning_rate": 1.991046315248682e-05, "loss": 0.5924, "step": 2611 }, { "epoch": 0.07142857142857142, "grad_norm": 2.474977731704712, "learning_rate": 1.9910344855686644e-05, "loss": 0.9877, "step": 2612 }, { "epoch": 0.07145591774228834, "grad_norm": 1.8941489458084106, "learning_rate": 1.991022648114261e-05, "loss": 0.9831, "step": 2613 }, { "epoch": 0.07148326405600525, "grad_norm": 1.4884793758392334, "learning_rate": 1.9910108028855634e-05, "loss": 0.587, "step": 2614 }, { "epoch": 0.07151061036972216, "grad_norm": 1.3792351484298706, "learning_rate": 1.9909989498826648e-05, "loss": 0.617, "step": 2615 }, { "epoch": 0.07153795668343907, "grad_norm": 2.108686923980713, "learning_rate": 1.990987089105659e-05, "loss": 0.6051, "step": 2616 }, { "epoch": 0.07156530299715598, "grad_norm": 1.6647738218307495, "learning_rate": 1.9909752205546377e-05, "loss": 0.5877, "step": 2617 }, { "epoch": 0.07159264931087289, "grad_norm": 1.447206974029541, "learning_rate": 1.990963344229695e-05, "loss": 0.5597, "step": 2618 }, { "epoch": 0.0716199956245898, "grad_norm": 2.5006954669952393, "learning_rate": 1.9909514601309236e-05, "loss": 1.0091, "step": 2619 }, { "epoch": 0.07164734193830671, "grad_norm": 2.92594575881958, "learning_rate": 1.990939568258417e-05, "loss": 0.6003, "step": 2620 }, { "epoch": 0.07167468825202362, "grad_norm": 1.6874648332595825, "learning_rate": 1.990927668612268e-05, "loss": 0.5998, "step": 2621 }, { "epoch": 0.07170203456574054, "grad_norm": 1.4484392404556274, "learning_rate": 1.990915761192571e-05, "loss": 0.6069, "step": 2622 }, { "epoch": 0.07172938087945745, "grad_norm": 1.703843355178833, "learning_rate": 1.990903845999418e-05, "loss": 0.5632, "step": 2623 }, { "epoch": 0.07175672719317436, "grad_norm": 1.5087037086486816, "learning_rate": 1.9908919230329038e-05, "loss": 0.5923, "step": 2624 }, { "epoch": 0.07178407350689127, "grad_norm": 1.8008917570114136, "learning_rate": 1.990879992293121e-05, "loss": 0.608, "step": 2625 }, { "epoch": 0.07181141982060818, "grad_norm": 1.9491608142852783, "learning_rate": 1.9908680537801635e-05, "loss": 0.6108, "step": 2626 }, { "epoch": 0.07183876613432509, "grad_norm": 1.820299744606018, "learning_rate": 1.990856107494125e-05, "loss": 0.5998, "step": 2627 }, { "epoch": 0.071866112448042, "grad_norm": 2.0186190605163574, "learning_rate": 1.9908441534350995e-05, "loss": 0.9748, "step": 2628 }, { "epoch": 0.07189345876175891, "grad_norm": 2.2538399696350098, "learning_rate": 1.9908321916031802e-05, "loss": 0.6043, "step": 2629 }, { "epoch": 0.07192080507547582, "grad_norm": 2.2017600536346436, "learning_rate": 1.990820221998461e-05, "loss": 0.6039, "step": 2630 }, { "epoch": 0.07194815138919274, "grad_norm": 1.7306724786758423, "learning_rate": 1.9908082446210364e-05, "loss": 0.6069, "step": 2631 }, { "epoch": 0.07197549770290965, "grad_norm": 1.79444420337677, "learning_rate": 1.9907962594709997e-05, "loss": 0.6145, "step": 2632 }, { "epoch": 0.07200284401662656, "grad_norm": 2.908839702606201, "learning_rate": 1.990784266548445e-05, "loss": 0.4761, "step": 2633 }, { "epoch": 0.07203019033034347, "grad_norm": 5.904958248138428, "learning_rate": 1.9907722658534668e-05, "loss": 0.5863, "step": 2634 }, { "epoch": 0.07205753664406038, "grad_norm": 2.3696844577789307, "learning_rate": 1.990760257386159e-05, "loss": 0.6128, "step": 2635 }, { "epoch": 0.07208488295777729, "grad_norm": 2.203866481781006, "learning_rate": 1.990748241146616e-05, "loss": 0.6005, "step": 2636 }, { "epoch": 0.0721122292714942, "grad_norm": 16.603696823120117, "learning_rate": 1.9907362171349315e-05, "loss": 0.5504, "step": 2637 }, { "epoch": 0.07213957558521111, "grad_norm": 2.021347761154175, "learning_rate": 1.9907241853512004e-05, "loss": 0.6021, "step": 2638 }, { "epoch": 0.07216692189892802, "grad_norm": 1.7544080018997192, "learning_rate": 1.990712145795517e-05, "loss": 0.5895, "step": 2639 }, { "epoch": 0.07219426821264494, "grad_norm": 1.7012171745300293, "learning_rate": 1.9907000984679753e-05, "loss": 0.5505, "step": 2640 }, { "epoch": 0.07222161452636185, "grad_norm": 1.6922856569290161, "learning_rate": 1.9906880433686703e-05, "loss": 0.5923, "step": 2641 }, { "epoch": 0.07224896084007876, "grad_norm": 2.093616247177124, "learning_rate": 1.9906759804976964e-05, "loss": 0.9642, "step": 2642 }, { "epoch": 0.07227630715379567, "grad_norm": 1.7691423892974854, "learning_rate": 1.9906639098551484e-05, "loss": 0.6091, "step": 2643 }, { "epoch": 0.07230365346751258, "grad_norm": 2.0212881565093994, "learning_rate": 1.9906518314411206e-05, "loss": 0.6282, "step": 2644 }, { "epoch": 0.07233099978122949, "grad_norm": 1.7437306642532349, "learning_rate": 1.990639745255708e-05, "loss": 0.6539, "step": 2645 }, { "epoch": 0.0723583460949464, "grad_norm": 1.42036771774292, "learning_rate": 1.9906276512990055e-05, "loss": 0.5891, "step": 2646 }, { "epoch": 0.07238569240866331, "grad_norm": 1.3509690761566162, "learning_rate": 1.990615549571108e-05, "loss": 0.5922, "step": 2647 }, { "epoch": 0.07241303872238022, "grad_norm": 2.2314743995666504, "learning_rate": 1.9906034400721098e-05, "loss": 0.6016, "step": 2648 }, { "epoch": 0.07244038503609714, "grad_norm": 1.661273717880249, "learning_rate": 1.9905913228021067e-05, "loss": 0.5665, "step": 2649 }, { "epoch": 0.07246773134981405, "grad_norm": 1.4019149541854858, "learning_rate": 1.9905791977611933e-05, "loss": 0.5673, "step": 2650 }, { "epoch": 0.07249507766353096, "grad_norm": 18.15854263305664, "learning_rate": 1.990567064949465e-05, "loss": 1.4634, "step": 2651 }, { "epoch": 0.07252242397724787, "grad_norm": 1.3706574440002441, "learning_rate": 1.990554924367017e-05, "loss": 0.5872, "step": 2652 }, { "epoch": 0.07254977029096478, "grad_norm": 5.930802822113037, "learning_rate": 1.9905427760139444e-05, "loss": 0.9727, "step": 2653 }, { "epoch": 0.07257711660468169, "grad_norm": 3.076056480407715, "learning_rate": 1.990530619890342e-05, "loss": 0.6048, "step": 2654 }, { "epoch": 0.0726044629183986, "grad_norm": 2.247512102127075, "learning_rate": 1.990518455996306e-05, "loss": 0.5632, "step": 2655 }, { "epoch": 0.07263180923211551, "grad_norm": 1.418829083442688, "learning_rate": 1.9905062843319317e-05, "loss": 0.5988, "step": 2656 }, { "epoch": 0.07265915554583242, "grad_norm": 3.202862024307251, "learning_rate": 1.990494104897314e-05, "loss": 0.5953, "step": 2657 }, { "epoch": 0.07268650185954934, "grad_norm": 1.8247791528701782, "learning_rate": 1.9904819176925486e-05, "loss": 0.6291, "step": 2658 }, { "epoch": 0.07271384817326625, "grad_norm": 1.7155115604400635, "learning_rate": 1.9904697227177317e-05, "loss": 0.6377, "step": 2659 }, { "epoch": 0.07274119448698316, "grad_norm": 1.728583574295044, "learning_rate": 1.9904575199729587e-05, "loss": 0.5704, "step": 2660 }, { "epoch": 0.07276854080070007, "grad_norm": 2.3236913681030273, "learning_rate": 1.9904453094583247e-05, "loss": 0.6332, "step": 2661 }, { "epoch": 0.07279588711441698, "grad_norm": 16.72493553161621, "learning_rate": 1.9904330911739262e-05, "loss": 1.4841, "step": 2662 }, { "epoch": 0.07282323342813389, "grad_norm": 1.7446393966674805, "learning_rate": 1.9904208651198588e-05, "loss": 0.5857, "step": 2663 }, { "epoch": 0.0728505797418508, "grad_norm": 14.561307907104492, "learning_rate": 1.9904086312962184e-05, "loss": 1.2664, "step": 2664 }, { "epoch": 0.07287792605556771, "grad_norm": 1.757386565208435, "learning_rate": 1.9903963897031012e-05, "loss": 0.6057, "step": 2665 }, { "epoch": 0.07290527236928462, "grad_norm": 2.530860662460327, "learning_rate": 1.990384140340603e-05, "loss": 0.6224, "step": 2666 }, { "epoch": 0.07293261868300153, "grad_norm": 3.661113977432251, "learning_rate": 1.9903718832088203e-05, "loss": 0.6492, "step": 2667 }, { "epoch": 0.07295996499671845, "grad_norm": 1.909155249595642, "learning_rate": 1.9903596183078483e-05, "loss": 0.586, "step": 2668 }, { "epoch": 0.07298731131043536, "grad_norm": 2.2839863300323486, "learning_rate": 1.9903473456377838e-05, "loss": 0.6411, "step": 2669 }, { "epoch": 0.07301465762415227, "grad_norm": 1.826264500617981, "learning_rate": 1.9903350651987233e-05, "loss": 0.6088, "step": 2670 }, { "epoch": 0.07304200393786918, "grad_norm": 1.92747163772583, "learning_rate": 1.990322776990763e-05, "loss": 0.6065, "step": 2671 }, { "epoch": 0.07306935025158609, "grad_norm": 1.843799114227295, "learning_rate": 1.990310481013999e-05, "loss": 0.6412, "step": 2672 }, { "epoch": 0.073096696565303, "grad_norm": 2.6375956535339355, "learning_rate": 1.990298177268528e-05, "loss": 0.5918, "step": 2673 }, { "epoch": 0.07312404287901991, "grad_norm": 1.8862704038619995, "learning_rate": 1.9902858657544468e-05, "loss": 0.5639, "step": 2674 }, { "epoch": 0.07315138919273682, "grad_norm": 108.79391479492188, "learning_rate": 1.9902735464718512e-05, "loss": 1.2092, "step": 2675 }, { "epoch": 0.07317873550645373, "grad_norm": 2.2645628452301025, "learning_rate": 1.990261219420839e-05, "loss": 0.6352, "step": 2676 }, { "epoch": 0.07320608182017065, "grad_norm": 3.331368923187256, "learning_rate": 1.9902488846015053e-05, "loss": 0.6316, "step": 2677 }, { "epoch": 0.07323342813388756, "grad_norm": 34.14759063720703, "learning_rate": 1.9902365420139484e-05, "loss": 1.9896, "step": 2678 }, { "epoch": 0.07326077444760447, "grad_norm": 104.00918579101562, "learning_rate": 1.9902241916582646e-05, "loss": 0.7232, "step": 2679 }, { "epoch": 0.07328812076132138, "grad_norm": 282.831298828125, "learning_rate": 1.99021183353455e-05, "loss": 7.6344, "step": 2680 }, { "epoch": 0.07331546707503829, "grad_norm": 29.994050979614258, "learning_rate": 1.990199467642903e-05, "loss": 1.104, "step": 2681 }, { "epoch": 0.0733428133887552, "grad_norm": 11.91300106048584, "learning_rate": 1.9901870939834197e-05, "loss": 0.7902, "step": 2682 }, { "epoch": 0.07337015970247211, "grad_norm": 2.897505283355713, "learning_rate": 1.990174712556197e-05, "loss": 0.7064, "step": 2683 }, { "epoch": 0.07339750601618902, "grad_norm": 22.987457275390625, "learning_rate": 1.9901623233613322e-05, "loss": 1.3947, "step": 2684 }, { "epoch": 0.07342485232990593, "grad_norm": 56.98953628540039, "learning_rate": 1.990149926398923e-05, "loss": 0.6779, "step": 2685 }, { "epoch": 0.07345219864362285, "grad_norm": 8.443214416503906, "learning_rate": 1.9901375216690663e-05, "loss": 0.6823, "step": 2686 }, { "epoch": 0.07347954495733976, "grad_norm": 24.040382385253906, "learning_rate": 1.9901251091718592e-05, "loss": 1.8883, "step": 2687 }, { "epoch": 0.07350689127105667, "grad_norm": 3.8283193111419678, "learning_rate": 1.9901126889073994e-05, "loss": 0.6922, "step": 2688 }, { "epoch": 0.07353423758477358, "grad_norm": 5.360132217407227, "learning_rate": 1.9901002608757843e-05, "loss": 0.7262, "step": 2689 }, { "epoch": 0.07356158389849049, "grad_norm": 4.244453430175781, "learning_rate": 1.9900878250771108e-05, "loss": 0.785, "step": 2690 }, { "epoch": 0.0735889302122074, "grad_norm": 8.675984382629395, "learning_rate": 1.9900753815114773e-05, "loss": 0.6512, "step": 2691 }, { "epoch": 0.0736162765259243, "grad_norm": 5.501805305480957, "learning_rate": 1.9900629301789812e-05, "loss": 0.7119, "step": 2692 }, { "epoch": 0.07364362283964121, "grad_norm": 3.3503146171569824, "learning_rate": 1.99005047107972e-05, "loss": 0.6864, "step": 2693 }, { "epoch": 0.07367096915335812, "grad_norm": 2.9448180198669434, "learning_rate": 1.9900380042137913e-05, "loss": 0.6724, "step": 2694 }, { "epoch": 0.07369831546707503, "grad_norm": 2.8291239738464355, "learning_rate": 1.9900255295812928e-05, "loss": 0.6666, "step": 2695 }, { "epoch": 0.07372566178079194, "grad_norm": 4.017736434936523, "learning_rate": 1.9900130471823235e-05, "loss": 0.5901, "step": 2696 }, { "epoch": 0.07375300809450885, "grad_norm": 3.7621359825134277, "learning_rate": 1.9900005570169796e-05, "loss": 0.6475, "step": 2697 }, { "epoch": 0.07378035440822577, "grad_norm": 2.710268974304199, "learning_rate": 1.9899880590853603e-05, "loss": 0.5992, "step": 2698 }, { "epoch": 0.07380770072194268, "grad_norm": 4.174410820007324, "learning_rate": 1.9899755533875633e-05, "loss": 0.7167, "step": 2699 }, { "epoch": 0.07383504703565959, "grad_norm": 48.5873908996582, "learning_rate": 1.9899630399236868e-05, "loss": 3.0839, "step": 2700 }, { "epoch": 0.0738623933493765, "grad_norm": 2.525167942047119, "learning_rate": 1.9899505186938283e-05, "loss": 0.6176, "step": 2701 }, { "epoch": 0.07388973966309341, "grad_norm": 2.8244616985321045, "learning_rate": 1.989937989698087e-05, "loss": 0.6711, "step": 2702 }, { "epoch": 0.07391708597681032, "grad_norm": 1.9389874935150146, "learning_rate": 1.989925452936561e-05, "loss": 0.6444, "step": 2703 }, { "epoch": 0.07394443229052723, "grad_norm": 3.559176206588745, "learning_rate": 1.9899129084093482e-05, "loss": 0.6929, "step": 2704 }, { "epoch": 0.07397177860424414, "grad_norm": 2.24802565574646, "learning_rate": 1.9899003561165472e-05, "loss": 0.6633, "step": 2705 }, { "epoch": 0.07399912491796105, "grad_norm": 22.615312576293945, "learning_rate": 1.9898877960582563e-05, "loss": 1.9564, "step": 2706 }, { "epoch": 0.07402647123167796, "grad_norm": 2.5357422828674316, "learning_rate": 1.9898752282345743e-05, "loss": 0.6354, "step": 2707 }, { "epoch": 0.07405381754539488, "grad_norm": 2.4386820793151855, "learning_rate": 1.9898626526456e-05, "loss": 0.6404, "step": 2708 }, { "epoch": 0.07408116385911179, "grad_norm": 4.223243713378906, "learning_rate": 1.9898500692914316e-05, "loss": 0.7213, "step": 2709 }, { "epoch": 0.0741085101728287, "grad_norm": 2.485560894012451, "learning_rate": 1.989837478172168e-05, "loss": 0.6345, "step": 2710 }, { "epoch": 0.07413585648654561, "grad_norm": 5.925610542297363, "learning_rate": 1.9898248792879075e-05, "loss": 0.6447, "step": 2711 }, { "epoch": 0.07416320280026252, "grad_norm": 11.09791374206543, "learning_rate": 1.98981227263875e-05, "loss": 1.9037, "step": 2712 }, { "epoch": 0.07419054911397943, "grad_norm": 3.7603156566619873, "learning_rate": 1.9897996582247935e-05, "loss": 0.6576, "step": 2713 }, { "epoch": 0.07421789542769634, "grad_norm": 2.1506550312042236, "learning_rate": 1.9897870360461372e-05, "loss": 0.5866, "step": 2714 }, { "epoch": 0.07424524174141325, "grad_norm": 2.0934741497039795, "learning_rate": 1.98977440610288e-05, "loss": 0.684, "step": 2715 }, { "epoch": 0.07427258805513016, "grad_norm": 2.5404281616210938, "learning_rate": 1.9897617683951214e-05, "loss": 0.6077, "step": 2716 }, { "epoch": 0.07429993436884708, "grad_norm": 1.6182739734649658, "learning_rate": 1.9897491229229602e-05, "loss": 0.6128, "step": 2717 }, { "epoch": 0.07432728068256399, "grad_norm": 5.750521183013916, "learning_rate": 1.9897364696864958e-05, "loss": 1.5308, "step": 2718 }, { "epoch": 0.0743546269962809, "grad_norm": 16.03487205505371, "learning_rate": 1.9897238086858272e-05, "loss": 1.7572, "step": 2719 }, { "epoch": 0.07438197330999781, "grad_norm": 2.0916409492492676, "learning_rate": 1.9897111399210538e-05, "loss": 0.5834, "step": 2720 }, { "epoch": 0.07440931962371472, "grad_norm": 3.1144158840179443, "learning_rate": 1.989698463392275e-05, "loss": 0.6222, "step": 2721 }, { "epoch": 0.07443666593743163, "grad_norm": 2.2177650928497314, "learning_rate": 1.9896857790995905e-05, "loss": 0.6174, "step": 2722 }, { "epoch": 0.07446401225114854, "grad_norm": 2.083477020263672, "learning_rate": 1.9896730870430997e-05, "loss": 0.6199, "step": 2723 }, { "epoch": 0.07449135856486545, "grad_norm": 2.3720767498016357, "learning_rate": 1.9896603872229016e-05, "loss": 0.6251, "step": 2724 }, { "epoch": 0.07451870487858236, "grad_norm": 1.7440314292907715, "learning_rate": 1.9896476796390964e-05, "loss": 0.6552, "step": 2725 }, { "epoch": 0.07454605119229928, "grad_norm": 2.4181907176971436, "learning_rate": 1.989634964291784e-05, "loss": 0.5997, "step": 2726 }, { "epoch": 0.07457339750601619, "grad_norm": 2.2736165523529053, "learning_rate": 1.9896222411810635e-05, "loss": 0.6264, "step": 2727 }, { "epoch": 0.0746007438197331, "grad_norm": 1.9188024997711182, "learning_rate": 1.989609510307035e-05, "loss": 0.6314, "step": 2728 }, { "epoch": 0.07462809013345001, "grad_norm": 6.116729736328125, "learning_rate": 1.9895967716697986e-05, "loss": 0.6571, "step": 2729 }, { "epoch": 0.07465543644716692, "grad_norm": 2.3106942176818848, "learning_rate": 1.989584025269454e-05, "loss": 0.6718, "step": 2730 }, { "epoch": 0.07468278276088383, "grad_norm": 1.9026319980621338, "learning_rate": 1.989571271106101e-05, "loss": 0.6631, "step": 2731 }, { "epoch": 0.07471012907460074, "grad_norm": 2.407736301422119, "learning_rate": 1.98955850917984e-05, "loss": 0.6115, "step": 2732 }, { "epoch": 0.07473747538831765, "grad_norm": 1.998255729675293, "learning_rate": 1.989545739490771e-05, "loss": 0.6069, "step": 2733 }, { "epoch": 0.07476482170203456, "grad_norm": 2.9833738803863525, "learning_rate": 1.989532962038994e-05, "loss": 0.5571, "step": 2734 }, { "epoch": 0.07479216801575148, "grad_norm": 2.362621307373047, "learning_rate": 1.9895201768246095e-05, "loss": 0.6671, "step": 2735 }, { "epoch": 0.07481951432946839, "grad_norm": 1.955010175704956, "learning_rate": 1.989507383847718e-05, "loss": 0.5828, "step": 2736 }, { "epoch": 0.0748468606431853, "grad_norm": 4.398645877838135, "learning_rate": 1.9894945831084192e-05, "loss": 0.6109, "step": 2737 }, { "epoch": 0.07487420695690221, "grad_norm": 1.771446704864502, "learning_rate": 1.989481774606814e-05, "loss": 0.5816, "step": 2738 }, { "epoch": 0.07490155327061912, "grad_norm": 1.9209119081497192, "learning_rate": 1.9894689583430024e-05, "loss": 0.5669, "step": 2739 }, { "epoch": 0.07492889958433603, "grad_norm": 2.774460554122925, "learning_rate": 1.9894561343170856e-05, "loss": 1.2669, "step": 2740 }, { "epoch": 0.07495624589805294, "grad_norm": 2.0172057151794434, "learning_rate": 1.989443302529164e-05, "loss": 0.6086, "step": 2741 }, { "epoch": 0.07498359221176985, "grad_norm": 1.8179476261138916, "learning_rate": 1.989430462979338e-05, "loss": 0.609, "step": 2742 }, { "epoch": 0.07501093852548676, "grad_norm": 2.1027190685272217, "learning_rate": 1.9894176156677085e-05, "loss": 0.6138, "step": 2743 }, { "epoch": 0.07503828483920368, "grad_norm": 5.193620204925537, "learning_rate": 1.989404760594376e-05, "loss": 0.6251, "step": 2744 }, { "epoch": 0.07506563115292059, "grad_norm": 4.59944486618042, "learning_rate": 1.9893918977594418e-05, "loss": 0.555, "step": 2745 }, { "epoch": 0.0750929774666375, "grad_norm": 2.4264321327209473, "learning_rate": 1.9893790271630068e-05, "loss": 0.6442, "step": 2746 }, { "epoch": 0.07512032378035441, "grad_norm": 2.2150330543518066, "learning_rate": 1.9893661488051713e-05, "loss": 0.5881, "step": 2747 }, { "epoch": 0.07514767009407132, "grad_norm": 3.147871732711792, "learning_rate": 1.989353262686037e-05, "loss": 0.6085, "step": 2748 }, { "epoch": 0.07517501640778823, "grad_norm": 13.84599494934082, "learning_rate": 1.989340368805705e-05, "loss": 1.4531, "step": 2749 }, { "epoch": 0.07520236272150514, "grad_norm": 2.000675916671753, "learning_rate": 1.9893274671642758e-05, "loss": 0.6262, "step": 2750 }, { "epoch": 0.07522970903522205, "grad_norm": 4.939501762390137, "learning_rate": 1.9893145577618513e-05, "loss": 0.9521, "step": 2751 }, { "epoch": 0.07525705534893896, "grad_norm": 1.8429499864578247, "learning_rate": 1.989301640598533e-05, "loss": 0.5989, "step": 2752 }, { "epoch": 0.07528440166265588, "grad_norm": 3.5340020656585693, "learning_rate": 1.989288715674421e-05, "loss": 0.6121, "step": 2753 }, { "epoch": 0.07531174797637279, "grad_norm": 3.2876195907592773, "learning_rate": 1.9892757829896176e-05, "loss": 0.6281, "step": 2754 }, { "epoch": 0.0753390942900897, "grad_norm": 1.8418315649032593, "learning_rate": 1.9892628425442244e-05, "loss": 0.5449, "step": 2755 }, { "epoch": 0.07536644060380661, "grad_norm": 4.010681629180908, "learning_rate": 1.9892498943383424e-05, "loss": 0.6128, "step": 2756 }, { "epoch": 0.07539378691752352, "grad_norm": 2.5685086250305176, "learning_rate": 1.9892369383720733e-05, "loss": 0.6134, "step": 2757 }, { "epoch": 0.07542113323124043, "grad_norm": 4.238722324371338, "learning_rate": 1.9892239746455187e-05, "loss": 0.5869, "step": 2758 }, { "epoch": 0.07544847954495734, "grad_norm": 9.994478225708008, "learning_rate": 1.9892110031587806e-05, "loss": 1.1852, "step": 2759 }, { "epoch": 0.07547582585867425, "grad_norm": 2.6263461112976074, "learning_rate": 1.9891980239119607e-05, "loss": 0.6277, "step": 2760 }, { "epoch": 0.07550317217239116, "grad_norm": 2.5476887226104736, "learning_rate": 1.9891850369051603e-05, "loss": 0.5907, "step": 2761 }, { "epoch": 0.07553051848610808, "grad_norm": 7.533618450164795, "learning_rate": 1.9891720421384823e-05, "loss": 1.1355, "step": 2762 }, { "epoch": 0.07555786479982499, "grad_norm": 2.2091126441955566, "learning_rate": 1.9891590396120274e-05, "loss": 0.6259, "step": 2763 }, { "epoch": 0.0755852111135419, "grad_norm": 3.165642499923706, "learning_rate": 1.9891460293258983e-05, "loss": 0.6206, "step": 2764 }, { "epoch": 0.07561255742725881, "grad_norm": 16.154325485229492, "learning_rate": 1.9891330112801972e-05, "loss": 0.5989, "step": 2765 }, { "epoch": 0.07563990374097572, "grad_norm": 2.287421703338623, "learning_rate": 1.989119985475026e-05, "loss": 0.5857, "step": 2766 }, { "epoch": 0.07566725005469263, "grad_norm": 3.208341121673584, "learning_rate": 1.9891069519104865e-05, "loss": 0.5478, "step": 2767 }, { "epoch": 0.07569459636840954, "grad_norm": 3.3155033588409424, "learning_rate": 1.989093910586682e-05, "loss": 0.8438, "step": 2768 }, { "epoch": 0.07572194268212645, "grad_norm": 3.1769444942474365, "learning_rate": 1.9890808615037133e-05, "loss": 0.5901, "step": 2769 }, { "epoch": 0.07574928899584336, "grad_norm": 2.6278345584869385, "learning_rate": 1.989067804661684e-05, "loss": 0.7027, "step": 2770 }, { "epoch": 0.07577663530956028, "grad_norm": 2.05370831489563, "learning_rate": 1.9890547400606964e-05, "loss": 0.6, "step": 2771 }, { "epoch": 0.07580398162327719, "grad_norm": 2.3587522506713867, "learning_rate": 1.9890416677008524e-05, "loss": 0.5827, "step": 2772 }, { "epoch": 0.0758313279369941, "grad_norm": 2.9078025817871094, "learning_rate": 1.9890285875822547e-05, "loss": 0.6239, "step": 2773 }, { "epoch": 0.07585867425071101, "grad_norm": 2.7768406867980957, "learning_rate": 1.9890154997050064e-05, "loss": 0.6002, "step": 2774 }, { "epoch": 0.07588602056442792, "grad_norm": 2.327995777130127, "learning_rate": 1.9890024040692096e-05, "loss": 0.6012, "step": 2775 }, { "epoch": 0.07591336687814483, "grad_norm": 2.2216737270355225, "learning_rate": 1.9889893006749675e-05, "loss": 0.5961, "step": 2776 }, { "epoch": 0.07594071319186174, "grad_norm": 3.6543853282928467, "learning_rate": 1.9889761895223823e-05, "loss": 0.5975, "step": 2777 }, { "epoch": 0.07596805950557865, "grad_norm": 4.930671215057373, "learning_rate": 1.9889630706115578e-05, "loss": 0.7955, "step": 2778 }, { "epoch": 0.07599540581929556, "grad_norm": 2.9731953144073486, "learning_rate": 1.9889499439425955e-05, "loss": 0.5989, "step": 2779 }, { "epoch": 0.07602275213301248, "grad_norm": 5.205527305603027, "learning_rate": 1.9889368095156e-05, "loss": 0.6017, "step": 2780 }, { "epoch": 0.07605009844672939, "grad_norm": 2.769500732421875, "learning_rate": 1.9889236673306735e-05, "loss": 0.6241, "step": 2781 }, { "epoch": 0.0760774447604463, "grad_norm": 3.256805419921875, "learning_rate": 1.9889105173879187e-05, "loss": 0.5974, "step": 2782 }, { "epoch": 0.07610479107416321, "grad_norm": 3.9481706619262695, "learning_rate": 1.9888973596874392e-05, "loss": 0.6006, "step": 2783 }, { "epoch": 0.07613213738788012, "grad_norm": 2.514157295227051, "learning_rate": 1.9888841942293383e-05, "loss": 0.6114, "step": 2784 }, { "epoch": 0.07615948370159703, "grad_norm": 7.759187698364258, "learning_rate": 1.9888710210137194e-05, "loss": 0.6955, "step": 2785 }, { "epoch": 0.07618683001531394, "grad_norm": 2.093888521194458, "learning_rate": 1.9888578400406855e-05, "loss": 0.6226, "step": 2786 }, { "epoch": 0.07621417632903085, "grad_norm": 2.67421555519104, "learning_rate": 1.9888446513103398e-05, "loss": 0.5907, "step": 2787 }, { "epoch": 0.07624152264274776, "grad_norm": 2.279006004333496, "learning_rate": 1.9888314548227864e-05, "loss": 0.6166, "step": 2788 }, { "epoch": 0.07626886895646467, "grad_norm": 2.028067111968994, "learning_rate": 1.9888182505781287e-05, "loss": 0.6458, "step": 2789 }, { "epoch": 0.07629621527018159, "grad_norm": 2.726773738861084, "learning_rate": 1.98880503857647e-05, "loss": 0.6391, "step": 2790 }, { "epoch": 0.0763235615838985, "grad_norm": 2.559718132019043, "learning_rate": 1.988791818817914e-05, "loss": 0.611, "step": 2791 }, { "epoch": 0.07635090789761541, "grad_norm": 2.4565165042877197, "learning_rate": 1.9887785913025642e-05, "loss": 0.6082, "step": 2792 }, { "epoch": 0.07637825421133232, "grad_norm": 2.628906726837158, "learning_rate": 1.988765356030525e-05, "loss": 0.5881, "step": 2793 }, { "epoch": 0.07640560052504922, "grad_norm": 2.1624155044555664, "learning_rate": 1.9887521130018998e-05, "loss": 0.6389, "step": 2794 }, { "epoch": 0.07643294683876613, "grad_norm": 2.5044877529144287, "learning_rate": 1.9887388622167925e-05, "loss": 0.5727, "step": 2795 }, { "epoch": 0.07646029315248304, "grad_norm": 2.407353401184082, "learning_rate": 1.988725603675307e-05, "loss": 0.5836, "step": 2796 }, { "epoch": 0.07648763946619995, "grad_norm": 2.606987953186035, "learning_rate": 1.988712337377547e-05, "loss": 0.6084, "step": 2797 }, { "epoch": 0.07651498577991686, "grad_norm": 1.9819226264953613, "learning_rate": 1.9886990633236177e-05, "loss": 1.1392, "step": 2798 }, { "epoch": 0.07654233209363377, "grad_norm": 2.4270243644714355, "learning_rate": 1.988685781513622e-05, "loss": 0.6031, "step": 2799 }, { "epoch": 0.07656967840735068, "grad_norm": 1.4090678691864014, "learning_rate": 1.988672491947665e-05, "loss": 1.1502, "step": 2800 }, { "epoch": 0.0765970247210676, "grad_norm": 2.4359512329101562, "learning_rate": 1.98865919462585e-05, "loss": 0.5809, "step": 2801 }, { "epoch": 0.0766243710347845, "grad_norm": 2.4507415294647217, "learning_rate": 1.9886458895482824e-05, "loss": 0.5603, "step": 2802 }, { "epoch": 0.07665171734850142, "grad_norm": 2.2971444129943848, "learning_rate": 1.9886325767150657e-05, "loss": 0.6068, "step": 2803 }, { "epoch": 0.07667906366221833, "grad_norm": 2.145643711090088, "learning_rate": 1.9886192561263046e-05, "loss": 0.6285, "step": 2804 }, { "epoch": 0.07670640997593524, "grad_norm": 2.1423916816711426, "learning_rate": 1.9886059277821037e-05, "loss": 0.5779, "step": 2805 }, { "epoch": 0.07673375628965215, "grad_norm": 2.0853302478790283, "learning_rate": 1.9885925916825678e-05, "loss": 0.5444, "step": 2806 }, { "epoch": 0.07676110260336906, "grad_norm": 1.7189017534255981, "learning_rate": 1.9885792478278008e-05, "loss": 0.5817, "step": 2807 }, { "epoch": 0.07678844891708597, "grad_norm": 5.230403423309326, "learning_rate": 1.988565896217908e-05, "loss": 0.5962, "step": 2808 }, { "epoch": 0.07681579523080288, "grad_norm": 2.333343505859375, "learning_rate": 1.988552536852994e-05, "loss": 0.6031, "step": 2809 }, { "epoch": 0.0768431415445198, "grad_norm": 2.2918765544891357, "learning_rate": 1.9885391697331634e-05, "loss": 1.0952, "step": 2810 }, { "epoch": 0.0768704878582367, "grad_norm": 5.306827545166016, "learning_rate": 1.9885257948585215e-05, "loss": 0.5878, "step": 2811 }, { "epoch": 0.07689783417195362, "grad_norm": 2.5375654697418213, "learning_rate": 1.988512412229173e-05, "loss": 1.1054, "step": 2812 }, { "epoch": 0.07692518048567053, "grad_norm": 5.709288597106934, "learning_rate": 1.9884990218452222e-05, "loss": 0.6101, "step": 2813 }, { "epoch": 0.07695252679938744, "grad_norm": 2.388273000717163, "learning_rate": 1.9884856237067754e-05, "loss": 0.6003, "step": 2814 }, { "epoch": 0.07697987311310435, "grad_norm": 2.6031486988067627, "learning_rate": 1.9884722178139367e-05, "loss": 0.5742, "step": 2815 }, { "epoch": 0.07700721942682126, "grad_norm": 3.0436313152313232, "learning_rate": 1.988458804166812e-05, "loss": 0.627, "step": 2816 }, { "epoch": 0.07703456574053817, "grad_norm": 2.912384510040283, "learning_rate": 1.9884453827655057e-05, "loss": 0.6066, "step": 2817 }, { "epoch": 0.07706191205425508, "grad_norm": 2.0983526706695557, "learning_rate": 1.9884319536101237e-05, "loss": 0.6156, "step": 2818 }, { "epoch": 0.077089258367972, "grad_norm": 2.4292471408843994, "learning_rate": 1.988418516700771e-05, "loss": 0.5527, "step": 2819 }, { "epoch": 0.0771166046816889, "grad_norm": 2.0878865718841553, "learning_rate": 1.9884050720375536e-05, "loss": 0.5781, "step": 2820 }, { "epoch": 0.07714395099540582, "grad_norm": 1.8172855377197266, "learning_rate": 1.9883916196205765e-05, "loss": 0.5423, "step": 2821 }, { "epoch": 0.07717129730912273, "grad_norm": 1.9253532886505127, "learning_rate": 1.9883781594499455e-05, "loss": 0.6024, "step": 2822 }, { "epoch": 0.07719864362283964, "grad_norm": 2.8865966796875, "learning_rate": 1.9883646915257656e-05, "loss": 0.5792, "step": 2823 }, { "epoch": 0.07722598993655655, "grad_norm": 2.010413646697998, "learning_rate": 1.988351215848143e-05, "loss": 0.5698, "step": 2824 }, { "epoch": 0.07725333625027346, "grad_norm": 3.0209624767303467, "learning_rate": 1.9883377324171832e-05, "loss": 1.0636, "step": 2825 }, { "epoch": 0.07728068256399037, "grad_norm": 3.4687554836273193, "learning_rate": 1.988324241232992e-05, "loss": 0.6754, "step": 2826 }, { "epoch": 0.07730802887770728, "grad_norm": 2.7356560230255127, "learning_rate": 1.9883107422956753e-05, "loss": 0.5673, "step": 2827 }, { "epoch": 0.0773353751914242, "grad_norm": 2.1165080070495605, "learning_rate": 1.988297235605339e-05, "loss": 0.5948, "step": 2828 }, { "epoch": 0.0773627215051411, "grad_norm": 8.826492309570312, "learning_rate": 1.988283721162089e-05, "loss": 0.6311, "step": 2829 }, { "epoch": 0.07739006781885802, "grad_norm": 2.2704086303710938, "learning_rate": 1.9882701989660314e-05, "loss": 0.6067, "step": 2830 }, { "epoch": 0.07741741413257493, "grad_norm": 1.6225237846374512, "learning_rate": 1.988256669017272e-05, "loss": 0.5544, "step": 2831 }, { "epoch": 0.07744476044629184, "grad_norm": 1.7004858255386353, "learning_rate": 1.9882431313159173e-05, "loss": 1.05, "step": 2832 }, { "epoch": 0.07747210676000875, "grad_norm": 1.73942232131958, "learning_rate": 1.988229585862073e-05, "loss": 0.5633, "step": 2833 }, { "epoch": 0.07749945307372566, "grad_norm": 2.0540671348571777, "learning_rate": 1.988216032655846e-05, "loss": 0.5873, "step": 2834 }, { "epoch": 0.07752679938744257, "grad_norm": 2.5777790546417236, "learning_rate": 1.9882024716973426e-05, "loss": 0.5947, "step": 2835 }, { "epoch": 0.07755414570115948, "grad_norm": 2.617645263671875, "learning_rate": 1.9881889029866687e-05, "loss": 0.5832, "step": 2836 }, { "epoch": 0.0775814920148764, "grad_norm": 3.46006441116333, "learning_rate": 1.9881753265239312e-05, "loss": 0.6293, "step": 2837 }, { "epoch": 0.0776088383285933, "grad_norm": 3.5240478515625, "learning_rate": 1.988161742309236e-05, "loss": 0.6018, "step": 2838 }, { "epoch": 0.07763618464231022, "grad_norm": 4.123600959777832, "learning_rate": 1.9881481503426904e-05, "loss": 0.6012, "step": 2839 }, { "epoch": 0.07766353095602713, "grad_norm": 3.409613847732544, "learning_rate": 1.9881345506244e-05, "loss": 0.5624, "step": 2840 }, { "epoch": 0.07769087726974404, "grad_norm": 2.5696675777435303, "learning_rate": 1.9881209431544726e-05, "loss": 0.5938, "step": 2841 }, { "epoch": 0.07771822358346095, "grad_norm": 2.212770700454712, "learning_rate": 1.9881073279330146e-05, "loss": 0.5836, "step": 2842 }, { "epoch": 0.07774556989717786, "grad_norm": 2.534813165664673, "learning_rate": 1.9880937049601324e-05, "loss": 0.5985, "step": 2843 }, { "epoch": 0.07777291621089477, "grad_norm": 1.9733344316482544, "learning_rate": 1.988080074235933e-05, "loss": 0.6273, "step": 2844 }, { "epoch": 0.07780026252461168, "grad_norm": 1.3329365253448486, "learning_rate": 1.988066435760524e-05, "loss": 1.0857, "step": 2845 }, { "epoch": 0.0778276088383286, "grad_norm": 3.2889575958251953, "learning_rate": 1.9880527895340115e-05, "loss": 0.572, "step": 2846 }, { "epoch": 0.0778549551520455, "grad_norm": 3.6516311168670654, "learning_rate": 1.9880391355565033e-05, "loss": 0.6077, "step": 2847 }, { "epoch": 0.07788230146576242, "grad_norm": 3.32891845703125, "learning_rate": 1.9880254738281057e-05, "loss": 0.6165, "step": 2848 }, { "epoch": 0.07790964777947933, "grad_norm": 1.9464747905731201, "learning_rate": 1.9880118043489266e-05, "loss": 0.6125, "step": 2849 }, { "epoch": 0.07793699409319624, "grad_norm": 2.0961661338806152, "learning_rate": 1.9879981271190728e-05, "loss": 0.6185, "step": 2850 }, { "epoch": 0.07796434040691315, "grad_norm": 2.192357063293457, "learning_rate": 1.9879844421386517e-05, "loss": 0.5405, "step": 2851 }, { "epoch": 0.07799168672063006, "grad_norm": 2.1320858001708984, "learning_rate": 1.9879707494077708e-05, "loss": 0.6266, "step": 2852 }, { "epoch": 0.07801903303434697, "grad_norm": 6.311386585235596, "learning_rate": 1.9879570489265373e-05, "loss": 0.6085, "step": 2853 }, { "epoch": 0.07804637934806388, "grad_norm": 2.712542772293091, "learning_rate": 1.987943340695059e-05, "loss": 0.6243, "step": 2854 }, { "epoch": 0.0780737256617808, "grad_norm": 1.9566164016723633, "learning_rate": 1.9879296247134433e-05, "loss": 0.6203, "step": 2855 }, { "epoch": 0.0781010719754977, "grad_norm": 2.711559772491455, "learning_rate": 1.987915900981797e-05, "loss": 0.5933, "step": 2856 }, { "epoch": 0.07812841828921462, "grad_norm": 3.042536735534668, "learning_rate": 1.987902169500229e-05, "loss": 0.5956, "step": 2857 }, { "epoch": 0.07815576460293153, "grad_norm": 3.144752025604248, "learning_rate": 1.9878884302688467e-05, "loss": 0.604, "step": 2858 }, { "epoch": 0.07818311091664844, "grad_norm": 4.383182048797607, "learning_rate": 1.9878746832877576e-05, "loss": 0.5762, "step": 2859 }, { "epoch": 0.07821045723036535, "grad_norm": 2.5020909309387207, "learning_rate": 1.9878609285570695e-05, "loss": 0.5818, "step": 2860 }, { "epoch": 0.07823780354408226, "grad_norm": 3.113635540008545, "learning_rate": 1.9878471660768905e-05, "loss": 0.6294, "step": 2861 }, { "epoch": 0.07826514985779917, "grad_norm": 2.0062246322631836, "learning_rate": 1.987833395847328e-05, "loss": 0.6053, "step": 2862 }, { "epoch": 0.07829249617151608, "grad_norm": 2.37129282951355, "learning_rate": 1.9878196178684912e-05, "loss": 0.5773, "step": 2863 }, { "epoch": 0.078319842485233, "grad_norm": 1.2864034175872803, "learning_rate": 1.987805832140487e-05, "loss": 1.0647, "step": 2864 }, { "epoch": 0.0783471887989499, "grad_norm": 1.3323924541473389, "learning_rate": 1.9877920386634243e-05, "loss": 1.0418, "step": 2865 }, { "epoch": 0.07837453511266682, "grad_norm": 5.346224784851074, "learning_rate": 1.9877782374374114e-05, "loss": 0.5885, "step": 2866 }, { "epoch": 0.07840188142638373, "grad_norm": 1.9748519659042358, "learning_rate": 1.9877644284625555e-05, "loss": 0.5986, "step": 2867 }, { "epoch": 0.07842922774010064, "grad_norm": 1.9419734477996826, "learning_rate": 1.987750611738966e-05, "loss": 0.6072, "step": 2868 }, { "epoch": 0.07845657405381755, "grad_norm": 1.971542239189148, "learning_rate": 1.987736787266751e-05, "loss": 0.5878, "step": 2869 }, { "epoch": 0.07848392036753446, "grad_norm": 1.875051736831665, "learning_rate": 1.9877229550460186e-05, "loss": 0.5907, "step": 2870 }, { "epoch": 0.07851126668125137, "grad_norm": 2.0436508655548096, "learning_rate": 1.9877091150768777e-05, "loss": 0.6846, "step": 2871 }, { "epoch": 0.07853861299496828, "grad_norm": 7.5421295166015625, "learning_rate": 1.987695267359437e-05, "loss": 0.5899, "step": 2872 }, { "epoch": 0.0785659593086852, "grad_norm": 2.3246958255767822, "learning_rate": 1.9876814118938046e-05, "loss": 0.631, "step": 2873 }, { "epoch": 0.0785933056224021, "grad_norm": 1.3235466480255127, "learning_rate": 1.9876675486800898e-05, "loss": 1.0354, "step": 2874 }, { "epoch": 0.07862065193611902, "grad_norm": 1.906980276107788, "learning_rate": 1.9876536777184005e-05, "loss": 0.6045, "step": 2875 }, { "epoch": 0.07864799824983593, "grad_norm": 1.8066703081130981, "learning_rate": 1.9876397990088464e-05, "loss": 0.5958, "step": 2876 }, { "epoch": 0.07867534456355284, "grad_norm": 1.894116997718811, "learning_rate": 1.987625912551536e-05, "loss": 0.6016, "step": 2877 }, { "epoch": 0.07870269087726975, "grad_norm": 1.7308956384658813, "learning_rate": 1.987612018346578e-05, "loss": 0.5975, "step": 2878 }, { "epoch": 0.07873003719098666, "grad_norm": 2.16797137260437, "learning_rate": 1.9875981163940824e-05, "loss": 0.6521, "step": 2879 }, { "epoch": 0.07875738350470357, "grad_norm": 1.6580272912979126, "learning_rate": 1.9875842066941568e-05, "loss": 0.5952, "step": 2880 }, { "epoch": 0.07878472981842048, "grad_norm": 2.1909024715423584, "learning_rate": 1.987570289246911e-05, "loss": 0.6127, "step": 2881 }, { "epoch": 0.07881207613213739, "grad_norm": 2.017538547515869, "learning_rate": 1.9875563640524548e-05, "loss": 0.5825, "step": 2882 }, { "epoch": 0.0788394224458543, "grad_norm": 2.209122896194458, "learning_rate": 1.9875424311108962e-05, "loss": 0.5626, "step": 2883 }, { "epoch": 0.07886676875957122, "grad_norm": 5.749057292938232, "learning_rate": 1.9875284904223455e-05, "loss": 0.6132, "step": 2884 }, { "epoch": 0.07889411507328813, "grad_norm": 2.24983549118042, "learning_rate": 1.9875145419869114e-05, "loss": 0.6629, "step": 2885 }, { "epoch": 0.07892146138700504, "grad_norm": 1.7053561210632324, "learning_rate": 1.987500585804704e-05, "loss": 0.5984, "step": 2886 }, { "epoch": 0.07894880770072195, "grad_norm": 2.453495502471924, "learning_rate": 1.9874866218758322e-05, "loss": 0.6239, "step": 2887 }, { "epoch": 0.07897615401443886, "grad_norm": 2.0360219478607178, "learning_rate": 1.987472650200406e-05, "loss": 0.5369, "step": 2888 }, { "epoch": 0.07900350032815577, "grad_norm": 3.0492329597473145, "learning_rate": 1.9874586707785345e-05, "loss": 0.5349, "step": 2889 }, { "epoch": 0.07903084664187268, "grad_norm": 2.4078452587127686, "learning_rate": 1.9874446836103278e-05, "loss": 0.5873, "step": 2890 }, { "epoch": 0.07905819295558959, "grad_norm": 3.611048460006714, "learning_rate": 1.9874306886958953e-05, "loss": 0.602, "step": 2891 }, { "epoch": 0.0790855392693065, "grad_norm": 2.203306198120117, "learning_rate": 1.987416686035347e-05, "loss": 0.5743, "step": 2892 }, { "epoch": 0.07911288558302342, "grad_norm": 4.326272010803223, "learning_rate": 1.9874026756287927e-05, "loss": 0.5945, "step": 2893 }, { "epoch": 0.07914023189674033, "grad_norm": 3.1625852584838867, "learning_rate": 1.9873886574763423e-05, "loss": 0.5714, "step": 2894 }, { "epoch": 0.07916757821045722, "grad_norm": 2.168628692626953, "learning_rate": 1.9873746315781057e-05, "loss": 0.592, "step": 2895 }, { "epoch": 0.07919492452417413, "grad_norm": 2.7759640216827393, "learning_rate": 1.987360597934193e-05, "loss": 0.5813, "step": 2896 }, { "epoch": 0.07922227083789105, "grad_norm": 1.7897663116455078, "learning_rate": 1.9873465565447143e-05, "loss": 0.5754, "step": 2897 }, { "epoch": 0.07924961715160796, "grad_norm": 1.8640235662460327, "learning_rate": 1.98733250740978e-05, "loss": 0.587, "step": 2898 }, { "epoch": 0.07927696346532487, "grad_norm": 3.013239622116089, "learning_rate": 1.9873184505294995e-05, "loss": 0.5894, "step": 2899 }, { "epoch": 0.07930430977904178, "grad_norm": 1.3910958766937256, "learning_rate": 1.987304385903984e-05, "loss": 1.0403, "step": 2900 }, { "epoch": 0.07933165609275869, "grad_norm": 2.1366047859191895, "learning_rate": 1.987290313533343e-05, "loss": 0.6176, "step": 2901 }, { "epoch": 0.0793590024064756, "grad_norm": 2.495481014251709, "learning_rate": 1.987276233417688e-05, "loss": 0.612, "step": 2902 }, { "epoch": 0.07938634872019251, "grad_norm": 3.074289560317993, "learning_rate": 1.987262145557128e-05, "loss": 0.5893, "step": 2903 }, { "epoch": 0.07941369503390942, "grad_norm": 1.2484022378921509, "learning_rate": 1.987248049951775e-05, "loss": 1.0583, "step": 2904 }, { "epoch": 0.07944104134762633, "grad_norm": 2.2222933769226074, "learning_rate": 1.9872339466017388e-05, "loss": 0.6413, "step": 2905 }, { "epoch": 0.07946838766134325, "grad_norm": 1.5612419843673706, "learning_rate": 1.9872198355071296e-05, "loss": 0.5946, "step": 2906 }, { "epoch": 0.07949573397506016, "grad_norm": 1.6938420534133911, "learning_rate": 1.987205716668059e-05, "loss": 0.5898, "step": 2907 }, { "epoch": 0.07952308028877707, "grad_norm": 1.9279271364212036, "learning_rate": 1.9871915900846373e-05, "loss": 0.5944, "step": 2908 }, { "epoch": 0.07955042660249398, "grad_norm": 1.5482121706008911, "learning_rate": 1.9871774557569755e-05, "loss": 0.4965, "step": 2909 }, { "epoch": 0.07957777291621089, "grad_norm": 2.8394737243652344, "learning_rate": 1.9871633136851845e-05, "loss": 0.6214, "step": 2910 }, { "epoch": 0.0796051192299278, "grad_norm": 2.125999927520752, "learning_rate": 1.987149163869375e-05, "loss": 0.5405, "step": 2911 }, { "epoch": 0.07963246554364471, "grad_norm": 1.9484957456588745, "learning_rate": 1.9871350063096576e-05, "loss": 0.5442, "step": 2912 }, { "epoch": 0.07965981185736162, "grad_norm": 2.2884318828582764, "learning_rate": 1.9871208410061446e-05, "loss": 0.5988, "step": 2913 }, { "epoch": 0.07968715817107853, "grad_norm": 1.8431490659713745, "learning_rate": 1.9871066679589462e-05, "loss": 0.4814, "step": 2914 }, { "epoch": 0.07971450448479545, "grad_norm": 2.2328414916992188, "learning_rate": 1.9870924871681737e-05, "loss": 0.5886, "step": 2915 }, { "epoch": 0.07974185079851236, "grad_norm": 2.3539228439331055, "learning_rate": 1.9870782986339384e-05, "loss": 0.5841, "step": 2916 }, { "epoch": 0.07976919711222927, "grad_norm": 2.4890811443328857, "learning_rate": 1.9870641023563516e-05, "loss": 0.5463, "step": 2917 }, { "epoch": 0.07979654342594618, "grad_norm": 4.400850772857666, "learning_rate": 1.987049898335525e-05, "loss": 0.5912, "step": 2918 }, { "epoch": 0.07982388973966309, "grad_norm": 2.0472304821014404, "learning_rate": 1.987035686571569e-05, "loss": 0.625, "step": 2919 }, { "epoch": 0.07985123605338, "grad_norm": 2.281712770462036, "learning_rate": 1.9870214670645966e-05, "loss": 0.4838, "step": 2920 }, { "epoch": 0.07987858236709691, "grad_norm": 3.225263833999634, "learning_rate": 1.987007239814718e-05, "loss": 0.6091, "step": 2921 }, { "epoch": 0.07990592868081382, "grad_norm": 2.9588170051574707, "learning_rate": 1.986993004822046e-05, "loss": 0.562, "step": 2922 }, { "epoch": 0.07993327499453073, "grad_norm": 1.867847204208374, "learning_rate": 1.9869787620866912e-05, "loss": 0.6498, "step": 2923 }, { "epoch": 0.07996062130824765, "grad_norm": 1.7001053094863892, "learning_rate": 1.986964511608766e-05, "loss": 0.561, "step": 2924 }, { "epoch": 0.07998796762196456, "grad_norm": 2.440169334411621, "learning_rate": 1.986950253388382e-05, "loss": 0.6027, "step": 2925 }, { "epoch": 0.08001531393568147, "grad_norm": 1.6707148551940918, "learning_rate": 1.9869359874256506e-05, "loss": 0.4615, "step": 2926 }, { "epoch": 0.08004266024939838, "grad_norm": 2.1890604496002197, "learning_rate": 1.9869217137206845e-05, "loss": 0.5851, "step": 2927 }, { "epoch": 0.08007000656311529, "grad_norm": 3.280851125717163, "learning_rate": 1.986907432273595e-05, "loss": 0.6044, "step": 2928 }, { "epoch": 0.0800973528768322, "grad_norm": 1.906886339187622, "learning_rate": 1.9868931430844945e-05, "loss": 0.6017, "step": 2929 }, { "epoch": 0.08012469919054911, "grad_norm": 1.4978454113006592, "learning_rate": 1.9868788461534954e-05, "loss": 1.0217, "step": 2930 }, { "epoch": 0.08015204550426602, "grad_norm": 2.399925947189331, "learning_rate": 1.986864541480709e-05, "loss": 0.6061, "step": 2931 }, { "epoch": 0.08017939181798293, "grad_norm": 1.6730420589447021, "learning_rate": 1.9868502290662483e-05, "loss": 0.6151, "step": 2932 }, { "epoch": 0.08020673813169985, "grad_norm": 1.3382067680358887, "learning_rate": 1.986835908910225e-05, "loss": 1.0247, "step": 2933 }, { "epoch": 0.08023408444541676, "grad_norm": 2.3068630695343018, "learning_rate": 1.9868215810127524e-05, "loss": 0.5916, "step": 2934 }, { "epoch": 0.08026143075913367, "grad_norm": 2.299484968185425, "learning_rate": 1.9868072453739416e-05, "loss": 0.5751, "step": 2935 }, { "epoch": 0.08028877707285058, "grad_norm": 1.8352692127227783, "learning_rate": 1.986792901993906e-05, "loss": 0.6082, "step": 2936 }, { "epoch": 0.08031612338656749, "grad_norm": 2.1092350482940674, "learning_rate": 1.9867785508727573e-05, "loss": 0.5869, "step": 2937 }, { "epoch": 0.0803434697002844, "grad_norm": 1.3495991230010986, "learning_rate": 1.9867641920106088e-05, "loss": 0.9998, "step": 2938 }, { "epoch": 0.08037081601400131, "grad_norm": 1.6021711826324463, "learning_rate": 1.986749825407573e-05, "loss": 0.9883, "step": 2939 }, { "epoch": 0.08039816232771822, "grad_norm": 1.8094969987869263, "learning_rate": 1.9867354510637624e-05, "loss": 0.5822, "step": 2940 }, { "epoch": 0.08042550864143513, "grad_norm": 1.774761438369751, "learning_rate": 1.98672106897929e-05, "loss": 0.5692, "step": 2941 }, { "epoch": 0.08045285495515204, "grad_norm": 1.6760503053665161, "learning_rate": 1.9867066791542682e-05, "loss": 0.6078, "step": 2942 }, { "epoch": 0.08048020126886896, "grad_norm": 2.106731414794922, "learning_rate": 1.9866922815888106e-05, "loss": 0.5655, "step": 2943 }, { "epoch": 0.08050754758258587, "grad_norm": 1.5293850898742676, "learning_rate": 1.9866778762830297e-05, "loss": 0.5899, "step": 2944 }, { "epoch": 0.08053489389630278, "grad_norm": 2.2493207454681396, "learning_rate": 1.9866634632370384e-05, "loss": 0.6188, "step": 2945 }, { "epoch": 0.08056224021001969, "grad_norm": 1.7425328493118286, "learning_rate": 1.98664904245095e-05, "loss": 0.59, "step": 2946 }, { "epoch": 0.0805895865237366, "grad_norm": 1.6175544261932373, "learning_rate": 1.9866346139248773e-05, "loss": 0.5403, "step": 2947 }, { "epoch": 0.08061693283745351, "grad_norm": 1.721569299697876, "learning_rate": 1.986620177658934e-05, "loss": 0.5665, "step": 2948 }, { "epoch": 0.08064427915117042, "grad_norm": 1.5953865051269531, "learning_rate": 1.986605733653233e-05, "loss": 0.5564, "step": 2949 }, { "epoch": 0.08067162546488733, "grad_norm": 1.7783151865005493, "learning_rate": 1.9865912819078876e-05, "loss": 0.5801, "step": 2950 }, { "epoch": 0.08069897177860424, "grad_norm": 1.93881094455719, "learning_rate": 1.986576822423011e-05, "loss": 0.5868, "step": 2951 }, { "epoch": 0.08072631809232116, "grad_norm": 1.6857496500015259, "learning_rate": 1.986562355198717e-05, "loss": 0.5784, "step": 2952 }, { "epoch": 0.08075366440603807, "grad_norm": 2.5352048873901367, "learning_rate": 1.9865478802351195e-05, "loss": 0.573, "step": 2953 }, { "epoch": 0.08078101071975498, "grad_norm": 1.8694640398025513, "learning_rate": 1.986533397532331e-05, "loss": 0.5738, "step": 2954 }, { "epoch": 0.08080835703347189, "grad_norm": 2.2786545753479004, "learning_rate": 1.9865189070904658e-05, "loss": 0.5555, "step": 2955 }, { "epoch": 0.0808357033471888, "grad_norm": 2.1287941932678223, "learning_rate": 1.986504408909637e-05, "loss": 0.5668, "step": 2956 }, { "epoch": 0.08086304966090571, "grad_norm": 1.417326807975769, "learning_rate": 1.9864899029899593e-05, "loss": 0.5943, "step": 2957 }, { "epoch": 0.08089039597462262, "grad_norm": 2.6157543659210205, "learning_rate": 1.986475389331546e-05, "loss": 0.5687, "step": 2958 }, { "epoch": 0.08091774228833953, "grad_norm": 1.6035977602005005, "learning_rate": 1.9864608679345104e-05, "loss": 1.0367, "step": 2959 }, { "epoch": 0.08094508860205644, "grad_norm": 1.5263773202896118, "learning_rate": 1.986446338798967e-05, "loss": 0.4642, "step": 2960 }, { "epoch": 0.08097243491577336, "grad_norm": 2.0132639408111572, "learning_rate": 1.9864318019250296e-05, "loss": 0.5688, "step": 2961 }, { "epoch": 0.08099978122949027, "grad_norm": 1.9365392923355103, "learning_rate": 1.9864172573128125e-05, "loss": 0.6071, "step": 2962 }, { "epoch": 0.08102712754320718, "grad_norm": 1.7655563354492188, "learning_rate": 1.9864027049624298e-05, "loss": 0.5696, "step": 2963 }, { "epoch": 0.08105447385692409, "grad_norm": 1.6273113489151, "learning_rate": 1.9863881448739955e-05, "loss": 0.5953, "step": 2964 }, { "epoch": 0.081081820170641, "grad_norm": 5.456992149353027, "learning_rate": 1.9863735770476236e-05, "loss": 0.4721, "step": 2965 }, { "epoch": 0.08110916648435791, "grad_norm": 1.4463672637939453, "learning_rate": 1.9863590014834285e-05, "loss": 1.0563, "step": 2966 }, { "epoch": 0.08113651279807482, "grad_norm": 1.8734328746795654, "learning_rate": 1.9863444181815247e-05, "loss": 0.6105, "step": 2967 }, { "epoch": 0.08116385911179173, "grad_norm": 2.5294852256774902, "learning_rate": 1.9863298271420264e-05, "loss": 0.5918, "step": 2968 }, { "epoch": 0.08119120542550864, "grad_norm": 1.513641595840454, "learning_rate": 1.9863152283650482e-05, "loss": 0.5705, "step": 2969 }, { "epoch": 0.08121855173922556, "grad_norm": 1.6691157817840576, "learning_rate": 1.986300621850705e-05, "loss": 0.593, "step": 2970 }, { "epoch": 0.08124589805294247, "grad_norm": 2.230234384536743, "learning_rate": 1.9862860075991107e-05, "loss": 0.5971, "step": 2971 }, { "epoch": 0.08127324436665938, "grad_norm": 3.14027738571167, "learning_rate": 1.98627138561038e-05, "loss": 0.6408, "step": 2972 }, { "epoch": 0.08130059068037629, "grad_norm": 1.3864284753799438, "learning_rate": 1.986256755884628e-05, "loss": 1.0115, "step": 2973 }, { "epoch": 0.0813279369940932, "grad_norm": 2.7577199935913086, "learning_rate": 1.9862421184219697e-05, "loss": 0.605, "step": 2974 }, { "epoch": 0.08135528330781011, "grad_norm": 1.8836994171142578, "learning_rate": 1.986227473222519e-05, "loss": 0.6085, "step": 2975 }, { "epoch": 0.08138262962152702, "grad_norm": 1.4947220087051392, "learning_rate": 1.9862128202863914e-05, "loss": 0.5791, "step": 2976 }, { "epoch": 0.08140997593524393, "grad_norm": 1.526172399520874, "learning_rate": 1.986198159613702e-05, "loss": 0.6272, "step": 2977 }, { "epoch": 0.08143732224896084, "grad_norm": 1.8696287870407104, "learning_rate": 1.9861834912045657e-05, "loss": 0.6084, "step": 2978 }, { "epoch": 0.08146466856267776, "grad_norm": 2.0244531631469727, "learning_rate": 1.986168815059097e-05, "loss": 0.585, "step": 2979 }, { "epoch": 0.08149201487639467, "grad_norm": 1.627630352973938, "learning_rate": 1.9861541311774117e-05, "loss": 0.579, "step": 2980 }, { "epoch": 0.08151936119011158, "grad_norm": 1.829180359840393, "learning_rate": 1.986139439559625e-05, "loss": 0.5567, "step": 2981 }, { "epoch": 0.08154670750382849, "grad_norm": 1.5877676010131836, "learning_rate": 1.9861247402058518e-05, "loss": 0.576, "step": 2982 }, { "epoch": 0.0815740538175454, "grad_norm": 1.5788090229034424, "learning_rate": 1.9861100331162075e-05, "loss": 0.9677, "step": 2983 }, { "epoch": 0.08160140013126231, "grad_norm": 1.6853435039520264, "learning_rate": 1.986095318290807e-05, "loss": 0.5817, "step": 2984 }, { "epoch": 0.08162874644497922, "grad_norm": 1.5950442552566528, "learning_rate": 1.986080595729767e-05, "loss": 0.5731, "step": 2985 }, { "epoch": 0.08165609275869613, "grad_norm": 1.8134918212890625, "learning_rate": 1.9860658654332016e-05, "loss": 0.6968, "step": 2986 }, { "epoch": 0.08168343907241304, "grad_norm": 1.9774245023727417, "learning_rate": 1.9860511274012273e-05, "loss": 0.579, "step": 2987 }, { "epoch": 0.08171078538612996, "grad_norm": 1.514689564704895, "learning_rate": 1.9860363816339596e-05, "loss": 0.5845, "step": 2988 }, { "epoch": 0.08173813169984687, "grad_norm": 1.7102258205413818, "learning_rate": 1.9860216281315136e-05, "loss": 0.5981, "step": 2989 }, { "epoch": 0.08176547801356378, "grad_norm": 1.9867509603500366, "learning_rate": 1.9860068668940058e-05, "loss": 0.6136, "step": 2990 }, { "epoch": 0.08179282432728069, "grad_norm": 1.3680181503295898, "learning_rate": 1.9859920979215512e-05, "loss": 1.0118, "step": 2991 }, { "epoch": 0.0818201706409976, "grad_norm": 1.7533814907073975, "learning_rate": 1.9859773212142663e-05, "loss": 0.588, "step": 2992 }, { "epoch": 0.08184751695471451, "grad_norm": 1.88871431350708, "learning_rate": 1.9859625367722668e-05, "loss": 0.5974, "step": 2993 }, { "epoch": 0.08187486326843142, "grad_norm": 2.031702756881714, "learning_rate": 1.9859477445956686e-05, "loss": 0.6313, "step": 2994 }, { "epoch": 0.08190220958214833, "grad_norm": 1.7878423929214478, "learning_rate": 1.985932944684588e-05, "loss": 0.556, "step": 2995 }, { "epoch": 0.08192955589586524, "grad_norm": 2.0205917358398438, "learning_rate": 1.9859181370391407e-05, "loss": 0.5893, "step": 2996 }, { "epoch": 0.08195690220958214, "grad_norm": 2.045494556427002, "learning_rate": 1.9859033216594433e-05, "loss": 0.6492, "step": 2997 }, { "epoch": 0.08198424852329905, "grad_norm": 1.3681340217590332, "learning_rate": 1.9858884985456115e-05, "loss": 1.019, "step": 2998 }, { "epoch": 0.08201159483701596, "grad_norm": 6.530986309051514, "learning_rate": 1.985873667697762e-05, "loss": 0.5822, "step": 2999 }, { "epoch": 0.08203894115073287, "grad_norm": 1.3101427555084229, "learning_rate": 1.985858829116011e-05, "loss": 1.0194, "step": 3000 }, { "epoch": 0.08206628746444979, "grad_norm": 1.5448962450027466, "learning_rate": 1.985843982800475e-05, "loss": 0.5886, "step": 3001 }, { "epoch": 0.0820936337781667, "grad_norm": 1.4854042530059814, "learning_rate": 1.9858291287512704e-05, "loss": 0.5417, "step": 3002 }, { "epoch": 0.08212098009188361, "grad_norm": 1.5064102411270142, "learning_rate": 1.985814266968514e-05, "loss": 0.5604, "step": 3003 }, { "epoch": 0.08214832640560052, "grad_norm": 1.6099340915679932, "learning_rate": 1.9857993974523216e-05, "loss": 0.5964, "step": 3004 }, { "epoch": 0.08217567271931743, "grad_norm": 1.7750589847564697, "learning_rate": 1.9857845202028106e-05, "loss": 0.5918, "step": 3005 }, { "epoch": 0.08220301903303434, "grad_norm": 1.6670620441436768, "learning_rate": 1.9857696352200975e-05, "loss": 0.5651, "step": 3006 }, { "epoch": 0.08223036534675125, "grad_norm": 1.412664532661438, "learning_rate": 1.9857547425042992e-05, "loss": 0.5947, "step": 3007 }, { "epoch": 0.08225771166046816, "grad_norm": 2.0831353664398193, "learning_rate": 1.985739842055532e-05, "loss": 0.5544, "step": 3008 }, { "epoch": 0.08228505797418507, "grad_norm": 1.5859678983688354, "learning_rate": 1.9857249338739132e-05, "loss": 0.5923, "step": 3009 }, { "epoch": 0.08231240428790199, "grad_norm": 1.4417644739151, "learning_rate": 1.98571001795956e-05, "loss": 0.5545, "step": 3010 }, { "epoch": 0.0823397506016189, "grad_norm": 1.9649711847305298, "learning_rate": 1.9856950943125887e-05, "loss": 0.5801, "step": 3011 }, { "epoch": 0.08236709691533581, "grad_norm": 1.7094433307647705, "learning_rate": 1.9856801629331167e-05, "loss": 0.5951, "step": 3012 }, { "epoch": 0.08239444322905272, "grad_norm": 1.5252150297164917, "learning_rate": 1.985665223821262e-05, "loss": 0.9784, "step": 3013 }, { "epoch": 0.08242178954276963, "grad_norm": 1.8343216180801392, "learning_rate": 1.98565027697714e-05, "loss": 0.6508, "step": 3014 }, { "epoch": 0.08244913585648654, "grad_norm": 1.711763620376587, "learning_rate": 1.9856353224008695e-05, "loss": 0.6009, "step": 3015 }, { "epoch": 0.08247648217020345, "grad_norm": 1.8256421089172363, "learning_rate": 1.9856203600925674e-05, "loss": 0.5575, "step": 3016 }, { "epoch": 0.08250382848392036, "grad_norm": 2.6950714588165283, "learning_rate": 1.9856053900523507e-05, "loss": 0.4648, "step": 3017 }, { "epoch": 0.08253117479763727, "grad_norm": 1.741756796836853, "learning_rate": 1.9855904122803374e-05, "loss": 0.5742, "step": 3018 }, { "epoch": 0.08255852111135419, "grad_norm": 2.0733022689819336, "learning_rate": 1.9855754267766442e-05, "loss": 0.6015, "step": 3019 }, { "epoch": 0.0825858674250711, "grad_norm": 1.2511910200119019, "learning_rate": 1.9855604335413894e-05, "loss": 0.9778, "step": 3020 }, { "epoch": 0.08261321373878801, "grad_norm": 1.5205471515655518, "learning_rate": 1.9855454325746907e-05, "loss": 0.5935, "step": 3021 }, { "epoch": 0.08264056005250492, "grad_norm": 2.029444456100464, "learning_rate": 1.985530423876665e-05, "loss": 0.5435, "step": 3022 }, { "epoch": 0.08266790636622183, "grad_norm": 1.5061039924621582, "learning_rate": 1.9855154074474305e-05, "loss": 0.5629, "step": 3023 }, { "epoch": 0.08269525267993874, "grad_norm": 2.5331554412841797, "learning_rate": 1.9855003832871048e-05, "loss": 0.6024, "step": 3024 }, { "epoch": 0.08272259899365565, "grad_norm": 1.878641963005066, "learning_rate": 1.9854853513958064e-05, "loss": 0.6114, "step": 3025 }, { "epoch": 0.08274994530737256, "grad_norm": 1.5970046520233154, "learning_rate": 1.9854703117736526e-05, "loss": 0.4723, "step": 3026 }, { "epoch": 0.08277729162108947, "grad_norm": 1.526861548423767, "learning_rate": 1.9854552644207615e-05, "loss": 0.4587, "step": 3027 }, { "epoch": 0.08280463793480639, "grad_norm": 2.413384199142456, "learning_rate": 1.9854402093372514e-05, "loss": 0.5987, "step": 3028 }, { "epoch": 0.0828319842485233, "grad_norm": 1.9812284708023071, "learning_rate": 1.9854251465232396e-05, "loss": 0.5404, "step": 3029 }, { "epoch": 0.08285933056224021, "grad_norm": 1.4555689096450806, "learning_rate": 1.9854100759788456e-05, "loss": 0.6486, "step": 3030 }, { "epoch": 0.08288667687595712, "grad_norm": 2.0046329498291016, "learning_rate": 1.9853949977041863e-05, "loss": 0.6391, "step": 3031 }, { "epoch": 0.08291402318967403, "grad_norm": 1.6736003160476685, "learning_rate": 1.9853799116993807e-05, "loss": 0.6006, "step": 3032 }, { "epoch": 0.08294136950339094, "grad_norm": 1.8828712701797485, "learning_rate": 1.985364817964547e-05, "loss": 0.579, "step": 3033 }, { "epoch": 0.08296871581710785, "grad_norm": 1.6967257261276245, "learning_rate": 1.9853497164998038e-05, "loss": 0.5921, "step": 3034 }, { "epoch": 0.08299606213082476, "grad_norm": 1.3962980508804321, "learning_rate": 1.985334607305269e-05, "loss": 1.0074, "step": 3035 }, { "epoch": 0.08302340844454167, "grad_norm": 1.6295809745788574, "learning_rate": 1.9853194903810618e-05, "loss": 0.4082, "step": 3036 }, { "epoch": 0.08305075475825859, "grad_norm": 1.9514458179473877, "learning_rate": 1.9853043657273002e-05, "loss": 0.585, "step": 3037 }, { "epoch": 0.0830781010719755, "grad_norm": 1.9191621541976929, "learning_rate": 1.9852892333441037e-05, "loss": 0.5236, "step": 3038 }, { "epoch": 0.08310544738569241, "grad_norm": 2.1589157581329346, "learning_rate": 1.98527409323159e-05, "loss": 0.6097, "step": 3039 }, { "epoch": 0.08313279369940932, "grad_norm": 1.5548986196517944, "learning_rate": 1.9852589453898787e-05, "loss": 0.5926, "step": 3040 }, { "epoch": 0.08316014001312623, "grad_norm": 1.5350741147994995, "learning_rate": 1.985243789819088e-05, "loss": 0.9638, "step": 3041 }, { "epoch": 0.08318748632684314, "grad_norm": 1.635574460029602, "learning_rate": 1.985228626519337e-05, "loss": 0.5806, "step": 3042 }, { "epoch": 0.08321483264056005, "grad_norm": 1.8287861347198486, "learning_rate": 1.9852134554907444e-05, "loss": 0.5594, "step": 3043 }, { "epoch": 0.08324217895427696, "grad_norm": 1.7824996709823608, "learning_rate": 1.98519827673343e-05, "loss": 0.5901, "step": 3044 }, { "epoch": 0.08326952526799387, "grad_norm": 1.3747971057891846, "learning_rate": 1.985183090247512e-05, "loss": 1.0003, "step": 3045 }, { "epoch": 0.08329687158171079, "grad_norm": 1.5120010375976562, "learning_rate": 1.9851678960331103e-05, "loss": 0.6098, "step": 3046 }, { "epoch": 0.0833242178954277, "grad_norm": 1.952392816543579, "learning_rate": 1.9851526940903435e-05, "loss": 0.6611, "step": 3047 }, { "epoch": 0.08335156420914461, "grad_norm": 1.7103757858276367, "learning_rate": 1.985137484419331e-05, "loss": 0.5733, "step": 3048 }, { "epoch": 0.08337891052286152, "grad_norm": 2.0011773109436035, "learning_rate": 1.9851222670201922e-05, "loss": 0.4476, "step": 3049 }, { "epoch": 0.08340625683657843, "grad_norm": 1.557023525238037, "learning_rate": 1.9851070418930466e-05, "loss": 0.5749, "step": 3050 }, { "epoch": 0.08343360315029534, "grad_norm": 1.4316303730010986, "learning_rate": 1.9850918090380132e-05, "loss": 0.596, "step": 3051 }, { "epoch": 0.08346094946401225, "grad_norm": 1.5748039484024048, "learning_rate": 1.985076568455212e-05, "loss": 0.6708, "step": 3052 }, { "epoch": 0.08348829577772916, "grad_norm": 1.4676997661590576, "learning_rate": 1.9850613201447625e-05, "loss": 0.6015, "step": 3053 }, { "epoch": 0.08351564209144607, "grad_norm": 1.4170162677764893, "learning_rate": 1.9850460641067836e-05, "loss": 0.5815, "step": 3054 }, { "epoch": 0.08354298840516299, "grad_norm": 2.0031158924102783, "learning_rate": 1.985030800341396e-05, "loss": 0.5776, "step": 3055 }, { "epoch": 0.0835703347188799, "grad_norm": 2.0420689582824707, "learning_rate": 1.985015528848719e-05, "loss": 0.6053, "step": 3056 }, { "epoch": 0.08359768103259681, "grad_norm": 1.6631416082382202, "learning_rate": 1.9850002496288723e-05, "loss": 0.6655, "step": 3057 }, { "epoch": 0.08362502734631372, "grad_norm": 1.8969835042953491, "learning_rate": 1.9849849626819758e-05, "loss": 0.5643, "step": 3058 }, { "epoch": 0.08365237366003063, "grad_norm": 1.9021780490875244, "learning_rate": 1.9849696680081496e-05, "loss": 0.5798, "step": 3059 }, { "epoch": 0.08367971997374754, "grad_norm": 1.9860305786132812, "learning_rate": 1.9849543656075134e-05, "loss": 0.5995, "step": 3060 }, { "epoch": 0.08370706628746445, "grad_norm": 1.933053970336914, "learning_rate": 1.9849390554801874e-05, "loss": 0.5529, "step": 3061 }, { "epoch": 0.08373441260118136, "grad_norm": 4.898226261138916, "learning_rate": 1.984923737626292e-05, "loss": 0.5412, "step": 3062 }, { "epoch": 0.08376175891489827, "grad_norm": 1.6653978824615479, "learning_rate": 1.9849084120459468e-05, "loss": 0.5799, "step": 3063 }, { "epoch": 0.08378910522861518, "grad_norm": 1.53117835521698, "learning_rate": 1.984893078739272e-05, "loss": 0.6062, "step": 3064 }, { "epoch": 0.0838164515423321, "grad_norm": 1.5310604572296143, "learning_rate": 1.9848777377063885e-05, "loss": 0.5706, "step": 3065 }, { "epoch": 0.08384379785604901, "grad_norm": 1.62632155418396, "learning_rate": 1.9848623889474165e-05, "loss": 0.5331, "step": 3066 }, { "epoch": 0.08387114416976592, "grad_norm": 1.6818686723709106, "learning_rate": 1.9848470324624756e-05, "loss": 0.6048, "step": 3067 }, { "epoch": 0.08389849048348283, "grad_norm": 1.8728456497192383, "learning_rate": 1.9848316682516873e-05, "loss": 0.5857, "step": 3068 }, { "epoch": 0.08392583679719974, "grad_norm": 1.5856308937072754, "learning_rate": 1.9848162963151722e-05, "loss": 0.6293, "step": 3069 }, { "epoch": 0.08395318311091665, "grad_norm": 1.6949310302734375, "learning_rate": 1.9848009166530494e-05, "loss": 0.5814, "step": 3070 }, { "epoch": 0.08398052942463356, "grad_norm": 1.3914999961853027, "learning_rate": 1.9847855292654414e-05, "loss": 0.6191, "step": 3071 }, { "epoch": 0.08400787573835047, "grad_norm": 2.0826404094696045, "learning_rate": 1.9847701341524676e-05, "loss": 0.5748, "step": 3072 }, { "epoch": 0.08403522205206738, "grad_norm": 1.5868732929229736, "learning_rate": 1.9847547313142495e-05, "loss": 0.5916, "step": 3073 }, { "epoch": 0.0840625683657843, "grad_norm": 1.9852540493011475, "learning_rate": 1.9847393207509076e-05, "loss": 0.5682, "step": 3074 }, { "epoch": 0.08408991467950121, "grad_norm": 2.188372850418091, "learning_rate": 1.9847239024625625e-05, "loss": 0.4669, "step": 3075 }, { "epoch": 0.08411726099321812, "grad_norm": 1.7820180654525757, "learning_rate": 1.9847084764493362e-05, "loss": 0.5793, "step": 3076 }, { "epoch": 0.08414460730693503, "grad_norm": 1.3856462240219116, "learning_rate": 1.9846930427113484e-05, "loss": 0.9833, "step": 3077 }, { "epoch": 0.08417195362065194, "grad_norm": 1.5295218229293823, "learning_rate": 1.984677601248721e-05, "loss": 0.5457, "step": 3078 }, { "epoch": 0.08419929993436885, "grad_norm": 1.7531455755233765, "learning_rate": 1.984662152061575e-05, "loss": 0.5936, "step": 3079 }, { "epoch": 0.08422664624808576, "grad_norm": 1.4880372285842896, "learning_rate": 1.9846466951500313e-05, "loss": 0.5568, "step": 3080 }, { "epoch": 0.08425399256180267, "grad_norm": 1.6325607299804688, "learning_rate": 1.9846312305142118e-05, "loss": 0.5545, "step": 3081 }, { "epoch": 0.08428133887551958, "grad_norm": 1.6441152095794678, "learning_rate": 1.984615758154237e-05, "loss": 0.5798, "step": 3082 }, { "epoch": 0.0843086851892365, "grad_norm": 1.2117996215820312, "learning_rate": 1.9846002780702287e-05, "loss": 0.605, "step": 3083 }, { "epoch": 0.0843360315029534, "grad_norm": 1.8906370401382446, "learning_rate": 1.9845847902623087e-05, "loss": 0.5488, "step": 3084 }, { "epoch": 0.08436337781667032, "grad_norm": 1.32269287109375, "learning_rate": 1.9845692947305978e-05, "loss": 0.5804, "step": 3085 }, { "epoch": 0.08439072413038723, "grad_norm": 1.2484194040298462, "learning_rate": 1.984553791475218e-05, "loss": 0.9706, "step": 3086 }, { "epoch": 0.08441807044410414, "grad_norm": 1.4521726369857788, "learning_rate": 1.9845382804962903e-05, "loss": 0.5838, "step": 3087 }, { "epoch": 0.08444541675782105, "grad_norm": 1.8795682191848755, "learning_rate": 1.9845227617939376e-05, "loss": 0.5637, "step": 3088 }, { "epoch": 0.08447276307153796, "grad_norm": 1.851592779159546, "learning_rate": 1.9845072353682803e-05, "loss": 0.6332, "step": 3089 }, { "epoch": 0.08450010938525487, "grad_norm": 1.4683698415756226, "learning_rate": 1.984491701219441e-05, "loss": 1.0199, "step": 3090 }, { "epoch": 0.08452745569897178, "grad_norm": 1.3586747646331787, "learning_rate": 1.9844761593475417e-05, "loss": 0.5588, "step": 3091 }, { "epoch": 0.0845548020126887, "grad_norm": 1.6932374238967896, "learning_rate": 1.9844606097527034e-05, "loss": 0.6163, "step": 3092 }, { "epoch": 0.0845821483264056, "grad_norm": 1.617069959640503, "learning_rate": 1.9844450524350494e-05, "loss": 0.5813, "step": 3093 }, { "epoch": 0.08460949464012252, "grad_norm": 1.3916257619857788, "learning_rate": 1.9844294873947002e-05, "loss": 0.435, "step": 3094 }, { "epoch": 0.08463684095383943, "grad_norm": 2.2578914165496826, "learning_rate": 1.9844139146317795e-05, "loss": 0.5806, "step": 3095 }, { "epoch": 0.08466418726755634, "grad_norm": 1.6269493103027344, "learning_rate": 1.9843983341464082e-05, "loss": 0.5318, "step": 3096 }, { "epoch": 0.08469153358127325, "grad_norm": 1.6527382135391235, "learning_rate": 1.9843827459387088e-05, "loss": 0.6026, "step": 3097 }, { "epoch": 0.08471887989499016, "grad_norm": 1.6649502515792847, "learning_rate": 1.984367150008804e-05, "loss": 0.6034, "step": 3098 }, { "epoch": 0.08474622620870706, "grad_norm": 1.9074915647506714, "learning_rate": 1.984351546356816e-05, "loss": 0.5725, "step": 3099 }, { "epoch": 0.08477357252242397, "grad_norm": 1.3991761207580566, "learning_rate": 1.9843359349828675e-05, "loss": 0.5874, "step": 3100 }, { "epoch": 0.08480091883614088, "grad_norm": 1.6190481185913086, "learning_rate": 1.9843203158870802e-05, "loss": 0.5775, "step": 3101 }, { "epoch": 0.08482826514985779, "grad_norm": 1.783976435661316, "learning_rate": 1.984304689069577e-05, "loss": 0.5489, "step": 3102 }, { "epoch": 0.0848556114635747, "grad_norm": 1.6610445976257324, "learning_rate": 1.9842890545304807e-05, "loss": 0.6038, "step": 3103 }, { "epoch": 0.08488295777729161, "grad_norm": 2.0794224739074707, "learning_rate": 1.9842734122699135e-05, "loss": 0.532, "step": 3104 }, { "epoch": 0.08491030409100853, "grad_norm": 1.475127100944519, "learning_rate": 1.9842577622879985e-05, "loss": 0.5683, "step": 3105 }, { "epoch": 0.08493765040472544, "grad_norm": 1.797182321548462, "learning_rate": 1.9842421045848585e-05, "loss": 0.5591, "step": 3106 }, { "epoch": 0.08496499671844235, "grad_norm": 1.70051109790802, "learning_rate": 1.984226439160616e-05, "loss": 0.5791, "step": 3107 }, { "epoch": 0.08499234303215926, "grad_norm": 2.022657632827759, "learning_rate": 1.9842107660153942e-05, "loss": 0.6775, "step": 3108 }, { "epoch": 0.08501968934587617, "grad_norm": 1.9667302370071411, "learning_rate": 1.984195085149316e-05, "loss": 0.43, "step": 3109 }, { "epoch": 0.08504703565959308, "grad_norm": 1.7352269887924194, "learning_rate": 1.984179396562504e-05, "loss": 0.6549, "step": 3110 }, { "epoch": 0.08507438197330999, "grad_norm": 1.8440649509429932, "learning_rate": 1.9841637002550817e-05, "loss": 0.6018, "step": 3111 }, { "epoch": 0.0851017282870269, "grad_norm": 2.0790529251098633, "learning_rate": 1.9841479962271724e-05, "loss": 0.567, "step": 3112 }, { "epoch": 0.08512907460074381, "grad_norm": 1.7938820123672485, "learning_rate": 1.984132284478899e-05, "loss": 0.6025, "step": 3113 }, { "epoch": 0.08515642091446073, "grad_norm": 1.5641499757766724, "learning_rate": 1.9841165650103845e-05, "loss": 0.5753, "step": 3114 }, { "epoch": 0.08518376722817764, "grad_norm": 1.692751407623291, "learning_rate": 1.9841008378217528e-05, "loss": 0.5886, "step": 3115 }, { "epoch": 0.08521111354189455, "grad_norm": 1.8732860088348389, "learning_rate": 1.984085102913127e-05, "loss": 0.6274, "step": 3116 }, { "epoch": 0.08523845985561146, "grad_norm": 1.9219976663589478, "learning_rate": 1.98406936028463e-05, "loss": 0.5783, "step": 3117 }, { "epoch": 0.08526580616932837, "grad_norm": 1.7890435457229614, "learning_rate": 1.9840536099363862e-05, "loss": 0.5576, "step": 3118 }, { "epoch": 0.08529315248304528, "grad_norm": 1.506083607673645, "learning_rate": 1.984037851868519e-05, "loss": 0.486, "step": 3119 }, { "epoch": 0.08532049879676219, "grad_norm": 2.1118619441986084, "learning_rate": 1.9840220860811515e-05, "loss": 0.5874, "step": 3120 }, { "epoch": 0.0853478451104791, "grad_norm": 2.1478729248046875, "learning_rate": 1.9840063125744076e-05, "loss": 0.5723, "step": 3121 }, { "epoch": 0.08537519142419601, "grad_norm": 1.7242411375045776, "learning_rate": 1.9839905313484112e-05, "loss": 0.5472, "step": 3122 }, { "epoch": 0.08540253773791293, "grad_norm": 1.6664553880691528, "learning_rate": 1.983974742403286e-05, "loss": 0.5899, "step": 3123 }, { "epoch": 0.08542988405162984, "grad_norm": 1.6066867113113403, "learning_rate": 1.983958945739156e-05, "loss": 0.5935, "step": 3124 }, { "epoch": 0.08545723036534675, "grad_norm": 1.7713261842727661, "learning_rate": 1.9839431413561448e-05, "loss": 0.5638, "step": 3125 }, { "epoch": 0.08548457667906366, "grad_norm": 1.5641295909881592, "learning_rate": 1.9839273292543765e-05, "loss": 0.5869, "step": 3126 }, { "epoch": 0.08551192299278057, "grad_norm": 1.7251383066177368, "learning_rate": 1.983911509433975e-05, "loss": 0.5881, "step": 3127 }, { "epoch": 0.08553926930649748, "grad_norm": 1.7230654954910278, "learning_rate": 1.983895681895065e-05, "loss": 0.5926, "step": 3128 }, { "epoch": 0.08556661562021439, "grad_norm": 1.981380820274353, "learning_rate": 1.98387984663777e-05, "loss": 0.5819, "step": 3129 }, { "epoch": 0.0855939619339313, "grad_norm": 1.5181907415390015, "learning_rate": 1.9838640036622147e-05, "loss": 0.5739, "step": 3130 }, { "epoch": 0.08562130824764821, "grad_norm": 1.8077188730239868, "learning_rate": 1.9838481529685235e-05, "loss": 0.5686, "step": 3131 }, { "epoch": 0.08564865456136513, "grad_norm": 1.8927351236343384, "learning_rate": 1.9838322945568198e-05, "loss": 0.5797, "step": 3132 }, { "epoch": 0.08567600087508204, "grad_norm": 1.4219443798065186, "learning_rate": 1.983816428427229e-05, "loss": 0.5753, "step": 3133 }, { "epoch": 0.08570334718879895, "grad_norm": 1.6689714193344116, "learning_rate": 1.983800554579875e-05, "loss": 0.6073, "step": 3134 }, { "epoch": 0.08573069350251586, "grad_norm": 1.7396221160888672, "learning_rate": 1.9837846730148825e-05, "loss": 0.436, "step": 3135 }, { "epoch": 0.08575803981623277, "grad_norm": 1.5628337860107422, "learning_rate": 1.9837687837323764e-05, "loss": 0.4785, "step": 3136 }, { "epoch": 0.08578538612994968, "grad_norm": 1.5862234830856323, "learning_rate": 1.983752886732481e-05, "loss": 0.981, "step": 3137 }, { "epoch": 0.08581273244366659, "grad_norm": 1.6914416551589966, "learning_rate": 1.9837369820153206e-05, "loss": 0.5872, "step": 3138 }, { "epoch": 0.0858400787573835, "grad_norm": 1.4812105894088745, "learning_rate": 1.9837210695810208e-05, "loss": 0.5542, "step": 3139 }, { "epoch": 0.08586742507110041, "grad_norm": 2.4482412338256836, "learning_rate": 1.9837051494297057e-05, "loss": 0.5483, "step": 3140 }, { "epoch": 0.08589477138481733, "grad_norm": 1.3630365133285522, "learning_rate": 1.983689221561501e-05, "loss": 0.5846, "step": 3141 }, { "epoch": 0.08592211769853424, "grad_norm": 1.7696470022201538, "learning_rate": 1.983673285976531e-05, "loss": 0.5995, "step": 3142 }, { "epoch": 0.08594946401225115, "grad_norm": 1.8297330141067505, "learning_rate": 1.9836573426749207e-05, "loss": 0.6132, "step": 3143 }, { "epoch": 0.08597681032596806, "grad_norm": 1.6799007654190063, "learning_rate": 1.9836413916567956e-05, "loss": 0.5902, "step": 3144 }, { "epoch": 0.08600415663968497, "grad_norm": 1.4912340641021729, "learning_rate": 1.9836254329222807e-05, "loss": 0.9575, "step": 3145 }, { "epoch": 0.08603150295340188, "grad_norm": 1.7585164308547974, "learning_rate": 1.983609466471501e-05, "loss": 0.5544, "step": 3146 }, { "epoch": 0.08605884926711879, "grad_norm": 1.5180872678756714, "learning_rate": 1.9835934923045817e-05, "loss": 0.5607, "step": 3147 }, { "epoch": 0.0860861955808357, "grad_norm": 1.8428953886032104, "learning_rate": 1.9835775104216483e-05, "loss": 0.5812, "step": 3148 }, { "epoch": 0.08611354189455261, "grad_norm": 2.1439931392669678, "learning_rate": 1.9835615208228262e-05, "loss": 0.5798, "step": 3149 }, { "epoch": 0.08614088820826953, "grad_norm": 1.5872914791107178, "learning_rate": 1.9835455235082407e-05, "loss": 0.5461, "step": 3150 }, { "epoch": 0.08616823452198644, "grad_norm": 1.7238842248916626, "learning_rate": 1.9835295184780176e-05, "loss": 0.5573, "step": 3151 }, { "epoch": 0.08619558083570335, "grad_norm": 1.972470760345459, "learning_rate": 1.9835135057322817e-05, "loss": 0.528, "step": 3152 }, { "epoch": 0.08622292714942026, "grad_norm": 1.5196276903152466, "learning_rate": 1.9834974852711596e-05, "loss": 0.9583, "step": 3153 }, { "epoch": 0.08625027346313717, "grad_norm": 2.4201302528381348, "learning_rate": 1.9834814570947766e-05, "loss": 0.6912, "step": 3154 }, { "epoch": 0.08627761977685408, "grad_norm": 1.797087550163269, "learning_rate": 1.983465421203258e-05, "loss": 0.5774, "step": 3155 }, { "epoch": 0.08630496609057099, "grad_norm": 1.7865935564041138, "learning_rate": 1.9834493775967302e-05, "loss": 0.5659, "step": 3156 }, { "epoch": 0.0863323124042879, "grad_norm": 1.5209710597991943, "learning_rate": 1.9834333262753186e-05, "loss": 0.5515, "step": 3157 }, { "epoch": 0.08635965871800481, "grad_norm": 1.5218783617019653, "learning_rate": 1.9834172672391495e-05, "loss": 0.575, "step": 3158 }, { "epoch": 0.08638700503172173, "grad_norm": 1.7778648138046265, "learning_rate": 1.9834012004883483e-05, "loss": 0.5468, "step": 3159 }, { "epoch": 0.08641435134543864, "grad_norm": 1.7762200832366943, "learning_rate": 1.9833851260230418e-05, "loss": 0.601, "step": 3160 }, { "epoch": 0.08644169765915555, "grad_norm": 1.3857426643371582, "learning_rate": 1.9833690438433556e-05, "loss": 0.5707, "step": 3161 }, { "epoch": 0.08646904397287246, "grad_norm": 2.3015167713165283, "learning_rate": 1.983352953949416e-05, "loss": 0.4426, "step": 3162 }, { "epoch": 0.08649639028658937, "grad_norm": 1.5658525228500366, "learning_rate": 1.9833368563413497e-05, "loss": 0.5766, "step": 3163 }, { "epoch": 0.08652373660030628, "grad_norm": 1.7981152534484863, "learning_rate": 1.9833207510192817e-05, "loss": 0.5738, "step": 3164 }, { "epoch": 0.08655108291402319, "grad_norm": 1.7999004125595093, "learning_rate": 1.9833046379833396e-05, "loss": 0.5717, "step": 3165 }, { "epoch": 0.0865784292277401, "grad_norm": 2.2058205604553223, "learning_rate": 1.9832885172336493e-05, "loss": 0.5973, "step": 3166 }, { "epoch": 0.08660577554145701, "grad_norm": 2.7600414752960205, "learning_rate": 1.9832723887703373e-05, "loss": 0.5797, "step": 3167 }, { "epoch": 0.08663312185517393, "grad_norm": 1.5702378749847412, "learning_rate": 1.98325625259353e-05, "loss": 0.5783, "step": 3168 }, { "epoch": 0.08666046816889084, "grad_norm": 1.5327091217041016, "learning_rate": 1.9832401087033543e-05, "loss": 0.585, "step": 3169 }, { "epoch": 0.08668781448260775, "grad_norm": 1.6551485061645508, "learning_rate": 1.9832239570999367e-05, "loss": 0.5798, "step": 3170 }, { "epoch": 0.08671516079632466, "grad_norm": 2.206583261489868, "learning_rate": 1.9832077977834035e-05, "loss": 0.5358, "step": 3171 }, { "epoch": 0.08674250711004157, "grad_norm": 1.8192505836486816, "learning_rate": 1.9831916307538818e-05, "loss": 0.5888, "step": 3172 }, { "epoch": 0.08676985342375848, "grad_norm": 2.0162322521209717, "learning_rate": 1.9831754560114986e-05, "loss": 0.9377, "step": 3173 }, { "epoch": 0.08679719973747539, "grad_norm": 1.6123944520950317, "learning_rate": 1.983159273556381e-05, "loss": 0.584, "step": 3174 }, { "epoch": 0.0868245460511923, "grad_norm": 1.7957485914230347, "learning_rate": 1.9831430833886552e-05, "loss": 0.5345, "step": 3175 }, { "epoch": 0.08685189236490921, "grad_norm": 1.7193164825439453, "learning_rate": 1.9831268855084487e-05, "loss": 0.5971, "step": 3176 }, { "epoch": 0.08687923867862613, "grad_norm": 1.592602014541626, "learning_rate": 1.9831106799158886e-05, "loss": 0.6324, "step": 3177 }, { "epoch": 0.08690658499234304, "grad_norm": 1.588688850402832, "learning_rate": 1.9830944666111017e-05, "loss": 0.5502, "step": 3178 }, { "epoch": 0.08693393130605995, "grad_norm": 1.6564737558364868, "learning_rate": 1.9830782455942153e-05, "loss": 1.0062, "step": 3179 }, { "epoch": 0.08696127761977686, "grad_norm": 1.5613131523132324, "learning_rate": 1.9830620168653568e-05, "loss": 0.5854, "step": 3180 }, { "epoch": 0.08698862393349377, "grad_norm": 1.9938175678253174, "learning_rate": 1.9830457804246533e-05, "loss": 0.6451, "step": 3181 }, { "epoch": 0.08701597024721068, "grad_norm": 1.8037453889846802, "learning_rate": 1.9830295362722326e-05, "loss": 0.6103, "step": 3182 }, { "epoch": 0.08704331656092759, "grad_norm": 1.6360385417938232, "learning_rate": 1.9830132844082216e-05, "loss": 0.5622, "step": 3183 }, { "epoch": 0.0870706628746445, "grad_norm": 1.5385112762451172, "learning_rate": 1.982997024832748e-05, "loss": 0.5476, "step": 3184 }, { "epoch": 0.08709800918836141, "grad_norm": 1.6154237985610962, "learning_rate": 1.98298075754594e-05, "loss": 0.5816, "step": 3185 }, { "epoch": 0.08712535550207832, "grad_norm": 1.6556594371795654, "learning_rate": 1.982964482547924e-05, "loss": 0.5969, "step": 3186 }, { "epoch": 0.08715270181579524, "grad_norm": 1.871180772781372, "learning_rate": 1.9829481998388284e-05, "loss": 0.5402, "step": 3187 }, { "epoch": 0.08718004812951215, "grad_norm": 1.964542269706726, "learning_rate": 1.9829319094187806e-05, "loss": 0.5845, "step": 3188 }, { "epoch": 0.08720739444322906, "grad_norm": 1.7185478210449219, "learning_rate": 1.982915611287909e-05, "loss": 0.5961, "step": 3189 }, { "epoch": 0.08723474075694597, "grad_norm": 1.7084909677505493, "learning_rate": 1.9828993054463406e-05, "loss": 0.5914, "step": 3190 }, { "epoch": 0.08726208707066288, "grad_norm": 1.3632382154464722, "learning_rate": 1.9828829918942038e-05, "loss": 0.424, "step": 3191 }, { "epoch": 0.08728943338437979, "grad_norm": 1.51784348487854, "learning_rate": 1.9828666706316267e-05, "loss": 0.5412, "step": 3192 }, { "epoch": 0.0873167796980967, "grad_norm": 2.175973653793335, "learning_rate": 1.9828503416587375e-05, "loss": 0.5654, "step": 3193 }, { "epoch": 0.08734412601181361, "grad_norm": 1.7136211395263672, "learning_rate": 1.9828340049756637e-05, "loss": 0.5703, "step": 3194 }, { "epoch": 0.08737147232553052, "grad_norm": 1.6762126684188843, "learning_rate": 1.9828176605825335e-05, "loss": 0.6457, "step": 3195 }, { "epoch": 0.08739881863924744, "grad_norm": 1.6299575567245483, "learning_rate": 1.9828013084794753e-05, "loss": 0.5821, "step": 3196 }, { "epoch": 0.08742616495296435, "grad_norm": 2.055584669113159, "learning_rate": 1.982784948666618e-05, "loss": 0.5948, "step": 3197 }, { "epoch": 0.08745351126668126, "grad_norm": 1.6550540924072266, "learning_rate": 1.982768581144089e-05, "loss": 0.5882, "step": 3198 }, { "epoch": 0.08748085758039817, "grad_norm": 1.6624544858932495, "learning_rate": 1.982752205912017e-05, "loss": 0.609, "step": 3199 }, { "epoch": 0.08750820389411507, "grad_norm": 1.5853103399276733, "learning_rate": 1.9827358229705304e-05, "loss": 0.5251, "step": 3200 }, { "epoch": 0.08753555020783198, "grad_norm": 3.679980754852295, "learning_rate": 1.982719432319758e-05, "loss": 1.028, "step": 3201 }, { "epoch": 0.08756289652154889, "grad_norm": 1.4853904247283936, "learning_rate": 1.9827030339598284e-05, "loss": 0.5644, "step": 3202 }, { "epoch": 0.0875902428352658, "grad_norm": 2.119246244430542, "learning_rate": 1.98268662789087e-05, "loss": 0.6284, "step": 3203 }, { "epoch": 0.08761758914898271, "grad_norm": 1.444206953048706, "learning_rate": 1.9826702141130112e-05, "loss": 0.5663, "step": 3204 }, { "epoch": 0.08764493546269962, "grad_norm": 1.459197998046875, "learning_rate": 1.982653792626381e-05, "loss": 0.5789, "step": 3205 }, { "epoch": 0.08767228177641653, "grad_norm": 1.5727934837341309, "learning_rate": 1.982637363431109e-05, "loss": 0.5791, "step": 3206 }, { "epoch": 0.08769962809013344, "grad_norm": 1.738446831703186, "learning_rate": 1.982620926527323e-05, "loss": 0.54, "step": 3207 }, { "epoch": 0.08772697440385036, "grad_norm": 1.4406414031982422, "learning_rate": 1.9826044819151526e-05, "loss": 0.5807, "step": 3208 }, { "epoch": 0.08775432071756727, "grad_norm": 1.565505862236023, "learning_rate": 1.9825880295947262e-05, "loss": 0.5771, "step": 3209 }, { "epoch": 0.08778166703128418, "grad_norm": 1.596057653427124, "learning_rate": 1.9825715695661736e-05, "loss": 0.6457, "step": 3210 }, { "epoch": 0.08780901334500109, "grad_norm": 1.3950140476226807, "learning_rate": 1.9825551018296235e-05, "loss": 0.5829, "step": 3211 }, { "epoch": 0.087836359658718, "grad_norm": 1.595482587814331, "learning_rate": 1.982538626385205e-05, "loss": 0.6083, "step": 3212 }, { "epoch": 0.08786370597243491, "grad_norm": 2.0951268672943115, "learning_rate": 1.9825221432330473e-05, "loss": 0.565, "step": 3213 }, { "epoch": 0.08789105228615182, "grad_norm": 1.4465981721878052, "learning_rate": 1.9825056523732805e-05, "loss": 0.4455, "step": 3214 }, { "epoch": 0.08791839859986873, "grad_norm": 1.318833351135254, "learning_rate": 1.982489153806033e-05, "loss": 0.4603, "step": 3215 }, { "epoch": 0.08794574491358564, "grad_norm": 1.486783742904663, "learning_rate": 1.9824726475314345e-05, "loss": 0.6071, "step": 3216 }, { "epoch": 0.08797309122730256, "grad_norm": 1.6622066497802734, "learning_rate": 1.9824561335496147e-05, "loss": 0.5667, "step": 3217 }, { "epoch": 0.08800043754101947, "grad_norm": 1.765238881111145, "learning_rate": 1.982439611860703e-05, "loss": 0.6111, "step": 3218 }, { "epoch": 0.08802778385473638, "grad_norm": 1.5532563924789429, "learning_rate": 1.9824230824648287e-05, "loss": 0.5931, "step": 3219 }, { "epoch": 0.08805513016845329, "grad_norm": 2.4153382778167725, "learning_rate": 1.9824065453621224e-05, "loss": 0.6509, "step": 3220 }, { "epoch": 0.0880824764821702, "grad_norm": 1.6670171022415161, "learning_rate": 1.982390000552713e-05, "loss": 0.6089, "step": 3221 }, { "epoch": 0.08810982279588711, "grad_norm": 2.0775771141052246, "learning_rate": 1.9823734480367303e-05, "loss": 0.5539, "step": 3222 }, { "epoch": 0.08813716910960402, "grad_norm": 3.1954822540283203, "learning_rate": 1.9823568878143045e-05, "loss": 0.5749, "step": 3223 }, { "epoch": 0.08816451542332093, "grad_norm": 2.050745964050293, "learning_rate": 1.9823403198855654e-05, "loss": 0.5951, "step": 3224 }, { "epoch": 0.08819186173703784, "grad_norm": 1.9056041240692139, "learning_rate": 1.9823237442506425e-05, "loss": 0.5946, "step": 3225 }, { "epoch": 0.08821920805075475, "grad_norm": 2.4059829711914062, "learning_rate": 1.982307160909667e-05, "loss": 0.5814, "step": 3226 }, { "epoch": 0.08824655436447167, "grad_norm": 2.025623083114624, "learning_rate": 1.9822905698627682e-05, "loss": 0.5886, "step": 3227 }, { "epoch": 0.08827390067818858, "grad_norm": 2.0244553089141846, "learning_rate": 1.9822739711100757e-05, "loss": 0.5871, "step": 3228 }, { "epoch": 0.08830124699190549, "grad_norm": 1.5399724245071411, "learning_rate": 1.9822573646517208e-05, "loss": 0.9651, "step": 3229 }, { "epoch": 0.0883285933056224, "grad_norm": 2.0958478450775146, "learning_rate": 1.9822407504878333e-05, "loss": 0.617, "step": 3230 }, { "epoch": 0.08835593961933931, "grad_norm": 5.287610054016113, "learning_rate": 1.9822241286185436e-05, "loss": 0.9813, "step": 3231 }, { "epoch": 0.08838328593305622, "grad_norm": 1.8595918416976929, "learning_rate": 1.9822074990439816e-05, "loss": 0.6665, "step": 3232 }, { "epoch": 0.08841063224677313, "grad_norm": 2.086102247238159, "learning_rate": 1.9821908617642787e-05, "loss": 0.5798, "step": 3233 }, { "epoch": 0.08843797856049004, "grad_norm": 3.5746212005615234, "learning_rate": 1.9821742167795646e-05, "loss": 0.5572, "step": 3234 }, { "epoch": 0.08846532487420695, "grad_norm": 1.7023684978485107, "learning_rate": 1.9821575640899706e-05, "loss": 0.5415, "step": 3235 }, { "epoch": 0.08849267118792387, "grad_norm": 1.8799736499786377, "learning_rate": 1.9821409036956262e-05, "loss": 0.5886, "step": 3236 }, { "epoch": 0.08852001750164078, "grad_norm": 1.420770525932312, "learning_rate": 1.9821242355966634e-05, "loss": 0.5993, "step": 3237 }, { "epoch": 0.08854736381535769, "grad_norm": 1.5558080673217773, "learning_rate": 1.9821075597932122e-05, "loss": 0.9743, "step": 3238 }, { "epoch": 0.0885747101290746, "grad_norm": 2.288214683532715, "learning_rate": 1.9820908762854034e-05, "loss": 0.6717, "step": 3239 }, { "epoch": 0.08860205644279151, "grad_norm": 1.6475229263305664, "learning_rate": 1.9820741850733682e-05, "loss": 0.6164, "step": 3240 }, { "epoch": 0.08862940275650842, "grad_norm": 1.8047538995742798, "learning_rate": 1.9820574861572373e-05, "loss": 0.5998, "step": 3241 }, { "epoch": 0.08865674907022533, "grad_norm": 2.1766324043273926, "learning_rate": 1.982040779537142e-05, "loss": 0.6101, "step": 3242 }, { "epoch": 0.08868409538394224, "grad_norm": 1.5704758167266846, "learning_rate": 1.9820240652132126e-05, "loss": 0.5893, "step": 3243 }, { "epoch": 0.08871144169765915, "grad_norm": 1.5593208074569702, "learning_rate": 1.9820073431855814e-05, "loss": 0.5895, "step": 3244 }, { "epoch": 0.08873878801137607, "grad_norm": 2.210692882537842, "learning_rate": 1.9819906134543787e-05, "loss": 0.5662, "step": 3245 }, { "epoch": 0.08876613432509298, "grad_norm": 1.5088419914245605, "learning_rate": 1.9819738760197362e-05, "loss": 0.5972, "step": 3246 }, { "epoch": 0.08879348063880989, "grad_norm": 1.8500763177871704, "learning_rate": 1.9819571308817848e-05, "loss": 0.6123, "step": 3247 }, { "epoch": 0.0888208269525268, "grad_norm": 1.8795734643936157, "learning_rate": 1.9819403780406562e-05, "loss": 0.5863, "step": 3248 }, { "epoch": 0.08884817326624371, "grad_norm": 1.4178433418273926, "learning_rate": 1.9819236174964816e-05, "loss": 0.9561, "step": 3249 }, { "epoch": 0.08887551957996062, "grad_norm": 1.968462586402893, "learning_rate": 1.9819068492493925e-05, "loss": 0.5606, "step": 3250 }, { "epoch": 0.08890286589367753, "grad_norm": 2.2055394649505615, "learning_rate": 1.9818900732995205e-05, "loss": 0.6278, "step": 3251 }, { "epoch": 0.08893021220739444, "grad_norm": 1.6171406507492065, "learning_rate": 1.9818732896469976e-05, "loss": 0.5995, "step": 3252 }, { "epoch": 0.08895755852111135, "grad_norm": 2.038165807723999, "learning_rate": 1.9818564982919545e-05, "loss": 0.6604, "step": 3253 }, { "epoch": 0.08898490483482827, "grad_norm": 2.108900547027588, "learning_rate": 1.9818396992345236e-05, "loss": 0.5926, "step": 3254 }, { "epoch": 0.08901225114854518, "grad_norm": 1.8219940662384033, "learning_rate": 1.9818228924748365e-05, "loss": 0.5834, "step": 3255 }, { "epoch": 0.08903959746226209, "grad_norm": 1.8459111452102661, "learning_rate": 1.9818060780130256e-05, "loss": 0.5849, "step": 3256 }, { "epoch": 0.089066943775979, "grad_norm": 1.558424949645996, "learning_rate": 1.981789255849222e-05, "loss": 0.5514, "step": 3257 }, { "epoch": 0.08909429008969591, "grad_norm": 1.8739054203033447, "learning_rate": 1.9817724259835577e-05, "loss": 0.6097, "step": 3258 }, { "epoch": 0.08912163640341282, "grad_norm": 2.003831624984741, "learning_rate": 1.9817555884161656e-05, "loss": 0.5745, "step": 3259 }, { "epoch": 0.08914898271712973, "grad_norm": 1.562369704246521, "learning_rate": 1.981738743147177e-05, "loss": 0.5757, "step": 3260 }, { "epoch": 0.08917632903084664, "grad_norm": 1.860162615776062, "learning_rate": 1.981721890176724e-05, "loss": 0.5801, "step": 3261 }, { "epoch": 0.08920367534456355, "grad_norm": 1.5041311979293823, "learning_rate": 1.9817050295049392e-05, "loss": 0.5643, "step": 3262 }, { "epoch": 0.08923102165828047, "grad_norm": 1.7001835107803345, "learning_rate": 1.9816881611319546e-05, "loss": 0.5366, "step": 3263 }, { "epoch": 0.08925836797199738, "grad_norm": 1.9966248273849487, "learning_rate": 1.9816712850579026e-05, "loss": 0.5624, "step": 3264 }, { "epoch": 0.08928571428571429, "grad_norm": 1.4636942148208618, "learning_rate": 1.981654401282916e-05, "loss": 0.605, "step": 3265 }, { "epoch": 0.0893130605994312, "grad_norm": 2.7723605632781982, "learning_rate": 1.9816375098071267e-05, "loss": 0.5654, "step": 3266 }, { "epoch": 0.08934040691314811, "grad_norm": 2.630200147628784, "learning_rate": 1.9816206106306675e-05, "loss": 0.5862, "step": 3267 }, { "epoch": 0.08936775322686502, "grad_norm": 2.6835484504699707, "learning_rate": 1.9816037037536705e-05, "loss": 0.65, "step": 3268 }, { "epoch": 0.08939509954058193, "grad_norm": 1.8005179166793823, "learning_rate": 1.981586789176269e-05, "loss": 0.6051, "step": 3269 }, { "epoch": 0.08942244585429884, "grad_norm": 1.8319029808044434, "learning_rate": 1.9815698668985953e-05, "loss": 0.5655, "step": 3270 }, { "epoch": 0.08944979216801575, "grad_norm": 1.8699815273284912, "learning_rate": 1.981552936920782e-05, "loss": 0.5971, "step": 3271 }, { "epoch": 0.08947713848173267, "grad_norm": 3.079832077026367, "learning_rate": 1.9815359992429627e-05, "loss": 0.5907, "step": 3272 }, { "epoch": 0.08950448479544958, "grad_norm": 1.4485667943954468, "learning_rate": 1.9815190538652692e-05, "loss": 0.5833, "step": 3273 }, { "epoch": 0.08953183110916649, "grad_norm": 1.7741084098815918, "learning_rate": 1.9815021007878355e-05, "loss": 0.5537, "step": 3274 }, { "epoch": 0.0895591774228834, "grad_norm": 2.0092976093292236, "learning_rate": 1.9814851400107934e-05, "loss": 0.6199, "step": 3275 }, { "epoch": 0.08958652373660031, "grad_norm": 1.614473819732666, "learning_rate": 1.9814681715342773e-05, "loss": 0.5775, "step": 3276 }, { "epoch": 0.08961387005031722, "grad_norm": 2.0699281692504883, "learning_rate": 1.981451195358419e-05, "loss": 0.6121, "step": 3277 }, { "epoch": 0.08964121636403413, "grad_norm": 1.783171534538269, "learning_rate": 1.9814342114833523e-05, "loss": 0.5475, "step": 3278 }, { "epoch": 0.08966856267775104, "grad_norm": 1.4211822748184204, "learning_rate": 1.9814172199092107e-05, "loss": 0.5363, "step": 3279 }, { "epoch": 0.08969590899146795, "grad_norm": 1.4857879877090454, "learning_rate": 1.981400220636127e-05, "loss": 0.9521, "step": 3280 }, { "epoch": 0.08972325530518487, "grad_norm": 1.7271760702133179, "learning_rate": 1.981383213664235e-05, "loss": 0.5817, "step": 3281 }, { "epoch": 0.08975060161890178, "grad_norm": 1.7222821712493896, "learning_rate": 1.9813661989936675e-05, "loss": 0.5908, "step": 3282 }, { "epoch": 0.08977794793261869, "grad_norm": 2.00243878364563, "learning_rate": 1.981349176624559e-05, "loss": 0.6589, "step": 3283 }, { "epoch": 0.0898052942463356, "grad_norm": 1.8100457191467285, "learning_rate": 1.9813321465570416e-05, "loss": 0.5616, "step": 3284 }, { "epoch": 0.08983264056005251, "grad_norm": 1.8215622901916504, "learning_rate": 1.98131510879125e-05, "loss": 0.5482, "step": 3285 }, { "epoch": 0.08985998687376942, "grad_norm": 2.036996841430664, "learning_rate": 1.9812980633273175e-05, "loss": 0.6395, "step": 3286 }, { "epoch": 0.08988733318748633, "grad_norm": 1.4604825973510742, "learning_rate": 1.981281010165378e-05, "loss": 0.4643, "step": 3287 }, { "epoch": 0.08991467950120324, "grad_norm": 1.7281955480575562, "learning_rate": 1.9812639493055653e-05, "loss": 0.5669, "step": 3288 }, { "epoch": 0.08994202581492015, "grad_norm": 1.6685127019882202, "learning_rate": 1.9812468807480127e-05, "loss": 0.5823, "step": 3289 }, { "epoch": 0.08996937212863707, "grad_norm": 2.3129372596740723, "learning_rate": 1.9812298044928546e-05, "loss": 0.562, "step": 3290 }, { "epoch": 0.08999671844235398, "grad_norm": 2.8515875339508057, "learning_rate": 1.9812127205402247e-05, "loss": 0.5532, "step": 3291 }, { "epoch": 0.09002406475607089, "grad_norm": 1.9459445476531982, "learning_rate": 1.981195628890257e-05, "loss": 0.555, "step": 3292 }, { "epoch": 0.0900514110697878, "grad_norm": 1.9905587434768677, "learning_rate": 1.9811785295430863e-05, "loss": 0.6378, "step": 3293 }, { "epoch": 0.09007875738350471, "grad_norm": 1.3149702548980713, "learning_rate": 1.9811614224988458e-05, "loss": 0.5436, "step": 3294 }, { "epoch": 0.09010610369722162, "grad_norm": 2.111525535583496, "learning_rate": 1.98114430775767e-05, "loss": 0.5785, "step": 3295 }, { "epoch": 0.09013345001093853, "grad_norm": 2.285125255584717, "learning_rate": 1.9811271853196933e-05, "loss": 0.5815, "step": 3296 }, { "epoch": 0.09016079632465544, "grad_norm": 1.7280360460281372, "learning_rate": 1.98111005518505e-05, "loss": 0.5593, "step": 3297 }, { "epoch": 0.09018814263837235, "grad_norm": 1.97151780128479, "learning_rate": 1.9810929173538744e-05, "loss": 0.5745, "step": 3298 }, { "epoch": 0.09021548895208926, "grad_norm": 1.491568684577942, "learning_rate": 1.981075771826301e-05, "loss": 0.5969, "step": 3299 }, { "epoch": 0.09024283526580618, "grad_norm": 1.6240121126174927, "learning_rate": 1.9810586186024642e-05, "loss": 0.5551, "step": 3300 }, { "epoch": 0.09027018157952309, "grad_norm": 1.5765866041183472, "learning_rate": 1.9810414576824986e-05, "loss": 0.5875, "step": 3301 }, { "epoch": 0.09029752789323998, "grad_norm": 1.59962797164917, "learning_rate": 1.981024289066539e-05, "loss": 0.575, "step": 3302 }, { "epoch": 0.0903248742069569, "grad_norm": 1.6568750143051147, "learning_rate": 1.98100711275472e-05, "loss": 0.5689, "step": 3303 }, { "epoch": 0.0903522205206738, "grad_norm": 1.6820781230926514, "learning_rate": 1.980989928747176e-05, "loss": 0.5521, "step": 3304 }, { "epoch": 0.09037956683439072, "grad_norm": 1.5545867681503296, "learning_rate": 1.9809727370440425e-05, "loss": 0.5788, "step": 3305 }, { "epoch": 0.09040691314810763, "grad_norm": 2.596743106842041, "learning_rate": 1.9809555376454535e-05, "loss": 0.5849, "step": 3306 }, { "epoch": 0.09043425946182454, "grad_norm": 3.11258864402771, "learning_rate": 1.9809383305515444e-05, "loss": 0.5771, "step": 3307 }, { "epoch": 0.09046160577554145, "grad_norm": 1.6975661516189575, "learning_rate": 1.9809211157624504e-05, "loss": 0.6166, "step": 3308 }, { "epoch": 0.09048895208925836, "grad_norm": 1.8274757862091064, "learning_rate": 1.9809038932783063e-05, "loss": 0.5245, "step": 3309 }, { "epoch": 0.09051629840297527, "grad_norm": 1.5233960151672363, "learning_rate": 1.980886663099247e-05, "loss": 0.5664, "step": 3310 }, { "epoch": 0.09054364471669218, "grad_norm": 1.80507230758667, "learning_rate": 1.9808694252254078e-05, "loss": 0.5988, "step": 3311 }, { "epoch": 0.0905709910304091, "grad_norm": 1.7187130451202393, "learning_rate": 1.980852179656924e-05, "loss": 0.4556, "step": 3312 }, { "epoch": 0.090598337344126, "grad_norm": 1.9031833410263062, "learning_rate": 1.9808349263939314e-05, "loss": 0.5688, "step": 3313 }, { "epoch": 0.09062568365784292, "grad_norm": 1.9909892082214355, "learning_rate": 1.9808176654365643e-05, "loss": 0.6055, "step": 3314 }, { "epoch": 0.09065302997155983, "grad_norm": 1.8795757293701172, "learning_rate": 1.980800396784959e-05, "loss": 0.5556, "step": 3315 }, { "epoch": 0.09068037628527674, "grad_norm": 1.8185731172561646, "learning_rate": 1.9807831204392504e-05, "loss": 0.5594, "step": 3316 }, { "epoch": 0.09070772259899365, "grad_norm": 1.4204505681991577, "learning_rate": 1.9807658363995745e-05, "loss": 0.9786, "step": 3317 }, { "epoch": 0.09073506891271056, "grad_norm": 1.9099687337875366, "learning_rate": 1.980748544666066e-05, "loss": 0.6194, "step": 3318 }, { "epoch": 0.09076241522642747, "grad_norm": 1.4914687871932983, "learning_rate": 1.9807312452388617e-05, "loss": 0.5662, "step": 3319 }, { "epoch": 0.09078976154014438, "grad_norm": 1.4408862590789795, "learning_rate": 1.980713938118097e-05, "loss": 0.5619, "step": 3320 }, { "epoch": 0.0908171078538613, "grad_norm": 2.2045652866363525, "learning_rate": 1.9806966233039068e-05, "loss": 0.5893, "step": 3321 }, { "epoch": 0.0908444541675782, "grad_norm": 1.4986422061920166, "learning_rate": 1.9806793007964277e-05, "loss": 0.5644, "step": 3322 }, { "epoch": 0.09087180048129512, "grad_norm": 1.2807046175003052, "learning_rate": 1.980661970595796e-05, "loss": 0.9469, "step": 3323 }, { "epoch": 0.09089914679501203, "grad_norm": 2.0680429935455322, "learning_rate": 1.9806446327021467e-05, "loss": 0.6018, "step": 3324 }, { "epoch": 0.09092649310872894, "grad_norm": 1.4808216094970703, "learning_rate": 1.9806272871156165e-05, "loss": 0.5848, "step": 3325 }, { "epoch": 0.09095383942244585, "grad_norm": 1.529800295829773, "learning_rate": 1.980609933836341e-05, "loss": 0.5962, "step": 3326 }, { "epoch": 0.09098118573616276, "grad_norm": 1.6995184421539307, "learning_rate": 1.9805925728644563e-05, "loss": 0.5694, "step": 3327 }, { "epoch": 0.09100853204987967, "grad_norm": 1.3425081968307495, "learning_rate": 1.980575204200099e-05, "loss": 0.624, "step": 3328 }, { "epoch": 0.09103587836359658, "grad_norm": 1.4746313095092773, "learning_rate": 1.9805578278434053e-05, "loss": 0.5688, "step": 3329 }, { "epoch": 0.0910632246773135, "grad_norm": 4.846518516540527, "learning_rate": 1.9805404437945113e-05, "loss": 0.4428, "step": 3330 }, { "epoch": 0.0910905709910304, "grad_norm": 2.1780343055725098, "learning_rate": 1.980523052053554e-05, "loss": 0.585, "step": 3331 }, { "epoch": 0.09111791730474732, "grad_norm": 2.087617874145508, "learning_rate": 1.9805056526206687e-05, "loss": 0.653, "step": 3332 }, { "epoch": 0.09114526361846423, "grad_norm": 1.7013100385665894, "learning_rate": 1.9804882454959927e-05, "loss": 0.5751, "step": 3333 }, { "epoch": 0.09117260993218114, "grad_norm": 1.926317811012268, "learning_rate": 1.980470830679662e-05, "loss": 0.6036, "step": 3334 }, { "epoch": 0.09119995624589805, "grad_norm": 1.6203596591949463, "learning_rate": 1.980453408171814e-05, "loss": 0.5547, "step": 3335 }, { "epoch": 0.09122730255961496, "grad_norm": 2.089478015899658, "learning_rate": 1.9804359779725847e-05, "loss": 0.5739, "step": 3336 }, { "epoch": 0.09125464887333187, "grad_norm": 2.1716723442077637, "learning_rate": 1.980418540082111e-05, "loss": 0.6064, "step": 3337 }, { "epoch": 0.09128199518704878, "grad_norm": 1.5356305837631226, "learning_rate": 1.98040109450053e-05, "loss": 0.5308, "step": 3338 }, { "epoch": 0.0913093415007657, "grad_norm": 1.8517169952392578, "learning_rate": 1.9803836412279782e-05, "loss": 0.5412, "step": 3339 }, { "epoch": 0.0913366878144826, "grad_norm": 1.8044565916061401, "learning_rate": 1.980366180264593e-05, "loss": 0.4643, "step": 3340 }, { "epoch": 0.09136403412819952, "grad_norm": 1.405454158782959, "learning_rate": 1.9803487116105105e-05, "loss": 0.9536, "step": 3341 }, { "epoch": 0.09139138044191643, "grad_norm": 2.435938596725464, "learning_rate": 1.9803312352658685e-05, "loss": 0.4214, "step": 3342 }, { "epoch": 0.09141872675563334, "grad_norm": 1.9313466548919678, "learning_rate": 1.980313751230804e-05, "loss": 0.6796, "step": 3343 }, { "epoch": 0.09144607306935025, "grad_norm": 1.9894821643829346, "learning_rate": 1.9802962595054536e-05, "loss": 0.5946, "step": 3344 }, { "epoch": 0.09147341938306716, "grad_norm": 2.545473098754883, "learning_rate": 1.980278760089955e-05, "loss": 0.5757, "step": 3345 }, { "epoch": 0.09150076569678407, "grad_norm": 1.4265533685684204, "learning_rate": 1.9802612529844457e-05, "loss": 1.0111, "step": 3346 }, { "epoch": 0.09152811201050098, "grad_norm": 1.5097253322601318, "learning_rate": 1.9802437381890625e-05, "loss": 0.5857, "step": 3347 }, { "epoch": 0.0915554583242179, "grad_norm": 1.5570539236068726, "learning_rate": 1.980226215703943e-05, "loss": 0.6681, "step": 3348 }, { "epoch": 0.0915828046379348, "grad_norm": 1.7539774179458618, "learning_rate": 1.980208685529225e-05, "loss": 0.594, "step": 3349 }, { "epoch": 0.09161015095165172, "grad_norm": 1.7510172128677368, "learning_rate": 1.980191147665045e-05, "loss": 0.5475, "step": 3350 }, { "epoch": 0.09163749726536863, "grad_norm": 1.6355000734329224, "learning_rate": 1.9801736021115422e-05, "loss": 0.5685, "step": 3351 }, { "epoch": 0.09166484357908554, "grad_norm": 1.2722560167312622, "learning_rate": 1.9801560488688528e-05, "loss": 0.6137, "step": 3352 }, { "epoch": 0.09169218989280245, "grad_norm": 1.8131946325302124, "learning_rate": 1.9801384879371153e-05, "loss": 0.4136, "step": 3353 }, { "epoch": 0.09171953620651936, "grad_norm": 1.6909180879592896, "learning_rate": 1.980120919316467e-05, "loss": 0.5325, "step": 3354 }, { "epoch": 0.09174688252023627, "grad_norm": 2.07450270652771, "learning_rate": 1.980103343007046e-05, "loss": 0.5961, "step": 3355 }, { "epoch": 0.09177422883395318, "grad_norm": 1.6299686431884766, "learning_rate": 1.98008575900899e-05, "loss": 0.4536, "step": 3356 }, { "epoch": 0.0918015751476701, "grad_norm": 1.5782780647277832, "learning_rate": 1.980068167322437e-05, "loss": 0.5688, "step": 3357 }, { "epoch": 0.091828921461387, "grad_norm": 1.4797619581222534, "learning_rate": 1.9800505679475256e-05, "loss": 0.5635, "step": 3358 }, { "epoch": 0.09185626777510392, "grad_norm": 1.8762826919555664, "learning_rate": 1.980032960884393e-05, "loss": 0.9697, "step": 3359 }, { "epoch": 0.09188361408882083, "grad_norm": 1.3147289752960205, "learning_rate": 1.9800153461331774e-05, "loss": 0.5614, "step": 3360 }, { "epoch": 0.09191096040253774, "grad_norm": 1.2900915145874023, "learning_rate": 1.979997723694017e-05, "loss": 0.5677, "step": 3361 }, { "epoch": 0.09193830671625465, "grad_norm": 2.776503801345825, "learning_rate": 1.9799800935670506e-05, "loss": 0.5241, "step": 3362 }, { "epoch": 0.09196565302997156, "grad_norm": 1.5834026336669922, "learning_rate": 1.9799624557524162e-05, "loss": 0.5857, "step": 3363 }, { "epoch": 0.09199299934368847, "grad_norm": 1.7612017393112183, "learning_rate": 1.979944810250252e-05, "loss": 0.584, "step": 3364 }, { "epoch": 0.09202034565740538, "grad_norm": 1.4524608850479126, "learning_rate": 1.9799271570606966e-05, "loss": 0.5732, "step": 3365 }, { "epoch": 0.0920476919711223, "grad_norm": 1.416350245475769, "learning_rate": 1.9799094961838883e-05, "loss": 0.9618, "step": 3366 }, { "epoch": 0.0920750382848392, "grad_norm": 1.831768274307251, "learning_rate": 1.979891827619966e-05, "loss": 0.5052, "step": 3367 }, { "epoch": 0.09210238459855612, "grad_norm": 2.634068727493286, "learning_rate": 1.979874151369068e-05, "loss": 0.925, "step": 3368 }, { "epoch": 0.09212973091227303, "grad_norm": 1.764265775680542, "learning_rate": 1.979856467431333e-05, "loss": 0.6194, "step": 3369 }, { "epoch": 0.09215707722598994, "grad_norm": 1.620465874671936, "learning_rate": 1.9798387758068997e-05, "loss": 0.596, "step": 3370 }, { "epoch": 0.09218442353970685, "grad_norm": 1.837081789970398, "learning_rate": 1.979821076495907e-05, "loss": 0.5998, "step": 3371 }, { "epoch": 0.09221176985342376, "grad_norm": 2.3582727909088135, "learning_rate": 1.979803369498494e-05, "loss": 0.5926, "step": 3372 }, { "epoch": 0.09223911616714067, "grad_norm": 1.531725287437439, "learning_rate": 1.979785654814799e-05, "loss": 0.5716, "step": 3373 }, { "epoch": 0.09226646248085758, "grad_norm": 1.3809072971343994, "learning_rate": 1.9797679324449612e-05, "loss": 0.5687, "step": 3374 }, { "epoch": 0.0922938087945745, "grad_norm": 1.3384265899658203, "learning_rate": 1.9797502023891198e-05, "loss": 0.5738, "step": 3375 }, { "epoch": 0.0923211551082914, "grad_norm": 2.0876240730285645, "learning_rate": 1.979732464647414e-05, "loss": 0.5298, "step": 3376 }, { "epoch": 0.09234850142200832, "grad_norm": 1.4607288837432861, "learning_rate": 1.9797147192199825e-05, "loss": 0.5705, "step": 3377 }, { "epoch": 0.09237584773572523, "grad_norm": 1.790418028831482, "learning_rate": 1.9796969661069648e-05, "loss": 0.6419, "step": 3378 }, { "epoch": 0.09240319404944214, "grad_norm": 3.1778955459594727, "learning_rate": 1.9796792053085e-05, "loss": 0.9879, "step": 3379 }, { "epoch": 0.09243054036315905, "grad_norm": 1.2311179637908936, "learning_rate": 1.9796614368247277e-05, "loss": 0.44, "step": 3380 }, { "epoch": 0.09245788667687596, "grad_norm": 1.9823317527770996, "learning_rate": 1.979643660655787e-05, "loss": 0.6282, "step": 3381 }, { "epoch": 0.09248523299059287, "grad_norm": 1.485213279724121, "learning_rate": 1.9796258768018174e-05, "loss": 0.5515, "step": 3382 }, { "epoch": 0.09251257930430978, "grad_norm": 1.6893255710601807, "learning_rate": 1.979608085262959e-05, "loss": 0.5829, "step": 3383 }, { "epoch": 0.0925399256180267, "grad_norm": 1.4303072690963745, "learning_rate": 1.97959028603935e-05, "loss": 0.9717, "step": 3384 }, { "epoch": 0.0925672719317436, "grad_norm": 1.7473480701446533, "learning_rate": 1.9795724791311315e-05, "loss": 0.5517, "step": 3385 }, { "epoch": 0.09259461824546052, "grad_norm": 1.4066903591156006, "learning_rate": 1.9795546645384424e-05, "loss": 0.4191, "step": 3386 }, { "epoch": 0.09262196455917743, "grad_norm": 1.3866199254989624, "learning_rate": 1.979536842261423e-05, "loss": 0.6242, "step": 3387 }, { "epoch": 0.09264931087289434, "grad_norm": 1.2880527973175049, "learning_rate": 1.9795190123002122e-05, "loss": 0.3908, "step": 3388 }, { "epoch": 0.09267665718661125, "grad_norm": 1.639142394065857, "learning_rate": 1.9795011746549506e-05, "loss": 0.6419, "step": 3389 }, { "epoch": 0.09270400350032816, "grad_norm": 1.8696651458740234, "learning_rate": 1.979483329325778e-05, "loss": 0.5875, "step": 3390 }, { "epoch": 0.09273134981404507, "grad_norm": 1.5285444259643555, "learning_rate": 1.9794654763128343e-05, "loss": 0.9854, "step": 3391 }, { "epoch": 0.09275869612776198, "grad_norm": 1.9869332313537598, "learning_rate": 1.9794476156162595e-05, "loss": 0.5826, "step": 3392 }, { "epoch": 0.0927860424414789, "grad_norm": 9.104601860046387, "learning_rate": 1.979429747236194e-05, "loss": 0.9581, "step": 3393 }, { "epoch": 0.0928133887551958, "grad_norm": 1.707399606704712, "learning_rate": 1.9794118711727776e-05, "loss": 0.5846, "step": 3394 }, { "epoch": 0.09284073506891272, "grad_norm": 2.0566561222076416, "learning_rate": 1.9793939874261506e-05, "loss": 0.573, "step": 3395 }, { "epoch": 0.09286808138262963, "grad_norm": 1.7117910385131836, "learning_rate": 1.9793760959964537e-05, "loss": 0.5305, "step": 3396 }, { "epoch": 0.09289542769634654, "grad_norm": 2.0231194496154785, "learning_rate": 1.9793581968838268e-05, "loss": 0.6119, "step": 3397 }, { "epoch": 0.09292277401006345, "grad_norm": 2.002120018005371, "learning_rate": 1.9793402900884105e-05, "loss": 0.5833, "step": 3398 }, { "epoch": 0.09295012032378036, "grad_norm": 4.285058498382568, "learning_rate": 1.979322375610345e-05, "loss": 0.6038, "step": 3399 }, { "epoch": 0.09297746663749727, "grad_norm": 1.6812777519226074, "learning_rate": 1.979304453449771e-05, "loss": 0.5658, "step": 3400 }, { "epoch": 0.09300481295121418, "grad_norm": 1.420581340789795, "learning_rate": 1.97928652360683e-05, "loss": 0.9176, "step": 3401 }, { "epoch": 0.0930321592649311, "grad_norm": 1.8139716386795044, "learning_rate": 1.979268586081661e-05, "loss": 0.6055, "step": 3402 }, { "epoch": 0.09305950557864799, "grad_norm": 2.2304279804229736, "learning_rate": 1.9792506408744057e-05, "loss": 0.5683, "step": 3403 }, { "epoch": 0.0930868518923649, "grad_norm": 1.8073943853378296, "learning_rate": 1.9792326879852047e-05, "loss": 0.5796, "step": 3404 }, { "epoch": 0.09311419820608181, "grad_norm": 2.125816583633423, "learning_rate": 1.979214727414199e-05, "loss": 0.5875, "step": 3405 }, { "epoch": 0.09314154451979872, "grad_norm": 23.123716354370117, "learning_rate": 1.9791967591615294e-05, "loss": 0.4247, "step": 3406 }, { "epoch": 0.09316889083351564, "grad_norm": 1.5033894777297974, "learning_rate": 1.9791787832273365e-05, "loss": 0.5695, "step": 3407 }, { "epoch": 0.09319623714723255, "grad_norm": 1.5022281408309937, "learning_rate": 1.979160799611762e-05, "loss": 0.584, "step": 3408 }, { "epoch": 0.09322358346094946, "grad_norm": 1.6364654302597046, "learning_rate": 1.9791428083149462e-05, "loss": 1.0054, "step": 3409 }, { "epoch": 0.09325092977466637, "grad_norm": 2.1603832244873047, "learning_rate": 1.9791248093370304e-05, "loss": 0.5627, "step": 3410 }, { "epoch": 0.09327827608838328, "grad_norm": 1.67061185836792, "learning_rate": 1.9791068026781566e-05, "loss": 0.5903, "step": 3411 }, { "epoch": 0.09330562240210019, "grad_norm": 1.4998115301132202, "learning_rate": 1.9790887883384652e-05, "loss": 0.5964, "step": 3412 }, { "epoch": 0.0933329687158171, "grad_norm": 1.5613740682601929, "learning_rate": 1.979070766318098e-05, "loss": 0.5365, "step": 3413 }, { "epoch": 0.09336031502953401, "grad_norm": 1.8549691438674927, "learning_rate": 1.979052736617196e-05, "loss": 0.6101, "step": 3414 }, { "epoch": 0.09338766134325092, "grad_norm": 1.7995498180389404, "learning_rate": 1.979034699235901e-05, "loss": 0.9591, "step": 3415 }, { "epoch": 0.09341500765696784, "grad_norm": 1.6316903829574585, "learning_rate": 1.979016654174354e-05, "loss": 0.5369, "step": 3416 }, { "epoch": 0.09344235397068475, "grad_norm": 1.8100684881210327, "learning_rate": 1.978998601432697e-05, "loss": 0.6697, "step": 3417 }, { "epoch": 0.09346970028440166, "grad_norm": 1.6602927446365356, "learning_rate": 1.9789805410110715e-05, "loss": 0.5838, "step": 3418 }, { "epoch": 0.09349704659811857, "grad_norm": 2.5618653297424316, "learning_rate": 1.9789624729096193e-05, "loss": 0.5751, "step": 3419 }, { "epoch": 0.09352439291183548, "grad_norm": 1.5013232231140137, "learning_rate": 1.9789443971284815e-05, "loss": 0.5881, "step": 3420 }, { "epoch": 0.09355173922555239, "grad_norm": 1.3782240152359009, "learning_rate": 1.9789263136678008e-05, "loss": 0.5954, "step": 3421 }, { "epoch": 0.0935790855392693, "grad_norm": 1.6798503398895264, "learning_rate": 1.978908222527719e-05, "loss": 0.597, "step": 3422 }, { "epoch": 0.09360643185298621, "grad_norm": 1.578036904335022, "learning_rate": 1.9788901237083772e-05, "loss": 0.5851, "step": 3423 }, { "epoch": 0.09363377816670312, "grad_norm": 2.1926136016845703, "learning_rate": 1.978872017209918e-05, "loss": 0.5363, "step": 3424 }, { "epoch": 0.09366112448042004, "grad_norm": 1.623090386390686, "learning_rate": 1.9788539030324832e-05, "loss": 0.6415, "step": 3425 }, { "epoch": 0.09368847079413695, "grad_norm": 1.4348877668380737, "learning_rate": 1.978835781176215e-05, "loss": 0.5499, "step": 3426 }, { "epoch": 0.09371581710785386, "grad_norm": 2.1800060272216797, "learning_rate": 1.9788176516412555e-05, "loss": 1.0146, "step": 3427 }, { "epoch": 0.09374316342157077, "grad_norm": 1.9842774868011475, "learning_rate": 1.978799514427747e-05, "loss": 0.4392, "step": 3428 }, { "epoch": 0.09377050973528768, "grad_norm": 1.7344478368759155, "learning_rate": 1.9787813695358324e-05, "loss": 0.5775, "step": 3429 }, { "epoch": 0.09379785604900459, "grad_norm": 1.8803563117980957, "learning_rate": 1.9787632169656526e-05, "loss": 0.6162, "step": 3430 }, { "epoch": 0.0938252023627215, "grad_norm": 1.3828574419021606, "learning_rate": 1.9787450567173515e-05, "loss": 0.9653, "step": 3431 }, { "epoch": 0.09385254867643841, "grad_norm": 2.6528327465057373, "learning_rate": 1.97872688879107e-05, "loss": 0.5443, "step": 3432 }, { "epoch": 0.09387989499015532, "grad_norm": 1.5318994522094727, "learning_rate": 1.9787087131869525e-05, "loss": 0.5547, "step": 3433 }, { "epoch": 0.09390724130387224, "grad_norm": 1.740614891052246, "learning_rate": 1.9786905299051398e-05, "loss": 0.6104, "step": 3434 }, { "epoch": 0.09393458761758915, "grad_norm": 1.5719197988510132, "learning_rate": 1.9786723389457758e-05, "loss": 0.5969, "step": 3435 }, { "epoch": 0.09396193393130606, "grad_norm": 1.6891034841537476, "learning_rate": 1.9786541403090023e-05, "loss": 0.5945, "step": 3436 }, { "epoch": 0.09398928024502297, "grad_norm": 1.9126349687576294, "learning_rate": 1.9786359339949628e-05, "loss": 0.5711, "step": 3437 }, { "epoch": 0.09401662655873988, "grad_norm": 1.8500950336456299, "learning_rate": 1.9786177200038e-05, "loss": 0.6337, "step": 3438 }, { "epoch": 0.09404397287245679, "grad_norm": 1.871942162513733, "learning_rate": 1.978599498335656e-05, "loss": 0.5754, "step": 3439 }, { "epoch": 0.0940713191861737, "grad_norm": 1.7375009059906006, "learning_rate": 1.9785812689906746e-05, "loss": 0.5627, "step": 3440 }, { "epoch": 0.09409866549989061, "grad_norm": 1.4600541591644287, "learning_rate": 1.9785630319689985e-05, "loss": 0.45, "step": 3441 }, { "epoch": 0.09412601181360752, "grad_norm": 1.3632045984268188, "learning_rate": 1.978544787270771e-05, "loss": 0.5666, "step": 3442 }, { "epoch": 0.09415335812732444, "grad_norm": 1.5534098148345947, "learning_rate": 1.978526534896135e-05, "loss": 0.9556, "step": 3443 }, { "epoch": 0.09418070444104135, "grad_norm": 1.7111012935638428, "learning_rate": 1.9785082748452334e-05, "loss": 0.605, "step": 3444 }, { "epoch": 0.09420805075475826, "grad_norm": 1.6634231805801392, "learning_rate": 1.9784900071182102e-05, "loss": 0.6106, "step": 3445 }, { "epoch": 0.09423539706847517, "grad_norm": 1.5713558197021484, "learning_rate": 1.978471731715208e-05, "loss": 0.6045, "step": 3446 }, { "epoch": 0.09426274338219208, "grad_norm": 1.27010178565979, "learning_rate": 1.9784534486363706e-05, "loss": 0.9363, "step": 3447 }, { "epoch": 0.09429008969590899, "grad_norm": 1.2657157182693481, "learning_rate": 1.9784351578818412e-05, "loss": 0.6059, "step": 3448 }, { "epoch": 0.0943174360096259, "grad_norm": 2.657679319381714, "learning_rate": 1.978416859451763e-05, "loss": 1.0022, "step": 3449 }, { "epoch": 0.09434478232334281, "grad_norm": 1.6368523836135864, "learning_rate": 1.9783985533462805e-05, "loss": 0.5715, "step": 3450 }, { "epoch": 0.09437212863705972, "grad_norm": 1.764824390411377, "learning_rate": 1.9783802395655362e-05, "loss": 0.5785, "step": 3451 }, { "epoch": 0.09439947495077664, "grad_norm": 1.7103164196014404, "learning_rate": 1.9783619181096745e-05, "loss": 0.6013, "step": 3452 }, { "epoch": 0.09442682126449355, "grad_norm": 1.5238176584243774, "learning_rate": 1.9783435889788385e-05, "loss": 0.5425, "step": 3453 }, { "epoch": 0.09445416757821046, "grad_norm": 1.7307462692260742, "learning_rate": 1.9783252521731732e-05, "loss": 0.9621, "step": 3454 }, { "epoch": 0.09448151389192737, "grad_norm": 1.4745782613754272, "learning_rate": 1.978306907692821e-05, "loss": 0.5952, "step": 3455 }, { "epoch": 0.09450886020564428, "grad_norm": 2.0447745323181152, "learning_rate": 1.9782885555379265e-05, "loss": 0.5308, "step": 3456 }, { "epoch": 0.09453620651936119, "grad_norm": 1.3778308629989624, "learning_rate": 1.9782701957086338e-05, "loss": 0.6126, "step": 3457 }, { "epoch": 0.0945635528330781, "grad_norm": 2.0517492294311523, "learning_rate": 1.9782518282050863e-05, "loss": 0.5318, "step": 3458 }, { "epoch": 0.09459089914679501, "grad_norm": 2.0424630641937256, "learning_rate": 1.978233453027429e-05, "loss": 0.5831, "step": 3459 }, { "epoch": 0.09461824546051192, "grad_norm": 1.6401020288467407, "learning_rate": 1.978215070175805e-05, "loss": 0.6612, "step": 3460 }, { "epoch": 0.09464559177422883, "grad_norm": 1.4210997819900513, "learning_rate": 1.9781966796503595e-05, "loss": 1.0116, "step": 3461 }, { "epoch": 0.09467293808794575, "grad_norm": 8.679905891418457, "learning_rate": 1.9781782814512362e-05, "loss": 0.5561, "step": 3462 }, { "epoch": 0.09470028440166266, "grad_norm": 1.9120814800262451, "learning_rate": 1.97815987557858e-05, "loss": 0.967, "step": 3463 }, { "epoch": 0.09472763071537957, "grad_norm": 1.6380629539489746, "learning_rate": 1.978141462032534e-05, "loss": 0.5985, "step": 3464 }, { "epoch": 0.09475497702909648, "grad_norm": 2.771111488342285, "learning_rate": 1.9781230408132443e-05, "loss": 0.5424, "step": 3465 }, { "epoch": 0.09478232334281339, "grad_norm": 1.7662767171859741, "learning_rate": 1.978104611920854e-05, "loss": 0.5858, "step": 3466 }, { "epoch": 0.0948096696565303, "grad_norm": 1.7818583250045776, "learning_rate": 1.9780861753555088e-05, "loss": 0.5945, "step": 3467 }, { "epoch": 0.09483701597024721, "grad_norm": 1.751849889755249, "learning_rate": 1.9780677311173524e-05, "loss": 0.5591, "step": 3468 }, { "epoch": 0.09486436228396412, "grad_norm": 1.8209468126296997, "learning_rate": 1.97804927920653e-05, "loss": 0.5828, "step": 3469 }, { "epoch": 0.09489170859768103, "grad_norm": 1.5887815952301025, "learning_rate": 1.9780308196231865e-05, "loss": 0.564, "step": 3470 }, { "epoch": 0.09491905491139795, "grad_norm": 1.4929993152618408, "learning_rate": 1.978012352367466e-05, "loss": 0.5575, "step": 3471 }, { "epoch": 0.09494640122511486, "grad_norm": 1.6756021976470947, "learning_rate": 1.9779938774395143e-05, "loss": 0.5813, "step": 3472 }, { "epoch": 0.09497374753883177, "grad_norm": 2.065448522567749, "learning_rate": 1.977975394839476e-05, "loss": 0.5774, "step": 3473 }, { "epoch": 0.09500109385254868, "grad_norm": 2.0938758850097656, "learning_rate": 1.9779569045674952e-05, "loss": 0.5834, "step": 3474 }, { "epoch": 0.09502844016626559, "grad_norm": 1.7007126808166504, "learning_rate": 1.9779384066237182e-05, "loss": 0.5424, "step": 3475 }, { "epoch": 0.0950557864799825, "grad_norm": 1.9360828399658203, "learning_rate": 1.9779199010082893e-05, "loss": 0.4349, "step": 3476 }, { "epoch": 0.09508313279369941, "grad_norm": 5.3964338302612305, "learning_rate": 1.9779013877213545e-05, "loss": 0.5844, "step": 3477 }, { "epoch": 0.09511047910741632, "grad_norm": 2.808147430419922, "learning_rate": 1.977882866763058e-05, "loss": 0.4403, "step": 3478 }, { "epoch": 0.09513782542113323, "grad_norm": 1.5257939100265503, "learning_rate": 1.977864338133546e-05, "loss": 0.5867, "step": 3479 }, { "epoch": 0.09516517173485015, "grad_norm": 1.8161519765853882, "learning_rate": 1.9778458018329633e-05, "loss": 0.615, "step": 3480 }, { "epoch": 0.09519251804856706, "grad_norm": 2.283156394958496, "learning_rate": 1.9778272578614556e-05, "loss": 0.6691, "step": 3481 }, { "epoch": 0.09521986436228397, "grad_norm": 1.8993887901306152, "learning_rate": 1.977808706219168e-05, "loss": 0.6036, "step": 3482 }, { "epoch": 0.09524721067600088, "grad_norm": 1.9867933988571167, "learning_rate": 1.9777901469062467e-05, "loss": 0.549, "step": 3483 }, { "epoch": 0.09527455698971779, "grad_norm": 1.3346539735794067, "learning_rate": 1.9777715799228366e-05, "loss": 0.5957, "step": 3484 }, { "epoch": 0.0953019033034347, "grad_norm": 1.484014630317688, "learning_rate": 1.9777530052690833e-05, "loss": 0.5566, "step": 3485 }, { "epoch": 0.09532924961715161, "grad_norm": 1.7425421476364136, "learning_rate": 1.9777344229451334e-05, "loss": 0.5885, "step": 3486 }, { "epoch": 0.09535659593086852, "grad_norm": 1.5883830785751343, "learning_rate": 1.9777158329511317e-05, "loss": 0.5588, "step": 3487 }, { "epoch": 0.09538394224458543, "grad_norm": 1.361592411994934, "learning_rate": 1.977697235287225e-05, "loss": 0.6139, "step": 3488 }, { "epoch": 0.09541128855830235, "grad_norm": 2.0543363094329834, "learning_rate": 1.977678629953558e-05, "loss": 0.5766, "step": 3489 }, { "epoch": 0.09543863487201926, "grad_norm": 2.322936773300171, "learning_rate": 1.9776600169502775e-05, "loss": 0.6249, "step": 3490 }, { "epoch": 0.09546598118573617, "grad_norm": 1.67502760887146, "learning_rate": 1.9776413962775295e-05, "loss": 0.5795, "step": 3491 }, { "epoch": 0.09549332749945308, "grad_norm": 1.3728877305984497, "learning_rate": 1.9776227679354596e-05, "loss": 0.5721, "step": 3492 }, { "epoch": 0.09552067381316999, "grad_norm": 1.9761731624603271, "learning_rate": 1.9776041319242144e-05, "loss": 0.5965, "step": 3493 }, { "epoch": 0.0955480201268869, "grad_norm": 2.022982597351074, "learning_rate": 1.9775854882439398e-05, "loss": 0.5773, "step": 3494 }, { "epoch": 0.09557536644060381, "grad_norm": 1.696171522140503, "learning_rate": 1.9775668368947823e-05, "loss": 0.6063, "step": 3495 }, { "epoch": 0.09560271275432072, "grad_norm": 1.643144965171814, "learning_rate": 1.977548177876888e-05, "loss": 0.5827, "step": 3496 }, { "epoch": 0.09563005906803763, "grad_norm": 1.863045573234558, "learning_rate": 1.9775295111904033e-05, "loss": 0.5955, "step": 3497 }, { "epoch": 0.09565740538175455, "grad_norm": 1.4485775232315063, "learning_rate": 1.9775108368354746e-05, "loss": 0.4273, "step": 3498 }, { "epoch": 0.09568475169547146, "grad_norm": 1.4714819192886353, "learning_rate": 1.9774921548122488e-05, "loss": 0.5752, "step": 3499 }, { "epoch": 0.09571209800918837, "grad_norm": 1.5268371105194092, "learning_rate": 1.9774734651208717e-05, "loss": 0.5835, "step": 3500 }, { "epoch": 0.09573944432290528, "grad_norm": 1.3590408563613892, "learning_rate": 1.9774547677614906e-05, "loss": 0.946, "step": 3501 }, { "epoch": 0.09576679063662219, "grad_norm": 1.940170407295227, "learning_rate": 1.977436062734252e-05, "loss": 0.535, "step": 3502 }, { "epoch": 0.0957941369503391, "grad_norm": 1.414124846458435, "learning_rate": 1.9774173500393022e-05, "loss": 0.5974, "step": 3503 }, { "epoch": 0.09582148326405601, "grad_norm": 3.0973567962646484, "learning_rate": 1.977398629676789e-05, "loss": 0.5531, "step": 3504 }, { "epoch": 0.09584882957777291, "grad_norm": 1.9417842626571655, "learning_rate": 1.977379901646858e-05, "loss": 0.5414, "step": 3505 }, { "epoch": 0.09587617589148982, "grad_norm": 2.1844372749328613, "learning_rate": 1.977361165949657e-05, "loss": 0.581, "step": 3506 }, { "epoch": 0.09590352220520673, "grad_norm": 1.3910387754440308, "learning_rate": 1.9773424225853328e-05, "loss": 0.9883, "step": 3507 }, { "epoch": 0.09593086851892364, "grad_norm": 1.3643295764923096, "learning_rate": 1.9773236715540322e-05, "loss": 0.959, "step": 3508 }, { "epoch": 0.09595821483264055, "grad_norm": 1.756990671157837, "learning_rate": 1.9773049128559025e-05, "loss": 0.5712, "step": 3509 }, { "epoch": 0.09598556114635746, "grad_norm": 2.374303102493286, "learning_rate": 1.9772861464910908e-05, "loss": 0.5921, "step": 3510 }, { "epoch": 0.09601290746007438, "grad_norm": 1.8835232257843018, "learning_rate": 1.977267372459744e-05, "loss": 0.5818, "step": 3511 }, { "epoch": 0.09604025377379129, "grad_norm": 1.9200302362442017, "learning_rate": 1.97724859076201e-05, "loss": 0.5949, "step": 3512 }, { "epoch": 0.0960676000875082, "grad_norm": 1.4984532594680786, "learning_rate": 1.977229801398036e-05, "loss": 0.5791, "step": 3513 }, { "epoch": 0.09609494640122511, "grad_norm": 1.6360399723052979, "learning_rate": 1.9772110043679687e-05, "loss": 0.5751, "step": 3514 }, { "epoch": 0.09612229271494202, "grad_norm": 1.8739662170410156, "learning_rate": 1.9771921996719566e-05, "loss": 0.591, "step": 3515 }, { "epoch": 0.09614963902865893, "grad_norm": 1.3459540605545044, "learning_rate": 1.9771733873101463e-05, "loss": 0.5923, "step": 3516 }, { "epoch": 0.09617698534237584, "grad_norm": 1.7664422988891602, "learning_rate": 1.977154567282686e-05, "loss": 0.4849, "step": 3517 }, { "epoch": 0.09620433165609275, "grad_norm": 1.40593683719635, "learning_rate": 1.9771357395897234e-05, "loss": 0.5921, "step": 3518 }, { "epoch": 0.09623167796980966, "grad_norm": 1.5392396450042725, "learning_rate": 1.9771169042314056e-05, "loss": 0.9773, "step": 3519 }, { "epoch": 0.09625902428352658, "grad_norm": 1.4347000122070312, "learning_rate": 1.9770980612078804e-05, "loss": 0.9861, "step": 3520 }, { "epoch": 0.09628637059724349, "grad_norm": 1.6883844137191772, "learning_rate": 1.9770792105192963e-05, "loss": 0.5425, "step": 3521 }, { "epoch": 0.0963137169109604, "grad_norm": 2.172283411026001, "learning_rate": 1.9770603521658005e-05, "loss": 0.5452, "step": 3522 }, { "epoch": 0.09634106322467731, "grad_norm": 1.676888346672058, "learning_rate": 1.9770414861475413e-05, "loss": 0.5392, "step": 3523 }, { "epoch": 0.09636840953839422, "grad_norm": 2.307358980178833, "learning_rate": 1.977022612464667e-05, "loss": 0.5767, "step": 3524 }, { "epoch": 0.09639575585211113, "grad_norm": 1.5764696598052979, "learning_rate": 1.9770037311173247e-05, "loss": 0.975, "step": 3525 }, { "epoch": 0.09642310216582804, "grad_norm": 1.543431043624878, "learning_rate": 1.9769848421056632e-05, "loss": 0.5781, "step": 3526 }, { "epoch": 0.09645044847954495, "grad_norm": 1.8242162466049194, "learning_rate": 1.9769659454298308e-05, "loss": 0.5825, "step": 3527 }, { "epoch": 0.09647779479326186, "grad_norm": 1.4702798128128052, "learning_rate": 1.976947041089975e-05, "loss": 0.9822, "step": 3528 }, { "epoch": 0.09650514110697878, "grad_norm": 1.5482256412506104, "learning_rate": 1.9769281290862453e-05, "loss": 0.605, "step": 3529 }, { "epoch": 0.09653248742069569, "grad_norm": 1.4763685464859009, "learning_rate": 1.9769092094187887e-05, "loss": 0.5352, "step": 3530 }, { "epoch": 0.0965598337344126, "grad_norm": 2.227818489074707, "learning_rate": 1.9768902820877548e-05, "loss": 0.6334, "step": 3531 }, { "epoch": 0.09658718004812951, "grad_norm": 1.6967813968658447, "learning_rate": 1.9768713470932912e-05, "loss": 0.5947, "step": 3532 }, { "epoch": 0.09661452636184642, "grad_norm": 3.5690789222717285, "learning_rate": 1.976852404435547e-05, "loss": 0.5511, "step": 3533 }, { "epoch": 0.09664187267556333, "grad_norm": 1.7940880060195923, "learning_rate": 1.9768334541146705e-05, "loss": 0.5888, "step": 3534 }, { "epoch": 0.09666921898928024, "grad_norm": 1.5546374320983887, "learning_rate": 1.9768144961308105e-05, "loss": 0.5964, "step": 3535 }, { "epoch": 0.09669656530299715, "grad_norm": 1.9278215169906616, "learning_rate": 1.9767955304841155e-05, "loss": 0.4355, "step": 3536 }, { "epoch": 0.09672391161671406, "grad_norm": 1.8932355642318726, "learning_rate": 1.976776557174735e-05, "loss": 0.5774, "step": 3537 }, { "epoch": 0.09675125793043098, "grad_norm": 1.4153822660446167, "learning_rate": 1.976757576202817e-05, "loss": 0.4499, "step": 3538 }, { "epoch": 0.09677860424414789, "grad_norm": 1.476120948791504, "learning_rate": 1.9767385875685103e-05, "loss": 0.578, "step": 3539 }, { "epoch": 0.0968059505578648, "grad_norm": 1.634706974029541, "learning_rate": 1.9767195912719647e-05, "loss": 0.5765, "step": 3540 }, { "epoch": 0.09683329687158171, "grad_norm": 1.7298234701156616, "learning_rate": 1.9767005873133284e-05, "loss": 0.5688, "step": 3541 }, { "epoch": 0.09686064318529862, "grad_norm": 1.6292624473571777, "learning_rate": 1.9766815756927513e-05, "loss": 0.5707, "step": 3542 }, { "epoch": 0.09688798949901553, "grad_norm": 1.7551183700561523, "learning_rate": 1.9766625564103817e-05, "loss": 0.6405, "step": 3543 }, { "epoch": 0.09691533581273244, "grad_norm": 1.850494623184204, "learning_rate": 1.9766435294663697e-05, "loss": 0.537, "step": 3544 }, { "epoch": 0.09694268212644935, "grad_norm": 2.1809470653533936, "learning_rate": 1.9766244948608638e-05, "loss": 0.9628, "step": 3545 }, { "epoch": 0.09697002844016626, "grad_norm": 1.8398497104644775, "learning_rate": 1.9766054525940136e-05, "loss": 0.5047, "step": 3546 }, { "epoch": 0.09699737475388318, "grad_norm": 1.5678578615188599, "learning_rate": 1.9765864026659682e-05, "loss": 0.4671, "step": 3547 }, { "epoch": 0.09702472106760009, "grad_norm": 1.8620080947875977, "learning_rate": 1.9765673450768775e-05, "loss": 0.5508, "step": 3548 }, { "epoch": 0.097052067381317, "grad_norm": 2.0101561546325684, "learning_rate": 1.9765482798268908e-05, "loss": 0.623, "step": 3549 }, { "epoch": 0.09707941369503391, "grad_norm": 16.251890182495117, "learning_rate": 1.976529206916158e-05, "loss": 0.4416, "step": 3550 }, { "epoch": 0.09710676000875082, "grad_norm": 1.8259763717651367, "learning_rate": 1.9765101263448278e-05, "loss": 0.5884, "step": 3551 }, { "epoch": 0.09713410632246773, "grad_norm": 1.5958658456802368, "learning_rate": 1.976491038113051e-05, "loss": 0.5595, "step": 3552 }, { "epoch": 0.09716145263618464, "grad_norm": 1.7353888750076294, "learning_rate": 1.9764719422209765e-05, "loss": 0.5722, "step": 3553 }, { "epoch": 0.09718879894990155, "grad_norm": 4.574222087860107, "learning_rate": 1.9764528386687546e-05, "loss": 0.5684, "step": 3554 }, { "epoch": 0.09721614526361846, "grad_norm": 1.6526345014572144, "learning_rate": 1.9764337274565348e-05, "loss": 0.5617, "step": 3555 }, { "epoch": 0.09724349157733538, "grad_norm": 1.9490222930908203, "learning_rate": 1.9764146085844674e-05, "loss": 0.5859, "step": 3556 }, { "epoch": 0.09727083789105229, "grad_norm": 1.476733684539795, "learning_rate": 1.976395482052702e-05, "loss": 0.5672, "step": 3557 }, { "epoch": 0.0972981842047692, "grad_norm": 1.5169365406036377, "learning_rate": 1.9763763478613887e-05, "loss": 0.5395, "step": 3558 }, { "epoch": 0.09732553051848611, "grad_norm": 1.4250524044036865, "learning_rate": 1.976357206010678e-05, "loss": 0.4228, "step": 3559 }, { "epoch": 0.09735287683220302, "grad_norm": 1.5815354585647583, "learning_rate": 1.9763380565007198e-05, "loss": 0.5574, "step": 3560 }, { "epoch": 0.09738022314591993, "grad_norm": 1.6111804246902466, "learning_rate": 1.9763188993316643e-05, "loss": 0.5917, "step": 3561 }, { "epoch": 0.09740756945963684, "grad_norm": 1.6804449558258057, "learning_rate": 1.9762997345036617e-05, "loss": 0.6264, "step": 3562 }, { "epoch": 0.09743491577335375, "grad_norm": 1.494398832321167, "learning_rate": 1.9762805620168623e-05, "loss": 0.5582, "step": 3563 }, { "epoch": 0.09746226208707066, "grad_norm": 1.3293302059173584, "learning_rate": 1.9762613818714168e-05, "loss": 0.6089, "step": 3564 }, { "epoch": 0.09748960840078758, "grad_norm": 1.5503828525543213, "learning_rate": 1.9762421940674757e-05, "loss": 0.5647, "step": 3565 }, { "epoch": 0.09751695471450449, "grad_norm": 1.7155694961547852, "learning_rate": 1.976222998605189e-05, "loss": 0.6668, "step": 3566 }, { "epoch": 0.0975443010282214, "grad_norm": 2.3165552616119385, "learning_rate": 1.9762037954847078e-05, "loss": 0.6608, "step": 3567 }, { "epoch": 0.09757164734193831, "grad_norm": 1.7621186971664429, "learning_rate": 1.9761845847061822e-05, "loss": 1.0186, "step": 3568 }, { "epoch": 0.09759899365565522, "grad_norm": 1.8756301403045654, "learning_rate": 1.9761653662697638e-05, "loss": 0.5948, "step": 3569 }, { "epoch": 0.09762633996937213, "grad_norm": 1.6508163213729858, "learning_rate": 1.9761461401756025e-05, "loss": 0.4179, "step": 3570 }, { "epoch": 0.09765368628308904, "grad_norm": 2.3179805278778076, "learning_rate": 1.9761269064238496e-05, "loss": 0.588, "step": 3571 }, { "epoch": 0.09768103259680595, "grad_norm": 1.6693075895309448, "learning_rate": 1.9761076650146556e-05, "loss": 0.6203, "step": 3572 }, { "epoch": 0.09770837891052286, "grad_norm": 1.4031014442443848, "learning_rate": 1.976088415948172e-05, "loss": 0.4308, "step": 3573 }, { "epoch": 0.09773572522423978, "grad_norm": 1.6931724548339844, "learning_rate": 1.9760691592245492e-05, "loss": 0.5899, "step": 3574 }, { "epoch": 0.09776307153795669, "grad_norm": 1.7467464208602905, "learning_rate": 1.9760498948439382e-05, "loss": 0.5551, "step": 3575 }, { "epoch": 0.0977904178516736, "grad_norm": 2.325582504272461, "learning_rate": 1.9760306228064913e-05, "loss": 0.5383, "step": 3576 }, { "epoch": 0.09781776416539051, "grad_norm": 1.7089508771896362, "learning_rate": 1.976011343112358e-05, "loss": 0.5905, "step": 3577 }, { "epoch": 0.09784511047910742, "grad_norm": 1.8736101388931274, "learning_rate": 1.9759920557616912e-05, "loss": 0.583, "step": 3578 }, { "epoch": 0.09787245679282433, "grad_norm": 1.5870941877365112, "learning_rate": 1.975972760754641e-05, "loss": 0.9662, "step": 3579 }, { "epoch": 0.09789980310654124, "grad_norm": 2.498835802078247, "learning_rate": 1.975953458091359e-05, "loss": 0.605, "step": 3580 }, { "epoch": 0.09792714942025815, "grad_norm": 2.0914995670318604, "learning_rate": 1.975934147771997e-05, "loss": 0.6507, "step": 3581 }, { "epoch": 0.09795449573397506, "grad_norm": 1.6281821727752686, "learning_rate": 1.975914829796706e-05, "loss": 0.607, "step": 3582 }, { "epoch": 0.09798184204769197, "grad_norm": 1.5608689785003662, "learning_rate": 1.975895504165638e-05, "loss": 0.5204, "step": 3583 }, { "epoch": 0.09800918836140889, "grad_norm": 2.1000287532806396, "learning_rate": 1.9758761708789443e-05, "loss": 0.5874, "step": 3584 }, { "epoch": 0.0980365346751258, "grad_norm": 1.9447208642959595, "learning_rate": 1.975856829936777e-05, "loss": 0.5695, "step": 3585 }, { "epoch": 0.09806388098884271, "grad_norm": 2.041182518005371, "learning_rate": 1.975837481339287e-05, "loss": 0.5925, "step": 3586 }, { "epoch": 0.09809122730255962, "grad_norm": 2.588902473449707, "learning_rate": 1.9758181250866268e-05, "loss": 0.6173, "step": 3587 }, { "epoch": 0.09811857361627653, "grad_norm": 2.153956174850464, "learning_rate": 1.9757987611789483e-05, "loss": 0.6293, "step": 3588 }, { "epoch": 0.09814591992999344, "grad_norm": 1.8938781023025513, "learning_rate": 1.975779389616403e-05, "loss": 0.5551, "step": 3589 }, { "epoch": 0.09817326624371035, "grad_norm": 2.1391794681549072, "learning_rate": 1.9757600103991426e-05, "loss": 0.5644, "step": 3590 }, { "epoch": 0.09820061255742726, "grad_norm": 3.461702823638916, "learning_rate": 1.97574062352732e-05, "loss": 0.5908, "step": 3591 }, { "epoch": 0.09822795887114417, "grad_norm": 1.8968784809112549, "learning_rate": 1.9757212290010865e-05, "loss": 0.6271, "step": 3592 }, { "epoch": 0.09825530518486109, "grad_norm": 1.484123945236206, "learning_rate": 1.975701826820595e-05, "loss": 0.556, "step": 3593 }, { "epoch": 0.098282651498578, "grad_norm": 1.5100041627883911, "learning_rate": 1.9756824169859968e-05, "loss": 0.4311, "step": 3594 }, { "epoch": 0.09830999781229491, "grad_norm": 2.1601366996765137, "learning_rate": 1.9756629994974447e-05, "loss": 0.5811, "step": 3595 }, { "epoch": 0.09833734412601182, "grad_norm": 2.208117723464966, "learning_rate": 1.975643574355091e-05, "loss": 0.589, "step": 3596 }, { "epoch": 0.09836469043972873, "grad_norm": 2.3529725074768066, "learning_rate": 1.975624141559088e-05, "loss": 0.5504, "step": 3597 }, { "epoch": 0.09839203675344564, "grad_norm": 2.1916956901550293, "learning_rate": 1.975604701109588e-05, "loss": 0.6525, "step": 3598 }, { "epoch": 0.09841938306716255, "grad_norm": 1.8830536603927612, "learning_rate": 1.9755852530067442e-05, "loss": 0.595, "step": 3599 }, { "epoch": 0.09844672938087946, "grad_norm": 1.5782842636108398, "learning_rate": 1.9755657972507082e-05, "loss": 0.5422, "step": 3600 }, { "epoch": 0.09847407569459637, "grad_norm": 1.896626353263855, "learning_rate": 1.9755463338416333e-05, "loss": 0.5743, "step": 3601 }, { "epoch": 0.09850142200831329, "grad_norm": 1.81056809425354, "learning_rate": 1.9755268627796717e-05, "loss": 0.574, "step": 3602 }, { "epoch": 0.0985287683220302, "grad_norm": 1.850174069404602, "learning_rate": 1.9755073840649764e-05, "loss": 0.9928, "step": 3603 }, { "epoch": 0.09855611463574711, "grad_norm": 2.182854652404785, "learning_rate": 1.9754878976977006e-05, "loss": 0.5744, "step": 3604 }, { "epoch": 0.09858346094946402, "grad_norm": 1.974082350730896, "learning_rate": 1.975468403677996e-05, "loss": 0.5524, "step": 3605 }, { "epoch": 0.09861080726318093, "grad_norm": 1.8413922786712646, "learning_rate": 1.975448902006017e-05, "loss": 0.5587, "step": 3606 }, { "epoch": 0.09863815357689783, "grad_norm": 1.897868275642395, "learning_rate": 1.975429392681916e-05, "loss": 0.581, "step": 3607 }, { "epoch": 0.09866549989061474, "grad_norm": 5.616458415985107, "learning_rate": 1.9754098757058454e-05, "loss": 0.5879, "step": 3608 }, { "epoch": 0.09869284620433165, "grad_norm": 1.8617440462112427, "learning_rate": 1.9753903510779592e-05, "loss": 0.582, "step": 3609 }, { "epoch": 0.09872019251804856, "grad_norm": 1.7167026996612549, "learning_rate": 1.97537081879841e-05, "loss": 0.5703, "step": 3610 }, { "epoch": 0.09874753883176547, "grad_norm": 1.9717416763305664, "learning_rate": 1.975351278867351e-05, "loss": 0.6152, "step": 3611 }, { "epoch": 0.09877488514548238, "grad_norm": 1.6318881511688232, "learning_rate": 1.9753317312849362e-05, "loss": 0.6123, "step": 3612 }, { "epoch": 0.0988022314591993, "grad_norm": 1.890572428703308, "learning_rate": 1.9753121760513183e-05, "loss": 0.5644, "step": 3613 }, { "epoch": 0.0988295777729162, "grad_norm": 1.6586129665374756, "learning_rate": 1.9752926131666504e-05, "loss": 0.97, "step": 3614 }, { "epoch": 0.09885692408663312, "grad_norm": 1.7837843894958496, "learning_rate": 1.975273042631087e-05, "loss": 0.5553, "step": 3615 }, { "epoch": 0.09888427040035003, "grad_norm": 1.9956104755401611, "learning_rate": 1.975253464444781e-05, "loss": 0.5596, "step": 3616 }, { "epoch": 0.09891161671406694, "grad_norm": 2.2196743488311768, "learning_rate": 1.975233878607886e-05, "loss": 0.5295, "step": 3617 }, { "epoch": 0.09893896302778385, "grad_norm": 1.5514222383499146, "learning_rate": 1.9752142851205552e-05, "loss": 0.5604, "step": 3618 }, { "epoch": 0.09896630934150076, "grad_norm": 1.8581230640411377, "learning_rate": 1.9751946839829432e-05, "loss": 0.5635, "step": 3619 }, { "epoch": 0.09899365565521767, "grad_norm": 1.416532039642334, "learning_rate": 1.975175075195203e-05, "loss": 0.563, "step": 3620 }, { "epoch": 0.09902100196893458, "grad_norm": 2.513559341430664, "learning_rate": 1.9751554587574894e-05, "loss": 0.5498, "step": 3621 }, { "epoch": 0.0990483482826515, "grad_norm": 1.519115924835205, "learning_rate": 1.9751358346699554e-05, "loss": 0.9651, "step": 3622 }, { "epoch": 0.0990756945963684, "grad_norm": 2.0406606197357178, "learning_rate": 1.975116202932755e-05, "loss": 0.6209, "step": 3623 }, { "epoch": 0.09910304091008532, "grad_norm": 2.1184730529785156, "learning_rate": 1.9750965635460426e-05, "loss": 0.5349, "step": 3624 }, { "epoch": 0.09913038722380223, "grad_norm": 1.731109857559204, "learning_rate": 1.9750769165099718e-05, "loss": 0.587, "step": 3625 }, { "epoch": 0.09915773353751914, "grad_norm": 1.6984570026397705, "learning_rate": 1.9750572618246973e-05, "loss": 0.608, "step": 3626 }, { "epoch": 0.09918507985123605, "grad_norm": 2.0134658813476562, "learning_rate": 1.975037599490373e-05, "loss": 0.557, "step": 3627 }, { "epoch": 0.09921242616495296, "grad_norm": 1.725921392440796, "learning_rate": 1.975017929507153e-05, "loss": 0.5985, "step": 3628 }, { "epoch": 0.09923977247866987, "grad_norm": 1.7803819179534912, "learning_rate": 1.9749982518751918e-05, "loss": 0.5635, "step": 3629 }, { "epoch": 0.09926711879238678, "grad_norm": 3.970921754837036, "learning_rate": 1.9749785665946435e-05, "loss": 0.6221, "step": 3630 }, { "epoch": 0.0992944651061037, "grad_norm": 1.4487576484680176, "learning_rate": 1.9749588736656628e-05, "loss": 0.5627, "step": 3631 }, { "epoch": 0.0993218114198206, "grad_norm": 1.3326314687728882, "learning_rate": 1.9749391730884042e-05, "loss": 0.5839, "step": 3632 }, { "epoch": 0.09934915773353752, "grad_norm": 1.6101425886154175, "learning_rate": 1.9749194648630222e-05, "loss": 0.5948, "step": 3633 }, { "epoch": 0.09937650404725443, "grad_norm": 1.7668440341949463, "learning_rate": 1.9748997489896713e-05, "loss": 0.5859, "step": 3634 }, { "epoch": 0.09940385036097134, "grad_norm": 1.7738523483276367, "learning_rate": 1.9748800254685063e-05, "loss": 0.5685, "step": 3635 }, { "epoch": 0.09943119667468825, "grad_norm": 1.5269964933395386, "learning_rate": 1.9748602942996818e-05, "loss": 0.4388, "step": 3636 }, { "epoch": 0.09945854298840516, "grad_norm": 1.6736714839935303, "learning_rate": 1.9748405554833525e-05, "loss": 0.5481, "step": 3637 }, { "epoch": 0.09948588930212207, "grad_norm": 1.4365264177322388, "learning_rate": 1.9748208090196734e-05, "loss": 0.598, "step": 3638 }, { "epoch": 0.09951323561583898, "grad_norm": 1.7408833503723145, "learning_rate": 1.9748010549087995e-05, "loss": 0.5544, "step": 3639 }, { "epoch": 0.0995405819295559, "grad_norm": 1.693354606628418, "learning_rate": 1.9747812931508858e-05, "loss": 0.5911, "step": 3640 }, { "epoch": 0.0995679282432728, "grad_norm": 1.883791446685791, "learning_rate": 1.974761523746087e-05, "loss": 0.5811, "step": 3641 }, { "epoch": 0.09959527455698972, "grad_norm": 1.7210851907730103, "learning_rate": 1.9747417466945587e-05, "loss": 0.5649, "step": 3642 }, { "epoch": 0.09962262087070663, "grad_norm": 1.838503360748291, "learning_rate": 1.9747219619964554e-05, "loss": 0.5897, "step": 3643 }, { "epoch": 0.09964996718442354, "grad_norm": 2.2217366695404053, "learning_rate": 1.9747021696519323e-05, "loss": 0.5374, "step": 3644 }, { "epoch": 0.09967731349814045, "grad_norm": 1.5308170318603516, "learning_rate": 1.9746823696611454e-05, "loss": 0.5669, "step": 3645 }, { "epoch": 0.09970465981185736, "grad_norm": 1.815037488937378, "learning_rate": 1.9746625620242498e-05, "loss": 0.5662, "step": 3646 }, { "epoch": 0.09973200612557427, "grad_norm": 2.0321810245513916, "learning_rate": 1.9746427467414002e-05, "loss": 0.5599, "step": 3647 }, { "epoch": 0.09975935243929118, "grad_norm": 1.6106619834899902, "learning_rate": 1.974622923812753e-05, "loss": 0.9455, "step": 3648 }, { "epoch": 0.0997866987530081, "grad_norm": 1.8926457166671753, "learning_rate": 1.974603093238463e-05, "loss": 0.536, "step": 3649 }, { "epoch": 0.099814045066725, "grad_norm": 2.3311572074890137, "learning_rate": 1.974583255018686e-05, "loss": 0.5624, "step": 3650 }, { "epoch": 0.09984139138044192, "grad_norm": 1.9836912155151367, "learning_rate": 1.9745634091535777e-05, "loss": 0.5854, "step": 3651 }, { "epoch": 0.09986873769415883, "grad_norm": 1.6191613674163818, "learning_rate": 1.9745435556432936e-05, "loss": 0.6277, "step": 3652 }, { "epoch": 0.09989608400787574, "grad_norm": 1.3918325901031494, "learning_rate": 1.9745236944879897e-05, "loss": 0.5528, "step": 3653 }, { "epoch": 0.09992343032159265, "grad_norm": 1.5056653022766113, "learning_rate": 1.9745038256878217e-05, "loss": 0.5656, "step": 3654 }, { "epoch": 0.09995077663530956, "grad_norm": 2.034219264984131, "learning_rate": 1.9744839492429456e-05, "loss": 0.5639, "step": 3655 }, { "epoch": 0.09997812294902647, "grad_norm": 1.316508173942566, "learning_rate": 1.9744640651535168e-05, "loss": 0.5952, "step": 3656 }, { "epoch": 0.10000546926274338, "grad_norm": 1.6294753551483154, "learning_rate": 1.9744441734196916e-05, "loss": 0.5988, "step": 3657 }, { "epoch": 0.1000328155764603, "grad_norm": 1.4795397520065308, "learning_rate": 1.9744242740416264e-05, "loss": 0.5928, "step": 3658 }, { "epoch": 0.1000601618901772, "grad_norm": 1.992835283279419, "learning_rate": 1.974404367019477e-05, "loss": 0.6104, "step": 3659 }, { "epoch": 0.10008750820389412, "grad_norm": 1.6872859001159668, "learning_rate": 1.974384452353399e-05, "loss": 0.632, "step": 3660 }, { "epoch": 0.10011485451761103, "grad_norm": 2.0802834033966064, "learning_rate": 1.97436453004355e-05, "loss": 0.5846, "step": 3661 }, { "epoch": 0.10014220083132794, "grad_norm": 1.9097024202346802, "learning_rate": 1.974344600090085e-05, "loss": 0.5764, "step": 3662 }, { "epoch": 0.10016954714504485, "grad_norm": 1.562559962272644, "learning_rate": 1.9743246624931608e-05, "loss": 0.58, "step": 3663 }, { "epoch": 0.10019689345876176, "grad_norm": 1.4171531200408936, "learning_rate": 1.9743047172529338e-05, "loss": 0.5699, "step": 3664 }, { "epoch": 0.10022423977247867, "grad_norm": 1.7640652656555176, "learning_rate": 1.9742847643695606e-05, "loss": 0.5665, "step": 3665 }, { "epoch": 0.10025158608619558, "grad_norm": 2.027691125869751, "learning_rate": 1.9742648038431976e-05, "loss": 0.5689, "step": 3666 }, { "epoch": 0.1002789323999125, "grad_norm": 1.3830732107162476, "learning_rate": 1.9742448356740015e-05, "loss": 0.5796, "step": 3667 }, { "epoch": 0.1003062787136294, "grad_norm": 1.724525809288025, "learning_rate": 1.9742248598621288e-05, "loss": 0.587, "step": 3668 }, { "epoch": 0.10033362502734632, "grad_norm": 1.6803709268569946, "learning_rate": 1.9742048764077362e-05, "loss": 0.5629, "step": 3669 }, { "epoch": 0.10036097134106323, "grad_norm": 1.5146766901016235, "learning_rate": 1.9741848853109805e-05, "loss": 0.6404, "step": 3670 }, { "epoch": 0.10038831765478014, "grad_norm": 1.9757888317108154, "learning_rate": 1.9741648865720187e-05, "loss": 0.5666, "step": 3671 }, { "epoch": 0.10041566396849705, "grad_norm": 1.7312884330749512, "learning_rate": 1.9741448801910074e-05, "loss": 0.5055, "step": 3672 }, { "epoch": 0.10044301028221396, "grad_norm": 1.9765652418136597, "learning_rate": 1.9741248661681035e-05, "loss": 0.9477, "step": 3673 }, { "epoch": 0.10047035659593087, "grad_norm": 1.6405916213989258, "learning_rate": 1.9741048445034642e-05, "loss": 0.5421, "step": 3674 }, { "epoch": 0.10049770290964778, "grad_norm": 1.7325397729873657, "learning_rate": 1.9740848151972466e-05, "loss": 0.5548, "step": 3675 }, { "epoch": 0.1005250492233647, "grad_norm": 1.9773461818695068, "learning_rate": 1.9740647782496078e-05, "loss": 0.5748, "step": 3676 }, { "epoch": 0.1005523955370816, "grad_norm": 2.0074689388275146, "learning_rate": 1.9740447336607048e-05, "loss": 0.5892, "step": 3677 }, { "epoch": 0.10057974185079852, "grad_norm": 1.9188673496246338, "learning_rate": 1.9740246814306952e-05, "loss": 0.589, "step": 3678 }, { "epoch": 0.10060708816451543, "grad_norm": 1.5865802764892578, "learning_rate": 1.9740046215597363e-05, "loss": 0.9119, "step": 3679 }, { "epoch": 0.10063443447823234, "grad_norm": 1.5584502220153809, "learning_rate": 1.9739845540479848e-05, "loss": 0.5933, "step": 3680 }, { "epoch": 0.10066178079194925, "grad_norm": 1.6701006889343262, "learning_rate": 1.9739644788955988e-05, "loss": 0.5805, "step": 3681 }, { "epoch": 0.10068912710566616, "grad_norm": 1.768677830696106, "learning_rate": 1.9739443961027357e-05, "loss": 0.568, "step": 3682 }, { "epoch": 0.10071647341938307, "grad_norm": 1.6820931434631348, "learning_rate": 1.9739243056695523e-05, "loss": 0.591, "step": 3683 }, { "epoch": 0.10074381973309998, "grad_norm": 1.8853665590286255, "learning_rate": 1.9739042075962074e-05, "loss": 0.6318, "step": 3684 }, { "epoch": 0.10077116604681689, "grad_norm": 1.959633231163025, "learning_rate": 1.9738841018828577e-05, "loss": 0.5822, "step": 3685 }, { "epoch": 0.1007985123605338, "grad_norm": 1.826547384262085, "learning_rate": 1.9738639885296615e-05, "loss": 0.5632, "step": 3686 }, { "epoch": 0.10082585867425072, "grad_norm": 2.138273000717163, "learning_rate": 1.9738438675367763e-05, "loss": 0.5815, "step": 3687 }, { "epoch": 0.10085320498796763, "grad_norm": 1.5488446950912476, "learning_rate": 1.9738237389043603e-05, "loss": 0.5505, "step": 3688 }, { "epoch": 0.10088055130168454, "grad_norm": 1.9879398345947266, "learning_rate": 1.9738036026325708e-05, "loss": 0.547, "step": 3689 }, { "epoch": 0.10090789761540145, "grad_norm": 1.9539752006530762, "learning_rate": 1.9737834587215663e-05, "loss": 0.5893, "step": 3690 }, { "epoch": 0.10093524392911836, "grad_norm": 1.7959545850753784, "learning_rate": 1.9737633071715043e-05, "loss": 0.5877, "step": 3691 }, { "epoch": 0.10096259024283527, "grad_norm": 1.8898680210113525, "learning_rate": 1.9737431479825435e-05, "loss": 0.5879, "step": 3692 }, { "epoch": 0.10098993655655218, "grad_norm": 1.8928897380828857, "learning_rate": 1.9737229811548414e-05, "loss": 0.5814, "step": 3693 }, { "epoch": 0.10101728287026909, "grad_norm": 1.7510759830474854, "learning_rate": 1.9737028066885568e-05, "loss": 0.5962, "step": 3694 }, { "epoch": 0.101044629183986, "grad_norm": 5.847419261932373, "learning_rate": 1.9736826245838477e-05, "loss": 0.5888, "step": 3695 }, { "epoch": 0.10107197549770292, "grad_norm": 1.3853232860565186, "learning_rate": 1.9736624348408723e-05, "loss": 0.5812, "step": 3696 }, { "epoch": 0.10109932181141983, "grad_norm": 1.9190571308135986, "learning_rate": 1.9736422374597895e-05, "loss": 0.9335, "step": 3697 }, { "epoch": 0.10112666812513674, "grad_norm": 1.5995384454727173, "learning_rate": 1.973622032440757e-05, "loss": 0.5132, "step": 3698 }, { "epoch": 0.10115401443885365, "grad_norm": 1.6718107461929321, "learning_rate": 1.973601819783934e-05, "loss": 0.547, "step": 3699 }, { "epoch": 0.10118136075257056, "grad_norm": 1.326148271560669, "learning_rate": 1.973581599489478e-05, "loss": 0.4488, "step": 3700 }, { "epoch": 0.10120870706628747, "grad_norm": 1.9279255867004395, "learning_rate": 1.973561371557549e-05, "loss": 0.5131, "step": 3701 }, { "epoch": 0.10123605338000438, "grad_norm": 2.252253293991089, "learning_rate": 1.9735411359883046e-05, "loss": 0.5834, "step": 3702 }, { "epoch": 0.10126339969372129, "grad_norm": 1.5125715732574463, "learning_rate": 1.9735208927819042e-05, "loss": 0.5783, "step": 3703 }, { "epoch": 0.1012907460074382, "grad_norm": 1.8496654033660889, "learning_rate": 1.9735006419385067e-05, "loss": 0.566, "step": 3704 }, { "epoch": 0.10131809232115511, "grad_norm": 1.743434190750122, "learning_rate": 1.9734803834582702e-05, "loss": 0.5738, "step": 3705 }, { "epoch": 0.10134543863487203, "grad_norm": 1.670745611190796, "learning_rate": 1.9734601173413538e-05, "loss": 0.5775, "step": 3706 }, { "epoch": 0.10137278494858894, "grad_norm": 2.4143030643463135, "learning_rate": 1.9734398435879173e-05, "loss": 0.5915, "step": 3707 }, { "epoch": 0.10140013126230583, "grad_norm": 1.647474765777588, "learning_rate": 1.973419562198119e-05, "loss": 0.5946, "step": 3708 }, { "epoch": 0.10142747757602275, "grad_norm": 1.8412443399429321, "learning_rate": 1.973399273172118e-05, "loss": 0.5373, "step": 3709 }, { "epoch": 0.10145482388973966, "grad_norm": 1.638190746307373, "learning_rate": 1.973378976510074e-05, "loss": 0.5556, "step": 3710 }, { "epoch": 0.10148217020345657, "grad_norm": 1.869309425354004, "learning_rate": 1.9733586722121455e-05, "loss": 0.6561, "step": 3711 }, { "epoch": 0.10150951651717348, "grad_norm": 1.9463750123977661, "learning_rate": 1.9733383602784922e-05, "loss": 0.5984, "step": 3712 }, { "epoch": 0.10153686283089039, "grad_norm": 2.3618969917297363, "learning_rate": 1.9733180407092733e-05, "loss": 0.5286, "step": 3713 }, { "epoch": 0.1015642091446073, "grad_norm": 1.7241660356521606, "learning_rate": 1.9732977135046484e-05, "loss": 0.5769, "step": 3714 }, { "epoch": 0.10159155545832421, "grad_norm": 1.9701623916625977, "learning_rate": 1.973277378664777e-05, "loss": 0.5583, "step": 3715 }, { "epoch": 0.10161890177204112, "grad_norm": 1.724043607711792, "learning_rate": 1.9732570361898184e-05, "loss": 0.6247, "step": 3716 }, { "epoch": 0.10164624808575803, "grad_norm": 2.08526611328125, "learning_rate": 1.973236686079932e-05, "loss": 0.5952, "step": 3717 }, { "epoch": 0.10167359439947495, "grad_norm": 1.9309155941009521, "learning_rate": 1.973216328335278e-05, "loss": 0.5921, "step": 3718 }, { "epoch": 0.10170094071319186, "grad_norm": 2.1815149784088135, "learning_rate": 1.9731959629560157e-05, "loss": 0.6141, "step": 3719 }, { "epoch": 0.10172828702690877, "grad_norm": 1.8347002267837524, "learning_rate": 1.973175589942305e-05, "loss": 0.5964, "step": 3720 }, { "epoch": 0.10175563334062568, "grad_norm": 1.908419132232666, "learning_rate": 1.973155209294306e-05, "loss": 0.5745, "step": 3721 }, { "epoch": 0.10178297965434259, "grad_norm": 2.1540634632110596, "learning_rate": 1.9731348210121776e-05, "loss": 0.5816, "step": 3722 }, { "epoch": 0.1018103259680595, "grad_norm": 2.3341293334960938, "learning_rate": 1.9731144250960804e-05, "loss": 0.5657, "step": 3723 }, { "epoch": 0.10183767228177641, "grad_norm": 1.5596562623977661, "learning_rate": 1.973094021546175e-05, "loss": 0.5739, "step": 3724 }, { "epoch": 0.10186501859549332, "grad_norm": 2.528923273086548, "learning_rate": 1.9730736103626203e-05, "loss": 0.5649, "step": 3725 }, { "epoch": 0.10189236490921023, "grad_norm": 2.3603382110595703, "learning_rate": 1.973053191545577e-05, "loss": 0.5898, "step": 3726 }, { "epoch": 0.10191971122292715, "grad_norm": 1.7982662916183472, "learning_rate": 1.9730327650952054e-05, "loss": 0.6377, "step": 3727 }, { "epoch": 0.10194705753664406, "grad_norm": 2.21079158782959, "learning_rate": 1.9730123310116657e-05, "loss": 0.959, "step": 3728 }, { "epoch": 0.10197440385036097, "grad_norm": 1.5733810663223267, "learning_rate": 1.972991889295118e-05, "loss": 0.6012, "step": 3729 }, { "epoch": 0.10200175016407788, "grad_norm": 1.4597612619400024, "learning_rate": 1.9729714399457228e-05, "loss": 0.6048, "step": 3730 }, { "epoch": 0.10202909647779479, "grad_norm": 1.4471468925476074, "learning_rate": 1.9729509829636404e-05, "loss": 0.5679, "step": 3731 }, { "epoch": 0.1020564427915117, "grad_norm": 1.4270994663238525, "learning_rate": 1.9729305183490316e-05, "loss": 0.5427, "step": 3732 }, { "epoch": 0.10208378910522861, "grad_norm": 2.2173948287963867, "learning_rate": 1.9729100461020562e-05, "loss": 0.5264, "step": 3733 }, { "epoch": 0.10211113541894552, "grad_norm": 2.587393045425415, "learning_rate": 1.972889566222876e-05, "loss": 0.5737, "step": 3734 }, { "epoch": 0.10213848173266243, "grad_norm": 1.6505416631698608, "learning_rate": 1.9728690787116502e-05, "loss": 0.5932, "step": 3735 }, { "epoch": 0.10216582804637935, "grad_norm": 1.8404650688171387, "learning_rate": 1.9728485835685408e-05, "loss": 0.9371, "step": 3736 }, { "epoch": 0.10219317436009626, "grad_norm": 1.6492894887924194, "learning_rate": 1.972828080793708e-05, "loss": 0.5723, "step": 3737 }, { "epoch": 0.10222052067381317, "grad_norm": 1.3340532779693604, "learning_rate": 1.972807570387312e-05, "loss": 0.6024, "step": 3738 }, { "epoch": 0.10224786698753008, "grad_norm": 1.8563828468322754, "learning_rate": 1.972787052349515e-05, "loss": 0.4443, "step": 3739 }, { "epoch": 0.10227521330124699, "grad_norm": 1.5291045904159546, "learning_rate": 1.9727665266804772e-05, "loss": 0.5817, "step": 3740 }, { "epoch": 0.1023025596149639, "grad_norm": 1.7418010234832764, "learning_rate": 1.97274599338036e-05, "loss": 0.597, "step": 3741 }, { "epoch": 0.10232990592868081, "grad_norm": 1.6769553422927856, "learning_rate": 1.972725452449324e-05, "loss": 0.597, "step": 3742 }, { "epoch": 0.10235725224239772, "grad_norm": 2.2974693775177, "learning_rate": 1.9727049038875306e-05, "loss": 0.5457, "step": 3743 }, { "epoch": 0.10238459855611463, "grad_norm": 3.104825258255005, "learning_rate": 1.9726843476951408e-05, "loss": 0.5219, "step": 3744 }, { "epoch": 0.10241194486983154, "grad_norm": 1.751336693763733, "learning_rate": 1.9726637838723164e-05, "loss": 0.5789, "step": 3745 }, { "epoch": 0.10243929118354846, "grad_norm": 1.4802868366241455, "learning_rate": 1.972643212419218e-05, "loss": 0.6276, "step": 3746 }, { "epoch": 0.10246663749726537, "grad_norm": 2.2002482414245605, "learning_rate": 1.9726226333360074e-05, "loss": 0.6227, "step": 3747 }, { "epoch": 0.10249398381098228, "grad_norm": 1.7093092203140259, "learning_rate": 1.972602046622846e-05, "loss": 0.5549, "step": 3748 }, { "epoch": 0.10252133012469919, "grad_norm": 1.8939961194992065, "learning_rate": 1.9725814522798955e-05, "loss": 0.5483, "step": 3749 }, { "epoch": 0.1025486764384161, "grad_norm": 1.9862194061279297, "learning_rate": 1.9725608503073167e-05, "loss": 0.5566, "step": 3750 }, { "epoch": 0.10257602275213301, "grad_norm": 1.7924587726593018, "learning_rate": 1.9725402407052723e-05, "loss": 0.9278, "step": 3751 }, { "epoch": 0.10260336906584992, "grad_norm": 1.4610995054244995, "learning_rate": 1.972519623473923e-05, "loss": 0.5607, "step": 3752 }, { "epoch": 0.10263071537956683, "grad_norm": 1.9232450723648071, "learning_rate": 1.9724989986134312e-05, "loss": 0.4528, "step": 3753 }, { "epoch": 0.10265806169328374, "grad_norm": 2.218057870864868, "learning_rate": 1.9724783661239585e-05, "loss": 0.5872, "step": 3754 }, { "epoch": 0.10268540800700066, "grad_norm": 1.5581218004226685, "learning_rate": 1.9724577260056663e-05, "loss": 0.5708, "step": 3755 }, { "epoch": 0.10271275432071757, "grad_norm": 1.851166844367981, "learning_rate": 1.9724370782587172e-05, "loss": 0.529, "step": 3756 }, { "epoch": 0.10274010063443448, "grad_norm": 2.0480058193206787, "learning_rate": 1.972416422883273e-05, "loss": 0.6539, "step": 3757 }, { "epoch": 0.10276744694815139, "grad_norm": 1.6835378408432007, "learning_rate": 1.972395759879495e-05, "loss": 0.6252, "step": 3758 }, { "epoch": 0.1027947932618683, "grad_norm": 2.0295419692993164, "learning_rate": 1.9723750892475468e-05, "loss": 0.6288, "step": 3759 }, { "epoch": 0.10282213957558521, "grad_norm": 1.3709385395050049, "learning_rate": 1.9723544109875893e-05, "loss": 0.9596, "step": 3760 }, { "epoch": 0.10284948588930212, "grad_norm": 1.6790341138839722, "learning_rate": 1.9723337250997847e-05, "loss": 0.6166, "step": 3761 }, { "epoch": 0.10287683220301903, "grad_norm": 1.46967613697052, "learning_rate": 1.972313031584296e-05, "loss": 0.575, "step": 3762 }, { "epoch": 0.10290417851673594, "grad_norm": 1.62379789352417, "learning_rate": 1.9722923304412854e-05, "loss": 0.9639, "step": 3763 }, { "epoch": 0.10293152483045286, "grad_norm": 1.5870025157928467, "learning_rate": 1.9722716216709148e-05, "loss": 0.5662, "step": 3764 }, { "epoch": 0.10295887114416977, "grad_norm": 1.500977873802185, "learning_rate": 1.972250905273347e-05, "loss": 0.9917, "step": 3765 }, { "epoch": 0.10298621745788668, "grad_norm": 2.034818649291992, "learning_rate": 1.9722301812487446e-05, "loss": 0.6534, "step": 3766 }, { "epoch": 0.10301356377160359, "grad_norm": 1.3036547899246216, "learning_rate": 1.9722094495972698e-05, "loss": 0.5751, "step": 3767 }, { "epoch": 0.1030409100853205, "grad_norm": 1.3415770530700684, "learning_rate": 1.972188710319086e-05, "loss": 0.9219, "step": 3768 }, { "epoch": 0.10306825639903741, "grad_norm": 2.0814201831817627, "learning_rate": 1.9721679634143546e-05, "loss": 0.9548, "step": 3769 }, { "epoch": 0.10309560271275432, "grad_norm": 1.5573972463607788, "learning_rate": 1.9721472088832395e-05, "loss": 0.6314, "step": 3770 }, { "epoch": 0.10312294902647123, "grad_norm": 1.5530234575271606, "learning_rate": 1.9721264467259032e-05, "loss": 0.5613, "step": 3771 }, { "epoch": 0.10315029534018814, "grad_norm": 1.8806735277175903, "learning_rate": 1.9721056769425084e-05, "loss": 0.5753, "step": 3772 }, { "epoch": 0.10317764165390506, "grad_norm": 2.3436834812164307, "learning_rate": 1.972084899533218e-05, "loss": 0.5351, "step": 3773 }, { "epoch": 0.10320498796762197, "grad_norm": 1.5409992933273315, "learning_rate": 1.9720641144981955e-05, "loss": 0.6251, "step": 3774 }, { "epoch": 0.10323233428133888, "grad_norm": 1.336705207824707, "learning_rate": 1.9720433218376036e-05, "loss": 0.5824, "step": 3775 }, { "epoch": 0.10325968059505579, "grad_norm": 1.468302845954895, "learning_rate": 1.972022521551605e-05, "loss": 0.5919, "step": 3776 }, { "epoch": 0.1032870269087727, "grad_norm": 1.7291203737258911, "learning_rate": 1.9720017136403635e-05, "loss": 0.574, "step": 3777 }, { "epoch": 0.10331437322248961, "grad_norm": 1.940913200378418, "learning_rate": 1.9719808981040423e-05, "loss": 0.5321, "step": 3778 }, { "epoch": 0.10334171953620652, "grad_norm": 1.2513612508773804, "learning_rate": 1.971960074942804e-05, "loss": 0.6004, "step": 3779 }, { "epoch": 0.10336906584992343, "grad_norm": 1.655436396598816, "learning_rate": 1.9719392441568126e-05, "loss": 0.5675, "step": 3780 }, { "epoch": 0.10339641216364034, "grad_norm": 1.5832394361495972, "learning_rate": 1.9719184057462314e-05, "loss": 0.5896, "step": 3781 }, { "epoch": 0.10342375847735726, "grad_norm": 1.453398585319519, "learning_rate": 1.971897559711224e-05, "loss": 0.5887, "step": 3782 }, { "epoch": 0.10345110479107417, "grad_norm": 1.779249906539917, "learning_rate": 1.9718767060519536e-05, "loss": 0.966, "step": 3783 }, { "epoch": 0.10347845110479108, "grad_norm": 1.6834044456481934, "learning_rate": 1.9718558447685844e-05, "loss": 0.6107, "step": 3784 }, { "epoch": 0.10350579741850799, "grad_norm": 1.9885079860687256, "learning_rate": 1.971834975861279e-05, "loss": 0.575, "step": 3785 }, { "epoch": 0.1035331437322249, "grad_norm": 1.5627751350402832, "learning_rate": 1.9718140993302018e-05, "loss": 0.5753, "step": 3786 }, { "epoch": 0.10356049004594181, "grad_norm": 1.7759078741073608, "learning_rate": 1.9717932151755165e-05, "loss": 0.5708, "step": 3787 }, { "epoch": 0.10358783635965872, "grad_norm": 1.48174250125885, "learning_rate": 1.971772323397387e-05, "loss": 0.5582, "step": 3788 }, { "epoch": 0.10361518267337563, "grad_norm": 1.5862510204315186, "learning_rate": 1.9717514239959776e-05, "loss": 0.9808, "step": 3789 }, { "epoch": 0.10364252898709254, "grad_norm": 1.498845100402832, "learning_rate": 1.971730516971451e-05, "loss": 0.9549, "step": 3790 }, { "epoch": 0.10366987530080946, "grad_norm": 1.929505705833435, "learning_rate": 1.971709602323972e-05, "loss": 0.574, "step": 3791 }, { "epoch": 0.10369722161452637, "grad_norm": 1.8060011863708496, "learning_rate": 1.9716886800537048e-05, "loss": 0.5975, "step": 3792 }, { "epoch": 0.10372456792824328, "grad_norm": 1.4421864748001099, "learning_rate": 1.971667750160814e-05, "loss": 0.5618, "step": 3793 }, { "epoch": 0.10375191424196019, "grad_norm": 1.9396567344665527, "learning_rate": 1.9716468126454622e-05, "loss": 0.5504, "step": 3794 }, { "epoch": 0.1037792605556771, "grad_norm": 1.8690192699432373, "learning_rate": 1.9716258675078152e-05, "loss": 0.572, "step": 3795 }, { "epoch": 0.10380660686939401, "grad_norm": 1.6404404640197754, "learning_rate": 1.9716049147480362e-05, "loss": 0.622, "step": 3796 }, { "epoch": 0.10383395318311092, "grad_norm": 1.1480462551116943, "learning_rate": 1.9715839543662905e-05, "loss": 0.4403, "step": 3797 }, { "epoch": 0.10386129949682783, "grad_norm": 2.1949374675750732, "learning_rate": 1.9715629863627418e-05, "loss": 0.6174, "step": 3798 }, { "epoch": 0.10388864581054474, "grad_norm": 1.8187183141708374, "learning_rate": 1.971542010737555e-05, "loss": 0.6102, "step": 3799 }, { "epoch": 0.10391599212426166, "grad_norm": 1.7888153791427612, "learning_rate": 1.9715210274908945e-05, "loss": 0.5611, "step": 3800 }, { "epoch": 0.10394333843797857, "grad_norm": 1.3978744745254517, "learning_rate": 1.971500036622925e-05, "loss": 0.5754, "step": 3801 }, { "epoch": 0.10397068475169548, "grad_norm": 1.5417654514312744, "learning_rate": 1.9714790381338115e-05, "loss": 0.5891, "step": 3802 }, { "epoch": 0.10399803106541239, "grad_norm": 2.044009208679199, "learning_rate": 1.971458032023718e-05, "loss": 0.9597, "step": 3803 }, { "epoch": 0.1040253773791293, "grad_norm": 1.6469043493270874, "learning_rate": 1.9714370182928095e-05, "loss": 0.6043, "step": 3804 }, { "epoch": 0.10405272369284621, "grad_norm": 1.6646943092346191, "learning_rate": 1.971415996941251e-05, "loss": 0.5414, "step": 3805 }, { "epoch": 0.10408007000656312, "grad_norm": 1.3473454713821411, "learning_rate": 1.9713949679692076e-05, "loss": 0.5816, "step": 3806 }, { "epoch": 0.10410741632028003, "grad_norm": 1.7251312732696533, "learning_rate": 1.971373931376844e-05, "loss": 0.5502, "step": 3807 }, { "epoch": 0.10413476263399694, "grad_norm": 1.8151001930236816, "learning_rate": 1.9713528871643253e-05, "loss": 0.6011, "step": 3808 }, { "epoch": 0.10416210894771386, "grad_norm": 1.5583090782165527, "learning_rate": 1.9713318353318167e-05, "loss": 0.5037, "step": 3809 }, { "epoch": 0.10418945526143075, "grad_norm": 1.8292776346206665, "learning_rate": 1.9713107758794827e-05, "loss": 0.5752, "step": 3810 }, { "epoch": 0.10421680157514766, "grad_norm": 1.4642350673675537, "learning_rate": 1.9712897088074893e-05, "loss": 0.5626, "step": 3811 }, { "epoch": 0.10424414788886457, "grad_norm": 1.6371806859970093, "learning_rate": 1.9712686341160016e-05, "loss": 0.5825, "step": 3812 }, { "epoch": 0.10427149420258149, "grad_norm": 1.7202717065811157, "learning_rate": 1.9712475518051845e-05, "loss": 0.5987, "step": 3813 }, { "epoch": 0.1042988405162984, "grad_norm": 1.6115779876708984, "learning_rate": 1.971226461875204e-05, "loss": 0.5169, "step": 3814 }, { "epoch": 0.10432618683001531, "grad_norm": 2.1336917877197266, "learning_rate": 1.9712053643262253e-05, "loss": 0.5816, "step": 3815 }, { "epoch": 0.10435353314373222, "grad_norm": 1.8442647457122803, "learning_rate": 1.9711842591584136e-05, "loss": 0.5354, "step": 3816 }, { "epoch": 0.10438087945744913, "grad_norm": 1.7093005180358887, "learning_rate": 1.9711631463719345e-05, "loss": 0.5871, "step": 3817 }, { "epoch": 0.10440822577116604, "grad_norm": 1.6216596364974976, "learning_rate": 1.971142025966954e-05, "loss": 0.5263, "step": 3818 }, { "epoch": 0.10443557208488295, "grad_norm": 1.5705387592315674, "learning_rate": 1.9711208979436378e-05, "loss": 0.5929, "step": 3819 }, { "epoch": 0.10446291839859986, "grad_norm": 1.7551989555358887, "learning_rate": 1.9710997623021515e-05, "loss": 0.5802, "step": 3820 }, { "epoch": 0.10449026471231677, "grad_norm": 1.669795274734497, "learning_rate": 1.9710786190426606e-05, "loss": 0.6069, "step": 3821 }, { "epoch": 0.10451761102603369, "grad_norm": 1.6197246313095093, "learning_rate": 1.971057468165331e-05, "loss": 0.5409, "step": 3822 }, { "epoch": 0.1045449573397506, "grad_norm": 1.7137004137039185, "learning_rate": 1.9710363096703294e-05, "loss": 0.6056, "step": 3823 }, { "epoch": 0.10457230365346751, "grad_norm": 1.9189313650131226, "learning_rate": 1.971015143557821e-05, "loss": 0.5868, "step": 3824 }, { "epoch": 0.10459964996718442, "grad_norm": 1.7913167476654053, "learning_rate": 1.970993969827972e-05, "loss": 0.5775, "step": 3825 }, { "epoch": 0.10462699628090133, "grad_norm": 1.9346791505813599, "learning_rate": 1.9709727884809488e-05, "loss": 0.5345, "step": 3826 }, { "epoch": 0.10465434259461824, "grad_norm": 1.390708565711975, "learning_rate": 1.970951599516917e-05, "loss": 0.5715, "step": 3827 }, { "epoch": 0.10468168890833515, "grad_norm": 1.9210364818572998, "learning_rate": 1.9709304029360434e-05, "loss": 0.979, "step": 3828 }, { "epoch": 0.10470903522205206, "grad_norm": 2.4303085803985596, "learning_rate": 1.970909198738494e-05, "loss": 0.6239, "step": 3829 }, { "epoch": 0.10473638153576897, "grad_norm": 1.7530722618103027, "learning_rate": 1.970887986924435e-05, "loss": 0.5766, "step": 3830 }, { "epoch": 0.10476372784948589, "grad_norm": 1.9685046672821045, "learning_rate": 1.9708667674940328e-05, "loss": 0.5622, "step": 3831 }, { "epoch": 0.1047910741632028, "grad_norm": 1.4385384321212769, "learning_rate": 1.9708455404474546e-05, "loss": 0.9705, "step": 3832 }, { "epoch": 0.10481842047691971, "grad_norm": 2.226994276046753, "learning_rate": 1.970824305784866e-05, "loss": 0.5833, "step": 3833 }, { "epoch": 0.10484576679063662, "grad_norm": 1.8024287223815918, "learning_rate": 1.9708030635064342e-05, "loss": 0.5873, "step": 3834 }, { "epoch": 0.10487311310435353, "grad_norm": 1.8746055364608765, "learning_rate": 1.9707818136123254e-05, "loss": 0.5669, "step": 3835 }, { "epoch": 0.10490045941807044, "grad_norm": 1.9457917213439941, "learning_rate": 1.9707605561027063e-05, "loss": 0.5459, "step": 3836 }, { "epoch": 0.10492780573178735, "grad_norm": 3.4790985584259033, "learning_rate": 1.970739290977744e-05, "loss": 0.6367, "step": 3837 }, { "epoch": 0.10495515204550426, "grad_norm": 1.9064021110534668, "learning_rate": 1.970718018237605e-05, "loss": 0.5997, "step": 3838 }, { "epoch": 0.10498249835922117, "grad_norm": 1.7082830667495728, "learning_rate": 1.9706967378824565e-05, "loss": 0.5774, "step": 3839 }, { "epoch": 0.10500984467293809, "grad_norm": 2.3097238540649414, "learning_rate": 1.970675449912465e-05, "loss": 0.5579, "step": 3840 }, { "epoch": 0.105037190986655, "grad_norm": 1.6179817914962769, "learning_rate": 1.9706541543277985e-05, "loss": 0.5426, "step": 3841 }, { "epoch": 0.10506453730037191, "grad_norm": 2.0062475204467773, "learning_rate": 1.9706328511286225e-05, "loss": 0.5595, "step": 3842 }, { "epoch": 0.10509188361408882, "grad_norm": 2.623255491256714, "learning_rate": 1.9706115403151052e-05, "loss": 0.5748, "step": 3843 }, { "epoch": 0.10511922992780573, "grad_norm": 1.564866065979004, "learning_rate": 1.9705902218874136e-05, "loss": 0.5717, "step": 3844 }, { "epoch": 0.10514657624152264, "grad_norm": 7.27280330657959, "learning_rate": 1.970568895845715e-05, "loss": 0.5774, "step": 3845 }, { "epoch": 0.10517392255523955, "grad_norm": 1.9834405183792114, "learning_rate": 1.9705475621901762e-05, "loss": 0.5754, "step": 3846 }, { "epoch": 0.10520126886895646, "grad_norm": 1.9681837558746338, "learning_rate": 1.9705262209209652e-05, "loss": 0.5916, "step": 3847 }, { "epoch": 0.10522861518267337, "grad_norm": 1.7806025743484497, "learning_rate": 1.970504872038249e-05, "loss": 0.5898, "step": 3848 }, { "epoch": 0.10525596149639029, "grad_norm": 1.7405178546905518, "learning_rate": 1.9704835155421954e-05, "loss": 0.5237, "step": 3849 }, { "epoch": 0.1052833078101072, "grad_norm": 1.9124867916107178, "learning_rate": 1.9704621514329717e-05, "loss": 0.5745, "step": 3850 }, { "epoch": 0.10531065412382411, "grad_norm": 1.7706739902496338, "learning_rate": 1.9704407797107453e-05, "loss": 0.5811, "step": 3851 }, { "epoch": 0.10533800043754102, "grad_norm": 1.7066419124603271, "learning_rate": 1.9704194003756842e-05, "loss": 0.6329, "step": 3852 }, { "epoch": 0.10536534675125793, "grad_norm": 3.006643772125244, "learning_rate": 1.9703980134279562e-05, "loss": 0.5697, "step": 3853 }, { "epoch": 0.10539269306497484, "grad_norm": 2.0152273178100586, "learning_rate": 1.9703766188677283e-05, "loss": 0.5716, "step": 3854 }, { "epoch": 0.10542003937869175, "grad_norm": 1.67819082736969, "learning_rate": 1.9703552166951696e-05, "loss": 0.5575, "step": 3855 }, { "epoch": 0.10544738569240866, "grad_norm": 1.6970106363296509, "learning_rate": 1.9703338069104467e-05, "loss": 0.5903, "step": 3856 }, { "epoch": 0.10547473200612557, "grad_norm": 1.8896445035934448, "learning_rate": 1.9703123895137287e-05, "loss": 0.5736, "step": 3857 }, { "epoch": 0.10550207831984248, "grad_norm": 2.2634148597717285, "learning_rate": 1.9702909645051827e-05, "loss": 0.6724, "step": 3858 }, { "epoch": 0.1055294246335594, "grad_norm": 1.5611985921859741, "learning_rate": 1.9702695318849774e-05, "loss": 0.5471, "step": 3859 }, { "epoch": 0.10555677094727631, "grad_norm": 1.7815337181091309, "learning_rate": 1.9702480916532805e-05, "loss": 0.6443, "step": 3860 }, { "epoch": 0.10558411726099322, "grad_norm": 2.136352062225342, "learning_rate": 1.9702266438102608e-05, "loss": 0.5632, "step": 3861 }, { "epoch": 0.10561146357471013, "grad_norm": 1.6201326847076416, "learning_rate": 1.9702051883560855e-05, "loss": 0.5906, "step": 3862 }, { "epoch": 0.10563880988842704, "grad_norm": 2.105861186981201, "learning_rate": 1.9701837252909238e-05, "loss": 0.5718, "step": 3863 }, { "epoch": 0.10566615620214395, "grad_norm": 1.6827824115753174, "learning_rate": 1.9701622546149438e-05, "loss": 0.6023, "step": 3864 }, { "epoch": 0.10569350251586086, "grad_norm": 2.7653262615203857, "learning_rate": 1.970140776328314e-05, "loss": 0.5978, "step": 3865 }, { "epoch": 0.10572084882957777, "grad_norm": 1.697881817817688, "learning_rate": 1.9701192904312027e-05, "loss": 0.5629, "step": 3866 }, { "epoch": 0.10574819514329468, "grad_norm": 1.4824994802474976, "learning_rate": 1.9700977969237786e-05, "loss": 0.5697, "step": 3867 }, { "epoch": 0.1057755414570116, "grad_norm": 3.544703960418701, "learning_rate": 1.9700762958062106e-05, "loss": 0.5591, "step": 3868 }, { "epoch": 0.10580288777072851, "grad_norm": 1.7133721113204956, "learning_rate": 1.9700547870786666e-05, "loss": 0.642, "step": 3869 }, { "epoch": 0.10583023408444542, "grad_norm": 1.758725643157959, "learning_rate": 1.9700332707413162e-05, "loss": 0.5458, "step": 3870 }, { "epoch": 0.10585758039816233, "grad_norm": 1.695986032485962, "learning_rate": 1.9700117467943275e-05, "loss": 0.5708, "step": 3871 }, { "epoch": 0.10588492671187924, "grad_norm": 2.1636343002319336, "learning_rate": 1.9699902152378696e-05, "loss": 0.4694, "step": 3872 }, { "epoch": 0.10591227302559615, "grad_norm": 4.217648029327393, "learning_rate": 1.9699686760721115e-05, "loss": 0.6238, "step": 3873 }, { "epoch": 0.10593961933931306, "grad_norm": 1.8333778381347656, "learning_rate": 1.9699471292972224e-05, "loss": 0.5917, "step": 3874 }, { "epoch": 0.10596696565302997, "grad_norm": 1.586949348449707, "learning_rate": 1.9699255749133706e-05, "loss": 0.599, "step": 3875 }, { "epoch": 0.10599431196674688, "grad_norm": 1.6919331550598145, "learning_rate": 1.9699040129207258e-05, "loss": 0.5892, "step": 3876 }, { "epoch": 0.1060216582804638, "grad_norm": 1.9151291847229004, "learning_rate": 1.969882443319457e-05, "loss": 0.424, "step": 3877 }, { "epoch": 0.1060490045941807, "grad_norm": 1.9677046537399292, "learning_rate": 1.9698608661097333e-05, "loss": 0.9608, "step": 3878 }, { "epoch": 0.10607635090789762, "grad_norm": 1.6072839498519897, "learning_rate": 1.969839281291724e-05, "loss": 0.5598, "step": 3879 }, { "epoch": 0.10610369722161453, "grad_norm": 1.9948813915252686, "learning_rate": 1.969817688865599e-05, "loss": 0.5884, "step": 3880 }, { "epoch": 0.10613104353533144, "grad_norm": 1.9130643606185913, "learning_rate": 1.9697960888315267e-05, "loss": 0.5279, "step": 3881 }, { "epoch": 0.10615838984904835, "grad_norm": 1.2735614776611328, "learning_rate": 1.9697744811896767e-05, "loss": 0.3887, "step": 3882 }, { "epoch": 0.10618573616276526, "grad_norm": 2.912881374359131, "learning_rate": 1.9697528659402196e-05, "loss": 0.5758, "step": 3883 }, { "epoch": 0.10621308247648217, "grad_norm": 1.419845700263977, "learning_rate": 1.9697312430833237e-05, "loss": 0.5667, "step": 3884 }, { "epoch": 0.10624042879019908, "grad_norm": 1.462132215499878, "learning_rate": 1.969709612619159e-05, "loss": 0.5801, "step": 3885 }, { "epoch": 0.106267775103916, "grad_norm": 2.4115724563598633, "learning_rate": 1.9696879745478956e-05, "loss": 0.4108, "step": 3886 }, { "epoch": 0.1062951214176329, "grad_norm": 2.116276502609253, "learning_rate": 1.9696663288697027e-05, "loss": 0.5718, "step": 3887 }, { "epoch": 0.10632246773134982, "grad_norm": 2.265699625015259, "learning_rate": 1.9696446755847505e-05, "loss": 0.5355, "step": 3888 }, { "epoch": 0.10634981404506673, "grad_norm": 123.11615753173828, "learning_rate": 1.9696230146932086e-05, "loss": 0.5536, "step": 3889 }, { "epoch": 0.10637716035878364, "grad_norm": 1.571667194366455, "learning_rate": 1.9696013461952472e-05, "loss": 0.5874, "step": 3890 }, { "epoch": 0.10640450667250055, "grad_norm": 1.6066685914993286, "learning_rate": 1.969579670091036e-05, "loss": 0.5952, "step": 3891 }, { "epoch": 0.10643185298621746, "grad_norm": 1.6475809812545776, "learning_rate": 1.969557986380745e-05, "loss": 0.5758, "step": 3892 }, { "epoch": 0.10645919929993437, "grad_norm": 2.2612006664276123, "learning_rate": 1.9695362950645447e-05, "loss": 0.5773, "step": 3893 }, { "epoch": 0.10648654561365128, "grad_norm": 1.753494381904602, "learning_rate": 1.9695145961426048e-05, "loss": 0.5625, "step": 3894 }, { "epoch": 0.1065138919273682, "grad_norm": 2.724257230758667, "learning_rate": 1.9694928896150958e-05, "loss": 0.6806, "step": 3895 }, { "epoch": 0.1065412382410851, "grad_norm": 1.879593014717102, "learning_rate": 1.969471175482188e-05, "loss": 0.6092, "step": 3896 }, { "epoch": 0.10656858455480202, "grad_norm": 2.144901752471924, "learning_rate": 1.9694494537440514e-05, "loss": 0.5832, "step": 3897 }, { "epoch": 0.10659593086851893, "grad_norm": 2.146392345428467, "learning_rate": 1.9694277244008568e-05, "loss": 0.6161, "step": 3898 }, { "epoch": 0.10662327718223584, "grad_norm": 1.8867782354354858, "learning_rate": 1.9694059874527748e-05, "loss": 0.5862, "step": 3899 }, { "epoch": 0.10665062349595275, "grad_norm": 2.684178590774536, "learning_rate": 1.9693842428999754e-05, "loss": 0.5823, "step": 3900 }, { "epoch": 0.10667796980966966, "grad_norm": 3.658813953399658, "learning_rate": 1.9693624907426294e-05, "loss": 0.611, "step": 3901 }, { "epoch": 0.10670531612338657, "grad_norm": 2.1688592433929443, "learning_rate": 1.9693407309809076e-05, "loss": 0.6837, "step": 3902 }, { "epoch": 0.10673266243710348, "grad_norm": 2.6195549964904785, "learning_rate": 1.9693189636149804e-05, "loss": 0.591, "step": 3903 }, { "epoch": 0.1067600087508204, "grad_norm": 2.3269550800323486, "learning_rate": 1.9692971886450188e-05, "loss": 0.5927, "step": 3904 }, { "epoch": 0.1067873550645373, "grad_norm": 2.1263906955718994, "learning_rate": 1.9692754060711937e-05, "loss": 0.6126, "step": 3905 }, { "epoch": 0.10681470137825422, "grad_norm": 2.2694594860076904, "learning_rate": 1.9692536158936754e-05, "loss": 0.588, "step": 3906 }, { "epoch": 0.10684204769197113, "grad_norm": 2.01850962638855, "learning_rate": 1.9692318181126358e-05, "loss": 0.589, "step": 3907 }, { "epoch": 0.10686939400568804, "grad_norm": 2.119119882583618, "learning_rate": 1.969210012728245e-05, "loss": 0.624, "step": 3908 }, { "epoch": 0.10689674031940495, "grad_norm": 2.506561040878296, "learning_rate": 1.9691881997406742e-05, "loss": 0.6211, "step": 3909 }, { "epoch": 0.10692408663312186, "grad_norm": 4.114643573760986, "learning_rate": 1.969166379150095e-05, "loss": 0.5725, "step": 3910 }, { "epoch": 0.10695143294683876, "grad_norm": 3.5610880851745605, "learning_rate": 1.9691445509566787e-05, "loss": 0.6958, "step": 3911 }, { "epoch": 0.10697877926055567, "grad_norm": 5.957381248474121, "learning_rate": 1.9691227151605958e-05, "loss": 0.5736, "step": 3912 }, { "epoch": 0.10700612557427258, "grad_norm": 2.7321228981018066, "learning_rate": 1.9691008717620177e-05, "loss": 0.496, "step": 3913 }, { "epoch": 0.10703347188798949, "grad_norm": 2.9731357097625732, "learning_rate": 1.969079020761116e-05, "loss": 0.6093, "step": 3914 }, { "epoch": 0.1070608182017064, "grad_norm": 4.394106388092041, "learning_rate": 1.9690571621580625e-05, "loss": 0.6725, "step": 3915 }, { "epoch": 0.10708816451542331, "grad_norm": 3.238248109817505, "learning_rate": 1.969035295953028e-05, "loss": 0.6077, "step": 3916 }, { "epoch": 0.10711551082914023, "grad_norm": 2.455212116241455, "learning_rate": 1.9690134221461842e-05, "loss": 0.6043, "step": 3917 }, { "epoch": 0.10714285714285714, "grad_norm": 2.607987403869629, "learning_rate": 1.968991540737703e-05, "loss": 0.6049, "step": 3918 }, { "epoch": 0.10717020345657405, "grad_norm": 23.463520050048828, "learning_rate": 1.968969651727756e-05, "loss": 0.6032, "step": 3919 }, { "epoch": 0.10719754977029096, "grad_norm": 2.496211290359497, "learning_rate": 1.9689477551165144e-05, "loss": 1.0216, "step": 3920 }, { "epoch": 0.10722489608400787, "grad_norm": 2.731267213821411, "learning_rate": 1.9689258509041505e-05, "loss": 0.6244, "step": 3921 }, { "epoch": 0.10725224239772478, "grad_norm": 2.229275703430176, "learning_rate": 1.9689039390908362e-05, "loss": 0.5919, "step": 3922 }, { "epoch": 0.10727958871144169, "grad_norm": 3.0565969944000244, "learning_rate": 1.968882019676743e-05, "loss": 0.6057, "step": 3923 }, { "epoch": 0.1073069350251586, "grad_norm": 3.3128085136413574, "learning_rate": 1.968860092662043e-05, "loss": 0.6111, "step": 3924 }, { "epoch": 0.10733428133887551, "grad_norm": 3.0332820415496826, "learning_rate": 1.9688381580469084e-05, "loss": 0.5731, "step": 3925 }, { "epoch": 0.10736162765259243, "grad_norm": 3.4544525146484375, "learning_rate": 1.9688162158315108e-05, "loss": 0.6555, "step": 3926 }, { "epoch": 0.10738897396630934, "grad_norm": 3.2231855392456055, "learning_rate": 1.968794266016023e-05, "loss": 0.5983, "step": 3927 }, { "epoch": 0.10741632028002625, "grad_norm": 5.573910236358643, "learning_rate": 1.9687723086006164e-05, "loss": 0.6105, "step": 3928 }, { "epoch": 0.10744366659374316, "grad_norm": 5.450027942657471, "learning_rate": 1.968750343585464e-05, "loss": 0.6221, "step": 3929 }, { "epoch": 0.10747101290746007, "grad_norm": 3.8986406326293945, "learning_rate": 1.9687283709707375e-05, "loss": 0.6016, "step": 3930 }, { "epoch": 0.10749835922117698, "grad_norm": 2.7359437942504883, "learning_rate": 1.96870639075661e-05, "loss": 0.6648, "step": 3931 }, { "epoch": 0.10752570553489389, "grad_norm": 2.8223588466644287, "learning_rate": 1.968684402943253e-05, "loss": 0.6368, "step": 3932 }, { "epoch": 0.1075530518486108, "grad_norm": 3.128676176071167, "learning_rate": 1.96866240753084e-05, "loss": 0.4985, "step": 3933 }, { "epoch": 0.10758039816232771, "grad_norm": 3.5110561847686768, "learning_rate": 1.9686404045195428e-05, "loss": 0.6157, "step": 3934 }, { "epoch": 0.10760774447604463, "grad_norm": 4.6364922523498535, "learning_rate": 1.9686183939095337e-05, "loss": 0.5947, "step": 3935 }, { "epoch": 0.10763509078976154, "grad_norm": 2.5857994556427, "learning_rate": 1.9685963757009862e-05, "loss": 1.0511, "step": 3936 }, { "epoch": 0.10766243710347845, "grad_norm": 4.641363620758057, "learning_rate": 1.968574349894073e-05, "loss": 0.5955, "step": 3937 }, { "epoch": 0.10768978341719536, "grad_norm": 10.37004566192627, "learning_rate": 1.9685523164889667e-05, "loss": 0.6277, "step": 3938 }, { "epoch": 0.10771712973091227, "grad_norm": 9.632312774658203, "learning_rate": 1.9685302754858397e-05, "loss": 0.6122, "step": 3939 }, { "epoch": 0.10774447604462918, "grad_norm": 4.080882549285889, "learning_rate": 1.9685082268848654e-05, "loss": 0.6331, "step": 3940 }, { "epoch": 0.10777182235834609, "grad_norm": 3.5299980640411377, "learning_rate": 1.9684861706862163e-05, "loss": 0.4981, "step": 3941 }, { "epoch": 0.107799168672063, "grad_norm": 4.393587589263916, "learning_rate": 1.9684641068900662e-05, "loss": 0.5903, "step": 3942 }, { "epoch": 0.10782651498577991, "grad_norm": 2.6572582721710205, "learning_rate": 1.9684420354965876e-05, "loss": 0.4741, "step": 3943 }, { "epoch": 0.10785386129949683, "grad_norm": 4.045711994171143, "learning_rate": 1.9684199565059536e-05, "loss": 0.6149, "step": 3944 }, { "epoch": 0.10788120761321374, "grad_norm": 4.85690450668335, "learning_rate": 1.968397869918338e-05, "loss": 0.6984, "step": 3945 }, { "epoch": 0.10790855392693065, "grad_norm": 2.524484157562256, "learning_rate": 1.9683757757339133e-05, "loss": 0.661, "step": 3946 }, { "epoch": 0.10793590024064756, "grad_norm": 3.1522300243377686, "learning_rate": 1.968353673952853e-05, "loss": 0.6292, "step": 3947 }, { "epoch": 0.10796324655436447, "grad_norm": 2.504636287689209, "learning_rate": 1.968331564575331e-05, "loss": 0.6494, "step": 3948 }, { "epoch": 0.10799059286808138, "grad_norm": 2.4249978065490723, "learning_rate": 1.96830944760152e-05, "loss": 0.65, "step": 3949 }, { "epoch": 0.10801793918179829, "grad_norm": 3.088690757751465, "learning_rate": 1.9682873230315943e-05, "loss": 0.607, "step": 3950 }, { "epoch": 0.1080452854955152, "grad_norm": 2.606602191925049, "learning_rate": 1.9682651908657267e-05, "loss": 0.6175, "step": 3951 }, { "epoch": 0.10807263180923211, "grad_norm": 2.279128313064575, "learning_rate": 1.9682430511040915e-05, "loss": 0.6364, "step": 3952 }, { "epoch": 0.10809997812294903, "grad_norm": 2.9204299449920654, "learning_rate": 1.968220903746862e-05, "loss": 0.7138, "step": 3953 }, { "epoch": 0.10812732443666594, "grad_norm": 1.8384346961975098, "learning_rate": 1.9681987487942118e-05, "loss": 0.5355, "step": 3954 }, { "epoch": 0.10815467075038285, "grad_norm": 2.7717325687408447, "learning_rate": 1.968176586246315e-05, "loss": 1.0415, "step": 3955 }, { "epoch": 0.10818201706409976, "grad_norm": 2.45576810836792, "learning_rate": 1.9681544161033454e-05, "loss": 0.6239, "step": 3956 }, { "epoch": 0.10820936337781667, "grad_norm": 2.0422873497009277, "learning_rate": 1.968132238365477e-05, "loss": 0.6189, "step": 3957 }, { "epoch": 0.10823670969153358, "grad_norm": 2.455821990966797, "learning_rate": 1.9681100530328833e-05, "loss": 0.6139, "step": 3958 }, { "epoch": 0.10826405600525049, "grad_norm": 2.625154495239258, "learning_rate": 1.9680878601057388e-05, "loss": 0.6105, "step": 3959 }, { "epoch": 0.1082914023189674, "grad_norm": 2.756596803665161, "learning_rate": 1.9680656595842175e-05, "loss": 0.6155, "step": 3960 }, { "epoch": 0.10831874863268431, "grad_norm": 3.220353364944458, "learning_rate": 1.9680434514684936e-05, "loss": 0.615, "step": 3961 }, { "epoch": 0.10834609494640123, "grad_norm": 3.4612109661102295, "learning_rate": 1.968021235758741e-05, "loss": 0.5771, "step": 3962 }, { "epoch": 0.10837344126011814, "grad_norm": 3.1125500202178955, "learning_rate": 1.9679990124551347e-05, "loss": 0.6506, "step": 3963 }, { "epoch": 0.10840078757383505, "grad_norm": 3.1463143825531006, "learning_rate": 1.967976781557848e-05, "loss": 0.614, "step": 3964 }, { "epoch": 0.10842813388755196, "grad_norm": 2.8324389457702637, "learning_rate": 1.9679545430670565e-05, "loss": 0.5981, "step": 3965 }, { "epoch": 0.10845548020126887, "grad_norm": 3.0102176666259766, "learning_rate": 1.9679322969829336e-05, "loss": 1.0534, "step": 3966 }, { "epoch": 0.10848282651498578, "grad_norm": 2.232187271118164, "learning_rate": 1.9679100433056544e-05, "loss": 0.9941, "step": 3967 }, { "epoch": 0.10851017282870269, "grad_norm": 2.981992721557617, "learning_rate": 1.9678877820353933e-05, "loss": 0.5949, "step": 3968 }, { "epoch": 0.1085375191424196, "grad_norm": 2.8926045894622803, "learning_rate": 1.9678655131723244e-05, "loss": 0.6414, "step": 3969 }, { "epoch": 0.10856486545613651, "grad_norm": 5.79648494720459, "learning_rate": 1.9678432367166237e-05, "loss": 0.6001, "step": 3970 }, { "epoch": 0.10859221176985343, "grad_norm": 2.9179718494415283, "learning_rate": 1.9678209526684648e-05, "loss": 0.6147, "step": 3971 }, { "epoch": 0.10861955808357034, "grad_norm": 1.985122561454773, "learning_rate": 1.967798661028023e-05, "loss": 0.6, "step": 3972 }, { "epoch": 0.10864690439728725, "grad_norm": 2.9797022342681885, "learning_rate": 1.9677763617954732e-05, "loss": 0.6183, "step": 3973 }, { "epoch": 0.10867425071100416, "grad_norm": 4.770840644836426, "learning_rate": 1.96775405497099e-05, "loss": 0.6055, "step": 3974 }, { "epoch": 0.10870159702472107, "grad_norm": 2.599966049194336, "learning_rate": 1.9677317405547485e-05, "loss": 0.636, "step": 3975 }, { "epoch": 0.10872894333843798, "grad_norm": 5.538642883300781, "learning_rate": 1.967709418546924e-05, "loss": 0.5838, "step": 3976 }, { "epoch": 0.10875628965215489, "grad_norm": 4.2239603996276855, "learning_rate": 1.9676870889476914e-05, "loss": 1.069, "step": 3977 }, { "epoch": 0.1087836359658718, "grad_norm": 2.8084607124328613, "learning_rate": 1.967664751757226e-05, "loss": 0.6126, "step": 3978 }, { "epoch": 0.10881098227958871, "grad_norm": 3.9384524822235107, "learning_rate": 1.9676424069757028e-05, "loss": 0.6134, "step": 3979 }, { "epoch": 0.10883832859330562, "grad_norm": 2.5686862468719482, "learning_rate": 1.9676200546032972e-05, "loss": 1.037, "step": 3980 }, { "epoch": 0.10886567490702254, "grad_norm": 3.766261577606201, "learning_rate": 1.9675976946401845e-05, "loss": 0.6124, "step": 3981 }, { "epoch": 0.10889302122073945, "grad_norm": 16.998779296875, "learning_rate": 1.967575327086541e-05, "loss": 0.6909, "step": 3982 }, { "epoch": 0.10892036753445636, "grad_norm": 2.744234085083008, "learning_rate": 1.9675529519425405e-05, "loss": 0.6006, "step": 3983 }, { "epoch": 0.10894771384817327, "grad_norm": 5.304773807525635, "learning_rate": 1.9675305692083596e-05, "loss": 0.5751, "step": 3984 }, { "epoch": 0.10897506016189018, "grad_norm": 3.6770193576812744, "learning_rate": 1.9675081788841735e-05, "loss": 0.6354, "step": 3985 }, { "epoch": 0.10900240647560709, "grad_norm": 5.335519313812256, "learning_rate": 1.9674857809701586e-05, "loss": 0.7143, "step": 3986 }, { "epoch": 0.109029752789324, "grad_norm": 5.156371593475342, "learning_rate": 1.9674633754664892e-05, "loss": 0.6018, "step": 3987 }, { "epoch": 0.10905709910304091, "grad_norm": 7.434057235717773, "learning_rate": 1.9674409623733423e-05, "loss": 0.6049, "step": 3988 }, { "epoch": 0.10908444541675782, "grad_norm": 5.681389331817627, "learning_rate": 1.9674185416908937e-05, "loss": 0.6652, "step": 3989 }, { "epoch": 0.10911179173047474, "grad_norm": 2.5146563053131104, "learning_rate": 1.9673961134193182e-05, "loss": 0.5475, "step": 3990 }, { "epoch": 0.10913913804419165, "grad_norm": 3.818847894668579, "learning_rate": 1.9673736775587927e-05, "loss": 0.65, "step": 3991 }, { "epoch": 0.10916648435790856, "grad_norm": 11.134625434875488, "learning_rate": 1.967351234109493e-05, "loss": 0.6714, "step": 3992 }, { "epoch": 0.10919383067162547, "grad_norm": 9.433490753173828, "learning_rate": 1.9673287830715953e-05, "loss": 0.6156, "step": 3993 }, { "epoch": 0.10922117698534238, "grad_norm": 9.778977394104004, "learning_rate": 1.9673063244452755e-05, "loss": 0.6318, "step": 3994 }, { "epoch": 0.10924852329905929, "grad_norm": 7.641572952270508, "learning_rate": 1.9672838582307094e-05, "loss": 0.6742, "step": 3995 }, { "epoch": 0.1092758696127762, "grad_norm": 5.706486225128174, "learning_rate": 1.967261384428074e-05, "loss": 1.1469, "step": 3996 }, { "epoch": 0.10930321592649311, "grad_norm": 8.627176284790039, "learning_rate": 1.967238903037545e-05, "loss": 0.6208, "step": 3997 }, { "epoch": 0.10933056224021002, "grad_norm": 6.12158203125, "learning_rate": 1.9672164140592994e-05, "loss": 0.6693, "step": 3998 }, { "epoch": 0.10935790855392694, "grad_norm": 4.54768180847168, "learning_rate": 1.967193917493513e-05, "loss": 0.6285, "step": 3999 }, { "epoch": 0.10938525486764385, "grad_norm": 6.414261817932129, "learning_rate": 1.9671714133403622e-05, "loss": 0.6549, "step": 4000 }, { "epoch": 0.10941260118136076, "grad_norm": 7.209465503692627, "learning_rate": 1.9671489016000242e-05, "loss": 0.7042, "step": 4001 }, { "epoch": 0.10943994749507767, "grad_norm": 10.185748100280762, "learning_rate": 1.9671263822726755e-05, "loss": 0.7261, "step": 4002 }, { "epoch": 0.10946729380879458, "grad_norm": 2.1501851081848145, "learning_rate": 1.967103855358492e-05, "loss": 1.0561, "step": 4003 }, { "epoch": 0.10949464012251149, "grad_norm": 7.243074417114258, "learning_rate": 1.967081320857651e-05, "loss": 0.6983, "step": 4004 }, { "epoch": 0.1095219864362284, "grad_norm": 36.06739044189453, "learning_rate": 1.9670587787703296e-05, "loss": 0.6775, "step": 4005 }, { "epoch": 0.10954933274994531, "grad_norm": 3.550072431564331, "learning_rate": 1.967036229096704e-05, "loss": 0.6963, "step": 4006 }, { "epoch": 0.10957667906366222, "grad_norm": 3.788275957107544, "learning_rate": 1.9670136718369512e-05, "loss": 0.6634, "step": 4007 }, { "epoch": 0.10960402537737914, "grad_norm": 4.052183151245117, "learning_rate": 1.9669911069912483e-05, "loss": 0.6726, "step": 4008 }, { "epoch": 0.10963137169109605, "grad_norm": 2.017810344696045, "learning_rate": 1.966968534559772e-05, "loss": 0.5312, "step": 4009 }, { "epoch": 0.10965871800481296, "grad_norm": 2.406313419342041, "learning_rate": 1.9669459545427e-05, "loss": 1.0288, "step": 4010 }, { "epoch": 0.10968606431852987, "grad_norm": 12.932793617248535, "learning_rate": 1.9669233669402093e-05, "loss": 0.7018, "step": 4011 }, { "epoch": 0.10971341063224678, "grad_norm": 4.101798057556152, "learning_rate": 1.9669007717524767e-05, "loss": 0.7288, "step": 4012 }, { "epoch": 0.10974075694596368, "grad_norm": 1.7601593732833862, "learning_rate": 1.96687816897968e-05, "loss": 1.0638, "step": 4013 }, { "epoch": 0.10976810325968059, "grad_norm": 4.528652191162109, "learning_rate": 1.9668555586219956e-05, "loss": 0.7071, "step": 4014 }, { "epoch": 0.1097954495733975, "grad_norm": 4.4835333824157715, "learning_rate": 1.9668329406796018e-05, "loss": 0.7305, "step": 4015 }, { "epoch": 0.10982279588711441, "grad_norm": 3.677133798599243, "learning_rate": 1.9668103151526758e-05, "loss": 0.675, "step": 4016 }, { "epoch": 0.10985014220083132, "grad_norm": 1.914605975151062, "learning_rate": 1.9667876820413947e-05, "loss": 0.5429, "step": 4017 }, { "epoch": 0.10987748851454823, "grad_norm": 5.948558807373047, "learning_rate": 1.966765041345936e-05, "loss": 0.6626, "step": 4018 }, { "epoch": 0.10990483482826514, "grad_norm": 5.731485366821289, "learning_rate": 1.9667423930664783e-05, "loss": 0.6636, "step": 4019 }, { "epoch": 0.10993218114198205, "grad_norm": 2.1307530403137207, "learning_rate": 1.966719737203198e-05, "loss": 1.0446, "step": 4020 }, { "epoch": 0.10995952745569897, "grad_norm": 6.555508136749268, "learning_rate": 1.966697073756274e-05, "loss": 0.6502, "step": 4021 }, { "epoch": 0.10998687376941588, "grad_norm": 4.815396785736084, "learning_rate": 1.966674402725883e-05, "loss": 0.6389, "step": 4022 }, { "epoch": 0.11001422008313279, "grad_norm": 4.2263031005859375, "learning_rate": 1.9666517241122035e-05, "loss": 0.6428, "step": 4023 }, { "epoch": 0.1100415663968497, "grad_norm": 7.875990867614746, "learning_rate": 1.9666290379154134e-05, "loss": 0.6989, "step": 4024 }, { "epoch": 0.11006891271056661, "grad_norm": 3.4233856201171875, "learning_rate": 1.9666063441356905e-05, "loss": 0.6768, "step": 4025 }, { "epoch": 0.11009625902428352, "grad_norm": 3.7067902088165283, "learning_rate": 1.966583642773213e-05, "loss": 0.7206, "step": 4026 }, { "epoch": 0.11012360533800043, "grad_norm": 4.758691310882568, "learning_rate": 1.9665609338281583e-05, "loss": 0.6518, "step": 4027 }, { "epoch": 0.11015095165171734, "grad_norm": 3.5124053955078125, "learning_rate": 1.9665382173007056e-05, "loss": 0.6367, "step": 4028 }, { "epoch": 0.11017829796543425, "grad_norm": 2.703896999359131, "learning_rate": 1.9665154931910324e-05, "loss": 0.6509, "step": 4029 }, { "epoch": 0.11020564427915117, "grad_norm": 3.467271089553833, "learning_rate": 1.9664927614993173e-05, "loss": 0.6529, "step": 4030 }, { "epoch": 0.11023299059286808, "grad_norm": 3.492642641067505, "learning_rate": 1.9664700222257385e-05, "loss": 0.6801, "step": 4031 }, { "epoch": 0.11026033690658499, "grad_norm": 4.771225929260254, "learning_rate": 1.966447275370474e-05, "loss": 0.6268, "step": 4032 }, { "epoch": 0.1102876832203019, "grad_norm": 3.96818208694458, "learning_rate": 1.966424520933703e-05, "loss": 0.6592, "step": 4033 }, { "epoch": 0.11031502953401881, "grad_norm": 3.2390806674957275, "learning_rate": 1.9664017589156033e-05, "loss": 0.6441, "step": 4034 }, { "epoch": 0.11034237584773572, "grad_norm": 3.6579604148864746, "learning_rate": 1.966378989316354e-05, "loss": 0.6219, "step": 4035 }, { "epoch": 0.11036972216145263, "grad_norm": 6.478364944458008, "learning_rate": 1.9663562121361334e-05, "loss": 0.6433, "step": 4036 }, { "epoch": 0.11039706847516954, "grad_norm": 14.6367769241333, "learning_rate": 1.96633342737512e-05, "loss": 0.655, "step": 4037 }, { "epoch": 0.11042441478888645, "grad_norm": 3.1104226112365723, "learning_rate": 1.9663106350334932e-05, "loss": 0.6364, "step": 4038 }, { "epoch": 0.11045176110260337, "grad_norm": 5.316206932067871, "learning_rate": 1.9662878351114314e-05, "loss": 0.6439, "step": 4039 }, { "epoch": 0.11047910741632028, "grad_norm": 3.257514476776123, "learning_rate": 1.9662650276091132e-05, "loss": 0.6373, "step": 4040 }, { "epoch": 0.11050645373003719, "grad_norm": 8.43961238861084, "learning_rate": 1.966242212526718e-05, "loss": 0.6644, "step": 4041 }, { "epoch": 0.1105338000437541, "grad_norm": 2.735002279281616, "learning_rate": 1.9662193898644247e-05, "loss": 0.6667, "step": 4042 }, { "epoch": 0.11056114635747101, "grad_norm": 2.476501703262329, "learning_rate": 1.966196559622412e-05, "loss": 0.6391, "step": 4043 }, { "epoch": 0.11058849267118792, "grad_norm": 2.689648389816284, "learning_rate": 1.966173721800859e-05, "loss": 0.6696, "step": 4044 }, { "epoch": 0.11061583898490483, "grad_norm": 6.331570148468018, "learning_rate": 1.9661508763999456e-05, "loss": 0.6203, "step": 4045 }, { "epoch": 0.11064318529862174, "grad_norm": 2.8466849327087402, "learning_rate": 1.96612802341985e-05, "loss": 0.6177, "step": 4046 }, { "epoch": 0.11067053161233865, "grad_norm": 2.1510677337646484, "learning_rate": 1.966105162860752e-05, "loss": 0.6528, "step": 4047 }, { "epoch": 0.11069787792605557, "grad_norm": 1.8872929811477661, "learning_rate": 1.9660822947228312e-05, "loss": 0.6138, "step": 4048 }, { "epoch": 0.11072522423977248, "grad_norm": 2.108480215072632, "learning_rate": 1.9660594190062667e-05, "loss": 0.6126, "step": 4049 }, { "epoch": 0.11075257055348939, "grad_norm": 2.7764203548431396, "learning_rate": 1.9660365357112374e-05, "loss": 0.6276, "step": 4050 }, { "epoch": 0.1107799168672063, "grad_norm": 1.8286433219909668, "learning_rate": 1.9660136448379235e-05, "loss": 0.6428, "step": 4051 }, { "epoch": 0.11080726318092321, "grad_norm": 9.306726455688477, "learning_rate": 1.9659907463865048e-05, "loss": 0.614, "step": 4052 }, { "epoch": 0.11083460949464012, "grad_norm": 2.0423731803894043, "learning_rate": 1.9659678403571603e-05, "loss": 0.6151, "step": 4053 }, { "epoch": 0.11086195580835703, "grad_norm": 2.3370039463043213, "learning_rate": 1.96594492675007e-05, "loss": 0.6624, "step": 4054 }, { "epoch": 0.11088930212207394, "grad_norm": 2.135502815246582, "learning_rate": 1.9659220055654135e-05, "loss": 0.6352, "step": 4055 }, { "epoch": 0.11091664843579085, "grad_norm": 2.9512076377868652, "learning_rate": 1.9658990768033707e-05, "loss": 0.5935, "step": 4056 }, { "epoch": 0.11094399474950777, "grad_norm": 2.0273211002349854, "learning_rate": 1.9658761404641214e-05, "loss": 0.6111, "step": 4057 }, { "epoch": 0.11097134106322468, "grad_norm": 2.4016380310058594, "learning_rate": 1.9658531965478456e-05, "loss": 0.6125, "step": 4058 }, { "epoch": 0.11099868737694159, "grad_norm": 2.1887645721435547, "learning_rate": 1.9658302450547236e-05, "loss": 0.5952, "step": 4059 }, { "epoch": 0.1110260336906585, "grad_norm": 1.8623247146606445, "learning_rate": 1.965807285984935e-05, "loss": 0.6212, "step": 4060 }, { "epoch": 0.11105338000437541, "grad_norm": 2.6150004863739014, "learning_rate": 1.9657843193386598e-05, "loss": 0.6217, "step": 4061 }, { "epoch": 0.11108072631809232, "grad_norm": 1.6461511850357056, "learning_rate": 1.9657613451160785e-05, "loss": 1.0391, "step": 4062 }, { "epoch": 0.11110807263180923, "grad_norm": 2.4056379795074463, "learning_rate": 1.9657383633173716e-05, "loss": 0.6094, "step": 4063 }, { "epoch": 0.11113541894552614, "grad_norm": 1.7199569940567017, "learning_rate": 1.9657153739427186e-05, "loss": 0.459, "step": 4064 }, { "epoch": 0.11116276525924305, "grad_norm": 2.420910358428955, "learning_rate": 1.9656923769923006e-05, "loss": 0.5987, "step": 4065 }, { "epoch": 0.11119011157295997, "grad_norm": 2.8344619274139404, "learning_rate": 1.9656693724662973e-05, "loss": 0.6144, "step": 4066 }, { "epoch": 0.11121745788667688, "grad_norm": 2.876215696334839, "learning_rate": 1.9656463603648895e-05, "loss": 0.6444, "step": 4067 }, { "epoch": 0.11124480420039379, "grad_norm": 3.767042636871338, "learning_rate": 1.9656233406882583e-05, "loss": 0.6671, "step": 4068 }, { "epoch": 0.1112721505141107, "grad_norm": 2.007049083709717, "learning_rate": 1.9656003134365834e-05, "loss": 0.6412, "step": 4069 }, { "epoch": 0.11129949682782761, "grad_norm": 2.2310101985931396, "learning_rate": 1.965577278610046e-05, "loss": 0.59, "step": 4070 }, { "epoch": 0.11132684314154452, "grad_norm": 1.6641771793365479, "learning_rate": 1.9655542362088262e-05, "loss": 0.5983, "step": 4071 }, { "epoch": 0.11135418945526143, "grad_norm": 1.6379985809326172, "learning_rate": 1.9655311862331055e-05, "loss": 0.6237, "step": 4072 }, { "epoch": 0.11138153576897834, "grad_norm": 2.2476015090942383, "learning_rate": 1.965508128683064e-05, "loss": 0.7057, "step": 4073 }, { "epoch": 0.11140888208269525, "grad_norm": 1.7801192998886108, "learning_rate": 1.965485063558883e-05, "loss": 0.5785, "step": 4074 }, { "epoch": 0.11143622839641217, "grad_norm": 2.654843330383301, "learning_rate": 1.9654619908607437e-05, "loss": 0.6073, "step": 4075 }, { "epoch": 0.11146357471012908, "grad_norm": 2.1723108291625977, "learning_rate": 1.9654389105888266e-05, "loss": 0.615, "step": 4076 }, { "epoch": 0.11149092102384599, "grad_norm": 1.6785863637924194, "learning_rate": 1.9654158227433128e-05, "loss": 0.5964, "step": 4077 }, { "epoch": 0.1115182673375629, "grad_norm": 1.8334234952926636, "learning_rate": 1.965392727324384e-05, "loss": 0.5868, "step": 4078 }, { "epoch": 0.11154561365127981, "grad_norm": 2.0922601222991943, "learning_rate": 1.965369624332221e-05, "loss": 0.6095, "step": 4079 }, { "epoch": 0.11157295996499672, "grad_norm": 3.0084619522094727, "learning_rate": 1.9653465137670042e-05, "loss": 0.5818, "step": 4080 }, { "epoch": 0.11160030627871363, "grad_norm": 2.614210844039917, "learning_rate": 1.9653233956289163e-05, "loss": 0.5536, "step": 4081 }, { "epoch": 0.11162765259243054, "grad_norm": 2.1605241298675537, "learning_rate": 1.965300269918138e-05, "loss": 0.6897, "step": 4082 }, { "epoch": 0.11165499890614745, "grad_norm": 2.301351547241211, "learning_rate": 1.9652771366348502e-05, "loss": 0.6195, "step": 4083 }, { "epoch": 0.11168234521986437, "grad_norm": 1.9936274290084839, "learning_rate": 1.9652539957792353e-05, "loss": 0.6224, "step": 4084 }, { "epoch": 0.11170969153358128, "grad_norm": 1.7869211435317993, "learning_rate": 1.9652308473514745e-05, "loss": 0.5767, "step": 4085 }, { "epoch": 0.11173703784729819, "grad_norm": 2.105529308319092, "learning_rate": 1.9652076913517494e-05, "loss": 0.622, "step": 4086 }, { "epoch": 0.1117643841610151, "grad_norm": 1.678269863128662, "learning_rate": 1.9651845277802416e-05, "loss": 0.6067, "step": 4087 }, { "epoch": 0.11179173047473201, "grad_norm": 2.768113851547241, "learning_rate": 1.9651613566371327e-05, "loss": 0.6075, "step": 4088 }, { "epoch": 0.11181907678844892, "grad_norm": 2.3338029384613037, "learning_rate": 1.9651381779226043e-05, "loss": 0.6071, "step": 4089 }, { "epoch": 0.11184642310216583, "grad_norm": 2.70615291595459, "learning_rate": 1.9651149916368387e-05, "loss": 0.5663, "step": 4090 }, { "epoch": 0.11187376941588274, "grad_norm": 1.7163723707199097, "learning_rate": 1.965091797780018e-05, "loss": 1.0307, "step": 4091 }, { "epoch": 0.11190111572959965, "grad_norm": 1.5403902530670166, "learning_rate": 1.9650685963523234e-05, "loss": 1.0352, "step": 4092 }, { "epoch": 0.11192846204331657, "grad_norm": 19.10251235961914, "learning_rate": 1.9650453873539372e-05, "loss": 0.5893, "step": 4093 }, { "epoch": 0.11195580835703348, "grad_norm": 3.701664447784424, "learning_rate": 1.9650221707850416e-05, "loss": 0.6097, "step": 4094 }, { "epoch": 0.11198315467075039, "grad_norm": 7.852379322052002, "learning_rate": 1.9649989466458188e-05, "loss": 0.6504, "step": 4095 }, { "epoch": 0.1120105009844673, "grad_norm": 3.941967725753784, "learning_rate": 1.9649757149364504e-05, "loss": 0.5783, "step": 4096 }, { "epoch": 0.11203784729818421, "grad_norm": 3.9122114181518555, "learning_rate": 1.9649524756571195e-05, "loss": 0.586, "step": 4097 }, { "epoch": 0.11206519361190112, "grad_norm": 4.154620170593262, "learning_rate": 1.964929228808008e-05, "loss": 0.5938, "step": 4098 }, { "epoch": 0.11209253992561803, "grad_norm": 1.8058415651321411, "learning_rate": 1.9649059743892983e-05, "loss": 0.9915, "step": 4099 }, { "epoch": 0.11211988623933494, "grad_norm": 3.6000893115997314, "learning_rate": 1.9648827124011723e-05, "loss": 0.5218, "step": 4100 }, { "epoch": 0.11214723255305185, "grad_norm": 2.934796094894409, "learning_rate": 1.9648594428438132e-05, "loss": 0.5452, "step": 4101 }, { "epoch": 0.11217457886676876, "grad_norm": 4.848275661468506, "learning_rate": 1.9648361657174036e-05, "loss": 0.5761, "step": 4102 }, { "epoch": 0.11220192518048568, "grad_norm": 14.694073677062988, "learning_rate": 1.9648128810221254e-05, "loss": 0.5924, "step": 4103 }, { "epoch": 0.11222927149420259, "grad_norm": 19.10735321044922, "learning_rate": 1.964789588758162e-05, "loss": 0.4852, "step": 4104 }, { "epoch": 0.1122566178079195, "grad_norm": 2.7405543327331543, "learning_rate": 1.9647662889256955e-05, "loss": 0.5647, "step": 4105 }, { "epoch": 0.11228396412163641, "grad_norm": 3.2728593349456787, "learning_rate": 1.9647429815249093e-05, "loss": 0.6061, "step": 4106 }, { "epoch": 0.11231131043535332, "grad_norm": 2.568013906478882, "learning_rate": 1.9647196665559855e-05, "loss": 0.5955, "step": 4107 }, { "epoch": 0.11233865674907023, "grad_norm": 2.97031831741333, "learning_rate": 1.9646963440191075e-05, "loss": 0.5789, "step": 4108 }, { "epoch": 0.11236600306278714, "grad_norm": 3.359531879425049, "learning_rate": 1.9646730139144584e-05, "loss": 0.6026, "step": 4109 }, { "epoch": 0.11239334937650405, "grad_norm": 2.405867099761963, "learning_rate": 1.964649676242221e-05, "loss": 0.5691, "step": 4110 }, { "epoch": 0.11242069569022096, "grad_norm": 2.2809598445892334, "learning_rate": 1.9646263310025782e-05, "loss": 0.5941, "step": 4111 }, { "epoch": 0.11244804200393788, "grad_norm": 3.7838070392608643, "learning_rate": 1.9646029781957134e-05, "loss": 0.5887, "step": 4112 }, { "epoch": 0.11247538831765479, "grad_norm": 1.4445836544036865, "learning_rate": 1.9645796178218097e-05, "loss": 1.0009, "step": 4113 }, { "epoch": 0.11250273463137168, "grad_norm": 2.1026558876037598, "learning_rate": 1.9645562498810503e-05, "loss": 0.6318, "step": 4114 }, { "epoch": 0.1125300809450886, "grad_norm": 2.2353336811065674, "learning_rate": 1.9645328743736188e-05, "loss": 0.5973, "step": 4115 }, { "epoch": 0.1125574272588055, "grad_norm": 1.655156135559082, "learning_rate": 1.964509491299698e-05, "loss": 0.4928, "step": 4116 }, { "epoch": 0.11258477357252242, "grad_norm": 2.014955520629883, "learning_rate": 1.9644861006594716e-05, "loss": 0.5984, "step": 4117 }, { "epoch": 0.11261211988623933, "grad_norm": 2.002747058868408, "learning_rate": 1.9644627024531235e-05, "loss": 0.6195, "step": 4118 }, { "epoch": 0.11263946619995624, "grad_norm": 9.964433670043945, "learning_rate": 1.964439296680837e-05, "loss": 0.5376, "step": 4119 }, { "epoch": 0.11266681251367315, "grad_norm": 1.7710126638412476, "learning_rate": 1.9644158833427954e-05, "loss": 0.6057, "step": 4120 }, { "epoch": 0.11269415882739006, "grad_norm": 1.6117658615112305, "learning_rate": 1.9643924624391824e-05, "loss": 0.5831, "step": 4121 }, { "epoch": 0.11272150514110697, "grad_norm": 1.7732548713684082, "learning_rate": 1.964369033970182e-05, "loss": 0.5897, "step": 4122 }, { "epoch": 0.11274885145482388, "grad_norm": 1.950255274772644, "learning_rate": 1.9643455979359785e-05, "loss": 0.5788, "step": 4123 }, { "epoch": 0.1127761977685408, "grad_norm": 1.9756029844284058, "learning_rate": 1.9643221543367546e-05, "loss": 0.629, "step": 4124 }, { "epoch": 0.1128035440822577, "grad_norm": 1.919413685798645, "learning_rate": 1.964298703172695e-05, "loss": 0.5879, "step": 4125 }, { "epoch": 0.11283089039597462, "grad_norm": 1.7723408937454224, "learning_rate": 1.964275244443983e-05, "loss": 0.6056, "step": 4126 }, { "epoch": 0.11285823670969153, "grad_norm": 1.7876369953155518, "learning_rate": 1.964251778150804e-05, "loss": 0.5894, "step": 4127 }, { "epoch": 0.11288558302340844, "grad_norm": 2.0950043201446533, "learning_rate": 1.9642283042933403e-05, "loss": 0.5835, "step": 4128 }, { "epoch": 0.11291292933712535, "grad_norm": 2.2246782779693604, "learning_rate": 1.9642048228717775e-05, "loss": 0.6628, "step": 4129 }, { "epoch": 0.11294027565084226, "grad_norm": 2.357292652130127, "learning_rate": 1.964181333886299e-05, "loss": 0.6015, "step": 4130 }, { "epoch": 0.11296762196455917, "grad_norm": 3.648507833480835, "learning_rate": 1.964157837337089e-05, "loss": 0.4946, "step": 4131 }, { "epoch": 0.11299496827827608, "grad_norm": 1.7987146377563477, "learning_rate": 1.9641343332243326e-05, "loss": 0.6015, "step": 4132 }, { "epoch": 0.113022314591993, "grad_norm": 2.1108241081237793, "learning_rate": 1.9641108215482135e-05, "loss": 0.6145, "step": 4133 }, { "epoch": 0.1130496609057099, "grad_norm": 2.309791326522827, "learning_rate": 1.9640873023089162e-05, "loss": 0.5786, "step": 4134 }, { "epoch": 0.11307700721942682, "grad_norm": 2.2805068492889404, "learning_rate": 1.964063775506625e-05, "loss": 0.5926, "step": 4135 }, { "epoch": 0.11310435353314373, "grad_norm": 1.9946900606155396, "learning_rate": 1.9640402411415257e-05, "loss": 0.5946, "step": 4136 }, { "epoch": 0.11313169984686064, "grad_norm": 1.8092514276504517, "learning_rate": 1.964016699213801e-05, "loss": 0.5883, "step": 4137 }, { "epoch": 0.11315904616057755, "grad_norm": 2.3273983001708984, "learning_rate": 1.9639931497236376e-05, "loss": 0.5918, "step": 4138 }, { "epoch": 0.11318639247429446, "grad_norm": 1.5831284523010254, "learning_rate": 1.9639695926712185e-05, "loss": 1.0151, "step": 4139 }, { "epoch": 0.11321373878801137, "grad_norm": 1.7021565437316895, "learning_rate": 1.9639460280567295e-05, "loss": 0.5622, "step": 4140 }, { "epoch": 0.11324108510172828, "grad_norm": 1.6651328802108765, "learning_rate": 1.963922455880355e-05, "loss": 0.5924, "step": 4141 }, { "epoch": 0.1132684314154452, "grad_norm": 2.1853127479553223, "learning_rate": 1.9638988761422806e-05, "loss": 0.5805, "step": 4142 }, { "epoch": 0.1132957777291621, "grad_norm": 1.655725359916687, "learning_rate": 1.9638752888426903e-05, "loss": 0.6184, "step": 4143 }, { "epoch": 0.11332312404287902, "grad_norm": 1.7339211702346802, "learning_rate": 1.9638516939817697e-05, "loss": 0.5995, "step": 4144 }, { "epoch": 0.11335047035659593, "grad_norm": 1.9181883335113525, "learning_rate": 1.963828091559704e-05, "loss": 0.5687, "step": 4145 }, { "epoch": 0.11337781667031284, "grad_norm": 1.9780678749084473, "learning_rate": 1.963804481576678e-05, "loss": 0.6105, "step": 4146 }, { "epoch": 0.11340516298402975, "grad_norm": 2.0628983974456787, "learning_rate": 1.9637808640328772e-05, "loss": 0.5752, "step": 4147 }, { "epoch": 0.11343250929774666, "grad_norm": 1.7997833490371704, "learning_rate": 1.9637572389284867e-05, "loss": 0.5906, "step": 4148 }, { "epoch": 0.11345985561146357, "grad_norm": 2.0968849658966064, "learning_rate": 1.963733606263692e-05, "loss": 0.6457, "step": 4149 }, { "epoch": 0.11348720192518048, "grad_norm": 1.7661099433898926, "learning_rate": 1.9637099660386777e-05, "loss": 0.5956, "step": 4150 }, { "epoch": 0.1135145482388974, "grad_norm": 1.7521421909332275, "learning_rate": 1.963686318253631e-05, "loss": 0.585, "step": 4151 }, { "epoch": 0.1135418945526143, "grad_norm": 2.3149499893188477, "learning_rate": 1.9636626629087354e-05, "loss": 0.5741, "step": 4152 }, { "epoch": 0.11356924086633122, "grad_norm": 1.8211963176727295, "learning_rate": 1.963639000004178e-05, "loss": 0.5973, "step": 4153 }, { "epoch": 0.11359658718004813, "grad_norm": 3.3710663318634033, "learning_rate": 1.9636153295401438e-05, "loss": 0.5927, "step": 4154 }, { "epoch": 0.11362393349376504, "grad_norm": 1.501058578491211, "learning_rate": 1.963591651516818e-05, "loss": 0.6037, "step": 4155 }, { "epoch": 0.11365127980748195, "grad_norm": 2.412919759750366, "learning_rate": 1.963567965934387e-05, "loss": 0.6265, "step": 4156 }, { "epoch": 0.11367862612119886, "grad_norm": 2.499978542327881, "learning_rate": 1.963544272793037e-05, "loss": 0.5729, "step": 4157 }, { "epoch": 0.11370597243491577, "grad_norm": 2.485905170440674, "learning_rate": 1.963520572092953e-05, "loss": 0.6816, "step": 4158 }, { "epoch": 0.11373331874863268, "grad_norm": 2.8025732040405273, "learning_rate": 1.9634968638343213e-05, "loss": 0.5461, "step": 4159 }, { "epoch": 0.1137606650623496, "grad_norm": 3.9681811332702637, "learning_rate": 1.963473148017328e-05, "loss": 0.6174, "step": 4160 }, { "epoch": 0.1137880113760665, "grad_norm": 1.751436471939087, "learning_rate": 1.9634494246421586e-05, "loss": 0.5924, "step": 4161 }, { "epoch": 0.11381535768978342, "grad_norm": 1.9691357612609863, "learning_rate": 1.963425693709e-05, "loss": 0.6217, "step": 4162 }, { "epoch": 0.11384270400350033, "grad_norm": 1.7708780765533447, "learning_rate": 1.963401955218038e-05, "loss": 0.6088, "step": 4163 }, { "epoch": 0.11387005031721724, "grad_norm": 2.069152593612671, "learning_rate": 1.9633782091694585e-05, "loss": 1.0116, "step": 4164 }, { "epoch": 0.11389739663093415, "grad_norm": 3.0469868183135986, "learning_rate": 1.9633544555634482e-05, "loss": 0.6053, "step": 4165 }, { "epoch": 0.11392474294465106, "grad_norm": 2.768083333969116, "learning_rate": 1.9633306944001937e-05, "loss": 0.6661, "step": 4166 }, { "epoch": 0.11395208925836797, "grad_norm": 1.9861770868301392, "learning_rate": 1.9633069256798802e-05, "loss": 0.5587, "step": 4167 }, { "epoch": 0.11397943557208488, "grad_norm": 1.8272156715393066, "learning_rate": 1.9632831494026953e-05, "loss": 0.5801, "step": 4168 }, { "epoch": 0.1140067818858018, "grad_norm": 2.2477853298187256, "learning_rate": 1.9632593655688255e-05, "loss": 0.6056, "step": 4169 }, { "epoch": 0.1140341281995187, "grad_norm": 22.892026901245117, "learning_rate": 1.963235574178457e-05, "loss": 0.6144, "step": 4170 }, { "epoch": 0.11406147451323562, "grad_norm": 2.132035255432129, "learning_rate": 1.963211775231776e-05, "loss": 0.6001, "step": 4171 }, { "epoch": 0.11408882082695253, "grad_norm": 2.243589162826538, "learning_rate": 1.96318796872897e-05, "loss": 0.613, "step": 4172 }, { "epoch": 0.11411616714066944, "grad_norm": 2.5819005966186523, "learning_rate": 1.9631641546702256e-05, "loss": 0.6042, "step": 4173 }, { "epoch": 0.11414351345438635, "grad_norm": 3.913090229034424, "learning_rate": 1.9631403330557296e-05, "loss": 0.589, "step": 4174 }, { "epoch": 0.11417085976810326, "grad_norm": 2.4378750324249268, "learning_rate": 1.9631165038856685e-05, "loss": 0.55, "step": 4175 }, { "epoch": 0.11419820608182017, "grad_norm": 2.0290439128875732, "learning_rate": 1.963092667160229e-05, "loss": 0.586, "step": 4176 }, { "epoch": 0.11422555239553708, "grad_norm": 2.110215902328491, "learning_rate": 1.963068822879599e-05, "loss": 0.589, "step": 4177 }, { "epoch": 0.114252898709254, "grad_norm": 1.9206314086914062, "learning_rate": 1.9630449710439654e-05, "loss": 0.5456, "step": 4178 }, { "epoch": 0.1142802450229709, "grad_norm": 1.9911878108978271, "learning_rate": 1.9630211116535146e-05, "loss": 0.5823, "step": 4179 }, { "epoch": 0.11430759133668782, "grad_norm": 1.5356274843215942, "learning_rate": 1.9629972447084346e-05, "loss": 0.5511, "step": 4180 }, { "epoch": 0.11433493765040473, "grad_norm": 2.3022356033325195, "learning_rate": 1.9629733702089116e-05, "loss": 0.6437, "step": 4181 }, { "epoch": 0.11436228396412164, "grad_norm": 1.9436713457107544, "learning_rate": 1.962949488155134e-05, "loss": 0.5852, "step": 4182 }, { "epoch": 0.11438963027783855, "grad_norm": 2.756492853164673, "learning_rate": 1.9629255985472885e-05, "loss": 0.6025, "step": 4183 }, { "epoch": 0.11441697659155546, "grad_norm": 1.8368659019470215, "learning_rate": 1.9629017013855627e-05, "loss": 0.5986, "step": 4184 }, { "epoch": 0.11444432290527237, "grad_norm": 2.0103702545166016, "learning_rate": 1.962877796670144e-05, "loss": 0.6144, "step": 4185 }, { "epoch": 0.11447166921898928, "grad_norm": 2.0658318996429443, "learning_rate": 1.9628538844012205e-05, "loss": 0.5887, "step": 4186 }, { "epoch": 0.1144990155327062, "grad_norm": 2.791273832321167, "learning_rate": 1.9628299645789785e-05, "loss": 0.5654, "step": 4187 }, { "epoch": 0.1145263618464231, "grad_norm": 2.3489303588867188, "learning_rate": 1.9628060372036067e-05, "loss": 0.5713, "step": 4188 }, { "epoch": 0.11455370816014002, "grad_norm": 2.4042129516601562, "learning_rate": 1.9627821022752924e-05, "loss": 0.6563, "step": 4189 }, { "epoch": 0.11458105447385693, "grad_norm": 2.2664828300476074, "learning_rate": 1.9627581597942234e-05, "loss": 0.5914, "step": 4190 }, { "epoch": 0.11460840078757384, "grad_norm": 1.6852059364318848, "learning_rate": 1.962734209760588e-05, "loss": 0.5748, "step": 4191 }, { "epoch": 0.11463574710129075, "grad_norm": 2.523848056793213, "learning_rate": 1.9627102521745732e-05, "loss": 0.5756, "step": 4192 }, { "epoch": 0.11466309341500766, "grad_norm": 2.6736810207366943, "learning_rate": 1.9626862870363673e-05, "loss": 0.6076, "step": 4193 }, { "epoch": 0.11469043972872457, "grad_norm": 1.9765883684158325, "learning_rate": 1.962662314346159e-05, "loss": 0.5584, "step": 4194 }, { "epoch": 0.11471778604244148, "grad_norm": 1.6772204637527466, "learning_rate": 1.9626383341041352e-05, "loss": 0.5798, "step": 4195 }, { "epoch": 0.1147451323561584, "grad_norm": 2.142127752304077, "learning_rate": 1.9626143463104848e-05, "loss": 0.6057, "step": 4196 }, { "epoch": 0.1147724786698753, "grad_norm": 2.595250368118286, "learning_rate": 1.9625903509653957e-05, "loss": 0.6118, "step": 4197 }, { "epoch": 0.11479982498359222, "grad_norm": 2.361039161682129, "learning_rate": 1.9625663480690564e-05, "loss": 0.5854, "step": 4198 }, { "epoch": 0.11482717129730913, "grad_norm": 3.8477303981781006, "learning_rate": 1.9625423376216548e-05, "loss": 0.5919, "step": 4199 }, { "epoch": 0.11485451761102604, "grad_norm": 1.8191789388656616, "learning_rate": 1.9625183196233797e-05, "loss": 0.4825, "step": 4200 }, { "epoch": 0.11488186392474295, "grad_norm": 5.340747356414795, "learning_rate": 1.9624942940744187e-05, "loss": 0.5968, "step": 4201 }, { "epoch": 0.11490921023845986, "grad_norm": 2.6893320083618164, "learning_rate": 1.9624702609749614e-05, "loss": 0.6085, "step": 4202 }, { "epoch": 0.11493655655217677, "grad_norm": 3.105234146118164, "learning_rate": 1.9624462203251953e-05, "loss": 0.595, "step": 4203 }, { "epoch": 0.11496390286589368, "grad_norm": 1.9150625467300415, "learning_rate": 1.96242217212531e-05, "loss": 0.5783, "step": 4204 }, { "epoch": 0.1149912491796106, "grad_norm": 2.3587143421173096, "learning_rate": 1.9623981163754933e-05, "loss": 0.5981, "step": 4205 }, { "epoch": 0.1150185954933275, "grad_norm": 1.4585812091827393, "learning_rate": 1.962374053075934e-05, "loss": 0.553, "step": 4206 }, { "epoch": 0.11504594180704442, "grad_norm": 2.55436372756958, "learning_rate": 1.9623499822268214e-05, "loss": 0.5325, "step": 4207 }, { "epoch": 0.11507328812076133, "grad_norm": 1.9762530326843262, "learning_rate": 1.9623259038283436e-05, "loss": 0.5714, "step": 4208 }, { "epoch": 0.11510063443447824, "grad_norm": 1.7419371604919434, "learning_rate": 1.9623018178806904e-05, "loss": 0.5753, "step": 4209 }, { "epoch": 0.11512798074819515, "grad_norm": 2.110564708709717, "learning_rate": 1.9622777243840502e-05, "loss": 0.5652, "step": 4210 }, { "epoch": 0.11515532706191206, "grad_norm": 2.3521387577056885, "learning_rate": 1.962253623338612e-05, "loss": 0.5224, "step": 4211 }, { "epoch": 0.11518267337562897, "grad_norm": 2.0092177391052246, "learning_rate": 1.962229514744565e-05, "loss": 0.5048, "step": 4212 }, { "epoch": 0.11521001968934588, "grad_norm": 1.6762102842330933, "learning_rate": 1.9622053986020982e-05, "loss": 0.6149, "step": 4213 }, { "epoch": 0.1152373660030628, "grad_norm": 1.6775274276733398, "learning_rate": 1.9621812749114006e-05, "loss": 0.5766, "step": 4214 }, { "epoch": 0.1152647123167797, "grad_norm": 2.1295456886291504, "learning_rate": 1.962157143672662e-05, "loss": 0.6589, "step": 4215 }, { "epoch": 0.1152920586304966, "grad_norm": 1.9437206983566284, "learning_rate": 1.962133004886071e-05, "loss": 0.6179, "step": 4216 }, { "epoch": 0.11531940494421351, "grad_norm": 1.8110612630844116, "learning_rate": 1.9621088585518174e-05, "loss": 0.6059, "step": 4217 }, { "epoch": 0.11534675125793042, "grad_norm": 2.0166006088256836, "learning_rate": 1.962084704670091e-05, "loss": 0.6012, "step": 4218 }, { "epoch": 0.11537409757164734, "grad_norm": 1.7322605848312378, "learning_rate": 1.9620605432410804e-05, "loss": 0.568, "step": 4219 }, { "epoch": 0.11540144388536425, "grad_norm": 2.3623292446136475, "learning_rate": 1.962036374264976e-05, "loss": 0.6021, "step": 4220 }, { "epoch": 0.11542879019908116, "grad_norm": 1.5609468221664429, "learning_rate": 1.9620121977419666e-05, "loss": 0.562, "step": 4221 }, { "epoch": 0.11545613651279807, "grad_norm": 1.8605396747589111, "learning_rate": 1.9619880136722424e-05, "loss": 0.6412, "step": 4222 }, { "epoch": 0.11548348282651498, "grad_norm": 1.791515827178955, "learning_rate": 1.961963822055993e-05, "loss": 0.5132, "step": 4223 }, { "epoch": 0.11551082914023189, "grad_norm": 1.8439525365829468, "learning_rate": 1.9619396228934083e-05, "loss": 0.6273, "step": 4224 }, { "epoch": 0.1155381754539488, "grad_norm": 2.0759663581848145, "learning_rate": 1.9619154161846774e-05, "loss": 0.572, "step": 4225 }, { "epoch": 0.11556552176766571, "grad_norm": 2.5425639152526855, "learning_rate": 1.9618912019299912e-05, "loss": 0.5842, "step": 4226 }, { "epoch": 0.11559286808138262, "grad_norm": 2.0798158645629883, "learning_rate": 1.961866980129539e-05, "loss": 0.5535, "step": 4227 }, { "epoch": 0.11562021439509954, "grad_norm": 1.4870926141738892, "learning_rate": 1.9618427507835113e-05, "loss": 0.5975, "step": 4228 }, { "epoch": 0.11564756070881645, "grad_norm": 1.8060669898986816, "learning_rate": 1.961818513892098e-05, "loss": 0.5778, "step": 4229 }, { "epoch": 0.11567490702253336, "grad_norm": 2.1539852619171143, "learning_rate": 1.961794269455489e-05, "loss": 0.5823, "step": 4230 }, { "epoch": 0.11570225333625027, "grad_norm": 7.3172078132629395, "learning_rate": 1.9617700174738743e-05, "loss": 0.6041, "step": 4231 }, { "epoch": 0.11572959964996718, "grad_norm": 2.7271103858947754, "learning_rate": 1.9617457579474445e-05, "loss": 0.5886, "step": 4232 }, { "epoch": 0.11575694596368409, "grad_norm": 1.9090569019317627, "learning_rate": 1.9617214908763903e-05, "loss": 0.5825, "step": 4233 }, { "epoch": 0.115784292277401, "grad_norm": 2.2352371215820312, "learning_rate": 1.9616972162609017e-05, "loss": 0.5802, "step": 4234 }, { "epoch": 0.11581163859111791, "grad_norm": 4.2384748458862305, "learning_rate": 1.9616729341011688e-05, "loss": 0.643, "step": 4235 }, { "epoch": 0.11583898490483482, "grad_norm": 1.8366644382476807, "learning_rate": 1.9616486443973823e-05, "loss": 0.612, "step": 4236 }, { "epoch": 0.11586633121855174, "grad_norm": 1.6987578868865967, "learning_rate": 1.961624347149733e-05, "loss": 0.6155, "step": 4237 }, { "epoch": 0.11589367753226865, "grad_norm": 3.066912889480591, "learning_rate": 1.9616000423584114e-05, "loss": 0.4355, "step": 4238 }, { "epoch": 0.11592102384598556, "grad_norm": 1.9691284894943237, "learning_rate": 1.9615757300236078e-05, "loss": 0.629, "step": 4239 }, { "epoch": 0.11594837015970247, "grad_norm": 1.6230376958847046, "learning_rate": 1.9615514101455134e-05, "loss": 0.5946, "step": 4240 }, { "epoch": 0.11597571647341938, "grad_norm": 2.58974027633667, "learning_rate": 1.9615270827243188e-05, "loss": 0.5951, "step": 4241 }, { "epoch": 0.11600306278713629, "grad_norm": 1.870703101158142, "learning_rate": 1.9615027477602148e-05, "loss": 0.5905, "step": 4242 }, { "epoch": 0.1160304091008532, "grad_norm": 1.8081361055374146, "learning_rate": 1.9614784052533925e-05, "loss": 0.6113, "step": 4243 }, { "epoch": 0.11605775541457011, "grad_norm": 1.8885356187820435, "learning_rate": 1.961454055204042e-05, "loss": 0.5971, "step": 4244 }, { "epoch": 0.11608510172828702, "grad_norm": 1.7204264402389526, "learning_rate": 1.961429697612356e-05, "loss": 0.5915, "step": 4245 }, { "epoch": 0.11611244804200394, "grad_norm": 1.9628756046295166, "learning_rate": 1.961405332478524e-05, "loss": 0.5815, "step": 4246 }, { "epoch": 0.11613979435572085, "grad_norm": 1.9486905336380005, "learning_rate": 1.9613809598027378e-05, "loss": 0.5639, "step": 4247 }, { "epoch": 0.11616714066943776, "grad_norm": 2.8961293697357178, "learning_rate": 1.9613565795851885e-05, "loss": 0.6393, "step": 4248 }, { "epoch": 0.11619448698315467, "grad_norm": 2.3402769565582275, "learning_rate": 1.9613321918260674e-05, "loss": 0.5806, "step": 4249 }, { "epoch": 0.11622183329687158, "grad_norm": 1.6053853034973145, "learning_rate": 1.9613077965255657e-05, "loss": 0.62, "step": 4250 }, { "epoch": 0.11624917961058849, "grad_norm": 1.443865180015564, "learning_rate": 1.9612833936838755e-05, "loss": 0.5838, "step": 4251 }, { "epoch": 0.1162765259243054, "grad_norm": 1.7301949262619019, "learning_rate": 1.9612589833011868e-05, "loss": 0.6143, "step": 4252 }, { "epoch": 0.11630387223802231, "grad_norm": 1.9365261793136597, "learning_rate": 1.9612345653776924e-05, "loss": 0.5771, "step": 4253 }, { "epoch": 0.11633121855173922, "grad_norm": 2.45316219329834, "learning_rate": 1.961210139913583e-05, "loss": 0.5903, "step": 4254 }, { "epoch": 0.11635856486545613, "grad_norm": 1.8226929903030396, "learning_rate": 1.961185706909051e-05, "loss": 0.5983, "step": 4255 }, { "epoch": 0.11638591117917305, "grad_norm": 1.429077386856079, "learning_rate": 1.961161266364287e-05, "loss": 1.0047, "step": 4256 }, { "epoch": 0.11641325749288996, "grad_norm": 1.9757177829742432, "learning_rate": 1.961136818279484e-05, "loss": 0.624, "step": 4257 }, { "epoch": 0.11644060380660687, "grad_norm": 3.6145479679107666, "learning_rate": 1.961112362654833e-05, "loss": 0.5835, "step": 4258 }, { "epoch": 0.11646795012032378, "grad_norm": 1.8520623445510864, "learning_rate": 1.9610878994905258e-05, "loss": 0.5799, "step": 4259 }, { "epoch": 0.11649529643404069, "grad_norm": 1.7881169319152832, "learning_rate": 1.9610634287867543e-05, "loss": 0.6189, "step": 4260 }, { "epoch": 0.1165226427477576, "grad_norm": 1.9789183139801025, "learning_rate": 1.961038950543711e-05, "loss": 0.612, "step": 4261 }, { "epoch": 0.11654998906147451, "grad_norm": 1.7337080240249634, "learning_rate": 1.9610144647615876e-05, "loss": 0.5586, "step": 4262 }, { "epoch": 0.11657733537519142, "grad_norm": 2.160215377807617, "learning_rate": 1.9609899714405762e-05, "loss": 0.576, "step": 4263 }, { "epoch": 0.11660468168890833, "grad_norm": 1.5550545454025269, "learning_rate": 1.9609654705808686e-05, "loss": 0.5892, "step": 4264 }, { "epoch": 0.11663202800262525, "grad_norm": 1.912693738937378, "learning_rate": 1.9609409621826575e-05, "loss": 0.6255, "step": 4265 }, { "epoch": 0.11665937431634216, "grad_norm": 2.28753399848938, "learning_rate": 1.9609164462461355e-05, "loss": 0.5873, "step": 4266 }, { "epoch": 0.11668672063005907, "grad_norm": 1.4413045644760132, "learning_rate": 1.9608919227714938e-05, "loss": 0.9969, "step": 4267 }, { "epoch": 0.11671406694377598, "grad_norm": 1.8884408473968506, "learning_rate": 1.9608673917589254e-05, "loss": 0.5911, "step": 4268 }, { "epoch": 0.11674141325749289, "grad_norm": 2.1707687377929688, "learning_rate": 1.960842853208623e-05, "loss": 0.5647, "step": 4269 }, { "epoch": 0.1167687595712098, "grad_norm": 2.6119112968444824, "learning_rate": 1.9608183071207786e-05, "loss": 0.4626, "step": 4270 }, { "epoch": 0.11679610588492671, "grad_norm": 2.424112558364868, "learning_rate": 1.960793753495585e-05, "loss": 0.6141, "step": 4271 }, { "epoch": 0.11682345219864362, "grad_norm": 3.055025100708008, "learning_rate": 1.9607691923332348e-05, "loss": 0.6422, "step": 4272 }, { "epoch": 0.11685079851236053, "grad_norm": 1.9055283069610596, "learning_rate": 1.9607446236339208e-05, "loss": 0.5519, "step": 4273 }, { "epoch": 0.11687814482607745, "grad_norm": 2.113050699234009, "learning_rate": 1.9607200473978354e-05, "loss": 0.572, "step": 4274 }, { "epoch": 0.11690549113979436, "grad_norm": 2.4738240242004395, "learning_rate": 1.9606954636251717e-05, "loss": 0.5724, "step": 4275 }, { "epoch": 0.11693283745351127, "grad_norm": 1.705054759979248, "learning_rate": 1.9606708723161224e-05, "loss": 0.5817, "step": 4276 }, { "epoch": 0.11696018376722818, "grad_norm": 1.831290364265442, "learning_rate": 1.9606462734708808e-05, "loss": 0.4739, "step": 4277 }, { "epoch": 0.11698753008094509, "grad_norm": 1.4799875020980835, "learning_rate": 1.960621667089639e-05, "loss": 0.9476, "step": 4278 }, { "epoch": 0.117014876394662, "grad_norm": 1.9739854335784912, "learning_rate": 1.9605970531725908e-05, "loss": 0.5714, "step": 4279 }, { "epoch": 0.11704222270837891, "grad_norm": 1.9905866384506226, "learning_rate": 1.9605724317199293e-05, "loss": 0.6053, "step": 4280 }, { "epoch": 0.11706956902209582, "grad_norm": 3.513866662979126, "learning_rate": 1.960547802731847e-05, "loss": 0.6547, "step": 4281 }, { "epoch": 0.11709691533581273, "grad_norm": 3.228571891784668, "learning_rate": 1.9605231662085378e-05, "loss": 0.5656, "step": 4282 }, { "epoch": 0.11712426164952965, "grad_norm": 3.1318135261535645, "learning_rate": 1.9604985221501947e-05, "loss": 0.5558, "step": 4283 }, { "epoch": 0.11715160796324656, "grad_norm": 1.350012183189392, "learning_rate": 1.9604738705570106e-05, "loss": 1.0121, "step": 4284 }, { "epoch": 0.11717895427696347, "grad_norm": 2.0658578872680664, "learning_rate": 1.96044921142918e-05, "loss": 0.5539, "step": 4285 }, { "epoch": 0.11720630059068038, "grad_norm": 1.9913721084594727, "learning_rate": 1.960424544766895e-05, "loss": 0.6068, "step": 4286 }, { "epoch": 0.11723364690439729, "grad_norm": 1.8166948556900024, "learning_rate": 1.96039987057035e-05, "loss": 0.5601, "step": 4287 }, { "epoch": 0.1172609932181142, "grad_norm": 2.366914987564087, "learning_rate": 1.960375188839738e-05, "loss": 0.5892, "step": 4288 }, { "epoch": 0.11728833953183111, "grad_norm": 5.1824517250061035, "learning_rate": 1.9603504995752533e-05, "loss": 0.5978, "step": 4289 }, { "epoch": 0.11731568584554802, "grad_norm": 1.618047833442688, "learning_rate": 1.9603258027770888e-05, "loss": 0.5928, "step": 4290 }, { "epoch": 0.11734303215926493, "grad_norm": 1.8941524028778076, "learning_rate": 1.960301098445439e-05, "loss": 0.5807, "step": 4291 }, { "epoch": 0.11737037847298185, "grad_norm": 1.4518687725067139, "learning_rate": 1.960276386580497e-05, "loss": 0.9917, "step": 4292 }, { "epoch": 0.11739772478669876, "grad_norm": 1.4352011680603027, "learning_rate": 1.9602516671824573e-05, "loss": 0.9975, "step": 4293 }, { "epoch": 0.11742507110041567, "grad_norm": 1.7038273811340332, "learning_rate": 1.9602269402515134e-05, "loss": 0.9555, "step": 4294 }, { "epoch": 0.11745241741413258, "grad_norm": 2.2467238903045654, "learning_rate": 1.9602022057878595e-05, "loss": 0.5775, "step": 4295 }, { "epoch": 0.11747976372784949, "grad_norm": 2.2518489360809326, "learning_rate": 1.9601774637916892e-05, "loss": 0.578, "step": 4296 }, { "epoch": 0.1175071100415664, "grad_norm": 1.5757304430007935, "learning_rate": 1.9601527142631972e-05, "loss": 0.4973, "step": 4297 }, { "epoch": 0.11753445635528331, "grad_norm": 2.1680049896240234, "learning_rate": 1.960127957202577e-05, "loss": 0.6352, "step": 4298 }, { "epoch": 0.11756180266900022, "grad_norm": 1.604677677154541, "learning_rate": 1.960103192610024e-05, "loss": 0.5937, "step": 4299 }, { "epoch": 0.11758914898271713, "grad_norm": 3.428586721420288, "learning_rate": 1.960078420485731e-05, "loss": 0.5807, "step": 4300 }, { "epoch": 0.11761649529643405, "grad_norm": 2.7239835262298584, "learning_rate": 1.9600536408298932e-05, "loss": 0.5709, "step": 4301 }, { "epoch": 0.11764384161015096, "grad_norm": 1.9662399291992188, "learning_rate": 1.9600288536427046e-05, "loss": 0.5739, "step": 4302 }, { "epoch": 0.11767118792386787, "grad_norm": 1.6524438858032227, "learning_rate": 1.9600040589243597e-05, "loss": 0.5941, "step": 4303 }, { "epoch": 0.11769853423758478, "grad_norm": 9.980687141418457, "learning_rate": 1.9599792566750534e-05, "loss": 0.5839, "step": 4304 }, { "epoch": 0.11772588055130169, "grad_norm": 1.851434588432312, "learning_rate": 1.95995444689498e-05, "loss": 0.6306, "step": 4305 }, { "epoch": 0.1177532268650186, "grad_norm": 1.8037049770355225, "learning_rate": 1.959929629584334e-05, "loss": 0.4937, "step": 4306 }, { "epoch": 0.11778057317873551, "grad_norm": 2.036689281463623, "learning_rate": 1.95990480474331e-05, "loss": 0.6082, "step": 4307 }, { "epoch": 0.11780791949245242, "grad_norm": 2.6156511306762695, "learning_rate": 1.9598799723721034e-05, "loss": 0.6082, "step": 4308 }, { "epoch": 0.11783526580616933, "grad_norm": 2.362243890762329, "learning_rate": 1.9598551324709083e-05, "loss": 0.5915, "step": 4309 }, { "epoch": 0.11786261211988625, "grad_norm": 2.304128885269165, "learning_rate": 1.95983028503992e-05, "loss": 0.6093, "step": 4310 }, { "epoch": 0.11788995843360316, "grad_norm": 1.706653356552124, "learning_rate": 1.9598054300793328e-05, "loss": 0.6197, "step": 4311 }, { "epoch": 0.11791730474732007, "grad_norm": 1.910384178161621, "learning_rate": 1.9597805675893424e-05, "loss": 0.5926, "step": 4312 }, { "epoch": 0.11794465106103698, "grad_norm": 1.932998538017273, "learning_rate": 1.9597556975701436e-05, "loss": 0.618, "step": 4313 }, { "epoch": 0.11797199737475389, "grad_norm": 2.0083436965942383, "learning_rate": 1.9597308200219315e-05, "loss": 0.5584, "step": 4314 }, { "epoch": 0.1179993436884708, "grad_norm": 1.6969598531723022, "learning_rate": 1.959705934944901e-05, "loss": 0.5604, "step": 4315 }, { "epoch": 0.11802669000218771, "grad_norm": 2.36435866355896, "learning_rate": 1.9596810423392477e-05, "loss": 0.6911, "step": 4316 }, { "epoch": 0.11805403631590462, "grad_norm": 1.422057867050171, "learning_rate": 1.9596561422051664e-05, "loss": 0.5552, "step": 4317 }, { "epoch": 0.11808138262962152, "grad_norm": 1.9568272829055786, "learning_rate": 1.959631234542853e-05, "loss": 0.5673, "step": 4318 }, { "epoch": 0.11810872894333843, "grad_norm": 3.0980992317199707, "learning_rate": 1.9596063193525025e-05, "loss": 0.615, "step": 4319 }, { "epoch": 0.11813607525705534, "grad_norm": 2.3611817359924316, "learning_rate": 1.9595813966343106e-05, "loss": 0.5843, "step": 4320 }, { "epoch": 0.11816342157077225, "grad_norm": 1.5259730815887451, "learning_rate": 1.9595564663884726e-05, "loss": 0.5755, "step": 4321 }, { "epoch": 0.11819076788448916, "grad_norm": 2.170243501663208, "learning_rate": 1.9595315286151843e-05, "loss": 0.5944, "step": 4322 }, { "epoch": 0.11821811419820608, "grad_norm": 1.9021611213684082, "learning_rate": 1.959506583314641e-05, "loss": 0.6054, "step": 4323 }, { "epoch": 0.11824546051192299, "grad_norm": 1.7403526306152344, "learning_rate": 1.959481630487039e-05, "loss": 0.5747, "step": 4324 }, { "epoch": 0.1182728068256399, "grad_norm": 2.0498952865600586, "learning_rate": 1.9594566701325734e-05, "loss": 0.6251, "step": 4325 }, { "epoch": 0.11830015313935681, "grad_norm": 1.5160146951675415, "learning_rate": 1.95943170225144e-05, "loss": 0.6061, "step": 4326 }, { "epoch": 0.11832749945307372, "grad_norm": 1.9540899991989136, "learning_rate": 1.959406726843835e-05, "loss": 0.5561, "step": 4327 }, { "epoch": 0.11835484576679063, "grad_norm": 1.7883541584014893, "learning_rate": 1.9593817439099544e-05, "loss": 0.572, "step": 4328 }, { "epoch": 0.11838219208050754, "grad_norm": 1.8524928092956543, "learning_rate": 1.959356753449994e-05, "loss": 0.5912, "step": 4329 }, { "epoch": 0.11840953839422445, "grad_norm": 1.9965468645095825, "learning_rate": 1.9593317554641495e-05, "loss": 0.4808, "step": 4330 }, { "epoch": 0.11843688470794136, "grad_norm": 2.2932679653167725, "learning_rate": 1.9593067499526178e-05, "loss": 0.606, "step": 4331 }, { "epoch": 0.11846423102165828, "grad_norm": 2.249241828918457, "learning_rate": 1.9592817369155942e-05, "loss": 0.5921, "step": 4332 }, { "epoch": 0.11849157733537519, "grad_norm": 1.8276184797286987, "learning_rate": 1.9592567163532757e-05, "loss": 0.5766, "step": 4333 }, { "epoch": 0.1185189236490921, "grad_norm": 3.1755826473236084, "learning_rate": 1.9592316882658583e-05, "loss": 0.6223, "step": 4334 }, { "epoch": 0.11854626996280901, "grad_norm": 2.1974339485168457, "learning_rate": 1.959206652653538e-05, "loss": 0.6501, "step": 4335 }, { "epoch": 0.11857361627652592, "grad_norm": 1.5740960836410522, "learning_rate": 1.9591816095165112e-05, "loss": 0.5543, "step": 4336 }, { "epoch": 0.11860096259024283, "grad_norm": 1.2444342374801636, "learning_rate": 1.959156558854975e-05, "loss": 0.4652, "step": 4337 }, { "epoch": 0.11862830890395974, "grad_norm": 1.8696776628494263, "learning_rate": 1.9591315006691256e-05, "loss": 0.5858, "step": 4338 }, { "epoch": 0.11865565521767665, "grad_norm": 2.273557662963867, "learning_rate": 1.9591064349591593e-05, "loss": 0.5653, "step": 4339 }, { "epoch": 0.11868300153139356, "grad_norm": 1.724110722541809, "learning_rate": 1.9590813617252727e-05, "loss": 0.6323, "step": 4340 }, { "epoch": 0.11871034784511048, "grad_norm": 2.1475443840026855, "learning_rate": 1.9590562809676634e-05, "loss": 0.5912, "step": 4341 }, { "epoch": 0.11873769415882739, "grad_norm": 1.5386234521865845, "learning_rate": 1.959031192686527e-05, "loss": 0.5599, "step": 4342 }, { "epoch": 0.1187650404725443, "grad_norm": 4.044602870941162, "learning_rate": 1.959006096882061e-05, "loss": 0.5455, "step": 4343 }, { "epoch": 0.11879238678626121, "grad_norm": 1.6385594606399536, "learning_rate": 1.958980993554462e-05, "loss": 0.5943, "step": 4344 }, { "epoch": 0.11881973309997812, "grad_norm": 1.4790077209472656, "learning_rate": 1.9589558827039268e-05, "loss": 0.5858, "step": 4345 }, { "epoch": 0.11884707941369503, "grad_norm": 2.5394065380096436, "learning_rate": 1.958930764330653e-05, "loss": 0.6102, "step": 4346 }, { "epoch": 0.11887442572741194, "grad_norm": 1.7859474420547485, "learning_rate": 1.9589056384348364e-05, "loss": 0.5946, "step": 4347 }, { "epoch": 0.11890177204112885, "grad_norm": 3.2759742736816406, "learning_rate": 1.9588805050166754e-05, "loss": 0.5925, "step": 4348 }, { "epoch": 0.11892911835484576, "grad_norm": 1.5939403772354126, "learning_rate": 1.958855364076367e-05, "loss": 0.5986, "step": 4349 }, { "epoch": 0.11895646466856268, "grad_norm": 1.429129958152771, "learning_rate": 1.9588302156141077e-05, "loss": 0.5786, "step": 4350 }, { "epoch": 0.11898381098227959, "grad_norm": 1.743452787399292, "learning_rate": 1.9588050596300952e-05, "loss": 0.6047, "step": 4351 }, { "epoch": 0.1190111572959965, "grad_norm": 1.928307294845581, "learning_rate": 1.958779896124527e-05, "loss": 0.5898, "step": 4352 }, { "epoch": 0.11903850360971341, "grad_norm": 2.5087454319000244, "learning_rate": 1.9587547250976005e-05, "loss": 0.5968, "step": 4353 }, { "epoch": 0.11906584992343032, "grad_norm": 1.6587661504745483, "learning_rate": 1.958729546549513e-05, "loss": 0.5909, "step": 4354 }, { "epoch": 0.11909319623714723, "grad_norm": 1.8962972164154053, "learning_rate": 1.9587043604804616e-05, "loss": 0.6056, "step": 4355 }, { "epoch": 0.11912054255086414, "grad_norm": 2.065695285797119, "learning_rate": 1.9586791668906447e-05, "loss": 0.5514, "step": 4356 }, { "epoch": 0.11914788886458105, "grad_norm": 2.2238850593566895, "learning_rate": 1.958653965780259e-05, "loss": 0.5602, "step": 4357 }, { "epoch": 0.11917523517829796, "grad_norm": 1.8385199308395386, "learning_rate": 1.9586287571495034e-05, "loss": 0.5785, "step": 4358 }, { "epoch": 0.11920258149201488, "grad_norm": 1.7849481105804443, "learning_rate": 1.9586035409985747e-05, "loss": 0.5505, "step": 4359 }, { "epoch": 0.11922992780573179, "grad_norm": 2.2140557765960693, "learning_rate": 1.9585783173276708e-05, "loss": 0.5665, "step": 4360 }, { "epoch": 0.1192572741194487, "grad_norm": 1.7221295833587646, "learning_rate": 1.9585530861369898e-05, "loss": 0.6028, "step": 4361 }, { "epoch": 0.11928462043316561, "grad_norm": 1.6432271003723145, "learning_rate": 1.95852784742673e-05, "loss": 1.0281, "step": 4362 }, { "epoch": 0.11931196674688252, "grad_norm": 1.386130690574646, "learning_rate": 1.9585026011970884e-05, "loss": 0.5801, "step": 4363 }, { "epoch": 0.11933931306059943, "grad_norm": 1.4631019830703735, "learning_rate": 1.958477347448264e-05, "loss": 0.5727, "step": 4364 }, { "epoch": 0.11936665937431634, "grad_norm": 1.4129180908203125, "learning_rate": 1.9584520861804548e-05, "loss": 0.5887, "step": 4365 }, { "epoch": 0.11939400568803325, "grad_norm": 1.9532381296157837, "learning_rate": 1.9584268173938582e-05, "loss": 0.5871, "step": 4366 }, { "epoch": 0.11942135200175016, "grad_norm": 1.4690401554107666, "learning_rate": 1.9584015410886733e-05, "loss": 0.6064, "step": 4367 }, { "epoch": 0.11944869831546708, "grad_norm": 2.1039834022521973, "learning_rate": 1.9583762572650976e-05, "loss": 0.5948, "step": 4368 }, { "epoch": 0.11947604462918399, "grad_norm": 1.4714934825897217, "learning_rate": 1.9583509659233303e-05, "loss": 0.5825, "step": 4369 }, { "epoch": 0.1195033909429009, "grad_norm": 1.8416008949279785, "learning_rate": 1.9583256670635692e-05, "loss": 0.5458, "step": 4370 }, { "epoch": 0.11953073725661781, "grad_norm": 1.838014841079712, "learning_rate": 1.958300360686013e-05, "loss": 0.6323, "step": 4371 }, { "epoch": 0.11955808357033472, "grad_norm": 1.5350117683410645, "learning_rate": 1.95827504679086e-05, "loss": 0.5536, "step": 4372 }, { "epoch": 0.11958542988405163, "grad_norm": 1.3724256753921509, "learning_rate": 1.958249725378309e-05, "loss": 0.6017, "step": 4373 }, { "epoch": 0.11961277619776854, "grad_norm": 1.5630097389221191, "learning_rate": 1.958224396448559e-05, "loss": 0.4953, "step": 4374 }, { "epoch": 0.11964012251148545, "grad_norm": 1.811189889907837, "learning_rate": 1.9581990600018078e-05, "loss": 0.6014, "step": 4375 }, { "epoch": 0.11966746882520236, "grad_norm": 1.7070354223251343, "learning_rate": 1.958173716038255e-05, "loss": 0.5843, "step": 4376 }, { "epoch": 0.11969481513891927, "grad_norm": 1.8645099401474, "learning_rate": 1.9581483645580986e-05, "loss": 0.5804, "step": 4377 }, { "epoch": 0.11972216145263619, "grad_norm": 1.6048763990402222, "learning_rate": 1.958123005561538e-05, "loss": 0.5693, "step": 4378 }, { "epoch": 0.1197495077663531, "grad_norm": 2.120450735092163, "learning_rate": 1.9580976390487724e-05, "loss": 0.5392, "step": 4379 }, { "epoch": 0.11977685408007001, "grad_norm": 1.6307566165924072, "learning_rate": 1.9580722650200004e-05, "loss": 0.5767, "step": 4380 }, { "epoch": 0.11980420039378692, "grad_norm": 2.0804545879364014, "learning_rate": 1.9580468834754208e-05, "loss": 0.5775, "step": 4381 }, { "epoch": 0.11983154670750383, "grad_norm": 1.7353564500808716, "learning_rate": 1.9580214944152333e-05, "loss": 0.5215, "step": 4382 }, { "epoch": 0.11985889302122074, "grad_norm": 1.9395544528961182, "learning_rate": 1.9579960978396366e-05, "loss": 0.5894, "step": 4383 }, { "epoch": 0.11988623933493765, "grad_norm": 1.9023621082305908, "learning_rate": 1.9579706937488302e-05, "loss": 0.5426, "step": 4384 }, { "epoch": 0.11991358564865456, "grad_norm": 1.9604244232177734, "learning_rate": 1.9579452821430133e-05, "loss": 0.5368, "step": 4385 }, { "epoch": 0.11994093196237147, "grad_norm": 1.8379127979278564, "learning_rate": 1.957919863022385e-05, "loss": 0.5886, "step": 4386 }, { "epoch": 0.11996827827608839, "grad_norm": 1.6693285703659058, "learning_rate": 1.9578944363871453e-05, "loss": 0.5981, "step": 4387 }, { "epoch": 0.1199956245898053, "grad_norm": 1.5898517370224, "learning_rate": 1.9578690022374933e-05, "loss": 0.5936, "step": 4388 }, { "epoch": 0.12002297090352221, "grad_norm": 2.374490976333618, "learning_rate": 1.9578435605736284e-05, "loss": 0.5882, "step": 4389 }, { "epoch": 0.12005031721723912, "grad_norm": 2.109003782272339, "learning_rate": 1.9578181113957505e-05, "loss": 0.6324, "step": 4390 }, { "epoch": 0.12007766353095603, "grad_norm": 1.9028719663619995, "learning_rate": 1.957792654704059e-05, "loss": 0.5659, "step": 4391 }, { "epoch": 0.12010500984467294, "grad_norm": 1.4707638025283813, "learning_rate": 1.9577671904987536e-05, "loss": 0.5682, "step": 4392 }, { "epoch": 0.12013235615838985, "grad_norm": 1.885327935218811, "learning_rate": 1.9577417187800343e-05, "loss": 0.5444, "step": 4393 }, { "epoch": 0.12015970247210676, "grad_norm": 1.4754921197891235, "learning_rate": 1.9577162395481003e-05, "loss": 0.4351, "step": 4394 }, { "epoch": 0.12018704878582367, "grad_norm": 5.054952621459961, "learning_rate": 1.9576907528031525e-05, "loss": 0.5418, "step": 4395 }, { "epoch": 0.12021439509954059, "grad_norm": 1.760989785194397, "learning_rate": 1.95766525854539e-05, "loss": 0.5344, "step": 4396 }, { "epoch": 0.1202417414132575, "grad_norm": 2.2002453804016113, "learning_rate": 1.9576397567750128e-05, "loss": 0.5941, "step": 4397 }, { "epoch": 0.12026908772697441, "grad_norm": 1.4416526556015015, "learning_rate": 1.9576142474922214e-05, "loss": 0.578, "step": 4398 }, { "epoch": 0.12029643404069132, "grad_norm": 1.8885823488235474, "learning_rate": 1.957588730697216e-05, "loss": 0.5749, "step": 4399 }, { "epoch": 0.12032378035440823, "grad_norm": 1.8553502559661865, "learning_rate": 1.957563206390196e-05, "loss": 0.6001, "step": 4400 }, { "epoch": 0.12035112666812514, "grad_norm": 1.7493294477462769, "learning_rate": 1.9575376745713627e-05, "loss": 0.5711, "step": 4401 }, { "epoch": 0.12037847298184205, "grad_norm": 2.1959023475646973, "learning_rate": 1.9575121352409155e-05, "loss": 0.548, "step": 4402 }, { "epoch": 0.12040581929555896, "grad_norm": 1.9252711534500122, "learning_rate": 1.9574865883990552e-05, "loss": 0.5411, "step": 4403 }, { "epoch": 0.12043316560927587, "grad_norm": 1.7468414306640625, "learning_rate": 1.957461034045982e-05, "loss": 0.555, "step": 4404 }, { "epoch": 0.12046051192299279, "grad_norm": 1.4737898111343384, "learning_rate": 1.957435472181896e-05, "loss": 0.3853, "step": 4405 }, { "epoch": 0.1204878582367097, "grad_norm": 3.359196662902832, "learning_rate": 1.9574099028069988e-05, "loss": 0.6746, "step": 4406 }, { "epoch": 0.12051520455042661, "grad_norm": 1.9229371547698975, "learning_rate": 1.95738432592149e-05, "loss": 0.6135, "step": 4407 }, { "epoch": 0.12054255086414352, "grad_norm": 2.0297651290893555, "learning_rate": 1.957358741525571e-05, "loss": 0.6665, "step": 4408 }, { "epoch": 0.12056989717786043, "grad_norm": 1.7126753330230713, "learning_rate": 1.9573331496194417e-05, "loss": 0.5894, "step": 4409 }, { "epoch": 0.12059724349157734, "grad_norm": 1.5910009145736694, "learning_rate": 1.9573075502033035e-05, "loss": 0.5589, "step": 4410 }, { "epoch": 0.12062458980529425, "grad_norm": 1.7484842538833618, "learning_rate": 1.957281943277357e-05, "loss": 0.4679, "step": 4411 }, { "epoch": 0.12065193611901116, "grad_norm": 1.7323983907699585, "learning_rate": 1.9572563288418024e-05, "loss": 0.5764, "step": 4412 }, { "epoch": 0.12067928243272807, "grad_norm": 2.0668065547943115, "learning_rate": 1.9572307068968423e-05, "loss": 0.6197, "step": 4413 }, { "epoch": 0.12070662874644499, "grad_norm": 1.7531663179397583, "learning_rate": 1.9572050774426762e-05, "loss": 0.5655, "step": 4414 }, { "epoch": 0.1207339750601619, "grad_norm": 1.534980058670044, "learning_rate": 1.9571794404795055e-05, "loss": 0.5793, "step": 4415 }, { "epoch": 0.12076132137387881, "grad_norm": 1.7042670249938965, "learning_rate": 1.9571537960075317e-05, "loss": 0.5381, "step": 4416 }, { "epoch": 0.12078866768759572, "grad_norm": 1.599196195602417, "learning_rate": 1.9571281440269554e-05, "loss": 0.5666, "step": 4417 }, { "epoch": 0.12081601400131263, "grad_norm": 1.6736996173858643, "learning_rate": 1.9571024845379785e-05, "loss": 0.5695, "step": 4418 }, { "epoch": 0.12084336031502953, "grad_norm": 2.2260169982910156, "learning_rate": 1.957076817540802e-05, "loss": 0.6243, "step": 4419 }, { "epoch": 0.12087070662874644, "grad_norm": 1.2595221996307373, "learning_rate": 1.957051143035627e-05, "loss": 0.4712, "step": 4420 }, { "epoch": 0.12089805294246335, "grad_norm": 2.0440704822540283, "learning_rate": 1.9570254610226552e-05, "loss": 0.5597, "step": 4421 }, { "epoch": 0.12092539925618026, "grad_norm": 1.4731792211532593, "learning_rate": 1.956999771502088e-05, "loss": 0.988, "step": 4422 }, { "epoch": 0.12095274556989717, "grad_norm": 1.8335732221603394, "learning_rate": 1.9569740744741267e-05, "loss": 0.5639, "step": 4423 }, { "epoch": 0.12098009188361408, "grad_norm": 1.7202764749526978, "learning_rate": 1.9569483699389734e-05, "loss": 0.5832, "step": 4424 }, { "epoch": 0.121007438197331, "grad_norm": 1.4931327104568481, "learning_rate": 1.9569226578968294e-05, "loss": 0.5772, "step": 4425 }, { "epoch": 0.1210347845110479, "grad_norm": 1.5101823806762695, "learning_rate": 1.9568969383478965e-05, "loss": 0.5384, "step": 4426 }, { "epoch": 0.12106213082476482, "grad_norm": 1.7917275428771973, "learning_rate": 1.9568712112923763e-05, "loss": 0.574, "step": 4427 }, { "epoch": 0.12108947713848173, "grad_norm": 1.470727801322937, "learning_rate": 1.956845476730471e-05, "loss": 0.9345, "step": 4428 }, { "epoch": 0.12111682345219864, "grad_norm": 1.4710530042648315, "learning_rate": 1.9568197346623817e-05, "loss": 0.5939, "step": 4429 }, { "epoch": 0.12114416976591555, "grad_norm": 1.8965250253677368, "learning_rate": 1.9567939850883115e-05, "loss": 0.5415, "step": 4430 }, { "epoch": 0.12117151607963246, "grad_norm": 5.0502705574035645, "learning_rate": 1.956768228008461e-05, "loss": 0.589, "step": 4431 }, { "epoch": 0.12119886239334937, "grad_norm": 1.7930351495742798, "learning_rate": 1.9567424634230333e-05, "loss": 0.5837, "step": 4432 }, { "epoch": 0.12122620870706628, "grad_norm": 1.9613864421844482, "learning_rate": 1.9567166913322303e-05, "loss": 0.6583, "step": 4433 }, { "epoch": 0.1212535550207832, "grad_norm": 2.0459001064300537, "learning_rate": 1.9566909117362543e-05, "loss": 0.439, "step": 4434 }, { "epoch": 0.1212809013345001, "grad_norm": 1.6584267616271973, "learning_rate": 1.956665124635307e-05, "loss": 0.5308, "step": 4435 }, { "epoch": 0.12130824764821702, "grad_norm": 1.9218835830688477, "learning_rate": 1.9566393300295915e-05, "loss": 0.5489, "step": 4436 }, { "epoch": 0.12133559396193393, "grad_norm": 1.629376769065857, "learning_rate": 1.9566135279193093e-05, "loss": 0.6128, "step": 4437 }, { "epoch": 0.12136294027565084, "grad_norm": 2.223402976989746, "learning_rate": 1.956587718304663e-05, "loss": 0.6419, "step": 4438 }, { "epoch": 0.12139028658936775, "grad_norm": 20.241119384765625, "learning_rate": 1.9565619011858556e-05, "loss": 0.5735, "step": 4439 }, { "epoch": 0.12141763290308466, "grad_norm": 1.6471786499023438, "learning_rate": 1.956536076563089e-05, "loss": 0.4864, "step": 4440 }, { "epoch": 0.12144497921680157, "grad_norm": 1.8806029558181763, "learning_rate": 1.9565102444365663e-05, "loss": 0.5833, "step": 4441 }, { "epoch": 0.12147232553051848, "grad_norm": 1.904141902923584, "learning_rate": 1.9564844048064898e-05, "loss": 0.459, "step": 4442 }, { "epoch": 0.1214996718442354, "grad_norm": 1.7390953302383423, "learning_rate": 1.9564585576730627e-05, "loss": 0.5717, "step": 4443 }, { "epoch": 0.1215270181579523, "grad_norm": 1.7227091789245605, "learning_rate": 1.956432703036487e-05, "loss": 0.6049, "step": 4444 }, { "epoch": 0.12155436447166922, "grad_norm": 1.499820351600647, "learning_rate": 1.956406840896966e-05, "loss": 0.5858, "step": 4445 }, { "epoch": 0.12158171078538613, "grad_norm": 1.7537572383880615, "learning_rate": 1.9563809712547024e-05, "loss": 0.6342, "step": 4446 }, { "epoch": 0.12160905709910304, "grad_norm": 1.7976154088974, "learning_rate": 1.9563550941098995e-05, "loss": 1.0084, "step": 4447 }, { "epoch": 0.12163640341281995, "grad_norm": 1.6895278692245483, "learning_rate": 1.9563292094627595e-05, "loss": 0.5483, "step": 4448 }, { "epoch": 0.12166374972653686, "grad_norm": 1.624109148979187, "learning_rate": 1.9563033173134865e-05, "loss": 0.5498, "step": 4449 }, { "epoch": 0.12169109604025377, "grad_norm": 1.7508424520492554, "learning_rate": 1.9562774176622826e-05, "loss": 0.5857, "step": 4450 }, { "epoch": 0.12171844235397068, "grad_norm": 2.180832624435425, "learning_rate": 1.956251510509352e-05, "loss": 0.5616, "step": 4451 }, { "epoch": 0.1217457886676876, "grad_norm": 2.3847482204437256, "learning_rate": 1.9562255958548973e-05, "loss": 0.5589, "step": 4452 }, { "epoch": 0.1217731349814045, "grad_norm": 1.6074881553649902, "learning_rate": 1.9561996736991217e-05, "loss": 0.5622, "step": 4453 }, { "epoch": 0.12180048129512142, "grad_norm": 1.7362396717071533, "learning_rate": 1.956173744042229e-05, "loss": 0.5545, "step": 4454 }, { "epoch": 0.12182782760883833, "grad_norm": 1.6848666667938232, "learning_rate": 1.956147806884422e-05, "loss": 0.5901, "step": 4455 }, { "epoch": 0.12185517392255524, "grad_norm": 2.906649112701416, "learning_rate": 1.956121862225905e-05, "loss": 0.6196, "step": 4456 }, { "epoch": 0.12188252023627215, "grad_norm": 1.7370952367782593, "learning_rate": 1.956095910066881e-05, "loss": 0.4259, "step": 4457 }, { "epoch": 0.12190986654998906, "grad_norm": 1.4954102039337158, "learning_rate": 1.9560699504075536e-05, "loss": 0.5331, "step": 4458 }, { "epoch": 0.12193721286370597, "grad_norm": 1.5869466066360474, "learning_rate": 1.956043983248126e-05, "loss": 0.5642, "step": 4459 }, { "epoch": 0.12196455917742288, "grad_norm": 1.969429612159729, "learning_rate": 1.956018008588803e-05, "loss": 0.9741, "step": 4460 }, { "epoch": 0.1219919054911398, "grad_norm": 1.9912265539169312, "learning_rate": 1.9559920264297875e-05, "loss": 0.5931, "step": 4461 }, { "epoch": 0.1220192518048567, "grad_norm": 1.3770607709884644, "learning_rate": 1.9559660367712838e-05, "loss": 0.9663, "step": 4462 }, { "epoch": 0.12204659811857362, "grad_norm": 1.4276880025863647, "learning_rate": 1.9559400396134952e-05, "loss": 0.9832, "step": 4463 }, { "epoch": 0.12207394443229053, "grad_norm": 1.4027353525161743, "learning_rate": 1.9559140349566265e-05, "loss": 0.5972, "step": 4464 }, { "epoch": 0.12210129074600744, "grad_norm": 1.824803113937378, "learning_rate": 1.955888022800881e-05, "loss": 0.5748, "step": 4465 }, { "epoch": 0.12212863705972435, "grad_norm": 1.5836524963378906, "learning_rate": 1.955862003146463e-05, "loss": 0.5864, "step": 4466 }, { "epoch": 0.12215598337344126, "grad_norm": 1.5589470863342285, "learning_rate": 1.9558359759935765e-05, "loss": 0.6035, "step": 4467 }, { "epoch": 0.12218332968715817, "grad_norm": 1.4431525468826294, "learning_rate": 1.9558099413424258e-05, "loss": 0.5952, "step": 4468 }, { "epoch": 0.12221067600087508, "grad_norm": 1.5200942754745483, "learning_rate": 1.955783899193215e-05, "loss": 0.5985, "step": 4469 }, { "epoch": 0.122238022314592, "grad_norm": 1.7417315244674683, "learning_rate": 1.9557578495461484e-05, "loss": 0.6285, "step": 4470 }, { "epoch": 0.1222653686283089, "grad_norm": 1.5043452978134155, "learning_rate": 1.955731792401431e-05, "loss": 0.5729, "step": 4471 }, { "epoch": 0.12229271494202582, "grad_norm": 1.8145023584365845, "learning_rate": 1.955705727759266e-05, "loss": 0.5731, "step": 4472 }, { "epoch": 0.12232006125574273, "grad_norm": 2.0353260040283203, "learning_rate": 1.955679655619859e-05, "loss": 0.9682, "step": 4473 }, { "epoch": 0.12234740756945964, "grad_norm": 1.3435978889465332, "learning_rate": 1.9556535759834138e-05, "loss": 0.5966, "step": 4474 }, { "epoch": 0.12237475388317655, "grad_norm": 2.5446624755859375, "learning_rate": 1.9556274888501353e-05, "loss": 0.5766, "step": 4475 }, { "epoch": 0.12240210019689346, "grad_norm": 1.6122171878814697, "learning_rate": 1.9556013942202282e-05, "loss": 0.612, "step": 4476 }, { "epoch": 0.12242944651061037, "grad_norm": 2.5148391723632812, "learning_rate": 1.955575292093897e-05, "loss": 0.4492, "step": 4477 }, { "epoch": 0.12245679282432728, "grad_norm": 1.4395051002502441, "learning_rate": 1.9555491824713465e-05, "loss": 0.5356, "step": 4478 }, { "epoch": 0.12248413913804419, "grad_norm": 1.618475079536438, "learning_rate": 1.9555230653527814e-05, "loss": 0.5314, "step": 4479 }, { "epoch": 0.1225114854517611, "grad_norm": 1.6208080053329468, "learning_rate": 1.955496940738407e-05, "loss": 0.5532, "step": 4480 }, { "epoch": 0.12253883176547802, "grad_norm": 1.285990595817566, "learning_rate": 1.955470808628428e-05, "loss": 0.6216, "step": 4481 }, { "epoch": 0.12256617807919493, "grad_norm": 1.8304674625396729, "learning_rate": 1.9554446690230496e-05, "loss": 0.5814, "step": 4482 }, { "epoch": 0.12259352439291184, "grad_norm": 1.3815313577651978, "learning_rate": 1.9554185219224763e-05, "loss": 0.558, "step": 4483 }, { "epoch": 0.12262087070662875, "grad_norm": 1.643293023109436, "learning_rate": 1.9553923673269138e-05, "loss": 0.5751, "step": 4484 }, { "epoch": 0.12264821702034566, "grad_norm": 1.7824947834014893, "learning_rate": 1.955366205236567e-05, "loss": 0.5543, "step": 4485 }, { "epoch": 0.12267556333406257, "grad_norm": 1.5693320035934448, "learning_rate": 1.955340035651641e-05, "loss": 0.5728, "step": 4486 }, { "epoch": 0.12270290964777948, "grad_norm": 1.819879412651062, "learning_rate": 1.9553138585723413e-05, "loss": 0.972, "step": 4487 }, { "epoch": 0.12273025596149639, "grad_norm": 1.8086456060409546, "learning_rate": 1.9552876739988737e-05, "loss": 0.6158, "step": 4488 }, { "epoch": 0.1227576022752133, "grad_norm": 2.293360471725464, "learning_rate": 1.9552614819314426e-05, "loss": 0.4764, "step": 4489 }, { "epoch": 0.12278494858893022, "grad_norm": 1.6822692155838013, "learning_rate": 1.955235282370254e-05, "loss": 0.5914, "step": 4490 }, { "epoch": 0.12281229490264713, "grad_norm": 1.6408772468566895, "learning_rate": 1.955209075315514e-05, "loss": 0.6207, "step": 4491 }, { "epoch": 0.12283964121636404, "grad_norm": 1.4265923500061035, "learning_rate": 1.9551828607674272e-05, "loss": 0.6085, "step": 4492 }, { "epoch": 0.12286698753008095, "grad_norm": 1.5190173387527466, "learning_rate": 1.9551566387261994e-05, "loss": 0.5624, "step": 4493 }, { "epoch": 0.12289433384379786, "grad_norm": 2.96484637260437, "learning_rate": 1.955130409192037e-05, "loss": 0.5922, "step": 4494 }, { "epoch": 0.12292168015751477, "grad_norm": 2.588273286819458, "learning_rate": 1.9551041721651453e-05, "loss": 0.9698, "step": 4495 }, { "epoch": 0.12294902647123168, "grad_norm": 1.715113639831543, "learning_rate": 1.95507792764573e-05, "loss": 0.5548, "step": 4496 }, { "epoch": 0.12297637278494859, "grad_norm": 1.3974446058273315, "learning_rate": 1.9550516756339973e-05, "loss": 0.9505, "step": 4497 }, { "epoch": 0.1230037190986655, "grad_norm": 1.3580132722854614, "learning_rate": 1.9550254161301527e-05, "loss": 0.5931, "step": 4498 }, { "epoch": 0.12303106541238241, "grad_norm": 1.7623515129089355, "learning_rate": 1.9549991491344026e-05, "loss": 0.5751, "step": 4499 }, { "epoch": 0.12305841172609933, "grad_norm": 2.028320074081421, "learning_rate": 1.9549728746469533e-05, "loss": 0.5902, "step": 4500 }, { "epoch": 0.12308575803981624, "grad_norm": 1.6563212871551514, "learning_rate": 1.95494659266801e-05, "loss": 0.5836, "step": 4501 }, { "epoch": 0.12311310435353315, "grad_norm": 6.170462608337402, "learning_rate": 1.95492030319778e-05, "loss": 0.663, "step": 4502 }, { "epoch": 0.12314045066725006, "grad_norm": 2.0328783988952637, "learning_rate": 1.9548940062364686e-05, "loss": 0.9808, "step": 4503 }, { "epoch": 0.12316779698096697, "grad_norm": 1.9972875118255615, "learning_rate": 1.9548677017842825e-05, "loss": 0.571, "step": 4504 }, { "epoch": 0.12319514329468388, "grad_norm": 1.5171732902526855, "learning_rate": 1.9548413898414277e-05, "loss": 0.5697, "step": 4505 }, { "epoch": 0.12322248960840079, "grad_norm": 2.0234522819519043, "learning_rate": 1.9548150704081115e-05, "loss": 0.5833, "step": 4506 }, { "epoch": 0.1232498359221177, "grad_norm": 1.7694710493087769, "learning_rate": 1.9547887434845398e-05, "loss": 0.6099, "step": 4507 }, { "epoch": 0.12327718223583461, "grad_norm": 1.7361834049224854, "learning_rate": 1.9547624090709187e-05, "loss": 0.5734, "step": 4508 }, { "epoch": 0.12330452854955153, "grad_norm": 1.4269341230392456, "learning_rate": 1.954736067167455e-05, "loss": 0.5918, "step": 4509 }, { "epoch": 0.12333187486326844, "grad_norm": 2.145754337310791, "learning_rate": 1.954709717774356e-05, "loss": 0.5712, "step": 4510 }, { "epoch": 0.12335922117698535, "grad_norm": 2.9958248138427734, "learning_rate": 1.9546833608918278e-05, "loss": 0.4766, "step": 4511 }, { "epoch": 0.12338656749070226, "grad_norm": 1.8752646446228027, "learning_rate": 1.954656996520077e-05, "loss": 0.5632, "step": 4512 }, { "epoch": 0.12341391380441917, "grad_norm": 1.5713924169540405, "learning_rate": 1.9546306246593113e-05, "loss": 0.5612, "step": 4513 }, { "epoch": 0.12344126011813608, "grad_norm": 1.7253832817077637, "learning_rate": 1.954604245309737e-05, "loss": 0.6424, "step": 4514 }, { "epoch": 0.12346860643185299, "grad_norm": 1.821213722229004, "learning_rate": 1.9545778584715603e-05, "loss": 0.5673, "step": 4515 }, { "epoch": 0.1234959527455699, "grad_norm": 1.805070400238037, "learning_rate": 1.9545514641449895e-05, "loss": 0.5651, "step": 4516 }, { "epoch": 0.12352329905928681, "grad_norm": 1.6063036918640137, "learning_rate": 1.954525062330231e-05, "loss": 0.9974, "step": 4517 }, { "epoch": 0.12355064537300373, "grad_norm": 1.8464792966842651, "learning_rate": 1.9544986530274922e-05, "loss": 0.5076, "step": 4518 }, { "epoch": 0.12357799168672064, "grad_norm": 1.6100581884384155, "learning_rate": 1.95447223623698e-05, "loss": 0.4421, "step": 4519 }, { "epoch": 0.12360533800043755, "grad_norm": 1.7454651594161987, "learning_rate": 1.9544458119589015e-05, "loss": 0.5855, "step": 4520 }, { "epoch": 0.12363268431415445, "grad_norm": 1.9905213117599487, "learning_rate": 1.9544193801934642e-05, "loss": 0.602, "step": 4521 }, { "epoch": 0.12366003062787136, "grad_norm": 1.9384928941726685, "learning_rate": 1.9543929409408754e-05, "loss": 0.9608, "step": 4522 }, { "epoch": 0.12368737694158827, "grad_norm": 1.674608826637268, "learning_rate": 1.954366494201343e-05, "loss": 0.5917, "step": 4523 }, { "epoch": 0.12371472325530518, "grad_norm": 1.8800321817398071, "learning_rate": 1.9543400399750738e-05, "loss": 0.611, "step": 4524 }, { "epoch": 0.12374206956902209, "grad_norm": 2.5576558113098145, "learning_rate": 1.9543135782622754e-05, "loss": 0.5872, "step": 4525 }, { "epoch": 0.123769415882739, "grad_norm": 1.829137921333313, "learning_rate": 1.954287109063156e-05, "loss": 0.5984, "step": 4526 }, { "epoch": 0.12379676219645591, "grad_norm": 2.21966552734375, "learning_rate": 1.9542606323779223e-05, "loss": 0.5873, "step": 4527 }, { "epoch": 0.12382410851017282, "grad_norm": 1.4087955951690674, "learning_rate": 1.9542341482067826e-05, "loss": 0.9609, "step": 4528 }, { "epoch": 0.12385145482388973, "grad_norm": 2.6953492164611816, "learning_rate": 1.9542076565499445e-05, "loss": 0.9542, "step": 4529 }, { "epoch": 0.12387880113760665, "grad_norm": 1.2398920059204102, "learning_rate": 1.9541811574076163e-05, "loss": 0.9739, "step": 4530 }, { "epoch": 0.12390614745132356, "grad_norm": 1.596372127532959, "learning_rate": 1.954154650780005e-05, "loss": 0.5895, "step": 4531 }, { "epoch": 0.12393349376504047, "grad_norm": 1.4761614799499512, "learning_rate": 1.9541281366673194e-05, "loss": 0.5167, "step": 4532 }, { "epoch": 0.12396084007875738, "grad_norm": 1.566223382949829, "learning_rate": 1.9541016150697666e-05, "loss": 0.5803, "step": 4533 }, { "epoch": 0.12398818639247429, "grad_norm": 1.8246012926101685, "learning_rate": 1.9540750859875553e-05, "loss": 0.6184, "step": 4534 }, { "epoch": 0.1240155327061912, "grad_norm": 1.4623937606811523, "learning_rate": 1.9540485494208935e-05, "loss": 0.5772, "step": 4535 }, { "epoch": 0.12404287901990811, "grad_norm": 1.571886420249939, "learning_rate": 1.9540220053699894e-05, "loss": 0.5918, "step": 4536 }, { "epoch": 0.12407022533362502, "grad_norm": 1.430570125579834, "learning_rate": 1.9539954538350513e-05, "loss": 0.5905, "step": 4537 }, { "epoch": 0.12409757164734193, "grad_norm": 1.4367681741714478, "learning_rate": 1.9539688948162873e-05, "loss": 0.571, "step": 4538 }, { "epoch": 0.12412491796105884, "grad_norm": 1.690308690071106, "learning_rate": 1.9539423283139053e-05, "loss": 1.0041, "step": 4539 }, { "epoch": 0.12415226427477576, "grad_norm": 2.3358564376831055, "learning_rate": 1.9539157543281145e-05, "loss": 0.5666, "step": 4540 }, { "epoch": 0.12417961058849267, "grad_norm": 1.5973869562149048, "learning_rate": 1.953889172859123e-05, "loss": 0.5659, "step": 4541 }, { "epoch": 0.12420695690220958, "grad_norm": 1.8002490997314453, "learning_rate": 1.9538625839071398e-05, "loss": 0.5921, "step": 4542 }, { "epoch": 0.12423430321592649, "grad_norm": 3.2445738315582275, "learning_rate": 1.9538359874723725e-05, "loss": 0.9409, "step": 4543 }, { "epoch": 0.1242616495296434, "grad_norm": 1.739143967628479, "learning_rate": 1.9538093835550307e-05, "loss": 0.5956, "step": 4544 }, { "epoch": 0.12428899584336031, "grad_norm": 1.715912103652954, "learning_rate": 1.9537827721553224e-05, "loss": 0.5754, "step": 4545 }, { "epoch": 0.12431634215707722, "grad_norm": 1.4169788360595703, "learning_rate": 1.953756153273457e-05, "loss": 0.4209, "step": 4546 }, { "epoch": 0.12434368847079413, "grad_norm": 1.6685558557510376, "learning_rate": 1.9537295269096428e-05, "loss": 0.5696, "step": 4547 }, { "epoch": 0.12437103478451104, "grad_norm": 1.6564236879348755, "learning_rate": 1.953702893064089e-05, "loss": 0.5676, "step": 4548 }, { "epoch": 0.12439838109822796, "grad_norm": 1.710937738418579, "learning_rate": 1.953676251737004e-05, "loss": 0.6364, "step": 4549 }, { "epoch": 0.12442572741194487, "grad_norm": 3.532217264175415, "learning_rate": 1.9536496029285975e-05, "loss": 0.4501, "step": 4550 }, { "epoch": 0.12445307372566178, "grad_norm": 2.1578848361968994, "learning_rate": 1.953622946639078e-05, "loss": 0.5788, "step": 4551 }, { "epoch": 0.12448042003937869, "grad_norm": 1.7476177215576172, "learning_rate": 1.9535962828686556e-05, "loss": 0.5909, "step": 4552 }, { "epoch": 0.1245077663530956, "grad_norm": 1.6821106672286987, "learning_rate": 1.953569611617538e-05, "loss": 0.4211, "step": 4553 }, { "epoch": 0.12453511266681251, "grad_norm": 1.9538406133651733, "learning_rate": 1.953542932885935e-05, "loss": 0.5691, "step": 4554 }, { "epoch": 0.12456245898052942, "grad_norm": 1.715381383895874, "learning_rate": 1.9535162466740563e-05, "loss": 0.5824, "step": 4555 }, { "epoch": 0.12458980529424633, "grad_norm": 1.458040714263916, "learning_rate": 1.9534895529821116e-05, "loss": 0.5679, "step": 4556 }, { "epoch": 0.12461715160796324, "grad_norm": 1.9223389625549316, "learning_rate": 1.9534628518103092e-05, "loss": 0.5474, "step": 4557 }, { "epoch": 0.12464449792168016, "grad_norm": 2.201105833053589, "learning_rate": 1.9534361431588586e-05, "loss": 0.5401, "step": 4558 }, { "epoch": 0.12467184423539707, "grad_norm": 1.758480429649353, "learning_rate": 1.9534094270279703e-05, "loss": 0.5634, "step": 4559 }, { "epoch": 0.12469919054911398, "grad_norm": 1.6979767084121704, "learning_rate": 1.9533827034178535e-05, "loss": 0.5717, "step": 4560 }, { "epoch": 0.12472653686283089, "grad_norm": 2.006718158721924, "learning_rate": 1.9533559723287173e-05, "loss": 0.6356, "step": 4561 }, { "epoch": 0.1247538831765478, "grad_norm": 1.6903364658355713, "learning_rate": 1.953329233760772e-05, "loss": 0.5772, "step": 4562 }, { "epoch": 0.12478122949026471, "grad_norm": 1.6590688228607178, "learning_rate": 1.953302487714227e-05, "loss": 0.5696, "step": 4563 }, { "epoch": 0.12480857580398162, "grad_norm": 1.5169650316238403, "learning_rate": 1.9532757341892926e-05, "loss": 0.6328, "step": 4564 }, { "epoch": 0.12483592211769853, "grad_norm": 1.4968159198760986, "learning_rate": 1.953248973186178e-05, "loss": 0.5889, "step": 4565 }, { "epoch": 0.12486326843141544, "grad_norm": 1.5509876012802124, "learning_rate": 1.9532222047050938e-05, "loss": 0.5718, "step": 4566 }, { "epoch": 0.12489061474513236, "grad_norm": 2.4937400817871094, "learning_rate": 1.9531954287462494e-05, "loss": 0.6707, "step": 4567 }, { "epoch": 0.12491796105884927, "grad_norm": 3.7873971462249756, "learning_rate": 1.953168645309855e-05, "loss": 0.4538, "step": 4568 }, { "epoch": 0.12494530737256618, "grad_norm": 1.8618133068084717, "learning_rate": 1.953141854396121e-05, "loss": 0.4799, "step": 4569 }, { "epoch": 0.12497265368628309, "grad_norm": 1.7520660161972046, "learning_rate": 1.9531150560052574e-05, "loss": 0.612, "step": 4570 }, { "epoch": 0.125, "grad_norm": 1.6211005449295044, "learning_rate": 1.9530882501374743e-05, "loss": 0.5489, "step": 4571 }, { "epoch": 0.1250273463137169, "grad_norm": 1.7630137205123901, "learning_rate": 1.9530614367929822e-05, "loss": 0.6634, "step": 4572 }, { "epoch": 0.12505469262743382, "grad_norm": 1.419811725616455, "learning_rate": 1.9530346159719915e-05, "loss": 0.5609, "step": 4573 }, { "epoch": 0.12508203894115072, "grad_norm": 1.2856322526931763, "learning_rate": 1.9530077876747122e-05, "loss": 0.4514, "step": 4574 }, { "epoch": 0.12510938525486764, "grad_norm": 1.6701246500015259, "learning_rate": 1.952980951901355e-05, "loss": 0.9761, "step": 4575 }, { "epoch": 0.12513673156858454, "grad_norm": 2.342961311340332, "learning_rate": 1.9529541086521307e-05, "loss": 0.5788, "step": 4576 }, { "epoch": 0.12516407788230147, "grad_norm": 1.7726064920425415, "learning_rate": 1.9529272579272493e-05, "loss": 0.5777, "step": 4577 }, { "epoch": 0.12519142419601836, "grad_norm": 1.890694499015808, "learning_rate": 1.952900399726922e-05, "loss": 0.6113, "step": 4578 }, { "epoch": 0.1252187705097353, "grad_norm": 1.7373690605163574, "learning_rate": 1.9528735340513588e-05, "loss": 0.6234, "step": 4579 }, { "epoch": 0.12524611682345219, "grad_norm": 1.8082528114318848, "learning_rate": 1.9528466609007712e-05, "loss": 0.4699, "step": 4580 }, { "epoch": 0.1252734631371691, "grad_norm": 1.863615870475769, "learning_rate": 1.9528197802753696e-05, "loss": 0.5859, "step": 4581 }, { "epoch": 0.125300809450886, "grad_norm": 1.3515167236328125, "learning_rate": 1.9527928921753654e-05, "loss": 1.0028, "step": 4582 }, { "epoch": 0.12532815576460293, "grad_norm": 1.5740199089050293, "learning_rate": 1.9527659966009686e-05, "loss": 0.456, "step": 4583 }, { "epoch": 0.12535550207831983, "grad_norm": 1.4762482643127441, "learning_rate": 1.9527390935523907e-05, "loss": 0.5772, "step": 4584 }, { "epoch": 0.12538284839203676, "grad_norm": 1.7109793424606323, "learning_rate": 1.952712183029843e-05, "loss": 0.5865, "step": 4585 }, { "epoch": 0.12541019470575365, "grad_norm": 1.7396478652954102, "learning_rate": 1.9526852650335363e-05, "loss": 0.561, "step": 4586 }, { "epoch": 0.12543754101947058, "grad_norm": 1.4747945070266724, "learning_rate": 1.9526583395636815e-05, "loss": 0.9817, "step": 4587 }, { "epoch": 0.12546488733318747, "grad_norm": 5.118479251861572, "learning_rate": 1.9526314066204906e-05, "loss": 0.5655, "step": 4588 }, { "epoch": 0.1254922336469044, "grad_norm": 1.297204852104187, "learning_rate": 1.952604466204174e-05, "loss": 0.9224, "step": 4589 }, { "epoch": 0.1255195799606213, "grad_norm": 2.011735677719116, "learning_rate": 1.9525775183149435e-05, "loss": 0.5707, "step": 4590 }, { "epoch": 0.12554692627433822, "grad_norm": 1.8700544834136963, "learning_rate": 1.9525505629530108e-05, "loss": 0.5892, "step": 4591 }, { "epoch": 0.12557427258805512, "grad_norm": 1.6767889261245728, "learning_rate": 1.9525236001185866e-05, "loss": 0.5756, "step": 4592 }, { "epoch": 0.12560161890177204, "grad_norm": 1.6340396404266357, "learning_rate": 1.9524966298118827e-05, "loss": 0.5488, "step": 4593 }, { "epoch": 0.12562896521548894, "grad_norm": 2.3325419425964355, "learning_rate": 1.9524696520331115e-05, "loss": 0.5598, "step": 4594 }, { "epoch": 0.12565631152920587, "grad_norm": 1.8946492671966553, "learning_rate": 1.952442666782483e-05, "loss": 0.5704, "step": 4595 }, { "epoch": 0.12568365784292276, "grad_norm": 1.571224331855774, "learning_rate": 1.9524156740602107e-05, "loss": 0.5604, "step": 4596 }, { "epoch": 0.1257110041566397, "grad_norm": 1.6432063579559326, "learning_rate": 1.952388673866505e-05, "loss": 0.5937, "step": 4597 }, { "epoch": 0.12573835047035659, "grad_norm": 1.7309755086898804, "learning_rate": 1.9523616662015782e-05, "loss": 0.5368, "step": 4598 }, { "epoch": 0.1257656967840735, "grad_norm": 2.5144457817077637, "learning_rate": 1.9523346510656424e-05, "loss": 0.5581, "step": 4599 }, { "epoch": 0.1257930430977904, "grad_norm": 1.6125555038452148, "learning_rate": 1.9523076284589088e-05, "loss": 0.5634, "step": 4600 }, { "epoch": 0.12582038941150733, "grad_norm": 1.686599612236023, "learning_rate": 1.9522805983815903e-05, "loss": 0.9899, "step": 4601 }, { "epoch": 0.12584773572522423, "grad_norm": 2.763754367828369, "learning_rate": 1.9522535608338985e-05, "loss": 0.5418, "step": 4602 }, { "epoch": 0.12587508203894116, "grad_norm": 1.87754487991333, "learning_rate": 1.952226515816045e-05, "loss": 0.5477, "step": 4603 }, { "epoch": 0.12590242835265805, "grad_norm": 1.6994001865386963, "learning_rate": 1.952199463328243e-05, "loss": 0.5623, "step": 4604 }, { "epoch": 0.12592977466637498, "grad_norm": 1.7626675367355347, "learning_rate": 1.9521724033707038e-05, "loss": 0.5163, "step": 4605 }, { "epoch": 0.12595712098009187, "grad_norm": 1.477541208267212, "learning_rate": 1.9521453359436406e-05, "loss": 0.5605, "step": 4606 }, { "epoch": 0.1259844672938088, "grad_norm": 2.454176902770996, "learning_rate": 1.9521182610472647e-05, "loss": 0.5706, "step": 4607 }, { "epoch": 0.1260118136075257, "grad_norm": 2.2674100399017334, "learning_rate": 1.9520911786817888e-05, "loss": 0.5923, "step": 4608 }, { "epoch": 0.12603915992124262, "grad_norm": 1.6038026809692383, "learning_rate": 1.952064088847426e-05, "loss": 0.9898, "step": 4609 }, { "epoch": 0.12606650623495952, "grad_norm": 1.7096943855285645, "learning_rate": 1.952036991544388e-05, "loss": 0.5955, "step": 4610 }, { "epoch": 0.12609385254867644, "grad_norm": 1.831485629081726, "learning_rate": 1.9520098867728884e-05, "loss": 0.5802, "step": 4611 }, { "epoch": 0.12612119886239334, "grad_norm": 1.5872595310211182, "learning_rate": 1.9519827745331388e-05, "loss": 0.9472, "step": 4612 }, { "epoch": 0.12614854517611027, "grad_norm": 2.868499994277954, "learning_rate": 1.951955654825352e-05, "loss": 0.5881, "step": 4613 }, { "epoch": 0.12617589148982716, "grad_norm": 1.745133399963379, "learning_rate": 1.951928527649741e-05, "loss": 0.5436, "step": 4614 }, { "epoch": 0.1262032378035441, "grad_norm": 2.382246732711792, "learning_rate": 1.9519013930065188e-05, "loss": 0.5852, "step": 4615 }, { "epoch": 0.12623058411726099, "grad_norm": 1.8092797994613647, "learning_rate": 1.9518742508958982e-05, "loss": 0.5285, "step": 4616 }, { "epoch": 0.1262579304309779, "grad_norm": 1.7529455423355103, "learning_rate": 1.9518471013180917e-05, "loss": 0.5546, "step": 4617 }, { "epoch": 0.1262852767446948, "grad_norm": 1.923685073852539, "learning_rate": 1.9518199442733127e-05, "loss": 0.5861, "step": 4618 }, { "epoch": 0.12631262305841173, "grad_norm": 1.3689665794372559, "learning_rate": 1.9517927797617742e-05, "loss": 0.9518, "step": 4619 }, { "epoch": 0.12633996937212863, "grad_norm": 1.5424597263336182, "learning_rate": 1.9517656077836887e-05, "loss": 0.5206, "step": 4620 }, { "epoch": 0.12636731568584555, "grad_norm": 1.6085505485534668, "learning_rate": 1.9517384283392704e-05, "loss": 0.5889, "step": 4621 }, { "epoch": 0.12639466199956245, "grad_norm": 1.8801525831222534, "learning_rate": 1.9517112414287317e-05, "loss": 0.5377, "step": 4622 }, { "epoch": 0.12642200831327938, "grad_norm": 1.4442858695983887, "learning_rate": 1.9516840470522864e-05, "loss": 0.9582, "step": 4623 }, { "epoch": 0.12644935462699627, "grad_norm": 2.2830302715301514, "learning_rate": 1.9516568452101472e-05, "loss": 0.5797, "step": 4624 }, { "epoch": 0.1264767009407132, "grad_norm": 1.5512900352478027, "learning_rate": 1.9516296359025283e-05, "loss": 0.5788, "step": 4625 }, { "epoch": 0.1265040472544301, "grad_norm": 1.4067233800888062, "learning_rate": 1.9516024191296424e-05, "loss": 0.5765, "step": 4626 }, { "epoch": 0.12653139356814702, "grad_norm": 1.653943657875061, "learning_rate": 1.9515751948917034e-05, "loss": 0.6186, "step": 4627 }, { "epoch": 0.12655873988186392, "grad_norm": 2.2184019088745117, "learning_rate": 1.951547963188925e-05, "loss": 0.5745, "step": 4628 }, { "epoch": 0.12658608619558084, "grad_norm": 1.8246437311172485, "learning_rate": 1.95152072402152e-05, "loss": 0.6289, "step": 4629 }, { "epoch": 0.12661343250929774, "grad_norm": 1.2323074340820312, "learning_rate": 1.9514934773897034e-05, "loss": 0.5631, "step": 4630 }, { "epoch": 0.12664077882301467, "grad_norm": 1.4042316675186157, "learning_rate": 1.951466223293688e-05, "loss": 0.5944, "step": 4631 }, { "epoch": 0.12666812513673156, "grad_norm": 1.3650143146514893, "learning_rate": 1.951438961733688e-05, "loss": 0.5506, "step": 4632 }, { "epoch": 0.1266954714504485, "grad_norm": 2.0143027305603027, "learning_rate": 1.9514116927099167e-05, "loss": 0.4516, "step": 4633 }, { "epoch": 0.12672281776416539, "grad_norm": 2.6828396320343018, "learning_rate": 1.9513844162225884e-05, "loss": 0.6104, "step": 4634 }, { "epoch": 0.1267501640778823, "grad_norm": 3.268118381500244, "learning_rate": 1.9513571322719176e-05, "loss": 0.5813, "step": 4635 }, { "epoch": 0.1267775103915992, "grad_norm": 1.854486346244812, "learning_rate": 1.9513298408581175e-05, "loss": 0.5867, "step": 4636 }, { "epoch": 0.12680485670531613, "grad_norm": 1.4361525774002075, "learning_rate": 1.9513025419814028e-05, "loss": 0.5631, "step": 4637 }, { "epoch": 0.12683220301903303, "grad_norm": 1.5264443159103394, "learning_rate": 1.9512752356419873e-05, "loss": 0.9595, "step": 4638 }, { "epoch": 0.12685954933274995, "grad_norm": 1.495936632156372, "learning_rate": 1.9512479218400848e-05, "loss": 0.9651, "step": 4639 }, { "epoch": 0.12688689564646685, "grad_norm": 1.6221946477890015, "learning_rate": 1.9512206005759108e-05, "loss": 0.4575, "step": 4640 }, { "epoch": 0.12691424196018378, "grad_norm": 1.6015037298202515, "learning_rate": 1.9511932718496785e-05, "loss": 0.5944, "step": 4641 }, { "epoch": 0.12694158827390067, "grad_norm": 1.818726658821106, "learning_rate": 1.9511659356616025e-05, "loss": 0.5904, "step": 4642 }, { "epoch": 0.1269689345876176, "grad_norm": 1.644867181777954, "learning_rate": 1.951138592011898e-05, "loss": 0.5731, "step": 4643 }, { "epoch": 0.1269962809013345, "grad_norm": 1.6658046245574951, "learning_rate": 1.951111240900778e-05, "loss": 0.4357, "step": 4644 }, { "epoch": 0.12702362721505142, "grad_norm": 1.732163429260254, "learning_rate": 1.9510838823284586e-05, "loss": 0.9585, "step": 4645 }, { "epoch": 0.12705097352876832, "grad_norm": 1.6712101697921753, "learning_rate": 1.9510565162951538e-05, "loss": 0.9489, "step": 4646 }, { "epoch": 0.12707831984248524, "grad_norm": 1.7834943532943726, "learning_rate": 1.951029142801078e-05, "loss": 0.6141, "step": 4647 }, { "epoch": 0.12710566615620214, "grad_norm": 1.5739283561706543, "learning_rate": 1.9510017618464465e-05, "loss": 0.6201, "step": 4648 }, { "epoch": 0.12713301246991907, "grad_norm": 1.516461968421936, "learning_rate": 1.9509743734314737e-05, "loss": 0.5569, "step": 4649 }, { "epoch": 0.12716035878363596, "grad_norm": 1.980873465538025, "learning_rate": 1.9509469775563748e-05, "loss": 0.5665, "step": 4650 }, { "epoch": 0.1271877050973529, "grad_norm": 1.5183221101760864, "learning_rate": 1.9509195742213642e-05, "loss": 0.5739, "step": 4651 }, { "epoch": 0.12721505141106978, "grad_norm": 2.7871787548065186, "learning_rate": 1.950892163426657e-05, "loss": 0.9818, "step": 4652 }, { "epoch": 0.1272423977247867, "grad_norm": 2.0328192710876465, "learning_rate": 1.9508647451724687e-05, "loss": 0.4545, "step": 4653 }, { "epoch": 0.1272697440385036, "grad_norm": 1.4837898015975952, "learning_rate": 1.950837319459014e-05, "loss": 0.5672, "step": 4654 }, { "epoch": 0.12729709035222053, "grad_norm": 1.747545599937439, "learning_rate": 1.950809886286508e-05, "loss": 0.4061, "step": 4655 }, { "epoch": 0.12732443666593743, "grad_norm": 1.5513010025024414, "learning_rate": 1.9507824456551662e-05, "loss": 0.5815, "step": 4656 }, { "epoch": 0.12735178297965435, "grad_norm": 1.8499797582626343, "learning_rate": 1.9507549975652037e-05, "loss": 0.5846, "step": 4657 }, { "epoch": 0.12737912929337125, "grad_norm": 1.5327407121658325, "learning_rate": 1.9507275420168356e-05, "loss": 0.6123, "step": 4658 }, { "epoch": 0.12740647560708818, "grad_norm": 1.3945502042770386, "learning_rate": 1.9507000790102773e-05, "loss": 0.5872, "step": 4659 }, { "epoch": 0.12743382192080507, "grad_norm": 1.5310883522033691, "learning_rate": 1.950672608545745e-05, "loss": 0.5983, "step": 4660 }, { "epoch": 0.127461168234522, "grad_norm": 1.3692439794540405, "learning_rate": 1.9506451306234533e-05, "loss": 0.5796, "step": 4661 }, { "epoch": 0.1274885145482389, "grad_norm": 2.0239009857177734, "learning_rate": 1.950617645243618e-05, "loss": 0.5442, "step": 4662 }, { "epoch": 0.12751586086195582, "grad_norm": 1.652873158454895, "learning_rate": 1.950590152406455e-05, "loss": 0.5884, "step": 4663 }, { "epoch": 0.12754320717567272, "grad_norm": 2.0327982902526855, "learning_rate": 1.9505626521121797e-05, "loss": 0.5456, "step": 4664 }, { "epoch": 0.12757055348938964, "grad_norm": 1.5125786066055298, "learning_rate": 1.950535144361008e-05, "loss": 0.5608, "step": 4665 }, { "epoch": 0.12759789980310654, "grad_norm": 1.7079408168792725, "learning_rate": 1.9505076291531554e-05, "loss": 0.9238, "step": 4666 }, { "epoch": 0.12762524611682347, "grad_norm": 1.5592559576034546, "learning_rate": 1.950480106488838e-05, "loss": 0.5733, "step": 4667 }, { "epoch": 0.12765259243054036, "grad_norm": 1.829249382019043, "learning_rate": 1.9504525763682718e-05, "loss": 0.9446, "step": 4668 }, { "epoch": 0.1276799387442573, "grad_norm": 1.4530965089797974, "learning_rate": 1.9504250387916728e-05, "loss": 0.5869, "step": 4669 }, { "epoch": 0.12770728505797418, "grad_norm": 1.8751987218856812, "learning_rate": 1.9503974937592564e-05, "loss": 0.5887, "step": 4670 }, { "epoch": 0.1277346313716911, "grad_norm": 2.0650205612182617, "learning_rate": 1.9503699412712394e-05, "loss": 0.5499, "step": 4671 }, { "epoch": 0.127761977685408, "grad_norm": 2.1005759239196777, "learning_rate": 1.9503423813278376e-05, "loss": 0.5929, "step": 4672 }, { "epoch": 0.1277893239991249, "grad_norm": 1.9991655349731445, "learning_rate": 1.9503148139292672e-05, "loss": 0.9495, "step": 4673 }, { "epoch": 0.12781667031284183, "grad_norm": 1.6931349039077759, "learning_rate": 1.9502872390757447e-05, "loss": 0.5677, "step": 4674 }, { "epoch": 0.12784401662655873, "grad_norm": 1.4153996706008911, "learning_rate": 1.950259656767486e-05, "loss": 0.5911, "step": 4675 }, { "epoch": 0.12787136294027565, "grad_norm": 1.3816351890563965, "learning_rate": 1.9502320670047076e-05, "loss": 0.5598, "step": 4676 }, { "epoch": 0.12789870925399255, "grad_norm": 1.9504297971725464, "learning_rate": 1.9502044697876264e-05, "loss": 0.5735, "step": 4677 }, { "epoch": 0.12792605556770947, "grad_norm": 1.57367742061615, "learning_rate": 1.9501768651164587e-05, "loss": 0.5552, "step": 4678 }, { "epoch": 0.12795340188142637, "grad_norm": 1.3419890403747559, "learning_rate": 1.9501492529914204e-05, "loss": 0.5597, "step": 4679 }, { "epoch": 0.1279807481951433, "grad_norm": 1.911629557609558, "learning_rate": 1.950121633412729e-05, "loss": 0.573, "step": 4680 }, { "epoch": 0.1280080945088602, "grad_norm": 1.489035725593567, "learning_rate": 1.9500940063806004e-05, "loss": 0.5894, "step": 4681 }, { "epoch": 0.12803544082257712, "grad_norm": 1.413788914680481, "learning_rate": 1.9500663718952516e-05, "loss": 0.5643, "step": 4682 }, { "epoch": 0.12806278713629402, "grad_norm": 1.440026044845581, "learning_rate": 1.9500387299569002e-05, "loss": 0.6126, "step": 4683 }, { "epoch": 0.12809013345001094, "grad_norm": 1.6087292432785034, "learning_rate": 1.9500110805657618e-05, "loss": 0.974, "step": 4684 }, { "epoch": 0.12811747976372784, "grad_norm": 1.8958311080932617, "learning_rate": 1.9499834237220537e-05, "loss": 0.5506, "step": 4685 }, { "epoch": 0.12814482607744476, "grad_norm": 2.601501941680908, "learning_rate": 1.9499557594259933e-05, "loss": 0.5727, "step": 4686 }, { "epoch": 0.12817217239116166, "grad_norm": 1.2786437273025513, "learning_rate": 1.9499280876777976e-05, "loss": 0.587, "step": 4687 }, { "epoch": 0.12819951870487858, "grad_norm": 1.6596169471740723, "learning_rate": 1.949900408477683e-05, "loss": 0.584, "step": 4688 }, { "epoch": 0.12822686501859548, "grad_norm": 1.6955186128616333, "learning_rate": 1.949872721825867e-05, "loss": 0.5954, "step": 4689 }, { "epoch": 0.1282542113323124, "grad_norm": 1.2118993997573853, "learning_rate": 1.9498450277225667e-05, "loss": 0.5736, "step": 4690 }, { "epoch": 0.1282815576460293, "grad_norm": 1.2517105340957642, "learning_rate": 1.9498173261679996e-05, "loss": 0.5358, "step": 4691 }, { "epoch": 0.12830890395974623, "grad_norm": 1.639301061630249, "learning_rate": 1.9497896171623828e-05, "loss": 0.567, "step": 4692 }, { "epoch": 0.12833625027346313, "grad_norm": 1.3395394086837769, "learning_rate": 1.9497619007059342e-05, "loss": 0.5529, "step": 4693 }, { "epoch": 0.12836359658718005, "grad_norm": 1.3734756708145142, "learning_rate": 1.9497341767988703e-05, "loss": 0.5411, "step": 4694 }, { "epoch": 0.12839094290089695, "grad_norm": 1.5873459577560425, "learning_rate": 1.9497064454414092e-05, "loss": 0.5861, "step": 4695 }, { "epoch": 0.12841828921461387, "grad_norm": 2.137869358062744, "learning_rate": 1.9496787066337683e-05, "loss": 0.5458, "step": 4696 }, { "epoch": 0.12844563552833077, "grad_norm": 1.4176158905029297, "learning_rate": 1.949650960376165e-05, "loss": 0.9263, "step": 4697 }, { "epoch": 0.1284729818420477, "grad_norm": 1.9078279733657837, "learning_rate": 1.9496232066688173e-05, "loss": 0.5577, "step": 4698 }, { "epoch": 0.1285003281557646, "grad_norm": 1.4881564378738403, "learning_rate": 1.949595445511943e-05, "loss": 0.6074, "step": 4699 }, { "epoch": 0.12852767446948152, "grad_norm": 1.655643105506897, "learning_rate": 1.9495676769057596e-05, "loss": 0.5668, "step": 4700 }, { "epoch": 0.12855502078319841, "grad_norm": 1.5761762857437134, "learning_rate": 1.9495399008504848e-05, "loss": 0.937, "step": 4701 }, { "epoch": 0.12858236709691534, "grad_norm": 1.6001815795898438, "learning_rate": 1.949512117346337e-05, "loss": 0.554, "step": 4702 }, { "epoch": 0.12860971341063224, "grad_norm": 1.6212005615234375, "learning_rate": 1.9494843263935337e-05, "loss": 0.5274, "step": 4703 }, { "epoch": 0.12863705972434916, "grad_norm": 1.7323826551437378, "learning_rate": 1.9494565279922928e-05, "loss": 0.5624, "step": 4704 }, { "epoch": 0.12866440603806606, "grad_norm": 1.9757301807403564, "learning_rate": 1.949428722142833e-05, "loss": 0.5539, "step": 4705 }, { "epoch": 0.12869175235178298, "grad_norm": 1.6599674224853516, "learning_rate": 1.949400908845372e-05, "loss": 0.6464, "step": 4706 }, { "epoch": 0.12871909866549988, "grad_norm": 1.7518129348754883, "learning_rate": 1.949373088100128e-05, "loss": 0.4369, "step": 4707 }, { "epoch": 0.1287464449792168, "grad_norm": 1.7629730701446533, "learning_rate": 1.9493452599073194e-05, "loss": 0.6048, "step": 4708 }, { "epoch": 0.1287737912929337, "grad_norm": 1.5805681943893433, "learning_rate": 1.9493174242671643e-05, "loss": 0.5692, "step": 4709 }, { "epoch": 0.12880113760665063, "grad_norm": 1.7233558893203735, "learning_rate": 1.949289581179881e-05, "loss": 0.5455, "step": 4710 }, { "epoch": 0.12882848392036753, "grad_norm": 1.5581490993499756, "learning_rate": 1.9492617306456883e-05, "loss": 0.4411, "step": 4711 }, { "epoch": 0.12885583023408445, "grad_norm": 1.606123924255371, "learning_rate": 1.9492338726648044e-05, "loss": 0.5773, "step": 4712 }, { "epoch": 0.12888317654780135, "grad_norm": 1.2822601795196533, "learning_rate": 1.9492060072374483e-05, "loss": 0.5948, "step": 4713 }, { "epoch": 0.12891052286151827, "grad_norm": 1.3361204862594604, "learning_rate": 1.949178134363838e-05, "loss": 0.5686, "step": 4714 }, { "epoch": 0.12893786917523517, "grad_norm": 1.395074725151062, "learning_rate": 1.949150254044192e-05, "loss": 0.5903, "step": 4715 }, { "epoch": 0.1289652154889521, "grad_norm": 2.272120237350464, "learning_rate": 1.9491223662787296e-05, "loss": 0.4015, "step": 4716 }, { "epoch": 0.128992561802669, "grad_norm": 1.3523738384246826, "learning_rate": 1.9490944710676693e-05, "loss": 0.5407, "step": 4717 }, { "epoch": 0.12901990811638592, "grad_norm": 1.4167819023132324, "learning_rate": 1.94906656841123e-05, "loss": 0.6037, "step": 4718 }, { "epoch": 0.12904725443010281, "grad_norm": 1.4173117876052856, "learning_rate": 1.9490386583096306e-05, "loss": 0.5328, "step": 4719 }, { "epoch": 0.12907460074381974, "grad_norm": 2.045302152633667, "learning_rate": 1.94901074076309e-05, "loss": 0.5644, "step": 4720 }, { "epoch": 0.12910194705753664, "grad_norm": 2.6122686862945557, "learning_rate": 1.948982815771827e-05, "loss": 0.6379, "step": 4721 }, { "epoch": 0.12912929337125356, "grad_norm": 1.6127780675888062, "learning_rate": 1.948954883336061e-05, "loss": 0.5563, "step": 4722 }, { "epoch": 0.12915663968497046, "grad_norm": 2.031334161758423, "learning_rate": 1.948926943456011e-05, "loss": 0.5687, "step": 4723 }, { "epoch": 0.12918398599868738, "grad_norm": 1.51288902759552, "learning_rate": 1.9488989961318965e-05, "loss": 0.6046, "step": 4724 }, { "epoch": 0.12921133231240428, "grad_norm": 1.3406704664230347, "learning_rate": 1.948871041363936e-05, "loss": 0.4335, "step": 4725 }, { "epoch": 0.1292386786261212, "grad_norm": 1.8520585298538208, "learning_rate": 1.9488430791523494e-05, "loss": 0.6392, "step": 4726 }, { "epoch": 0.1292660249398381, "grad_norm": 1.9045246839523315, "learning_rate": 1.948815109497356e-05, "loss": 0.6087, "step": 4727 }, { "epoch": 0.12929337125355503, "grad_norm": 1.606109857559204, "learning_rate": 1.948787132399175e-05, "loss": 0.9493, "step": 4728 }, { "epoch": 0.12932071756727193, "grad_norm": 1.352738380432129, "learning_rate": 1.9487591478580258e-05, "loss": 0.58, "step": 4729 }, { "epoch": 0.12934806388098885, "grad_norm": 1.7258756160736084, "learning_rate": 1.948731155874128e-05, "loss": 0.5678, "step": 4730 }, { "epoch": 0.12937541019470575, "grad_norm": 1.3485101461410522, "learning_rate": 1.9487031564477016e-05, "loss": 0.563, "step": 4731 }, { "epoch": 0.12940275650842267, "grad_norm": 1.4518777132034302, "learning_rate": 1.948675149578966e-05, "loss": 0.5489, "step": 4732 }, { "epoch": 0.12943010282213957, "grad_norm": 1.4488941431045532, "learning_rate": 1.9486471352681403e-05, "loss": 0.5776, "step": 4733 }, { "epoch": 0.1294574491358565, "grad_norm": 2.3516786098480225, "learning_rate": 1.9486191135154453e-05, "loss": 0.5795, "step": 4734 }, { "epoch": 0.1294847954495734, "grad_norm": 5.633289813995361, "learning_rate": 1.9485910843211e-05, "loss": 0.9303, "step": 4735 }, { "epoch": 0.12951214176329032, "grad_norm": 1.7731338739395142, "learning_rate": 1.9485630476853248e-05, "loss": 0.6054, "step": 4736 }, { "epoch": 0.12953948807700721, "grad_norm": 1.4397958517074585, "learning_rate": 1.948535003608339e-05, "loss": 0.5812, "step": 4737 }, { "epoch": 0.12956683439072414, "grad_norm": 1.6358002424240112, "learning_rate": 1.9485069520903635e-05, "loss": 0.5699, "step": 4738 }, { "epoch": 0.12959418070444104, "grad_norm": 3.21307635307312, "learning_rate": 1.9484788931316182e-05, "loss": 0.5216, "step": 4739 }, { "epoch": 0.12962152701815796, "grad_norm": 1.8109917640686035, "learning_rate": 1.9484508267323227e-05, "loss": 0.5702, "step": 4740 }, { "epoch": 0.12964887333187486, "grad_norm": 1.8363292217254639, "learning_rate": 1.948422752892697e-05, "loss": 0.9574, "step": 4741 }, { "epoch": 0.12967621964559178, "grad_norm": 1.9081741571426392, "learning_rate": 1.9483946716129617e-05, "loss": 0.5955, "step": 4742 }, { "epoch": 0.12970356595930868, "grad_norm": 2.010485887527466, "learning_rate": 1.9483665828933373e-05, "loss": 0.6015, "step": 4743 }, { "epoch": 0.1297309122730256, "grad_norm": 1.6417148113250732, "learning_rate": 1.948338486734044e-05, "loss": 0.5679, "step": 4744 }, { "epoch": 0.1297582585867425, "grad_norm": 1.5074213743209839, "learning_rate": 1.948310383135302e-05, "loss": 0.6078, "step": 4745 }, { "epoch": 0.12978560490045943, "grad_norm": 1.6580530405044556, "learning_rate": 1.9482822720973322e-05, "loss": 0.5685, "step": 4746 }, { "epoch": 0.12981295121417633, "grad_norm": 1.4784761667251587, "learning_rate": 1.948254153620355e-05, "loss": 0.5568, "step": 4747 }, { "epoch": 0.12984029752789325, "grad_norm": 2.141054391860962, "learning_rate": 1.9482260277045902e-05, "loss": 0.52, "step": 4748 }, { "epoch": 0.12986764384161015, "grad_norm": 2.028331995010376, "learning_rate": 1.9481978943502594e-05, "loss": 0.5804, "step": 4749 }, { "epoch": 0.12989499015532707, "grad_norm": 1.5454704761505127, "learning_rate": 1.948169753557583e-05, "loss": 0.5675, "step": 4750 }, { "epoch": 0.12992233646904397, "grad_norm": 1.4715485572814941, "learning_rate": 1.9481416053267815e-05, "loss": 0.5968, "step": 4751 }, { "epoch": 0.1299496827827609, "grad_norm": 1.6130133867263794, "learning_rate": 1.948113449658076e-05, "loss": 0.5705, "step": 4752 }, { "epoch": 0.1299770290964778, "grad_norm": 1.6357805728912354, "learning_rate": 1.9480852865516877e-05, "loss": 0.583, "step": 4753 }, { "epoch": 0.13000437541019472, "grad_norm": 1.6170753240585327, "learning_rate": 1.9480571160078367e-05, "loss": 0.4837, "step": 4754 }, { "epoch": 0.13003172172391161, "grad_norm": 1.9435172080993652, "learning_rate": 1.9480289380267446e-05, "loss": 0.6662, "step": 4755 }, { "epoch": 0.13005906803762854, "grad_norm": 3.2510645389556885, "learning_rate": 1.9480007526086325e-05, "loss": 0.6065, "step": 4756 }, { "epoch": 0.13008641435134544, "grad_norm": 1.958702802658081, "learning_rate": 1.9479725597537208e-05, "loss": 0.4388, "step": 4757 }, { "epoch": 0.13011376066506236, "grad_norm": 1.7583184242248535, "learning_rate": 1.9479443594622312e-05, "loss": 0.5527, "step": 4758 }, { "epoch": 0.13014110697877926, "grad_norm": 2.274900436401367, "learning_rate": 1.9479161517343853e-05, "loss": 0.501, "step": 4759 }, { "epoch": 0.13016845329249618, "grad_norm": 2.7709755897521973, "learning_rate": 1.947887936570404e-05, "loss": 0.5435, "step": 4760 }, { "epoch": 0.13019579960621308, "grad_norm": 1.5797450542449951, "learning_rate": 1.9478597139705083e-05, "loss": 0.5192, "step": 4761 }, { "epoch": 0.13022314591993, "grad_norm": 1.7103992700576782, "learning_rate": 1.9478314839349197e-05, "loss": 0.4792, "step": 4762 }, { "epoch": 0.1302504922336469, "grad_norm": 2.6366076469421387, "learning_rate": 1.94780324646386e-05, "loss": 0.5466, "step": 4763 }, { "epoch": 0.13027783854736383, "grad_norm": 1.7132353782653809, "learning_rate": 1.9477750015575508e-05, "loss": 0.5945, "step": 4764 }, { "epoch": 0.13030518486108073, "grad_norm": 1.5223814249038696, "learning_rate": 1.9477467492162132e-05, "loss": 0.9619, "step": 4765 }, { "epoch": 0.13033253117479765, "grad_norm": 1.9322896003723145, "learning_rate": 1.947718489440069e-05, "loss": 0.5595, "step": 4766 }, { "epoch": 0.13035987748851455, "grad_norm": 2.287405490875244, "learning_rate": 1.9476902222293404e-05, "loss": 0.5978, "step": 4767 }, { "epoch": 0.13038722380223147, "grad_norm": 1.8697447776794434, "learning_rate": 1.947661947584248e-05, "loss": 0.5797, "step": 4768 }, { "epoch": 0.13041457011594837, "grad_norm": 1.8059115409851074, "learning_rate": 1.947633665505015e-05, "loss": 0.575, "step": 4769 }, { "epoch": 0.1304419164296653, "grad_norm": 1.959739089012146, "learning_rate": 1.9476053759918624e-05, "loss": 0.6277, "step": 4770 }, { "epoch": 0.1304692627433822, "grad_norm": 1.6178475618362427, "learning_rate": 1.9475770790450125e-05, "loss": 0.6508, "step": 4771 }, { "epoch": 0.13049660905709912, "grad_norm": 1.4724621772766113, "learning_rate": 1.9475487746646866e-05, "loss": 0.4278, "step": 4772 }, { "epoch": 0.130523955370816, "grad_norm": 1.6132010221481323, "learning_rate": 1.947520462851107e-05, "loss": 0.5913, "step": 4773 }, { "epoch": 0.13055130168453294, "grad_norm": 1.8154948949813843, "learning_rate": 1.9474921436044968e-05, "loss": 0.5533, "step": 4774 }, { "epoch": 0.13057864799824984, "grad_norm": 2.8662641048431396, "learning_rate": 1.947463816925077e-05, "loss": 0.5547, "step": 4775 }, { "epoch": 0.13060599431196673, "grad_norm": 1.6374485492706299, "learning_rate": 1.9474354828130704e-05, "loss": 0.5789, "step": 4776 }, { "epoch": 0.13063334062568366, "grad_norm": 1.8189079761505127, "learning_rate": 1.947407141268699e-05, "loss": 0.5878, "step": 4777 }, { "epoch": 0.13066068693940056, "grad_norm": 1.6934114694595337, "learning_rate": 1.947378792292185e-05, "loss": 0.5418, "step": 4778 }, { "epoch": 0.13068803325311748, "grad_norm": 1.822558879852295, "learning_rate": 1.9473504358837512e-05, "loss": 0.5378, "step": 4779 }, { "epoch": 0.13071537956683438, "grad_norm": 1.8295336961746216, "learning_rate": 1.9473220720436196e-05, "loss": 0.5752, "step": 4780 }, { "epoch": 0.1307427258805513, "grad_norm": 1.5324369668960571, "learning_rate": 1.9472937007720132e-05, "loss": 0.914, "step": 4781 }, { "epoch": 0.1307700721942682, "grad_norm": 1.6290926933288574, "learning_rate": 1.947265322069154e-05, "loss": 0.562, "step": 4782 }, { "epoch": 0.13079741850798512, "grad_norm": 1.4887810945510864, "learning_rate": 1.9472369359352652e-05, "loss": 0.5601, "step": 4783 }, { "epoch": 0.13082476482170202, "grad_norm": 1.5645312070846558, "learning_rate": 1.9472085423705692e-05, "loss": 0.6023, "step": 4784 }, { "epoch": 0.13085211113541895, "grad_norm": 1.7918132543563843, "learning_rate": 1.9471801413752886e-05, "loss": 0.5628, "step": 4785 }, { "epoch": 0.13087945744913584, "grad_norm": 1.4860857725143433, "learning_rate": 1.947151732949646e-05, "loss": 0.5605, "step": 4786 }, { "epoch": 0.13090680376285277, "grad_norm": 1.6360386610031128, "learning_rate": 1.9471233170938656e-05, "loss": 0.5461, "step": 4787 }, { "epoch": 0.13093415007656967, "grad_norm": 2.0427818298339844, "learning_rate": 1.9470948938081686e-05, "loss": 0.5532, "step": 4788 }, { "epoch": 0.1309614963902866, "grad_norm": 1.34511399269104, "learning_rate": 1.947066463092779e-05, "loss": 0.5971, "step": 4789 }, { "epoch": 0.1309888427040035, "grad_norm": 1.3351922035217285, "learning_rate": 1.9470380249479193e-05, "loss": 0.9384, "step": 4790 }, { "epoch": 0.1310161890177204, "grad_norm": 1.7548902034759521, "learning_rate": 1.9470095793738127e-05, "loss": 0.6126, "step": 4791 }, { "epoch": 0.1310435353314373, "grad_norm": 1.5340185165405273, "learning_rate": 1.946981126370683e-05, "loss": 0.5868, "step": 4792 }, { "epoch": 0.13107088164515424, "grad_norm": 1.4312248229980469, "learning_rate": 1.9469526659387524e-05, "loss": 0.5983, "step": 4793 }, { "epoch": 0.13109822795887113, "grad_norm": 2.215257406234741, "learning_rate": 1.9469241980782447e-05, "loss": 0.5674, "step": 4794 }, { "epoch": 0.13112557427258806, "grad_norm": 1.4072951078414917, "learning_rate": 1.9468957227893833e-05, "loss": 0.5764, "step": 4795 }, { "epoch": 0.13115292058630496, "grad_norm": 1.3185501098632812, "learning_rate": 1.946867240072391e-05, "loss": 0.5673, "step": 4796 }, { "epoch": 0.13118026690002188, "grad_norm": 2.2583248615264893, "learning_rate": 1.9468387499274923e-05, "loss": 0.464, "step": 4797 }, { "epoch": 0.13120761321373878, "grad_norm": 4.134210586547852, "learning_rate": 1.9468102523549096e-05, "loss": 0.4491, "step": 4798 }, { "epoch": 0.1312349595274557, "grad_norm": 1.6611034870147705, "learning_rate": 1.946781747354867e-05, "loss": 0.6092, "step": 4799 }, { "epoch": 0.1312623058411726, "grad_norm": 2.0920286178588867, "learning_rate": 1.9467532349275883e-05, "loss": 0.6119, "step": 4800 }, { "epoch": 0.13128965215488952, "grad_norm": 2.7772722244262695, "learning_rate": 1.946724715073297e-05, "loss": 0.5925, "step": 4801 }, { "epoch": 0.13131699846860642, "grad_norm": 1.588815450668335, "learning_rate": 1.9466961877922164e-05, "loss": 0.5851, "step": 4802 }, { "epoch": 0.13134434478232335, "grad_norm": 1.819935917854309, "learning_rate": 1.9466676530845708e-05, "loss": 0.552, "step": 4803 }, { "epoch": 0.13137169109604024, "grad_norm": 1.3952922821044922, "learning_rate": 1.9466391109505838e-05, "loss": 0.5806, "step": 4804 }, { "epoch": 0.13139903740975717, "grad_norm": 1.4882804155349731, "learning_rate": 1.9466105613904794e-05, "loss": 0.523, "step": 4805 }, { "epoch": 0.13142638372347407, "grad_norm": 2.5948996543884277, "learning_rate": 1.9465820044044816e-05, "loss": 0.6009, "step": 4806 }, { "epoch": 0.131453730037191, "grad_norm": 1.4846843481063843, "learning_rate": 1.9465534399928142e-05, "loss": 0.5817, "step": 4807 }, { "epoch": 0.1314810763509079, "grad_norm": 1.9268802404403687, "learning_rate": 1.9465248681557013e-05, "loss": 0.6329, "step": 4808 }, { "epoch": 0.1315084226646248, "grad_norm": 1.3528629541397095, "learning_rate": 1.946496288893368e-05, "loss": 0.5675, "step": 4809 }, { "epoch": 0.1315357689783417, "grad_norm": 1.583172082901001, "learning_rate": 1.9464677022060367e-05, "loss": 0.5652, "step": 4810 }, { "epoch": 0.13156311529205864, "grad_norm": 1.6257803440093994, "learning_rate": 1.9464391080939333e-05, "loss": 0.6394, "step": 4811 }, { "epoch": 0.13159046160577553, "grad_norm": 1.536709189414978, "learning_rate": 1.946410506557281e-05, "loss": 0.5408, "step": 4812 }, { "epoch": 0.13161780791949246, "grad_norm": 1.862783670425415, "learning_rate": 1.9463818975963048e-05, "loss": 0.5769, "step": 4813 }, { "epoch": 0.13164515423320935, "grad_norm": 1.3937820196151733, "learning_rate": 1.9463532812112288e-05, "loss": 0.5554, "step": 4814 }, { "epoch": 0.13167250054692628, "grad_norm": 1.5677990913391113, "learning_rate": 1.946324657402278e-05, "loss": 0.5987, "step": 4815 }, { "epoch": 0.13169984686064318, "grad_norm": 2.056150197982788, "learning_rate": 1.9462960261696765e-05, "loss": 0.5845, "step": 4816 }, { "epoch": 0.1317271931743601, "grad_norm": 1.6160407066345215, "learning_rate": 1.946267387513649e-05, "loss": 0.6452, "step": 4817 }, { "epoch": 0.131754539488077, "grad_norm": 1.5711199045181274, "learning_rate": 1.94623874143442e-05, "loss": 0.9362, "step": 4818 }, { "epoch": 0.13178188580179392, "grad_norm": 1.7854558229446411, "learning_rate": 1.9462100879322143e-05, "loss": 0.5744, "step": 4819 }, { "epoch": 0.13180923211551082, "grad_norm": 1.8093570470809937, "learning_rate": 1.9461814270072568e-05, "loss": 0.5853, "step": 4820 }, { "epoch": 0.13183657842922775, "grad_norm": 1.4085568189620972, "learning_rate": 1.9461527586597723e-05, "loss": 0.5512, "step": 4821 }, { "epoch": 0.13186392474294464, "grad_norm": 2.0503597259521484, "learning_rate": 1.9461240828899858e-05, "loss": 0.5668, "step": 4822 }, { "epoch": 0.13189127105666157, "grad_norm": 1.4616814851760864, "learning_rate": 1.946095399698122e-05, "loss": 0.9501, "step": 4823 }, { "epoch": 0.13191861737037847, "grad_norm": 1.8540773391723633, "learning_rate": 1.946066709084406e-05, "loss": 0.5883, "step": 4824 }, { "epoch": 0.1319459636840954, "grad_norm": 1.5131107568740845, "learning_rate": 1.946038011049063e-05, "loss": 0.5601, "step": 4825 }, { "epoch": 0.1319733099978123, "grad_norm": 2.5963690280914307, "learning_rate": 1.946009305592318e-05, "loss": 0.9683, "step": 4826 }, { "epoch": 0.1320006563115292, "grad_norm": 1.5130536556243896, "learning_rate": 1.945980592714396e-05, "loss": 0.594, "step": 4827 }, { "epoch": 0.1320280026252461, "grad_norm": 1.7638682126998901, "learning_rate": 1.945951872415523e-05, "loss": 0.5722, "step": 4828 }, { "epoch": 0.13205534893896304, "grad_norm": 2.115560293197632, "learning_rate": 1.9459231446959235e-05, "loss": 0.581, "step": 4829 }, { "epoch": 0.13208269525267993, "grad_norm": 1.4838634729385376, "learning_rate": 1.9458944095558233e-05, "loss": 0.9693, "step": 4830 }, { "epoch": 0.13211004156639686, "grad_norm": 3.1203248500823975, "learning_rate": 1.9458656669954476e-05, "loss": 0.5292, "step": 4831 }, { "epoch": 0.13213738788011375, "grad_norm": 1.8806136846542358, "learning_rate": 1.9458369170150215e-05, "loss": 0.5925, "step": 4832 }, { "epoch": 0.13216473419383068, "grad_norm": 1.5777711868286133, "learning_rate": 1.9458081596147715e-05, "loss": 0.6076, "step": 4833 }, { "epoch": 0.13219208050754758, "grad_norm": 1.7420166730880737, "learning_rate": 1.945779394794922e-05, "loss": 0.571, "step": 4834 }, { "epoch": 0.1322194268212645, "grad_norm": 1.8606122732162476, "learning_rate": 1.9457506225556998e-05, "loss": 0.6336, "step": 4835 }, { "epoch": 0.1322467731349814, "grad_norm": 1.5897492170333862, "learning_rate": 1.9457218428973304e-05, "loss": 0.9204, "step": 4836 }, { "epoch": 0.13227411944869832, "grad_norm": 1.7388889789581299, "learning_rate": 1.945693055820039e-05, "loss": 0.571, "step": 4837 }, { "epoch": 0.13230146576241522, "grad_norm": 1.8159276247024536, "learning_rate": 1.9456642613240518e-05, "loss": 0.5863, "step": 4838 }, { "epoch": 0.13232881207613215, "grad_norm": 1.7028706073760986, "learning_rate": 1.9456354594095943e-05, "loss": 0.5994, "step": 4839 }, { "epoch": 0.13235615838984904, "grad_norm": 1.5760217905044556, "learning_rate": 1.945606650076893e-05, "loss": 0.5835, "step": 4840 }, { "epoch": 0.13238350470356597, "grad_norm": 1.7457232475280762, "learning_rate": 1.9455778333261736e-05, "loss": 0.5769, "step": 4841 }, { "epoch": 0.13241085101728287, "grad_norm": 1.8229790925979614, "learning_rate": 1.945549009157662e-05, "loss": 0.5884, "step": 4842 }, { "epoch": 0.1324381973309998, "grad_norm": 1.519492506980896, "learning_rate": 1.945520177571585e-05, "loss": 0.5456, "step": 4843 }, { "epoch": 0.1324655436447167, "grad_norm": 1.5091675519943237, "learning_rate": 1.9454913385681678e-05, "loss": 0.6027, "step": 4844 }, { "epoch": 0.1324928899584336, "grad_norm": 1.7873176336288452, "learning_rate": 1.9454624921476377e-05, "loss": 0.612, "step": 4845 }, { "epoch": 0.1325202362721505, "grad_norm": 1.4928011894226074, "learning_rate": 1.94543363831022e-05, "loss": 0.9264, "step": 4846 }, { "epoch": 0.13254758258586744, "grad_norm": 2.57926082611084, "learning_rate": 1.9454047770561412e-05, "loss": 0.5838, "step": 4847 }, { "epoch": 0.13257492889958433, "grad_norm": 1.8267771005630493, "learning_rate": 1.9453759083856287e-05, "loss": 0.5926, "step": 4848 }, { "epoch": 0.13260227521330126, "grad_norm": 1.3062852621078491, "learning_rate": 1.9453470322989076e-05, "loss": 0.9428, "step": 4849 }, { "epoch": 0.13262962152701815, "grad_norm": 1.3014774322509766, "learning_rate": 1.9453181487962057e-05, "loss": 0.5822, "step": 4850 }, { "epoch": 0.13265696784073508, "grad_norm": 1.2448744773864746, "learning_rate": 1.9452892578777484e-05, "loss": 0.5557, "step": 4851 }, { "epoch": 0.13268431415445198, "grad_norm": 1.427561640739441, "learning_rate": 1.9452603595437632e-05, "loss": 0.5468, "step": 4852 }, { "epoch": 0.1327116604681689, "grad_norm": 1.5589730739593506, "learning_rate": 1.945231453794476e-05, "loss": 0.9793, "step": 4853 }, { "epoch": 0.1327390067818858, "grad_norm": 1.3641669750213623, "learning_rate": 1.9452025406301147e-05, "loss": 0.5507, "step": 4854 }, { "epoch": 0.13276635309560272, "grad_norm": 1.786866307258606, "learning_rate": 1.9451736200509054e-05, "loss": 0.627, "step": 4855 }, { "epoch": 0.13279369940931962, "grad_norm": 1.3546158075332642, "learning_rate": 1.945144692057075e-05, "loss": 0.6013, "step": 4856 }, { "epoch": 0.13282104572303655, "grad_norm": 1.898979663848877, "learning_rate": 1.9451157566488503e-05, "loss": 0.5953, "step": 4857 }, { "epoch": 0.13284839203675344, "grad_norm": 1.6835956573486328, "learning_rate": 1.9450868138264586e-05, "loss": 0.5895, "step": 4858 }, { "epoch": 0.13287573835047037, "grad_norm": 6.233742713928223, "learning_rate": 1.9450578635901264e-05, "loss": 0.9504, "step": 4859 }, { "epoch": 0.13290308466418727, "grad_norm": 1.952141523361206, "learning_rate": 1.945028905940082e-05, "loss": 0.5529, "step": 4860 }, { "epoch": 0.1329304309779042, "grad_norm": 1.5513073205947876, "learning_rate": 1.9449999408765512e-05, "loss": 0.625, "step": 4861 }, { "epoch": 0.1329577772916211, "grad_norm": 1.404071569442749, "learning_rate": 1.944970968399762e-05, "loss": 0.564, "step": 4862 }, { "epoch": 0.132985123605338, "grad_norm": 1.4563523530960083, "learning_rate": 1.9449419885099413e-05, "loss": 0.5761, "step": 4863 }, { "epoch": 0.1330124699190549, "grad_norm": 1.535114049911499, "learning_rate": 1.944913001207317e-05, "loss": 0.5692, "step": 4864 }, { "epoch": 0.13303981623277183, "grad_norm": 1.8348143100738525, "learning_rate": 1.944884006492116e-05, "loss": 0.5876, "step": 4865 }, { "epoch": 0.13306716254648873, "grad_norm": 1.4917129278182983, "learning_rate": 1.9448550043645658e-05, "loss": 0.5792, "step": 4866 }, { "epoch": 0.13309450886020566, "grad_norm": 1.796387791633606, "learning_rate": 1.944825994824894e-05, "loss": 0.52, "step": 4867 }, { "epoch": 0.13312185517392255, "grad_norm": 1.8395442962646484, "learning_rate": 1.944796977873328e-05, "loss": 0.6038, "step": 4868 }, { "epoch": 0.13314920148763948, "grad_norm": 1.7194671630859375, "learning_rate": 1.9447679535100958e-05, "loss": 0.5372, "step": 4869 }, { "epoch": 0.13317654780135638, "grad_norm": 1.9664980173110962, "learning_rate": 1.9447389217354247e-05, "loss": 0.5642, "step": 4870 }, { "epoch": 0.1332038941150733, "grad_norm": 1.5477957725524902, "learning_rate": 1.944709882549543e-05, "loss": 0.5945, "step": 4871 }, { "epoch": 0.1332312404287902, "grad_norm": 1.4557174444198608, "learning_rate": 1.944680835952678e-05, "loss": 0.5712, "step": 4872 }, { "epoch": 0.13325858674250712, "grad_norm": 1.85381281375885, "learning_rate": 1.9446517819450577e-05, "loss": 0.5846, "step": 4873 }, { "epoch": 0.13328593305622402, "grad_norm": 1.3883335590362549, "learning_rate": 1.9446227205269098e-05, "loss": 0.5553, "step": 4874 }, { "epoch": 0.13331327936994095, "grad_norm": 1.874150037765503, "learning_rate": 1.9445936516984626e-05, "loss": 0.5473, "step": 4875 }, { "epoch": 0.13334062568365784, "grad_norm": 1.4542125463485718, "learning_rate": 1.944564575459944e-05, "loss": 0.5719, "step": 4876 }, { "epoch": 0.13336797199737474, "grad_norm": 1.5383691787719727, "learning_rate": 1.9445354918115823e-05, "loss": 0.5828, "step": 4877 }, { "epoch": 0.13339531831109167, "grad_norm": 1.4095596075057983, "learning_rate": 1.944506400753605e-05, "loss": 0.5622, "step": 4878 }, { "epoch": 0.13342266462480856, "grad_norm": 1.6230812072753906, "learning_rate": 1.9444773022862415e-05, "loss": 0.6221, "step": 4879 }, { "epoch": 0.1334500109385255, "grad_norm": 1.767470359802246, "learning_rate": 1.944448196409719e-05, "loss": 0.5873, "step": 4880 }, { "epoch": 0.13347735725224238, "grad_norm": 1.7013778686523438, "learning_rate": 1.9444190831242663e-05, "loss": 0.5009, "step": 4881 }, { "epoch": 0.1335047035659593, "grad_norm": 1.8108822107315063, "learning_rate": 1.9443899624301113e-05, "loss": 0.6291, "step": 4882 }, { "epoch": 0.1335320498796762, "grad_norm": 2.1113789081573486, "learning_rate": 1.9443608343274833e-05, "loss": 0.5624, "step": 4883 }, { "epoch": 0.13355939619339313, "grad_norm": 1.516895055770874, "learning_rate": 1.94433169881661e-05, "loss": 0.4773, "step": 4884 }, { "epoch": 0.13358674250711003, "grad_norm": 1.8200469017028809, "learning_rate": 1.9443025558977206e-05, "loss": 0.5863, "step": 4885 }, { "epoch": 0.13361408882082695, "grad_norm": 1.7982478141784668, "learning_rate": 1.9442734055710433e-05, "loss": 0.5341, "step": 4886 }, { "epoch": 0.13364143513454385, "grad_norm": 1.6003615856170654, "learning_rate": 1.9442442478368068e-05, "loss": 0.5425, "step": 4887 }, { "epoch": 0.13366878144826078, "grad_norm": 2.024895191192627, "learning_rate": 1.94421508269524e-05, "loss": 0.5755, "step": 4888 }, { "epoch": 0.13369612776197767, "grad_norm": 1.5033273696899414, "learning_rate": 1.9441859101465713e-05, "loss": 0.9513, "step": 4889 }, { "epoch": 0.1337234740756946, "grad_norm": 2.2258243560791016, "learning_rate": 1.94415673019103e-05, "loss": 0.9307, "step": 4890 }, { "epoch": 0.1337508203894115, "grad_norm": 1.4031779766082764, "learning_rate": 1.9441275428288452e-05, "loss": 0.5783, "step": 4891 }, { "epoch": 0.13377816670312842, "grad_norm": 1.6772511005401611, "learning_rate": 1.944098348060245e-05, "loss": 0.5865, "step": 4892 }, { "epoch": 0.13380551301684532, "grad_norm": 1.6488325595855713, "learning_rate": 1.9440691458854594e-05, "loss": 0.5617, "step": 4893 }, { "epoch": 0.13383285933056224, "grad_norm": 1.6949735879898071, "learning_rate": 1.9440399363047166e-05, "loss": 0.5269, "step": 4894 }, { "epoch": 0.13386020564427914, "grad_norm": 1.8956347703933716, "learning_rate": 1.9440107193182465e-05, "loss": 0.5808, "step": 4895 }, { "epoch": 0.13388755195799606, "grad_norm": 1.7784459590911865, "learning_rate": 1.9439814949262778e-05, "loss": 0.5885, "step": 4896 }, { "epoch": 0.13391489827171296, "grad_norm": 1.476259708404541, "learning_rate": 1.9439522631290402e-05, "loss": 0.5355, "step": 4897 }, { "epoch": 0.1339422445854299, "grad_norm": 1.7080916166305542, "learning_rate": 1.9439230239267625e-05, "loss": 0.5664, "step": 4898 }, { "epoch": 0.13396959089914678, "grad_norm": 1.9031081199645996, "learning_rate": 1.9438937773196742e-05, "loss": 0.5725, "step": 4899 }, { "epoch": 0.1339969372128637, "grad_norm": 1.7423934936523438, "learning_rate": 1.9438645233080052e-05, "loss": 0.5575, "step": 4900 }, { "epoch": 0.1340242835265806, "grad_norm": 1.4285798072814941, "learning_rate": 1.9438352618919845e-05, "loss": 0.5946, "step": 4901 }, { "epoch": 0.13405162984029753, "grad_norm": 2.0852134227752686, "learning_rate": 1.9438059930718417e-05, "loss": 0.5835, "step": 4902 }, { "epoch": 0.13407897615401443, "grad_norm": 1.7457786798477173, "learning_rate": 1.9437767168478067e-05, "loss": 0.5823, "step": 4903 }, { "epoch": 0.13410632246773135, "grad_norm": 1.6694673299789429, "learning_rate": 1.9437474332201088e-05, "loss": 0.9265, "step": 4904 }, { "epoch": 0.13413366878144825, "grad_norm": 1.9734723567962646, "learning_rate": 1.9437181421889776e-05, "loss": 0.5354, "step": 4905 }, { "epoch": 0.13416101509516518, "grad_norm": 1.9623000621795654, "learning_rate": 1.9436888437546437e-05, "loss": 0.5712, "step": 4906 }, { "epoch": 0.13418836140888207, "grad_norm": 1.561654806137085, "learning_rate": 1.943659537917336e-05, "loss": 0.9755, "step": 4907 }, { "epoch": 0.134215707722599, "grad_norm": 1.4710348844528198, "learning_rate": 1.9436302246772852e-05, "loss": 0.5757, "step": 4908 }, { "epoch": 0.1342430540363159, "grad_norm": 1.261960506439209, "learning_rate": 1.9436009040347204e-05, "loss": 0.5515, "step": 4909 }, { "epoch": 0.13427040035003282, "grad_norm": 1.665297031402588, "learning_rate": 1.9435715759898724e-05, "loss": 0.5778, "step": 4910 }, { "epoch": 0.13429774666374972, "grad_norm": 1.5580216646194458, "learning_rate": 1.943542240542971e-05, "loss": 0.5533, "step": 4911 }, { "epoch": 0.13432509297746664, "grad_norm": 2.063612937927246, "learning_rate": 1.9435128976942458e-05, "loss": 0.6386, "step": 4912 }, { "epoch": 0.13435243929118354, "grad_norm": 1.890379786491394, "learning_rate": 1.9434835474439276e-05, "loss": 0.56, "step": 4913 }, { "epoch": 0.13437978560490046, "grad_norm": 1.577191948890686, "learning_rate": 1.9434541897922467e-05, "loss": 0.6029, "step": 4914 }, { "epoch": 0.13440713191861736, "grad_norm": 1.594662070274353, "learning_rate": 1.9434248247394328e-05, "loss": 0.5813, "step": 4915 }, { "epoch": 0.1344344782323343, "grad_norm": 1.7650182247161865, "learning_rate": 1.9433954522857173e-05, "loss": 0.5664, "step": 4916 }, { "epoch": 0.13446182454605118, "grad_norm": 1.5351101160049438, "learning_rate": 1.9433660724313293e-05, "loss": 0.5986, "step": 4917 }, { "epoch": 0.1344891708597681, "grad_norm": 1.576909065246582, "learning_rate": 1.9433366851765e-05, "loss": 0.5628, "step": 4918 }, { "epoch": 0.134516517173485, "grad_norm": 3.30334210395813, "learning_rate": 1.9433072905214606e-05, "loss": 0.3797, "step": 4919 }, { "epoch": 0.13454386348720193, "grad_norm": 1.6749193668365479, "learning_rate": 1.9432778884664403e-05, "loss": 0.6027, "step": 4920 }, { "epoch": 0.13457120980091883, "grad_norm": 1.8927571773529053, "learning_rate": 1.9432484790116704e-05, "loss": 0.5709, "step": 4921 }, { "epoch": 0.13459855611463575, "grad_norm": 1.6186455488204956, "learning_rate": 1.943219062157382e-05, "loss": 0.6478, "step": 4922 }, { "epoch": 0.13462590242835265, "grad_norm": 1.375698208808899, "learning_rate": 1.9431896379038057e-05, "loss": 0.6494, "step": 4923 }, { "epoch": 0.13465324874206958, "grad_norm": 1.6645349264144897, "learning_rate": 1.9431602062511717e-05, "loss": 0.5205, "step": 4924 }, { "epoch": 0.13468059505578647, "grad_norm": 2.8261804580688477, "learning_rate": 1.9431307671997113e-05, "loss": 0.5666, "step": 4925 }, { "epoch": 0.1347079413695034, "grad_norm": 1.437578558921814, "learning_rate": 1.9431013207496553e-05, "loss": 0.5856, "step": 4926 }, { "epoch": 0.1347352876832203, "grad_norm": 1.6698107719421387, "learning_rate": 1.9430718669012354e-05, "loss": 0.5758, "step": 4927 }, { "epoch": 0.13476263399693722, "grad_norm": 1.368825078010559, "learning_rate": 1.9430424056546817e-05, "loss": 0.5679, "step": 4928 }, { "epoch": 0.13478998031065412, "grad_norm": 1.4973716735839844, "learning_rate": 1.943012937010226e-05, "loss": 0.5676, "step": 4929 }, { "epoch": 0.13481732662437104, "grad_norm": 1.9750875234603882, "learning_rate": 1.9429834609680988e-05, "loss": 0.559, "step": 4930 }, { "epoch": 0.13484467293808794, "grad_norm": 1.655150294303894, "learning_rate": 1.942953977528532e-05, "loss": 0.6239, "step": 4931 }, { "epoch": 0.13487201925180486, "grad_norm": 1.5060455799102783, "learning_rate": 1.9429244866917565e-05, "loss": 0.5828, "step": 4932 }, { "epoch": 0.13489936556552176, "grad_norm": 1.3798205852508545, "learning_rate": 1.942894988458004e-05, "loss": 0.5823, "step": 4933 }, { "epoch": 0.1349267118792387, "grad_norm": 6.004847526550293, "learning_rate": 1.9428654828275054e-05, "loss": 0.6105, "step": 4934 }, { "epoch": 0.13495405819295558, "grad_norm": 1.8815653324127197, "learning_rate": 1.9428359698004927e-05, "loss": 0.6058, "step": 4935 }, { "epoch": 0.1349814045066725, "grad_norm": 1.763631820678711, "learning_rate": 1.942806449377197e-05, "loss": 0.573, "step": 4936 }, { "epoch": 0.1350087508203894, "grad_norm": 1.3939869403839111, "learning_rate": 1.94277692155785e-05, "loss": 0.6104, "step": 4937 }, { "epoch": 0.13503609713410633, "grad_norm": 1.8333594799041748, "learning_rate": 1.9427473863426832e-05, "loss": 0.6547, "step": 4938 }, { "epoch": 0.13506344344782323, "grad_norm": 1.621815800666809, "learning_rate": 1.9427178437319288e-05, "loss": 0.617, "step": 4939 }, { "epoch": 0.13509078976154015, "grad_norm": 1.604630470275879, "learning_rate": 1.942688293725818e-05, "loss": 0.592, "step": 4940 }, { "epoch": 0.13511813607525705, "grad_norm": 2.3487260341644287, "learning_rate": 1.942658736324583e-05, "loss": 0.6011, "step": 4941 }, { "epoch": 0.13514548238897398, "grad_norm": 2.9097490310668945, "learning_rate": 1.9426291715284554e-05, "loss": 0.9772, "step": 4942 }, { "epoch": 0.13517282870269087, "grad_norm": 2.0443639755249023, "learning_rate": 1.9425995993376672e-05, "loss": 0.5517, "step": 4943 }, { "epoch": 0.1352001750164078, "grad_norm": 7.1306939125061035, "learning_rate": 1.9425700197524505e-05, "loss": 0.478, "step": 4944 }, { "epoch": 0.1352275213301247, "grad_norm": 2.1807851791381836, "learning_rate": 1.9425404327730372e-05, "loss": 0.5906, "step": 4945 }, { "epoch": 0.13525486764384162, "grad_norm": 1.691264271736145, "learning_rate": 1.9425108383996596e-05, "loss": 0.5559, "step": 4946 }, { "epoch": 0.13528221395755852, "grad_norm": 1.6016123294830322, "learning_rate": 1.9424812366325497e-05, "loss": 0.4305, "step": 4947 }, { "epoch": 0.13530956027127544, "grad_norm": 1.8934775590896606, "learning_rate": 1.9424516274719392e-05, "loss": 0.5674, "step": 4948 }, { "epoch": 0.13533690658499234, "grad_norm": 1.430501103401184, "learning_rate": 1.9424220109180617e-05, "loss": 0.4245, "step": 4949 }, { "epoch": 0.13536425289870926, "grad_norm": 1.6945257186889648, "learning_rate": 1.9423923869711484e-05, "loss": 0.5837, "step": 4950 }, { "epoch": 0.13539159921242616, "grad_norm": 1.6922682523727417, "learning_rate": 1.9423627556314318e-05, "loss": 0.5753, "step": 4951 }, { "epoch": 0.1354189455261431, "grad_norm": 1.6881219148635864, "learning_rate": 1.9423331168991447e-05, "loss": 0.5572, "step": 4952 }, { "epoch": 0.13544629183985998, "grad_norm": 1.9149253368377686, "learning_rate": 1.94230347077452e-05, "loss": 0.575, "step": 4953 }, { "epoch": 0.1354736381535769, "grad_norm": 1.6555124521255493, "learning_rate": 1.942273817257789e-05, "loss": 0.5896, "step": 4954 }, { "epoch": 0.1355009844672938, "grad_norm": 1.708022952079773, "learning_rate": 1.9422441563491854e-05, "loss": 0.5894, "step": 4955 }, { "epoch": 0.13552833078101073, "grad_norm": 2.0038866996765137, "learning_rate": 1.942214488048942e-05, "loss": 0.5755, "step": 4956 }, { "epoch": 0.13555567709472763, "grad_norm": 1.43272066116333, "learning_rate": 1.9421848123572907e-05, "loss": 0.5757, "step": 4957 }, { "epoch": 0.13558302340844455, "grad_norm": 2.0500898361206055, "learning_rate": 1.9421551292744646e-05, "loss": 0.5974, "step": 4958 }, { "epoch": 0.13561036972216145, "grad_norm": 1.4638259410858154, "learning_rate": 1.9421254388006966e-05, "loss": 0.5858, "step": 4959 }, { "epoch": 0.13563771603587838, "grad_norm": 1.5428496599197388, "learning_rate": 1.94209574093622e-05, "loss": 0.5865, "step": 4960 }, { "epoch": 0.13566506234959527, "grad_norm": 1.900436282157898, "learning_rate": 1.9420660356812672e-05, "loss": 0.6002, "step": 4961 }, { "epoch": 0.1356924086633122, "grad_norm": 1.4405015707015991, "learning_rate": 1.9420363230360713e-05, "loss": 0.4279, "step": 4962 }, { "epoch": 0.1357197549770291, "grad_norm": 1.8984276056289673, "learning_rate": 1.942006603000866e-05, "loss": 0.5407, "step": 4963 }, { "epoch": 0.13574710129074602, "grad_norm": 1.475915789604187, "learning_rate": 1.9419768755758838e-05, "loss": 0.5447, "step": 4964 }, { "epoch": 0.13577444760446292, "grad_norm": 2.4157352447509766, "learning_rate": 1.9419471407613583e-05, "loss": 0.9262, "step": 4965 }, { "epoch": 0.13580179391817984, "grad_norm": 1.7380766868591309, "learning_rate": 1.9419173985575223e-05, "loss": 0.5854, "step": 4966 }, { "epoch": 0.13582914023189674, "grad_norm": 1.3673672676086426, "learning_rate": 1.9418876489646093e-05, "loss": 0.572, "step": 4967 }, { "epoch": 0.13585648654561366, "grad_norm": 2.3610763549804688, "learning_rate": 1.941857891982853e-05, "loss": 0.572, "step": 4968 }, { "epoch": 0.13588383285933056, "grad_norm": 1.9677462577819824, "learning_rate": 1.9418281276124865e-05, "loss": 0.6191, "step": 4969 }, { "epoch": 0.13591117917304749, "grad_norm": 1.3238615989685059, "learning_rate": 1.9417983558537435e-05, "loss": 0.5867, "step": 4970 }, { "epoch": 0.13593852548676438, "grad_norm": 1.9007577896118164, "learning_rate": 1.9417685767068573e-05, "loss": 0.5548, "step": 4971 }, { "epoch": 0.1359658718004813, "grad_norm": 1.5256870985031128, "learning_rate": 1.9417387901720616e-05, "loss": 0.3966, "step": 4972 }, { "epoch": 0.1359932181141982, "grad_norm": 1.8636449575424194, "learning_rate": 1.9417089962495902e-05, "loss": 0.3972, "step": 4973 }, { "epoch": 0.13602056442791513, "grad_norm": 1.3926212787628174, "learning_rate": 1.941679194939677e-05, "loss": 0.4013, "step": 4974 }, { "epoch": 0.13604791074163203, "grad_norm": 2.0856757164001465, "learning_rate": 1.9416493862425552e-05, "loss": 0.5918, "step": 4975 }, { "epoch": 0.13607525705534895, "grad_norm": 2.277611017227173, "learning_rate": 1.9416195701584592e-05, "loss": 0.6263, "step": 4976 }, { "epoch": 0.13610260336906585, "grad_norm": 1.9453983306884766, "learning_rate": 1.9415897466876226e-05, "loss": 0.5833, "step": 4977 }, { "epoch": 0.13612994968278275, "grad_norm": 1.339233160018921, "learning_rate": 1.9415599158302792e-05, "loss": 0.5466, "step": 4978 }, { "epoch": 0.13615729599649967, "grad_norm": 1.4716252088546753, "learning_rate": 1.9415300775866635e-05, "loss": 0.5603, "step": 4979 }, { "epoch": 0.13618464231021657, "grad_norm": 1.5945950746536255, "learning_rate": 1.941500231957009e-05, "loss": 0.591, "step": 4980 }, { "epoch": 0.1362119886239335, "grad_norm": 1.7522425651550293, "learning_rate": 1.9414703789415507e-05, "loss": 0.5824, "step": 4981 }, { "epoch": 0.1362393349376504, "grad_norm": 1.8614507913589478, "learning_rate": 1.9414405185405218e-05, "loss": 0.6207, "step": 4982 }, { "epoch": 0.13626668125136732, "grad_norm": 1.609320878982544, "learning_rate": 1.9414106507541567e-05, "loss": 0.5845, "step": 4983 }, { "epoch": 0.1362940275650842, "grad_norm": 1.533247709274292, "learning_rate": 1.9413807755826906e-05, "loss": 0.9412, "step": 4984 }, { "epoch": 0.13632137387880114, "grad_norm": 1.4682810306549072, "learning_rate": 1.941350893026357e-05, "loss": 0.5758, "step": 4985 }, { "epoch": 0.13634872019251804, "grad_norm": 1.7501938343048096, "learning_rate": 1.9413210030853907e-05, "loss": 0.578, "step": 4986 }, { "epoch": 0.13637606650623496, "grad_norm": 1.7086734771728516, "learning_rate": 1.9412911057600258e-05, "loss": 0.5674, "step": 4987 }, { "epoch": 0.13640341281995186, "grad_norm": 1.394020915031433, "learning_rate": 1.941261201050497e-05, "loss": 0.9666, "step": 4988 }, { "epoch": 0.13643075913366878, "grad_norm": 1.380810022354126, "learning_rate": 1.9412312889570393e-05, "loss": 0.5322, "step": 4989 }, { "epoch": 0.13645810544738568, "grad_norm": 1.2106175422668457, "learning_rate": 1.941201369479887e-05, "loss": 0.5504, "step": 4990 }, { "epoch": 0.1364854517611026, "grad_norm": 1.3489900827407837, "learning_rate": 1.9411714426192746e-05, "loss": 0.5935, "step": 4991 }, { "epoch": 0.1365127980748195, "grad_norm": 1.533959984779358, "learning_rate": 1.941141508375437e-05, "loss": 0.5291, "step": 4992 }, { "epoch": 0.13654014438853643, "grad_norm": 1.5140999555587769, "learning_rate": 1.9411115667486095e-05, "loss": 0.5767, "step": 4993 }, { "epoch": 0.13656749070225332, "grad_norm": 1.638850450515747, "learning_rate": 1.9410816177390262e-05, "loss": 0.5377, "step": 4994 }, { "epoch": 0.13659483701597025, "grad_norm": 1.4701972007751465, "learning_rate": 1.9410516613469227e-05, "loss": 0.5407, "step": 4995 }, { "epoch": 0.13662218332968715, "grad_norm": 1.583179235458374, "learning_rate": 1.941021697572534e-05, "loss": 0.5963, "step": 4996 }, { "epoch": 0.13664952964340407, "grad_norm": 1.4073519706726074, "learning_rate": 1.9409917264160945e-05, "loss": 0.5511, "step": 4997 }, { "epoch": 0.13667687595712097, "grad_norm": 1.4427917003631592, "learning_rate": 1.94096174787784e-05, "loss": 0.5751, "step": 4998 }, { "epoch": 0.1367042222708379, "grad_norm": 2.491366386413574, "learning_rate": 1.940931761958005e-05, "loss": 0.5446, "step": 4999 }, { "epoch": 0.1367315685845548, "grad_norm": 1.6922959089279175, "learning_rate": 1.9409017686568257e-05, "loss": 0.6549, "step": 5000 }, { "epoch": 0.13675891489827172, "grad_norm": 1.8570181131362915, "learning_rate": 1.9408717679745362e-05, "loss": 0.5703, "step": 5001 }, { "epoch": 0.1367862612119886, "grad_norm": 1.6431541442871094, "learning_rate": 1.940841759911373e-05, "loss": 0.5314, "step": 5002 }, { "epoch": 0.13681360752570554, "grad_norm": 2.3644464015960693, "learning_rate": 1.9408117444675707e-05, "loss": 0.5849, "step": 5003 }, { "epoch": 0.13684095383942244, "grad_norm": 1.4431074857711792, "learning_rate": 1.940781721643365e-05, "loss": 0.573, "step": 5004 }, { "epoch": 0.13686830015313936, "grad_norm": 2.425356149673462, "learning_rate": 1.9407516914389916e-05, "loss": 0.5504, "step": 5005 }, { "epoch": 0.13689564646685626, "grad_norm": 1.4717581272125244, "learning_rate": 1.940721653854686e-05, "loss": 0.565, "step": 5006 }, { "epoch": 0.13692299278057318, "grad_norm": 1.8198267221450806, "learning_rate": 1.9406916088906834e-05, "loss": 0.5851, "step": 5007 }, { "epoch": 0.13695033909429008, "grad_norm": 1.7699538469314575, "learning_rate": 1.94066155654722e-05, "loss": 0.5635, "step": 5008 }, { "epoch": 0.136977685408007, "grad_norm": 1.9944690465927124, "learning_rate": 1.9406314968245316e-05, "loss": 0.939, "step": 5009 }, { "epoch": 0.1370050317217239, "grad_norm": 1.5684514045715332, "learning_rate": 1.940601429722854e-05, "loss": 0.5754, "step": 5010 }, { "epoch": 0.13703237803544083, "grad_norm": 1.5400127172470093, "learning_rate": 1.9405713552424224e-05, "loss": 0.6325, "step": 5011 }, { "epoch": 0.13705972434915772, "grad_norm": 1.5078507661819458, "learning_rate": 1.9405412733834734e-05, "loss": 0.5785, "step": 5012 }, { "epoch": 0.13708707066287465, "grad_norm": 1.4997280836105347, "learning_rate": 1.940511184146243e-05, "loss": 0.5728, "step": 5013 }, { "epoch": 0.13711441697659155, "grad_norm": 1.853461742401123, "learning_rate": 1.9404810875309667e-05, "loss": 0.6126, "step": 5014 }, { "epoch": 0.13714176329030847, "grad_norm": 1.7742761373519897, "learning_rate": 1.9404509835378812e-05, "loss": 0.6001, "step": 5015 }, { "epoch": 0.13716910960402537, "grad_norm": 1.5887417793273926, "learning_rate": 1.9404208721672225e-05, "loss": 0.9507, "step": 5016 }, { "epoch": 0.1371964559177423, "grad_norm": 1.591400384902954, "learning_rate": 1.9403907534192267e-05, "loss": 0.925, "step": 5017 }, { "epoch": 0.1372238022314592, "grad_norm": 1.704148292541504, "learning_rate": 1.94036062729413e-05, "loss": 0.5553, "step": 5018 }, { "epoch": 0.13725114854517612, "grad_norm": 1.8818910121917725, "learning_rate": 1.9403304937921685e-05, "loss": 0.5996, "step": 5019 }, { "epoch": 0.137278494858893, "grad_norm": 1.613566279411316, "learning_rate": 1.9403003529135792e-05, "loss": 0.5952, "step": 5020 }, { "epoch": 0.13730584117260994, "grad_norm": 1.491384744644165, "learning_rate": 1.940270204658598e-05, "loss": 0.569, "step": 5021 }, { "epoch": 0.13733318748632684, "grad_norm": 2.0095374584198, "learning_rate": 1.940240049027462e-05, "loss": 0.584, "step": 5022 }, { "epoch": 0.13736053380004376, "grad_norm": 1.3643046617507935, "learning_rate": 1.9402098860204072e-05, "loss": 0.9234, "step": 5023 }, { "epoch": 0.13738788011376066, "grad_norm": 1.4221117496490479, "learning_rate": 1.940179715637671e-05, "loss": 0.5707, "step": 5024 }, { "epoch": 0.13741522642747758, "grad_norm": 1.6165263652801514, "learning_rate": 1.9401495378794886e-05, "loss": 0.5443, "step": 5025 }, { "epoch": 0.13744257274119448, "grad_norm": 1.4673123359680176, "learning_rate": 1.9401193527460983e-05, "loss": 0.5578, "step": 5026 }, { "epoch": 0.1374699190549114, "grad_norm": 1.9106109142303467, "learning_rate": 1.940089160237736e-05, "loss": 0.9245, "step": 5027 }, { "epoch": 0.1374972653686283, "grad_norm": 3.0475950241088867, "learning_rate": 1.9400589603546386e-05, "loss": 0.5863, "step": 5028 }, { "epoch": 0.13752461168234523, "grad_norm": 1.439286708831787, "learning_rate": 1.940028753097044e-05, "loss": 0.9542, "step": 5029 }, { "epoch": 0.13755195799606212, "grad_norm": 1.4791473150253296, "learning_rate": 1.9399985384651878e-05, "loss": 0.6153, "step": 5030 }, { "epoch": 0.13757930430977905, "grad_norm": 1.2508774995803833, "learning_rate": 1.9399683164593074e-05, "loss": 0.8985, "step": 5031 }, { "epoch": 0.13760665062349595, "grad_norm": 1.3430402278900146, "learning_rate": 1.9399380870796404e-05, "loss": 0.5967, "step": 5032 }, { "epoch": 0.13763399693721287, "grad_norm": 2.0869247913360596, "learning_rate": 1.9399078503264234e-05, "loss": 0.5574, "step": 5033 }, { "epoch": 0.13766134325092977, "grad_norm": 1.7765592336654663, "learning_rate": 1.939877606199894e-05, "loss": 0.5549, "step": 5034 }, { "epoch": 0.1376886895646467, "grad_norm": 1.626169204711914, "learning_rate": 1.9398473547002897e-05, "loss": 0.568, "step": 5035 }, { "epoch": 0.1377160358783636, "grad_norm": 1.1997281312942505, "learning_rate": 1.9398170958278468e-05, "loss": 0.5241, "step": 5036 }, { "epoch": 0.13774338219208052, "grad_norm": 1.5779258012771606, "learning_rate": 1.9397868295828034e-05, "loss": 0.9363, "step": 5037 }, { "epoch": 0.1377707285057974, "grad_norm": 1.4811739921569824, "learning_rate": 1.939756555965397e-05, "loss": 0.587, "step": 5038 }, { "epoch": 0.13779807481951434, "grad_norm": 1.6622471809387207, "learning_rate": 1.9397262749758648e-05, "loss": 0.6079, "step": 5039 }, { "epoch": 0.13782542113323124, "grad_norm": 2.0283291339874268, "learning_rate": 1.9396959866144442e-05, "loss": 0.5749, "step": 5040 }, { "epoch": 0.13785276744694816, "grad_norm": 1.2228925228118896, "learning_rate": 1.9396656908813734e-05, "loss": 0.5441, "step": 5041 }, { "epoch": 0.13788011376066506, "grad_norm": 1.5456475019454956, "learning_rate": 1.9396353877768894e-05, "loss": 0.5646, "step": 5042 }, { "epoch": 0.13790746007438198, "grad_norm": 1.4603019952774048, "learning_rate": 1.9396050773012302e-05, "loss": 0.6158, "step": 5043 }, { "epoch": 0.13793480638809888, "grad_norm": 1.5733928680419922, "learning_rate": 1.939574759454634e-05, "loss": 0.5826, "step": 5044 }, { "epoch": 0.1379621527018158, "grad_norm": 1.6067293882369995, "learning_rate": 1.9395444342373378e-05, "loss": 0.5497, "step": 5045 }, { "epoch": 0.1379894990155327, "grad_norm": 1.5646698474884033, "learning_rate": 1.93951410164958e-05, "loss": 0.5558, "step": 5046 }, { "epoch": 0.13801684532924963, "grad_norm": 1.595715880393982, "learning_rate": 1.9394837616915987e-05, "loss": 0.5886, "step": 5047 }, { "epoch": 0.13804419164296652, "grad_norm": 1.861280918121338, "learning_rate": 1.9394534143636315e-05, "loss": 0.5952, "step": 5048 }, { "epoch": 0.13807153795668345, "grad_norm": 1.423264741897583, "learning_rate": 1.9394230596659166e-05, "loss": 0.5883, "step": 5049 }, { "epoch": 0.13809888427040035, "grad_norm": 1.5703761577606201, "learning_rate": 1.9393926975986926e-05, "loss": 0.5718, "step": 5050 }, { "epoch": 0.13812623058411727, "grad_norm": 1.4069219827651978, "learning_rate": 1.9393623281621968e-05, "loss": 0.5854, "step": 5051 }, { "epoch": 0.13815357689783417, "grad_norm": 1.5125430822372437, "learning_rate": 1.939331951356668e-05, "loss": 0.5882, "step": 5052 }, { "epoch": 0.1381809232115511, "grad_norm": 1.5770955085754395, "learning_rate": 1.9393015671823443e-05, "loss": 0.6027, "step": 5053 }, { "epoch": 0.138208269525268, "grad_norm": 1.6069546937942505, "learning_rate": 1.9392711756394643e-05, "loss": 0.5617, "step": 5054 }, { "epoch": 0.13823561583898492, "grad_norm": 1.486025094985962, "learning_rate": 1.939240776728266e-05, "loss": 0.5603, "step": 5055 }, { "epoch": 0.1382629621527018, "grad_norm": 1.3119145631790161, "learning_rate": 1.9392103704489888e-05, "loss": 0.5708, "step": 5056 }, { "epoch": 0.13829030846641874, "grad_norm": 1.58665132522583, "learning_rate": 1.93917995680187e-05, "loss": 0.6013, "step": 5057 }, { "epoch": 0.13831765478013563, "grad_norm": 3.5360753536224365, "learning_rate": 1.9391495357871487e-05, "loss": 0.4797, "step": 5058 }, { "epoch": 0.13834500109385256, "grad_norm": 1.3470284938812256, "learning_rate": 1.939119107405064e-05, "loss": 0.5795, "step": 5059 }, { "epoch": 0.13837234740756946, "grad_norm": 1.9527283906936646, "learning_rate": 1.9390886716558536e-05, "loss": 0.5759, "step": 5060 }, { "epoch": 0.13839969372128638, "grad_norm": 1.4810127019882202, "learning_rate": 1.939058228539757e-05, "loss": 0.5203, "step": 5061 }, { "epoch": 0.13842704003500328, "grad_norm": 1.6912682056427002, "learning_rate": 1.9390277780570134e-05, "loss": 0.4489, "step": 5062 }, { "epoch": 0.1384543863487202, "grad_norm": 1.656618595123291, "learning_rate": 1.938997320207861e-05, "loss": 0.5222, "step": 5063 }, { "epoch": 0.1384817326624371, "grad_norm": 1.6935800313949585, "learning_rate": 1.9389668549925385e-05, "loss": 0.9616, "step": 5064 }, { "epoch": 0.13850907897615403, "grad_norm": 2.0296249389648438, "learning_rate": 1.9389363824112856e-05, "loss": 0.5907, "step": 5065 }, { "epoch": 0.13853642528987092, "grad_norm": 1.5985877513885498, "learning_rate": 1.938905902464341e-05, "loss": 0.5766, "step": 5066 }, { "epoch": 0.13856377160358785, "grad_norm": 1.5234640836715698, "learning_rate": 1.9388754151519436e-05, "loss": 0.5778, "step": 5067 }, { "epoch": 0.13859111791730475, "grad_norm": 1.4509906768798828, "learning_rate": 1.938844920474333e-05, "loss": 0.588, "step": 5068 }, { "epoch": 0.13861846423102167, "grad_norm": 1.543153166770935, "learning_rate": 1.9388144184317478e-05, "loss": 0.3973, "step": 5069 }, { "epoch": 0.13864581054473857, "grad_norm": 1.4548838138580322, "learning_rate": 1.9387839090244284e-05, "loss": 0.5423, "step": 5070 }, { "epoch": 0.1386731568584555, "grad_norm": 1.96396005153656, "learning_rate": 1.9387533922526128e-05, "loss": 0.5827, "step": 5071 }, { "epoch": 0.1387005031721724, "grad_norm": 1.5377888679504395, "learning_rate": 1.9387228681165413e-05, "loss": 0.6234, "step": 5072 }, { "epoch": 0.13872784948588932, "grad_norm": 1.3944555521011353, "learning_rate": 1.938692336616453e-05, "loss": 0.556, "step": 5073 }, { "epoch": 0.1387551957996062, "grad_norm": 2.340019464492798, "learning_rate": 1.9386617977525876e-05, "loss": 0.4266, "step": 5074 }, { "epoch": 0.13878254211332314, "grad_norm": 1.545858383178711, "learning_rate": 1.9386312515251843e-05, "loss": 0.5841, "step": 5075 }, { "epoch": 0.13880988842704003, "grad_norm": 1.720573902130127, "learning_rate": 1.9386006979344833e-05, "loss": 0.5658, "step": 5076 }, { "epoch": 0.13883723474075696, "grad_norm": 1.9311506748199463, "learning_rate": 1.938570136980724e-05, "loss": 0.6072, "step": 5077 }, { "epoch": 0.13886458105447386, "grad_norm": 1.7518178224563599, "learning_rate": 1.9385395686641457e-05, "loss": 0.6063, "step": 5078 }, { "epoch": 0.13889192736819075, "grad_norm": 1.439924716949463, "learning_rate": 1.938508992984989e-05, "loss": 0.5721, "step": 5079 }, { "epoch": 0.13891927368190768, "grad_norm": 2.6200547218322754, "learning_rate": 1.9384784099434933e-05, "loss": 0.9412, "step": 5080 }, { "epoch": 0.13894661999562458, "grad_norm": 2.0882248878479004, "learning_rate": 1.9384478195398985e-05, "loss": 0.576, "step": 5081 }, { "epoch": 0.1389739663093415, "grad_norm": 1.4572246074676514, "learning_rate": 1.9384172217744446e-05, "loss": 0.9571, "step": 5082 }, { "epoch": 0.1390013126230584, "grad_norm": 1.5891661643981934, "learning_rate": 1.938386616647372e-05, "loss": 0.5712, "step": 5083 }, { "epoch": 0.13902865893677532, "grad_norm": 1.5822707414627075, "learning_rate": 1.93835600415892e-05, "loss": 0.5667, "step": 5084 }, { "epoch": 0.13905600525049222, "grad_norm": 1.8035805225372314, "learning_rate": 1.9383253843093298e-05, "loss": 0.3909, "step": 5085 }, { "epoch": 0.13908335156420915, "grad_norm": 1.5275282859802246, "learning_rate": 1.9382947570988405e-05, "loss": 0.5768, "step": 5086 }, { "epoch": 0.13911069787792604, "grad_norm": 1.4063071012496948, "learning_rate": 1.938264122527693e-05, "loss": 0.5597, "step": 5087 }, { "epoch": 0.13913804419164297, "grad_norm": 1.6414716243743896, "learning_rate": 1.9382334805961277e-05, "loss": 0.6041, "step": 5088 }, { "epoch": 0.13916539050535986, "grad_norm": 1.6496989727020264, "learning_rate": 1.9382028313043846e-05, "loss": 0.6416, "step": 5089 }, { "epoch": 0.1391927368190768, "grad_norm": 1.5224136114120483, "learning_rate": 1.9381721746527045e-05, "loss": 0.563, "step": 5090 }, { "epoch": 0.1392200831327937, "grad_norm": 1.4799089431762695, "learning_rate": 1.9381415106413276e-05, "loss": 0.5895, "step": 5091 }, { "epoch": 0.1392474294465106, "grad_norm": 2.1605069637298584, "learning_rate": 1.9381108392704947e-05, "loss": 0.9584, "step": 5092 }, { "epoch": 0.1392747757602275, "grad_norm": 1.7950425148010254, "learning_rate": 1.938080160540446e-05, "loss": 0.428, "step": 5093 }, { "epoch": 0.13930212207394443, "grad_norm": 1.4619145393371582, "learning_rate": 1.9380494744514228e-05, "loss": 0.5985, "step": 5094 }, { "epoch": 0.13932946838766133, "grad_norm": 2.0948691368103027, "learning_rate": 1.938018781003665e-05, "loss": 0.5798, "step": 5095 }, { "epoch": 0.13935681470137826, "grad_norm": 1.3803660869598389, "learning_rate": 1.937988080197414e-05, "loss": 0.4321, "step": 5096 }, { "epoch": 0.13938416101509515, "grad_norm": 1.6936321258544922, "learning_rate": 1.9379573720329106e-05, "loss": 0.5833, "step": 5097 }, { "epoch": 0.13941150732881208, "grad_norm": 3.19195818901062, "learning_rate": 1.937926656510396e-05, "loss": 0.5586, "step": 5098 }, { "epoch": 0.13943885364252898, "grad_norm": 1.4671337604522705, "learning_rate": 1.93789593363011e-05, "loss": 0.586, "step": 5099 }, { "epoch": 0.1394661999562459, "grad_norm": 1.865817904472351, "learning_rate": 1.9378652033922948e-05, "loss": 0.5858, "step": 5100 }, { "epoch": 0.1394935462699628, "grad_norm": 4.0630784034729, "learning_rate": 1.9378344657971908e-05, "loss": 0.5636, "step": 5101 }, { "epoch": 0.13952089258367972, "grad_norm": 1.3330998420715332, "learning_rate": 1.9378037208450395e-05, "loss": 0.5788, "step": 5102 }, { "epoch": 0.13954823889739662, "grad_norm": 1.3741015195846558, "learning_rate": 1.9377729685360817e-05, "loss": 0.6242, "step": 5103 }, { "epoch": 0.13957558521111355, "grad_norm": 3.238726854324341, "learning_rate": 1.9377422088705592e-05, "loss": 0.9356, "step": 5104 }, { "epoch": 0.13960293152483044, "grad_norm": 1.843436598777771, "learning_rate": 1.9377114418487127e-05, "loss": 0.9082, "step": 5105 }, { "epoch": 0.13963027783854737, "grad_norm": 27.090791702270508, "learning_rate": 1.937680667470784e-05, "loss": 0.6116, "step": 5106 }, { "epoch": 0.13965762415226426, "grad_norm": 1.5596107244491577, "learning_rate": 1.9376498857370142e-05, "loss": 0.6313, "step": 5107 }, { "epoch": 0.1396849704659812, "grad_norm": 2.367103099822998, "learning_rate": 1.937619096647645e-05, "loss": 0.5203, "step": 5108 }, { "epoch": 0.1397123167796981, "grad_norm": 1.6565967798233032, "learning_rate": 1.9375883002029178e-05, "loss": 0.5891, "step": 5109 }, { "epoch": 0.139739663093415, "grad_norm": 1.6622769832611084, "learning_rate": 1.9375574964030742e-05, "loss": 0.5706, "step": 5110 }, { "epoch": 0.1397670094071319, "grad_norm": 1.5958296060562134, "learning_rate": 1.937526685248356e-05, "loss": 0.5625, "step": 5111 }, { "epoch": 0.13979435572084883, "grad_norm": 2.021455764770508, "learning_rate": 1.9374958667390048e-05, "loss": 0.5841, "step": 5112 }, { "epoch": 0.13982170203456573, "grad_norm": 1.6840018033981323, "learning_rate": 1.9374650408752624e-05, "loss": 0.593, "step": 5113 }, { "epoch": 0.13984904834828266, "grad_norm": 3.4895706176757812, "learning_rate": 1.9374342076573706e-05, "loss": 1.0036, "step": 5114 }, { "epoch": 0.13987639466199955, "grad_norm": 1.8435337543487549, "learning_rate": 1.9374033670855713e-05, "loss": 0.5732, "step": 5115 }, { "epoch": 0.13990374097571648, "grad_norm": 1.9662524461746216, "learning_rate": 1.9373725191601064e-05, "loss": 0.5793, "step": 5116 }, { "epoch": 0.13993108728943338, "grad_norm": 1.4689518213272095, "learning_rate": 1.9373416638812178e-05, "loss": 0.5914, "step": 5117 }, { "epoch": 0.1399584336031503, "grad_norm": 1.5723739862442017, "learning_rate": 1.9373108012491474e-05, "loss": 0.5851, "step": 5118 }, { "epoch": 0.1399857799168672, "grad_norm": 2.4618070125579834, "learning_rate": 1.9372799312641378e-05, "loss": 0.6032, "step": 5119 }, { "epoch": 0.14001312623058412, "grad_norm": 1.4031076431274414, "learning_rate": 1.937249053926431e-05, "loss": 0.626, "step": 5120 }, { "epoch": 0.14004047254430102, "grad_norm": 1.5705729722976685, "learning_rate": 1.9372181692362687e-05, "loss": 0.5741, "step": 5121 }, { "epoch": 0.14006781885801795, "grad_norm": 1.390165090560913, "learning_rate": 1.937187277193894e-05, "loss": 0.9782, "step": 5122 }, { "epoch": 0.14009516517173484, "grad_norm": 2.213325262069702, "learning_rate": 1.9371563777995488e-05, "loss": 0.594, "step": 5123 }, { "epoch": 0.14012251148545177, "grad_norm": 1.943017601966858, "learning_rate": 1.9371254710534756e-05, "loss": 0.6434, "step": 5124 }, { "epoch": 0.14014985779916866, "grad_norm": 1.6095808744430542, "learning_rate": 1.9370945569559168e-05, "loss": 0.6398, "step": 5125 }, { "epoch": 0.1401772041128856, "grad_norm": 1.5276440382003784, "learning_rate": 1.937063635507115e-05, "loss": 0.9624, "step": 5126 }, { "epoch": 0.1402045504266025, "grad_norm": 1.7017120122909546, "learning_rate": 1.937032706707312e-05, "loss": 0.5539, "step": 5127 }, { "epoch": 0.1402318967403194, "grad_norm": 1.4357142448425293, "learning_rate": 1.9370017705567517e-05, "loss": 0.545, "step": 5128 }, { "epoch": 0.1402592430540363, "grad_norm": 1.9619052410125732, "learning_rate": 1.9369708270556767e-05, "loss": 0.6154, "step": 5129 }, { "epoch": 0.14028658936775323, "grad_norm": 1.8477388620376587, "learning_rate": 1.9369398762043287e-05, "loss": 0.6449, "step": 5130 }, { "epoch": 0.14031393568147013, "grad_norm": 4.030612468719482, "learning_rate": 1.936908918002951e-05, "loss": 0.5507, "step": 5131 }, { "epoch": 0.14034128199518706, "grad_norm": 2.0113706588745117, "learning_rate": 1.9368779524517864e-05, "loss": 0.4208, "step": 5132 }, { "epoch": 0.14036862830890395, "grad_norm": 2.7258996963500977, "learning_rate": 1.9368469795510784e-05, "loss": 0.4708, "step": 5133 }, { "epoch": 0.14039597462262088, "grad_norm": 1.9100033044815063, "learning_rate": 1.936815999301069e-05, "loss": 0.5859, "step": 5134 }, { "epoch": 0.14042332093633778, "grad_norm": 1.6674509048461914, "learning_rate": 1.936785011702002e-05, "loss": 0.6022, "step": 5135 }, { "epoch": 0.1404506672500547, "grad_norm": 1.5509727001190186, "learning_rate": 1.9367540167541203e-05, "loss": 0.4204, "step": 5136 }, { "epoch": 0.1404780135637716, "grad_norm": 9.360454559326172, "learning_rate": 1.9367230144576664e-05, "loss": 0.5539, "step": 5137 }, { "epoch": 0.14050535987748852, "grad_norm": 1.3924167156219482, "learning_rate": 1.9366920048128846e-05, "loss": 0.9571, "step": 5138 }, { "epoch": 0.14053270619120542, "grad_norm": 1.5459315776824951, "learning_rate": 1.9366609878200178e-05, "loss": 0.6029, "step": 5139 }, { "epoch": 0.14056005250492234, "grad_norm": 1.9726588726043701, "learning_rate": 1.936629963479309e-05, "loss": 0.5823, "step": 5140 }, { "epoch": 0.14058739881863924, "grad_norm": 1.707688808441162, "learning_rate": 1.9365989317910016e-05, "loss": 0.6011, "step": 5141 }, { "epoch": 0.14061474513235617, "grad_norm": 2.0542235374450684, "learning_rate": 1.936567892755339e-05, "loss": 0.5758, "step": 5142 }, { "epoch": 0.14064209144607306, "grad_norm": 2.628795862197876, "learning_rate": 1.9365368463725648e-05, "loss": 0.5733, "step": 5143 }, { "epoch": 0.14066943775979, "grad_norm": 6.793057441711426, "learning_rate": 1.936505792642923e-05, "loss": 0.5635, "step": 5144 }, { "epoch": 0.1406967840735069, "grad_norm": 1.678399920463562, "learning_rate": 1.9364747315666567e-05, "loss": 0.5819, "step": 5145 }, { "epoch": 0.1407241303872238, "grad_norm": 1.4833635091781616, "learning_rate": 1.9364436631440093e-05, "loss": 0.9119, "step": 5146 }, { "epoch": 0.1407514767009407, "grad_norm": 3.0034282207489014, "learning_rate": 1.9364125873752254e-05, "loss": 0.5868, "step": 5147 }, { "epoch": 0.14077882301465763, "grad_norm": 2.330789089202881, "learning_rate": 1.936381504260548e-05, "loss": 0.5982, "step": 5148 }, { "epoch": 0.14080616932837453, "grad_norm": 1.511947512626648, "learning_rate": 1.9363504138002212e-05, "loss": 0.597, "step": 5149 }, { "epoch": 0.14083351564209146, "grad_norm": 1.469146728515625, "learning_rate": 1.936319315994489e-05, "loss": 0.561, "step": 5150 }, { "epoch": 0.14086086195580835, "grad_norm": 2.48280930519104, "learning_rate": 1.9362882108435953e-05, "loss": 0.4512, "step": 5151 }, { "epoch": 0.14088820826952528, "grad_norm": 1.4176557064056396, "learning_rate": 1.936257098347784e-05, "loss": 0.57, "step": 5152 }, { "epoch": 0.14091555458324218, "grad_norm": 1.3346682786941528, "learning_rate": 1.9362259785072996e-05, "loss": 0.9602, "step": 5153 }, { "epoch": 0.1409429008969591, "grad_norm": 2.23125958442688, "learning_rate": 1.9361948513223856e-05, "loss": 0.5921, "step": 5154 }, { "epoch": 0.140970247210676, "grad_norm": 2.114884853363037, "learning_rate": 1.9361637167932863e-05, "loss": 0.5786, "step": 5155 }, { "epoch": 0.14099759352439292, "grad_norm": 2.0201528072357178, "learning_rate": 1.9361325749202463e-05, "loss": 0.5912, "step": 5156 }, { "epoch": 0.14102493983810982, "grad_norm": 2.0298924446105957, "learning_rate": 1.9361014257035097e-05, "loss": 0.5244, "step": 5157 }, { "epoch": 0.14105228615182674, "grad_norm": 2.124066114425659, "learning_rate": 1.9360702691433206e-05, "loss": 0.6224, "step": 5158 }, { "epoch": 0.14107963246554364, "grad_norm": 1.6348334550857544, "learning_rate": 1.936039105239924e-05, "loss": 0.5938, "step": 5159 }, { "epoch": 0.14110697877926057, "grad_norm": 2.0404903888702393, "learning_rate": 1.936007933993564e-05, "loss": 0.5697, "step": 5160 }, { "epoch": 0.14113432509297746, "grad_norm": 1.8588931560516357, "learning_rate": 1.935976755404485e-05, "loss": 0.5584, "step": 5161 }, { "epoch": 0.1411616714066944, "grad_norm": 1.9805974960327148, "learning_rate": 1.935945569472932e-05, "loss": 0.5535, "step": 5162 }, { "epoch": 0.1411890177204113, "grad_norm": 1.3462305068969727, "learning_rate": 1.9359143761991493e-05, "loss": 0.5791, "step": 5163 }, { "epoch": 0.1412163640341282, "grad_norm": 1.8458279371261597, "learning_rate": 1.9358831755833814e-05, "loss": 0.566, "step": 5164 }, { "epoch": 0.1412437103478451, "grad_norm": 2.3553855419158936, "learning_rate": 1.9358519676258738e-05, "loss": 0.5531, "step": 5165 }, { "epoch": 0.14127105666156203, "grad_norm": 1.8176275491714478, "learning_rate": 1.9358207523268708e-05, "loss": 0.5874, "step": 5166 }, { "epoch": 0.14129840297527893, "grad_norm": 1.854385256767273, "learning_rate": 1.9357895296866172e-05, "loss": 0.5306, "step": 5167 }, { "epoch": 0.14132574928899586, "grad_norm": 1.684808611869812, "learning_rate": 1.9357582997053585e-05, "loss": 0.5881, "step": 5168 }, { "epoch": 0.14135309560271275, "grad_norm": 2.0411171913146973, "learning_rate": 1.9357270623833386e-05, "loss": 0.4808, "step": 5169 }, { "epoch": 0.14138044191642968, "grad_norm": 1.7784568071365356, "learning_rate": 1.9356958177208037e-05, "loss": 0.5746, "step": 5170 }, { "epoch": 0.14140778823014657, "grad_norm": 1.6826646327972412, "learning_rate": 1.9356645657179985e-05, "loss": 0.661, "step": 5171 }, { "epoch": 0.1414351345438635, "grad_norm": 2.2539830207824707, "learning_rate": 1.9356333063751677e-05, "loss": 0.8963, "step": 5172 }, { "epoch": 0.1414624808575804, "grad_norm": 1.4158481359481812, "learning_rate": 1.9356020396925572e-05, "loss": 0.5892, "step": 5173 }, { "epoch": 0.14148982717129732, "grad_norm": 1.5349719524383545, "learning_rate": 1.935570765670412e-05, "loss": 0.5923, "step": 5174 }, { "epoch": 0.14151717348501422, "grad_norm": 1.700439691543579, "learning_rate": 1.9355394843089773e-05, "loss": 0.5729, "step": 5175 }, { "epoch": 0.14154451979873114, "grad_norm": 2.003638505935669, "learning_rate": 1.935508195608499e-05, "loss": 0.5675, "step": 5176 }, { "epoch": 0.14157186611244804, "grad_norm": 2.5413730144500732, "learning_rate": 1.9354768995692216e-05, "loss": 0.973, "step": 5177 }, { "epoch": 0.14159921242616497, "grad_norm": 1.516640305519104, "learning_rate": 1.9354455961913914e-05, "loss": 0.5781, "step": 5178 }, { "epoch": 0.14162655873988186, "grad_norm": 1.7922229766845703, "learning_rate": 1.935414285475254e-05, "loss": 0.5383, "step": 5179 }, { "epoch": 0.1416539050535988, "grad_norm": 1.6741243600845337, "learning_rate": 1.9353829674210545e-05, "loss": 0.5541, "step": 5180 }, { "epoch": 0.14168125136731569, "grad_norm": 2.739976167678833, "learning_rate": 1.9353516420290394e-05, "loss": 0.6203, "step": 5181 }, { "epoch": 0.14170859768103258, "grad_norm": 1.7798049449920654, "learning_rate": 1.9353203092994532e-05, "loss": 0.5589, "step": 5182 }, { "epoch": 0.1417359439947495, "grad_norm": 1.8757188320159912, "learning_rate": 1.9352889692325428e-05, "loss": 0.5391, "step": 5183 }, { "epoch": 0.1417632903084664, "grad_norm": 2.3324594497680664, "learning_rate": 1.9352576218285536e-05, "loss": 0.5858, "step": 5184 }, { "epoch": 0.14179063662218333, "grad_norm": 1.537792682647705, "learning_rate": 1.935226267087731e-05, "loss": 0.5733, "step": 5185 }, { "epoch": 0.14181798293590023, "grad_norm": 1.6001917123794556, "learning_rate": 1.9351949050103226e-05, "loss": 0.5796, "step": 5186 }, { "epoch": 0.14184532924961715, "grad_norm": 1.8441439867019653, "learning_rate": 1.9351635355965727e-05, "loss": 0.5915, "step": 5187 }, { "epoch": 0.14187267556333405, "grad_norm": 1.7215009927749634, "learning_rate": 1.935132158846728e-05, "loss": 0.5835, "step": 5188 }, { "epoch": 0.14190002187705097, "grad_norm": 1.839896321296692, "learning_rate": 1.935100774761035e-05, "loss": 0.5695, "step": 5189 }, { "epoch": 0.14192736819076787, "grad_norm": 1.569682240486145, "learning_rate": 1.9350693833397392e-05, "loss": 0.5642, "step": 5190 }, { "epoch": 0.1419547145044848, "grad_norm": 1.4969977140426636, "learning_rate": 1.9350379845830875e-05, "loss": 0.592, "step": 5191 }, { "epoch": 0.1419820608182017, "grad_norm": 2.6024062633514404, "learning_rate": 1.9350065784913256e-05, "loss": 0.5084, "step": 5192 }, { "epoch": 0.14200940713191862, "grad_norm": 1.7208161354064941, "learning_rate": 1.9349751650647005e-05, "loss": 0.5318, "step": 5193 }, { "epoch": 0.14203675344563552, "grad_norm": 1.8552613258361816, "learning_rate": 1.9349437443034585e-05, "loss": 0.575, "step": 5194 }, { "epoch": 0.14206409975935244, "grad_norm": 1.824153184890747, "learning_rate": 1.9349123162078457e-05, "loss": 0.5605, "step": 5195 }, { "epoch": 0.14209144607306934, "grad_norm": 1.5491474866867065, "learning_rate": 1.934880880778109e-05, "loss": 0.4222, "step": 5196 }, { "epoch": 0.14211879238678626, "grad_norm": 1.5488183498382568, "learning_rate": 1.9348494380144948e-05, "loss": 0.6203, "step": 5197 }, { "epoch": 0.14214613870050316, "grad_norm": 2.0063695907592773, "learning_rate": 1.9348179879172503e-05, "loss": 0.643, "step": 5198 }, { "epoch": 0.14217348501422009, "grad_norm": 1.8005778789520264, "learning_rate": 1.934786530486621e-05, "loss": 0.5872, "step": 5199 }, { "epoch": 0.14220083132793698, "grad_norm": 1.5200982093811035, "learning_rate": 1.9347550657228548e-05, "loss": 0.5792, "step": 5200 }, { "epoch": 0.1422281776416539, "grad_norm": 1.5297293663024902, "learning_rate": 1.9347235936261986e-05, "loss": 0.5596, "step": 5201 }, { "epoch": 0.1422555239553708, "grad_norm": 1.5638467073440552, "learning_rate": 1.9346921141968983e-05, "loss": 0.935, "step": 5202 }, { "epoch": 0.14228287026908773, "grad_norm": 2.083009958267212, "learning_rate": 1.9346606274352015e-05, "loss": 0.5633, "step": 5203 }, { "epoch": 0.14231021658280463, "grad_norm": 1.4821852445602417, "learning_rate": 1.934629133341355e-05, "loss": 0.5757, "step": 5204 }, { "epoch": 0.14233756289652155, "grad_norm": 1.57541823387146, "learning_rate": 1.9345976319156063e-05, "loss": 0.5679, "step": 5205 }, { "epoch": 0.14236490921023845, "grad_norm": 1.909098744392395, "learning_rate": 1.9345661231582023e-05, "loss": 0.5738, "step": 5206 }, { "epoch": 0.14239225552395537, "grad_norm": 1.6864691972732544, "learning_rate": 1.9345346070693897e-05, "loss": 0.5669, "step": 5207 }, { "epoch": 0.14241960183767227, "grad_norm": 1.4411487579345703, "learning_rate": 1.9345030836494164e-05, "loss": 0.5393, "step": 5208 }, { "epoch": 0.1424469481513892, "grad_norm": 1.5123423337936401, "learning_rate": 1.934471552898529e-05, "loss": 0.5934, "step": 5209 }, { "epoch": 0.1424742944651061, "grad_norm": 1.9108980894088745, "learning_rate": 1.9344400148169756e-05, "loss": 0.4751, "step": 5210 }, { "epoch": 0.14250164077882302, "grad_norm": 1.6300461292266846, "learning_rate": 1.9344084694050028e-05, "loss": 0.6079, "step": 5211 }, { "epoch": 0.14252898709253992, "grad_norm": 1.3197377920150757, "learning_rate": 1.9343769166628594e-05, "loss": 0.5827, "step": 5212 }, { "epoch": 0.14255633340625684, "grad_norm": 1.7488454580307007, "learning_rate": 1.9343453565907913e-05, "loss": 0.6241, "step": 5213 }, { "epoch": 0.14258367971997374, "grad_norm": 1.356586217880249, "learning_rate": 1.934313789189047e-05, "loss": 0.5315, "step": 5214 }, { "epoch": 0.14261102603369066, "grad_norm": 1.5930887460708618, "learning_rate": 1.934282214457874e-05, "loss": 0.5628, "step": 5215 }, { "epoch": 0.14263837234740756, "grad_norm": 2.101240873336792, "learning_rate": 1.9342506323975198e-05, "loss": 0.6131, "step": 5216 }, { "epoch": 0.14266571866112449, "grad_norm": 1.6239625215530396, "learning_rate": 1.9342190430082326e-05, "loss": 0.5635, "step": 5217 }, { "epoch": 0.14269306497484138, "grad_norm": 1.7000666856765747, "learning_rate": 1.9341874462902595e-05, "loss": 0.5859, "step": 5218 }, { "epoch": 0.1427204112885583, "grad_norm": 2.054269552230835, "learning_rate": 1.9341558422438495e-05, "loss": 0.6144, "step": 5219 }, { "epoch": 0.1427477576022752, "grad_norm": 3.1924116611480713, "learning_rate": 1.9341242308692493e-05, "loss": 0.5367, "step": 5220 }, { "epoch": 0.14277510391599213, "grad_norm": 2.8355157375335693, "learning_rate": 1.9340926121667075e-05, "loss": 0.5934, "step": 5221 }, { "epoch": 0.14280245022970903, "grad_norm": 2.367147922515869, "learning_rate": 1.9340609861364718e-05, "loss": 0.5706, "step": 5222 }, { "epoch": 0.14282979654342595, "grad_norm": 1.7380956411361694, "learning_rate": 1.934029352778791e-05, "loss": 0.5758, "step": 5223 }, { "epoch": 0.14285714285714285, "grad_norm": 2.179407835006714, "learning_rate": 1.9339977120939125e-05, "loss": 0.5214, "step": 5224 }, { "epoch": 0.14288448917085977, "grad_norm": 1.5579137802124023, "learning_rate": 1.933966064082085e-05, "loss": 0.923, "step": 5225 }, { "epoch": 0.14291183548457667, "grad_norm": 3.2350757122039795, "learning_rate": 1.9339344087435562e-05, "loss": 0.606, "step": 5226 }, { "epoch": 0.1429391817982936, "grad_norm": 3.042978525161743, "learning_rate": 1.933902746078575e-05, "loss": 0.9202, "step": 5227 }, { "epoch": 0.1429665281120105, "grad_norm": 1.8829814195632935, "learning_rate": 1.9338710760873897e-05, "loss": 0.4421, "step": 5228 }, { "epoch": 0.14299387442572742, "grad_norm": 2.1874759197235107, "learning_rate": 1.9338393987702486e-05, "loss": 0.9082, "step": 5229 }, { "epoch": 0.14302122073944432, "grad_norm": 3.0307958126068115, "learning_rate": 1.9338077141274e-05, "loss": 0.5914, "step": 5230 }, { "epoch": 0.14304856705316124, "grad_norm": 1.4322099685668945, "learning_rate": 1.9337760221590928e-05, "loss": 0.5426, "step": 5231 }, { "epoch": 0.14307591336687814, "grad_norm": 1.5841529369354248, "learning_rate": 1.9337443228655755e-05, "loss": 0.5796, "step": 5232 }, { "epoch": 0.14310325968059506, "grad_norm": 2.543675422668457, "learning_rate": 1.9337126162470968e-05, "loss": 0.5931, "step": 5233 }, { "epoch": 0.14313060599431196, "grad_norm": 1.5687637329101562, "learning_rate": 1.9336809023039054e-05, "loss": 0.572, "step": 5234 }, { "epoch": 0.14315795230802889, "grad_norm": 1.7898008823394775, "learning_rate": 1.9336491810362498e-05, "loss": 0.5678, "step": 5235 }, { "epoch": 0.14318529862174578, "grad_norm": 1.361047625541687, "learning_rate": 1.9336174524443795e-05, "loss": 0.4581, "step": 5236 }, { "epoch": 0.1432126449354627, "grad_norm": 1.3180040121078491, "learning_rate": 1.9335857165285427e-05, "loss": 0.454, "step": 5237 }, { "epoch": 0.1432399912491796, "grad_norm": 1.4163626432418823, "learning_rate": 1.933553973288989e-05, "loss": 0.6074, "step": 5238 }, { "epoch": 0.14326733756289653, "grad_norm": 1.7719416618347168, "learning_rate": 1.933522222725967e-05, "loss": 0.9411, "step": 5239 }, { "epoch": 0.14329468387661343, "grad_norm": 1.7709509134292603, "learning_rate": 1.9334904648397258e-05, "loss": 0.5773, "step": 5240 }, { "epoch": 0.14332203019033035, "grad_norm": 1.7215341329574585, "learning_rate": 1.9334586996305148e-05, "loss": 0.5657, "step": 5241 }, { "epoch": 0.14334937650404725, "grad_norm": 1.8203374147415161, "learning_rate": 1.9334269270985827e-05, "loss": 0.9235, "step": 5242 }, { "epoch": 0.14337672281776417, "grad_norm": 1.2918851375579834, "learning_rate": 1.9333951472441792e-05, "loss": 0.5743, "step": 5243 }, { "epoch": 0.14340406913148107, "grad_norm": 1.8963403701782227, "learning_rate": 1.9333633600675536e-05, "loss": 0.5256, "step": 5244 }, { "epoch": 0.143431415445198, "grad_norm": 1.2953174114227295, "learning_rate": 1.9333315655689547e-05, "loss": 0.5997, "step": 5245 }, { "epoch": 0.1434587617589149, "grad_norm": 1.5023224353790283, "learning_rate": 1.9332997637486328e-05, "loss": 0.6095, "step": 5246 }, { "epoch": 0.14348610807263182, "grad_norm": 1.5598891973495483, "learning_rate": 1.9332679546068366e-05, "loss": 0.5762, "step": 5247 }, { "epoch": 0.14351345438634872, "grad_norm": 1.6092067956924438, "learning_rate": 1.9332361381438162e-05, "loss": 0.5873, "step": 5248 }, { "epoch": 0.14354080070006564, "grad_norm": 1.3137255907058716, "learning_rate": 1.933204314359821e-05, "loss": 0.5726, "step": 5249 }, { "epoch": 0.14356814701378254, "grad_norm": 1.7664128541946411, "learning_rate": 1.9331724832551e-05, "loss": 0.5505, "step": 5250 }, { "epoch": 0.14359549332749946, "grad_norm": 1.9597151279449463, "learning_rate": 1.933140644829904e-05, "loss": 0.541, "step": 5251 }, { "epoch": 0.14362283964121636, "grad_norm": 1.4610716104507446, "learning_rate": 1.933108799084482e-05, "loss": 0.5493, "step": 5252 }, { "epoch": 0.14365018595493328, "grad_norm": 1.9889508485794067, "learning_rate": 1.9330769460190845e-05, "loss": 0.5902, "step": 5253 }, { "epoch": 0.14367753226865018, "grad_norm": 1.7222900390625, "learning_rate": 1.9330450856339604e-05, "loss": 0.4756, "step": 5254 }, { "epoch": 0.1437048785823671, "grad_norm": 1.6050490140914917, "learning_rate": 1.9330132179293603e-05, "loss": 0.5732, "step": 5255 }, { "epoch": 0.143732224896084, "grad_norm": 1.725292682647705, "learning_rate": 1.9329813429055342e-05, "loss": 0.5588, "step": 5256 }, { "epoch": 0.14375957120980093, "grad_norm": 1.5654114484786987, "learning_rate": 1.9329494605627323e-05, "loss": 0.5404, "step": 5257 }, { "epoch": 0.14378691752351783, "grad_norm": 2.326768159866333, "learning_rate": 1.932917570901204e-05, "loss": 0.5876, "step": 5258 }, { "epoch": 0.14381426383723475, "grad_norm": 2.212597608566284, "learning_rate": 1.9328856739212e-05, "loss": 0.5948, "step": 5259 }, { "epoch": 0.14384161015095165, "grad_norm": 1.7855511903762817, "learning_rate": 1.9328537696229706e-05, "loss": 0.5926, "step": 5260 }, { "epoch": 0.14386895646466857, "grad_norm": 1.8182018995285034, "learning_rate": 1.9328218580067656e-05, "loss": 0.5669, "step": 5261 }, { "epoch": 0.14389630277838547, "grad_norm": 1.2895970344543457, "learning_rate": 1.9327899390728356e-05, "loss": 0.5578, "step": 5262 }, { "epoch": 0.1439236490921024, "grad_norm": 1.5288567543029785, "learning_rate": 1.9327580128214316e-05, "loss": 0.5412, "step": 5263 }, { "epoch": 0.1439509954058193, "grad_norm": 1.5451871156692505, "learning_rate": 1.932726079252803e-05, "loss": 0.5765, "step": 5264 }, { "epoch": 0.14397834171953622, "grad_norm": 1.49815034866333, "learning_rate": 1.932694138367201e-05, "loss": 0.5495, "step": 5265 }, { "epoch": 0.14400568803325312, "grad_norm": 1.6841410398483276, "learning_rate": 1.932662190164876e-05, "loss": 0.5756, "step": 5266 }, { "epoch": 0.14403303434697004, "grad_norm": 1.6875792741775513, "learning_rate": 1.9326302346460782e-05, "loss": 0.554, "step": 5267 }, { "epoch": 0.14406038066068694, "grad_norm": 1.2592530250549316, "learning_rate": 1.932598271811059e-05, "loss": 0.5407, "step": 5268 }, { "epoch": 0.14408772697440386, "grad_norm": 1.6545557975769043, "learning_rate": 1.932566301660069e-05, "loss": 0.5884, "step": 5269 }, { "epoch": 0.14411507328812076, "grad_norm": 2.216320037841797, "learning_rate": 1.9325343241933586e-05, "loss": 0.6417, "step": 5270 }, { "epoch": 0.14414241960183768, "grad_norm": 1.633235216140747, "learning_rate": 1.9325023394111788e-05, "loss": 0.5939, "step": 5271 }, { "epoch": 0.14416976591555458, "grad_norm": 1.4092254638671875, "learning_rate": 1.9324703473137808e-05, "loss": 0.5536, "step": 5272 }, { "epoch": 0.1441971122292715, "grad_norm": 1.772351622581482, "learning_rate": 1.932438347901415e-05, "loss": 0.4567, "step": 5273 }, { "epoch": 0.1442244585429884, "grad_norm": 1.6181846857070923, "learning_rate": 1.932406341174333e-05, "loss": 0.58, "step": 5274 }, { "epoch": 0.14425180485670533, "grad_norm": 2.122270345687866, "learning_rate": 1.932374327132786e-05, "loss": 0.5909, "step": 5275 }, { "epoch": 0.14427915117042223, "grad_norm": 1.9676047563552856, "learning_rate": 1.932342305777024e-05, "loss": 0.9676, "step": 5276 }, { "epoch": 0.14430649748413915, "grad_norm": 1.6733115911483765, "learning_rate": 1.9323102771072998e-05, "loss": 0.5758, "step": 5277 }, { "epoch": 0.14433384379785605, "grad_norm": 1.6039015054702759, "learning_rate": 1.9322782411238634e-05, "loss": 0.5502, "step": 5278 }, { "epoch": 0.14436119011157297, "grad_norm": 1.5821269750595093, "learning_rate": 1.9322461978269667e-05, "loss": 0.5952, "step": 5279 }, { "epoch": 0.14438853642528987, "grad_norm": 1.5555037260055542, "learning_rate": 1.9322141472168608e-05, "loss": 0.5975, "step": 5280 }, { "epoch": 0.1444158827390068, "grad_norm": 1.870831847190857, "learning_rate": 1.9321820892937972e-05, "loss": 0.5582, "step": 5281 }, { "epoch": 0.1444432290527237, "grad_norm": 1.4091408252716064, "learning_rate": 1.932150024058028e-05, "loss": 0.5826, "step": 5282 }, { "epoch": 0.1444705753664406, "grad_norm": 1.547115683555603, "learning_rate": 1.9321179515098035e-05, "loss": 0.5863, "step": 5283 }, { "epoch": 0.14449792168015752, "grad_norm": 1.5603045225143433, "learning_rate": 1.932085871649376e-05, "loss": 0.6097, "step": 5284 }, { "epoch": 0.1445252679938744, "grad_norm": 1.3830161094665527, "learning_rate": 1.9320537844769977e-05, "loss": 0.5903, "step": 5285 }, { "epoch": 0.14455261430759134, "grad_norm": 1.6696795225143433, "learning_rate": 1.932021689992919e-05, "loss": 0.9243, "step": 5286 }, { "epoch": 0.14457996062130823, "grad_norm": 1.6671425104141235, "learning_rate": 1.9319895881973935e-05, "loss": 0.5182, "step": 5287 }, { "epoch": 0.14460730693502516, "grad_norm": 1.4025756120681763, "learning_rate": 1.931957479090671e-05, "loss": 0.9238, "step": 5288 }, { "epoch": 0.14463465324874206, "grad_norm": 1.695307731628418, "learning_rate": 1.9319253626730047e-05, "loss": 0.5405, "step": 5289 }, { "epoch": 0.14466199956245898, "grad_norm": 2.4838998317718506, "learning_rate": 1.9318932389446464e-05, "loss": 0.5479, "step": 5290 }, { "epoch": 0.14468934587617588, "grad_norm": 1.6984624862670898, "learning_rate": 1.9318611079058476e-05, "loss": 0.5605, "step": 5291 }, { "epoch": 0.1447166921898928, "grad_norm": 1.6131030321121216, "learning_rate": 1.9318289695568607e-05, "loss": 0.5879, "step": 5292 }, { "epoch": 0.1447440385036097, "grad_norm": 1.7469544410705566, "learning_rate": 1.9317968238979378e-05, "loss": 0.9732, "step": 5293 }, { "epoch": 0.14477138481732663, "grad_norm": 1.4371768236160278, "learning_rate": 1.9317646709293313e-05, "loss": 0.5621, "step": 5294 }, { "epoch": 0.14479873113104352, "grad_norm": 1.673248052597046, "learning_rate": 1.931732510651293e-05, "loss": 0.6236, "step": 5295 }, { "epoch": 0.14482607744476045, "grad_norm": 1.545741081237793, "learning_rate": 1.9317003430640752e-05, "loss": 0.6325, "step": 5296 }, { "epoch": 0.14485342375847735, "grad_norm": 2.0033679008483887, "learning_rate": 1.9316681681679304e-05, "loss": 0.6097, "step": 5297 }, { "epoch": 0.14488077007219427, "grad_norm": 1.6115808486938477, "learning_rate": 1.9316359859631114e-05, "loss": 0.5808, "step": 5298 }, { "epoch": 0.14490811638591117, "grad_norm": 1.5674107074737549, "learning_rate": 1.93160379644987e-05, "loss": 0.9017, "step": 5299 }, { "epoch": 0.1449354626996281, "grad_norm": 1.829674243927002, "learning_rate": 1.931571599628459e-05, "loss": 0.566, "step": 5300 }, { "epoch": 0.144962809013345, "grad_norm": 1.705660343170166, "learning_rate": 1.931539395499131e-05, "loss": 0.5588, "step": 5301 }, { "epoch": 0.14499015532706191, "grad_norm": 1.6861696243286133, "learning_rate": 1.9315071840621387e-05, "loss": 0.5958, "step": 5302 }, { "epoch": 0.1450175016407788, "grad_norm": 1.470414161682129, "learning_rate": 1.9314749653177345e-05, "loss": 0.9463, "step": 5303 }, { "epoch": 0.14504484795449574, "grad_norm": 2.0868570804595947, "learning_rate": 1.9314427392661713e-05, "loss": 0.5527, "step": 5304 }, { "epoch": 0.14507219426821263, "grad_norm": 1.5142848491668701, "learning_rate": 1.9314105059077023e-05, "loss": 0.5818, "step": 5305 }, { "epoch": 0.14509954058192956, "grad_norm": 1.4583126306533813, "learning_rate": 1.9313782652425798e-05, "loss": 0.5799, "step": 5306 }, { "epoch": 0.14512688689564646, "grad_norm": 2.122936487197876, "learning_rate": 1.9313460172710568e-05, "loss": 0.5357, "step": 5307 }, { "epoch": 0.14515423320936338, "grad_norm": 1.425917387008667, "learning_rate": 1.9313137619933867e-05, "loss": 0.6357, "step": 5308 }, { "epoch": 0.14518157952308028, "grad_norm": 2.39925217628479, "learning_rate": 1.931281499409822e-05, "loss": 0.5904, "step": 5309 }, { "epoch": 0.1452089258367972, "grad_norm": 1.62725830078125, "learning_rate": 1.9312492295206158e-05, "loss": 0.5918, "step": 5310 }, { "epoch": 0.1452362721505141, "grad_norm": 1.6781418323516846, "learning_rate": 1.9312169523260217e-05, "loss": 0.5901, "step": 5311 }, { "epoch": 0.14526361846423103, "grad_norm": 2.295034170150757, "learning_rate": 1.9311846678262926e-05, "loss": 0.5275, "step": 5312 }, { "epoch": 0.14529096477794792, "grad_norm": 1.6074771881103516, "learning_rate": 1.9311523760216823e-05, "loss": 0.5798, "step": 5313 }, { "epoch": 0.14531831109166485, "grad_norm": 1.8748838901519775, "learning_rate": 1.931120076912443e-05, "loss": 0.6104, "step": 5314 }, { "epoch": 0.14534565740538175, "grad_norm": 1.5763866901397705, "learning_rate": 1.9310877704988292e-05, "loss": 0.6153, "step": 5315 }, { "epoch": 0.14537300371909867, "grad_norm": 1.2596551179885864, "learning_rate": 1.9310554567810937e-05, "loss": 0.4381, "step": 5316 }, { "epoch": 0.14540035003281557, "grad_norm": 1.3965849876403809, "learning_rate": 1.9310231357594903e-05, "loss": 0.5848, "step": 5317 }, { "epoch": 0.1454276963465325, "grad_norm": 1.3882962465286255, "learning_rate": 1.930990807434272e-05, "loss": 0.5581, "step": 5318 }, { "epoch": 0.1454550426602494, "grad_norm": 1.6546155214309692, "learning_rate": 1.9309584718056933e-05, "loss": 0.9389, "step": 5319 }, { "epoch": 0.14548238897396631, "grad_norm": 1.6614888906478882, "learning_rate": 1.9309261288740072e-05, "loss": 0.5772, "step": 5320 }, { "epoch": 0.1455097352876832, "grad_norm": 1.9055922031402588, "learning_rate": 1.9308937786394674e-05, "loss": 0.5849, "step": 5321 }, { "epoch": 0.14553708160140014, "grad_norm": 1.6099226474761963, "learning_rate": 1.9308614211023278e-05, "loss": 0.5687, "step": 5322 }, { "epoch": 0.14556442791511703, "grad_norm": 1.7278701066970825, "learning_rate": 1.9308290562628427e-05, "loss": 0.6184, "step": 5323 }, { "epoch": 0.14559177422883396, "grad_norm": 1.3547158241271973, "learning_rate": 1.9307966841212654e-05, "loss": 0.4412, "step": 5324 }, { "epoch": 0.14561912054255086, "grad_norm": 1.38673996925354, "learning_rate": 1.9307643046778505e-05, "loss": 0.5767, "step": 5325 }, { "epoch": 0.14564646685626778, "grad_norm": 1.6228440999984741, "learning_rate": 1.930731917932851e-05, "loss": 0.9387, "step": 5326 }, { "epoch": 0.14567381316998468, "grad_norm": 1.4815762042999268, "learning_rate": 1.930699523886522e-05, "loss": 0.9405, "step": 5327 }, { "epoch": 0.1457011594837016, "grad_norm": 1.9235212802886963, "learning_rate": 1.9306671225391168e-05, "loss": 0.5208, "step": 5328 }, { "epoch": 0.1457285057974185, "grad_norm": 1.7436277866363525, "learning_rate": 1.93063471389089e-05, "loss": 0.5163, "step": 5329 }, { "epoch": 0.14575585211113543, "grad_norm": 1.6109622716903687, "learning_rate": 1.930602297942096e-05, "loss": 0.58, "step": 5330 }, { "epoch": 0.14578319842485232, "grad_norm": 2.8263535499572754, "learning_rate": 1.9305698746929887e-05, "loss": 0.5823, "step": 5331 }, { "epoch": 0.14581054473856925, "grad_norm": 2.089931011199951, "learning_rate": 1.9305374441438226e-05, "loss": 0.9137, "step": 5332 }, { "epoch": 0.14583789105228614, "grad_norm": 1.4466091394424438, "learning_rate": 1.930505006294852e-05, "loss": 0.544, "step": 5333 }, { "epoch": 0.14586523736600307, "grad_norm": 1.5950192213058472, "learning_rate": 1.9304725611463317e-05, "loss": 0.6135, "step": 5334 }, { "epoch": 0.14589258367971997, "grad_norm": 1.614055871963501, "learning_rate": 1.9304401086985158e-05, "loss": 0.5612, "step": 5335 }, { "epoch": 0.1459199299934369, "grad_norm": 1.3336024284362793, "learning_rate": 1.9304076489516595e-05, "loss": 0.5518, "step": 5336 }, { "epoch": 0.1459472763071538, "grad_norm": 1.4491504430770874, "learning_rate": 1.9303751819060164e-05, "loss": 0.5988, "step": 5337 }, { "epoch": 0.14597462262087071, "grad_norm": 1.820648193359375, "learning_rate": 1.9303427075618425e-05, "loss": 0.5547, "step": 5338 }, { "epoch": 0.1460019689345876, "grad_norm": 1.9711741209030151, "learning_rate": 1.9303102259193912e-05, "loss": 0.564, "step": 5339 }, { "epoch": 0.14602931524830454, "grad_norm": 1.8070420026779175, "learning_rate": 1.930277736978919e-05, "loss": 0.5432, "step": 5340 }, { "epoch": 0.14605666156202143, "grad_norm": 1.5179940462112427, "learning_rate": 1.930245240740679e-05, "loss": 0.5639, "step": 5341 }, { "epoch": 0.14608400787573836, "grad_norm": 1.572796106338501, "learning_rate": 1.9302127372049268e-05, "loss": 0.618, "step": 5342 }, { "epoch": 0.14611135418945526, "grad_norm": 1.431678295135498, "learning_rate": 1.9301802263719176e-05, "loss": 0.548, "step": 5343 }, { "epoch": 0.14613870050317218, "grad_norm": 1.6554838418960571, "learning_rate": 1.9301477082419066e-05, "loss": 0.5748, "step": 5344 }, { "epoch": 0.14616604681688908, "grad_norm": 1.2868826389312744, "learning_rate": 1.9301151828151482e-05, "loss": 0.3913, "step": 5345 }, { "epoch": 0.146193393130606, "grad_norm": 1.2772136926651, "learning_rate": 1.9300826500918982e-05, "loss": 0.5571, "step": 5346 }, { "epoch": 0.1462207394443229, "grad_norm": 2.314370632171631, "learning_rate": 1.9300501100724115e-05, "loss": 0.5929, "step": 5347 }, { "epoch": 0.14624808575803983, "grad_norm": 1.551882028579712, "learning_rate": 1.930017562756943e-05, "loss": 0.5925, "step": 5348 }, { "epoch": 0.14627543207175672, "grad_norm": 1.2540996074676514, "learning_rate": 1.929985008145749e-05, "loss": 0.5867, "step": 5349 }, { "epoch": 0.14630277838547365, "grad_norm": 1.6734579801559448, "learning_rate": 1.929952446239084e-05, "loss": 0.5898, "step": 5350 }, { "epoch": 0.14633012469919054, "grad_norm": 1.3088469505310059, "learning_rate": 1.929919877037204e-05, "loss": 0.5664, "step": 5351 }, { "epoch": 0.14635747101290747, "grad_norm": 1.5123388767242432, "learning_rate": 1.9298873005403645e-05, "loss": 0.501, "step": 5352 }, { "epoch": 0.14638481732662437, "grad_norm": 1.6751025915145874, "learning_rate": 1.9298547167488206e-05, "loss": 0.963, "step": 5353 }, { "epoch": 0.1464121636403413, "grad_norm": 1.3202917575836182, "learning_rate": 1.929822125662828e-05, "loss": 0.6643, "step": 5354 }, { "epoch": 0.1464395099540582, "grad_norm": 1.7173523902893066, "learning_rate": 1.9297895272826426e-05, "loss": 0.5918, "step": 5355 }, { "epoch": 0.14646685626777511, "grad_norm": 1.6161056756973267, "learning_rate": 1.9297569216085198e-05, "loss": 0.5776, "step": 5356 }, { "epoch": 0.146494202581492, "grad_norm": 1.5182000398635864, "learning_rate": 1.929724308640716e-05, "loss": 0.5559, "step": 5357 }, { "epoch": 0.14652154889520894, "grad_norm": 2.5709261894226074, "learning_rate": 1.9296916883794864e-05, "loss": 0.411, "step": 5358 }, { "epoch": 0.14654889520892583, "grad_norm": 1.3807272911071777, "learning_rate": 1.929659060825087e-05, "loss": 0.9532, "step": 5359 }, { "epoch": 0.14657624152264276, "grad_norm": 1.518397569656372, "learning_rate": 1.9296264259777743e-05, "loss": 0.582, "step": 5360 }, { "epoch": 0.14660358783635966, "grad_norm": 1.4474272727966309, "learning_rate": 1.9295937838378033e-05, "loss": 0.9163, "step": 5361 }, { "epoch": 0.14663093415007658, "grad_norm": 1.7623133659362793, "learning_rate": 1.9295611344054313e-05, "loss": 0.5646, "step": 5362 }, { "epoch": 0.14665828046379348, "grad_norm": 1.9533116817474365, "learning_rate": 1.9295284776809136e-05, "loss": 0.6532, "step": 5363 }, { "epoch": 0.1466856267775104, "grad_norm": 1.5757973194122314, "learning_rate": 1.9294958136645065e-05, "loss": 0.579, "step": 5364 }, { "epoch": 0.1467129730912273, "grad_norm": 1.4110668897628784, "learning_rate": 1.9294631423564664e-05, "loss": 0.586, "step": 5365 }, { "epoch": 0.14674031940494422, "grad_norm": 1.3878374099731445, "learning_rate": 1.9294304637570492e-05, "loss": 0.9257, "step": 5366 }, { "epoch": 0.14676766571866112, "grad_norm": 1.398935079574585, "learning_rate": 1.929397777866512e-05, "loss": 0.5593, "step": 5367 }, { "epoch": 0.14679501203237805, "grad_norm": 1.4612324237823486, "learning_rate": 1.9293650846851107e-05, "loss": 0.5529, "step": 5368 }, { "epoch": 0.14682235834609494, "grad_norm": 1.7570953369140625, "learning_rate": 1.929332384213102e-05, "loss": 0.583, "step": 5369 }, { "epoch": 0.14684970465981187, "grad_norm": 1.8397607803344727, "learning_rate": 1.929299676450742e-05, "loss": 0.5595, "step": 5370 }, { "epoch": 0.14687705097352877, "grad_norm": 4.94801664352417, "learning_rate": 1.9292669613982875e-05, "loss": 0.9554, "step": 5371 }, { "epoch": 0.1469043972872457, "grad_norm": 1.6659256219863892, "learning_rate": 1.9292342390559956e-05, "loss": 0.5581, "step": 5372 }, { "epoch": 0.1469317436009626, "grad_norm": 2.0904366970062256, "learning_rate": 1.9292015094241223e-05, "loss": 0.8754, "step": 5373 }, { "epoch": 0.1469590899146795, "grad_norm": 1.4602097272872925, "learning_rate": 1.9291687725029247e-05, "loss": 0.5974, "step": 5374 }, { "epoch": 0.1469864362283964, "grad_norm": 1.4543311595916748, "learning_rate": 1.9291360282926596e-05, "loss": 0.5884, "step": 5375 }, { "epoch": 0.14701378254211334, "grad_norm": 1.5094765424728394, "learning_rate": 1.9291032767935838e-05, "loss": 0.522, "step": 5376 }, { "epoch": 0.14704112885583023, "grad_norm": 2.366511583328247, "learning_rate": 1.9290705180059545e-05, "loss": 0.6097, "step": 5377 }, { "epoch": 0.14706847516954716, "grad_norm": 1.6925387382507324, "learning_rate": 1.9290377519300282e-05, "loss": 0.5718, "step": 5378 }, { "epoch": 0.14709582148326406, "grad_norm": 1.2410833835601807, "learning_rate": 1.929004978566062e-05, "loss": 0.5707, "step": 5379 }, { "epoch": 0.14712316779698098, "grad_norm": 1.3146705627441406, "learning_rate": 1.9289721979143138e-05, "loss": 0.5841, "step": 5380 }, { "epoch": 0.14715051411069788, "grad_norm": 1.6038818359375, "learning_rate": 1.9289394099750397e-05, "loss": 0.5635, "step": 5381 }, { "epoch": 0.1471778604244148, "grad_norm": 1.3795417547225952, "learning_rate": 1.9289066147484975e-05, "loss": 0.5677, "step": 5382 }, { "epoch": 0.1472052067381317, "grad_norm": 1.5612614154815674, "learning_rate": 1.928873812234944e-05, "loss": 0.5423, "step": 5383 }, { "epoch": 0.1472325530518486, "grad_norm": 1.3153342008590698, "learning_rate": 1.9288410024346374e-05, "loss": 0.5576, "step": 5384 }, { "epoch": 0.14725989936556552, "grad_norm": 1.4566384553909302, "learning_rate": 1.928808185347834e-05, "loss": 0.5592, "step": 5385 }, { "epoch": 0.14728724567928242, "grad_norm": 2.3215491771698, "learning_rate": 1.928775360974792e-05, "loss": 0.5819, "step": 5386 }, { "epoch": 0.14731459199299934, "grad_norm": 2.8422067165374756, "learning_rate": 1.928742529315769e-05, "loss": 0.5363, "step": 5387 }, { "epoch": 0.14734193830671624, "grad_norm": 1.458428144454956, "learning_rate": 1.9287096903710215e-05, "loss": 0.5727, "step": 5388 }, { "epoch": 0.14736928462043317, "grad_norm": 1.4128559827804565, "learning_rate": 1.9286768441408085e-05, "loss": 0.5762, "step": 5389 }, { "epoch": 0.14739663093415006, "grad_norm": 2.3439488410949707, "learning_rate": 1.9286439906253868e-05, "loss": 0.5869, "step": 5390 }, { "epoch": 0.147423977247867, "grad_norm": 1.4290310144424438, "learning_rate": 1.928611129825014e-05, "loss": 0.5551, "step": 5391 }, { "epoch": 0.14745132356158389, "grad_norm": 1.4844474792480469, "learning_rate": 1.9285782617399488e-05, "loss": 0.5435, "step": 5392 }, { "epoch": 0.1474786698753008, "grad_norm": 1.3924989700317383, "learning_rate": 1.928545386370448e-05, "loss": 0.592, "step": 5393 }, { "epoch": 0.1475060161890177, "grad_norm": 1.3494765758514404, "learning_rate": 1.9285125037167702e-05, "loss": 0.5737, "step": 5394 }, { "epoch": 0.14753336250273463, "grad_norm": 1.3825030326843262, "learning_rate": 1.9284796137791733e-05, "loss": 0.5966, "step": 5395 }, { "epoch": 0.14756070881645153, "grad_norm": 1.8630632162094116, "learning_rate": 1.9284467165579147e-05, "loss": 0.5052, "step": 5396 }, { "epoch": 0.14758805513016846, "grad_norm": 1.2382621765136719, "learning_rate": 1.9284138120532532e-05, "loss": 0.548, "step": 5397 }, { "epoch": 0.14761540144388535, "grad_norm": 1.4239037036895752, "learning_rate": 1.9283809002654464e-05, "loss": 0.5995, "step": 5398 }, { "epoch": 0.14764274775760228, "grad_norm": 1.3265224695205688, "learning_rate": 1.9283479811947526e-05, "loss": 0.5972, "step": 5399 }, { "epoch": 0.14767009407131917, "grad_norm": 1.2286173105239868, "learning_rate": 1.9283150548414306e-05, "loss": 0.5701, "step": 5400 }, { "epoch": 0.1476974403850361, "grad_norm": 1.3883849382400513, "learning_rate": 1.928282121205738e-05, "loss": 0.5685, "step": 5401 }, { "epoch": 0.147724786698753, "grad_norm": 1.287400484085083, "learning_rate": 1.928249180287933e-05, "loss": 0.5124, "step": 5402 }, { "epoch": 0.14775213301246992, "grad_norm": 1.3735764026641846, "learning_rate": 1.928216232088275e-05, "loss": 0.5626, "step": 5403 }, { "epoch": 0.14777947932618682, "grad_norm": 1.4425691366195679, "learning_rate": 1.9281832766070218e-05, "loss": 0.5849, "step": 5404 }, { "epoch": 0.14780682563990374, "grad_norm": 1.0814980268478394, "learning_rate": 1.928150313844432e-05, "loss": 0.54, "step": 5405 }, { "epoch": 0.14783417195362064, "grad_norm": 1.5702005624771118, "learning_rate": 1.9281173438007643e-05, "loss": 0.5301, "step": 5406 }, { "epoch": 0.14786151826733757, "grad_norm": 1.5085351467132568, "learning_rate": 1.9280843664762768e-05, "loss": 0.5776, "step": 5407 }, { "epoch": 0.14788886458105446, "grad_norm": 1.7637289762496948, "learning_rate": 1.928051381871229e-05, "loss": 0.5937, "step": 5408 }, { "epoch": 0.1479162108947714, "grad_norm": 1.4626926183700562, "learning_rate": 1.928018389985879e-05, "loss": 0.6013, "step": 5409 }, { "epoch": 0.14794355720848829, "grad_norm": 1.8012480735778809, "learning_rate": 1.927985390820486e-05, "loss": 0.9587, "step": 5410 }, { "epoch": 0.1479709035222052, "grad_norm": 1.9625558853149414, "learning_rate": 1.927952384375309e-05, "loss": 0.6038, "step": 5411 }, { "epoch": 0.1479982498359221, "grad_norm": 1.5080959796905518, "learning_rate": 1.9279193706506066e-05, "loss": 0.5602, "step": 5412 }, { "epoch": 0.14802559614963903, "grad_norm": 1.533024549484253, "learning_rate": 1.9278863496466375e-05, "loss": 0.5513, "step": 5413 }, { "epoch": 0.14805294246335593, "grad_norm": 1.6171997785568237, "learning_rate": 1.9278533213636616e-05, "loss": 0.5721, "step": 5414 }, { "epoch": 0.14808028877707285, "grad_norm": 1.2667405605316162, "learning_rate": 1.927820285801937e-05, "loss": 0.5512, "step": 5415 }, { "epoch": 0.14810763509078975, "grad_norm": 1.684256672859192, "learning_rate": 1.9277872429617236e-05, "loss": 0.5823, "step": 5416 }, { "epoch": 0.14813498140450668, "grad_norm": 1.4645870923995972, "learning_rate": 1.927754192843281e-05, "loss": 0.5884, "step": 5417 }, { "epoch": 0.14816232771822357, "grad_norm": 1.3010214567184448, "learning_rate": 1.927721135446867e-05, "loss": 0.5914, "step": 5418 }, { "epoch": 0.1481896740319405, "grad_norm": 2.43686580657959, "learning_rate": 1.9276880707727422e-05, "loss": 0.4329, "step": 5419 }, { "epoch": 0.1482170203456574, "grad_norm": 1.3722976446151733, "learning_rate": 1.9276549988211654e-05, "loss": 0.5703, "step": 5420 }, { "epoch": 0.14824436665937432, "grad_norm": 1.5235545635223389, "learning_rate": 1.927621919592396e-05, "loss": 0.5331, "step": 5421 }, { "epoch": 0.14827171297309122, "grad_norm": 1.281468391418457, "learning_rate": 1.927588833086694e-05, "loss": 0.5499, "step": 5422 }, { "epoch": 0.14829905928680814, "grad_norm": 1.618735432624817, "learning_rate": 1.9275557393043187e-05, "loss": 0.5512, "step": 5423 }, { "epoch": 0.14832640560052504, "grad_norm": 1.5313876867294312, "learning_rate": 1.9275226382455297e-05, "loss": 0.5712, "step": 5424 }, { "epoch": 0.14835375191424197, "grad_norm": 1.1158157587051392, "learning_rate": 1.9274895299105864e-05, "loss": 0.5852, "step": 5425 }, { "epoch": 0.14838109822795886, "grad_norm": 1.5124777555465698, "learning_rate": 1.9274564142997486e-05, "loss": 0.5931, "step": 5426 }, { "epoch": 0.1484084445416758, "grad_norm": 2.196155309677124, "learning_rate": 1.9274232914132766e-05, "loss": 0.5368, "step": 5427 }, { "epoch": 0.14843579085539269, "grad_norm": 1.9498229026794434, "learning_rate": 1.92739016125143e-05, "loss": 0.4107, "step": 5428 }, { "epoch": 0.1484631371691096, "grad_norm": 1.494011402130127, "learning_rate": 1.927357023814468e-05, "loss": 0.5447, "step": 5429 }, { "epoch": 0.1484904834828265, "grad_norm": 1.8501849174499512, "learning_rate": 1.9273238791026517e-05, "loss": 0.6095, "step": 5430 }, { "epoch": 0.14851782979654343, "grad_norm": 1.63454008102417, "learning_rate": 1.92729072711624e-05, "loss": 0.5766, "step": 5431 }, { "epoch": 0.14854517611026033, "grad_norm": 1.3806360960006714, "learning_rate": 1.927257567855494e-05, "loss": 0.5943, "step": 5432 }, { "epoch": 0.14857252242397725, "grad_norm": 4.561904430389404, "learning_rate": 1.927224401320673e-05, "loss": 0.6075, "step": 5433 }, { "epoch": 0.14859986873769415, "grad_norm": 1.3692286014556885, "learning_rate": 1.9271912275120378e-05, "loss": 0.5999, "step": 5434 }, { "epoch": 0.14862721505141108, "grad_norm": 1.6885403394699097, "learning_rate": 1.9271580464298483e-05, "loss": 0.5952, "step": 5435 }, { "epoch": 0.14865456136512797, "grad_norm": 1.3538764715194702, "learning_rate": 1.9271248580743648e-05, "loss": 0.5893, "step": 5436 }, { "epoch": 0.1486819076788449, "grad_norm": 1.5538384914398193, "learning_rate": 1.9270916624458475e-05, "loss": 0.5878, "step": 5437 }, { "epoch": 0.1487092539925618, "grad_norm": 1.9460680484771729, "learning_rate": 1.9270584595445572e-05, "loss": 0.5452, "step": 5438 }, { "epoch": 0.14873660030627872, "grad_norm": 1.36024808883667, "learning_rate": 1.9270252493707546e-05, "loss": 0.6034, "step": 5439 }, { "epoch": 0.14876394661999562, "grad_norm": 1.2796289920806885, "learning_rate": 1.926992031924699e-05, "loss": 0.6009, "step": 5440 }, { "epoch": 0.14879129293371254, "grad_norm": 1.4648326635360718, "learning_rate": 1.9269588072066522e-05, "loss": 0.5619, "step": 5441 }, { "epoch": 0.14881863924742944, "grad_norm": 1.319098949432373, "learning_rate": 1.9269255752168746e-05, "loss": 0.5587, "step": 5442 }, { "epoch": 0.14884598556114637, "grad_norm": 1.630630373954773, "learning_rate": 1.926892335955626e-05, "loss": 0.9444, "step": 5443 }, { "epoch": 0.14887333187486326, "grad_norm": 1.2954474687576294, "learning_rate": 1.9268590894231686e-05, "loss": 0.5502, "step": 5444 }, { "epoch": 0.1489006781885802, "grad_norm": 1.750186800956726, "learning_rate": 1.9268258356197624e-05, "loss": 0.9691, "step": 5445 }, { "epoch": 0.14892802450229708, "grad_norm": 1.2866177558898926, "learning_rate": 1.9267925745456682e-05, "loss": 0.5699, "step": 5446 }, { "epoch": 0.148955370816014, "grad_norm": 1.5889530181884766, "learning_rate": 1.926759306201147e-05, "loss": 0.589, "step": 5447 }, { "epoch": 0.1489827171297309, "grad_norm": 1.9288504123687744, "learning_rate": 1.9267260305864602e-05, "loss": 0.5753, "step": 5448 }, { "epoch": 0.14901006344344783, "grad_norm": 1.4881751537322998, "learning_rate": 1.926692747701868e-05, "loss": 0.5715, "step": 5449 }, { "epoch": 0.14903740975716473, "grad_norm": 2.4276862144470215, "learning_rate": 1.9266594575476322e-05, "loss": 0.5659, "step": 5450 }, { "epoch": 0.14906475607088165, "grad_norm": 2.2691328525543213, "learning_rate": 1.926626160124014e-05, "loss": 0.5572, "step": 5451 }, { "epoch": 0.14909210238459855, "grad_norm": 1.8992189168930054, "learning_rate": 1.9265928554312738e-05, "loss": 0.6197, "step": 5452 }, { "epoch": 0.14911944869831548, "grad_norm": 1.454654574394226, "learning_rate": 1.926559543469674e-05, "loss": 0.5752, "step": 5453 }, { "epoch": 0.14914679501203237, "grad_norm": 1.326054573059082, "learning_rate": 1.926526224239475e-05, "loss": 0.5701, "step": 5454 }, { "epoch": 0.1491741413257493, "grad_norm": 1.3378359079360962, "learning_rate": 1.9264928977409386e-05, "loss": 0.5767, "step": 5455 }, { "epoch": 0.1492014876394662, "grad_norm": 1.5066494941711426, "learning_rate": 1.926459563974326e-05, "loss": 0.5261, "step": 5456 }, { "epoch": 0.14922883395318312, "grad_norm": 1.4955990314483643, "learning_rate": 1.926426222939899e-05, "loss": 0.5299, "step": 5457 }, { "epoch": 0.14925618026690002, "grad_norm": 1.4248284101486206, "learning_rate": 1.926392874637919e-05, "loss": 0.5907, "step": 5458 }, { "epoch": 0.14928352658061694, "grad_norm": 1.4316850900650024, "learning_rate": 1.9263595190686476e-05, "loss": 0.5616, "step": 5459 }, { "epoch": 0.14931087289433384, "grad_norm": 1.5834006071090698, "learning_rate": 1.9263261562323465e-05, "loss": 0.575, "step": 5460 }, { "epoch": 0.14933821920805077, "grad_norm": 1.2343932390213013, "learning_rate": 1.9262927861292776e-05, "loss": 0.5343, "step": 5461 }, { "epoch": 0.14936556552176766, "grad_norm": 1.2508573532104492, "learning_rate": 1.926259408759702e-05, "loss": 0.5526, "step": 5462 }, { "epoch": 0.1493929118354846, "grad_norm": 1.4498745203018188, "learning_rate": 1.9262260241238823e-05, "loss": 0.5616, "step": 5463 }, { "epoch": 0.14942025814920148, "grad_norm": 1.5420904159545898, "learning_rate": 1.92619263222208e-05, "loss": 0.5374, "step": 5464 }, { "epoch": 0.1494476044629184, "grad_norm": 1.3352818489074707, "learning_rate": 1.926159233054557e-05, "loss": 0.5427, "step": 5465 }, { "epoch": 0.1494749507766353, "grad_norm": 2.425734519958496, "learning_rate": 1.926125826621576e-05, "loss": 0.9549, "step": 5466 }, { "epoch": 0.14950229709035223, "grad_norm": 1.4114092588424683, "learning_rate": 1.926092412923398e-05, "loss": 0.5515, "step": 5467 }, { "epoch": 0.14952964340406913, "grad_norm": 1.7032910585403442, "learning_rate": 1.9260589919602857e-05, "loss": 0.9268, "step": 5468 }, { "epoch": 0.14955698971778605, "grad_norm": 1.388841152191162, "learning_rate": 1.9260255637325018e-05, "loss": 0.6229, "step": 5469 }, { "epoch": 0.14958433603150295, "grad_norm": 1.5741466283798218, "learning_rate": 1.9259921282403074e-05, "loss": 0.5647, "step": 5470 }, { "epoch": 0.14961168234521988, "grad_norm": 1.6841233968734741, "learning_rate": 1.9259586854839653e-05, "loss": 0.5558, "step": 5471 }, { "epoch": 0.14963902865893677, "grad_norm": 2.102388620376587, "learning_rate": 1.925925235463738e-05, "loss": 0.9461, "step": 5472 }, { "epoch": 0.1496663749726537, "grad_norm": 1.306473970413208, "learning_rate": 1.925891778179888e-05, "loss": 0.5568, "step": 5473 }, { "epoch": 0.1496937212863706, "grad_norm": 1.9221563339233398, "learning_rate": 1.9258583136326777e-05, "loss": 0.9046, "step": 5474 }, { "epoch": 0.14972106760008752, "grad_norm": 2.5876262187957764, "learning_rate": 1.925824841822369e-05, "loss": 0.6039, "step": 5475 }, { "epoch": 0.14974841391380442, "grad_norm": 1.5443450212478638, "learning_rate": 1.9257913627492253e-05, "loss": 0.5792, "step": 5476 }, { "epoch": 0.14977576022752134, "grad_norm": 1.4147846698760986, "learning_rate": 1.9257578764135086e-05, "loss": 0.5923, "step": 5477 }, { "epoch": 0.14980310654123824, "grad_norm": 1.3551419973373413, "learning_rate": 1.9257243828154823e-05, "loss": 0.5169, "step": 5478 }, { "epoch": 0.14983045285495517, "grad_norm": 1.4630377292633057, "learning_rate": 1.9256908819554084e-05, "loss": 0.9615, "step": 5479 }, { "epoch": 0.14985779916867206, "grad_norm": 1.943724513053894, "learning_rate": 1.9256573738335504e-05, "loss": 0.5734, "step": 5480 }, { "epoch": 0.149885145482389, "grad_norm": 1.2583225965499878, "learning_rate": 1.9256238584501703e-05, "loss": 0.568, "step": 5481 }, { "epoch": 0.14991249179610588, "grad_norm": 1.4060170650482178, "learning_rate": 1.925590335805532e-05, "loss": 0.5579, "step": 5482 }, { "epoch": 0.1499398381098228, "grad_norm": 1.6132665872573853, "learning_rate": 1.9255568058998977e-05, "loss": 0.573, "step": 5483 }, { "epoch": 0.1499671844235397, "grad_norm": 1.4373162984848022, "learning_rate": 1.925523268733531e-05, "loss": 0.4372, "step": 5484 }, { "epoch": 0.14999453073725663, "grad_norm": 1.4534159898757935, "learning_rate": 1.9254897243066942e-05, "loss": 0.5828, "step": 5485 }, { "epoch": 0.15002187705097353, "grad_norm": 1.463220238685608, "learning_rate": 1.9254561726196515e-05, "loss": 0.5575, "step": 5486 }, { "epoch": 0.15004922336469043, "grad_norm": 1.3469916582107544, "learning_rate": 1.9254226136726656e-05, "loss": 0.5756, "step": 5487 }, { "epoch": 0.15007656967840735, "grad_norm": 1.3284670114517212, "learning_rate": 1.9253890474659992e-05, "loss": 0.5521, "step": 5488 }, { "epoch": 0.15010391599212425, "grad_norm": 1.6436138153076172, "learning_rate": 1.9253554739999166e-05, "loss": 0.9108, "step": 5489 }, { "epoch": 0.15013126230584117, "grad_norm": 1.514699101448059, "learning_rate": 1.9253218932746803e-05, "loss": 0.5647, "step": 5490 }, { "epoch": 0.15015860861955807, "grad_norm": 1.4063737392425537, "learning_rate": 1.9252883052905546e-05, "loss": 0.5744, "step": 5491 }, { "epoch": 0.150185954933275, "grad_norm": 1.5665920972824097, "learning_rate": 1.9252547100478024e-05, "loss": 0.5355, "step": 5492 }, { "epoch": 0.1502133012469919, "grad_norm": 1.3297017812728882, "learning_rate": 1.9252211075466873e-05, "loss": 0.9007, "step": 5493 }, { "epoch": 0.15024064756070882, "grad_norm": 1.5344611406326294, "learning_rate": 1.9251874977874733e-05, "loss": 0.6097, "step": 5494 }, { "epoch": 0.15026799387442571, "grad_norm": 1.360954761505127, "learning_rate": 1.9251538807704233e-05, "loss": 0.5706, "step": 5495 }, { "epoch": 0.15029534018814264, "grad_norm": 1.3473278284072876, "learning_rate": 1.9251202564958018e-05, "loss": 0.5869, "step": 5496 }, { "epoch": 0.15032268650185954, "grad_norm": 1.438353180885315, "learning_rate": 1.9250866249638722e-05, "loss": 0.5979, "step": 5497 }, { "epoch": 0.15035003281557646, "grad_norm": 1.464123249053955, "learning_rate": 1.9250529861748983e-05, "loss": 0.9048, "step": 5498 }, { "epoch": 0.15037737912929336, "grad_norm": 1.655395269393921, "learning_rate": 1.925019340129144e-05, "loss": 0.5764, "step": 5499 }, { "epoch": 0.15040472544301028, "grad_norm": 1.6429270505905151, "learning_rate": 1.9249856868268734e-05, "loss": 0.5667, "step": 5500 }, { "epoch": 0.15043207175672718, "grad_norm": 1.4504914283752441, "learning_rate": 1.92495202626835e-05, "loss": 0.5529, "step": 5501 }, { "epoch": 0.1504594180704441, "grad_norm": 1.4265631437301636, "learning_rate": 1.924918358453839e-05, "loss": 0.5559, "step": 5502 }, { "epoch": 0.150486764384161, "grad_norm": 1.6924453973770142, "learning_rate": 1.9248846833836032e-05, "loss": 0.5629, "step": 5503 }, { "epoch": 0.15051411069787793, "grad_norm": 1.4956700801849365, "learning_rate": 1.9248510010579077e-05, "loss": 0.61, "step": 5504 }, { "epoch": 0.15054145701159483, "grad_norm": 1.4237592220306396, "learning_rate": 1.9248173114770165e-05, "loss": 0.6066, "step": 5505 }, { "epoch": 0.15056880332531175, "grad_norm": 2.3670647144317627, "learning_rate": 1.9247836146411933e-05, "loss": 0.6706, "step": 5506 }, { "epoch": 0.15059614963902865, "grad_norm": 1.4826436042785645, "learning_rate": 1.924749910550703e-05, "loss": 0.5548, "step": 5507 }, { "epoch": 0.15062349595274557, "grad_norm": 1.402891755104065, "learning_rate": 1.9247161992058097e-05, "loss": 0.9134, "step": 5508 }, { "epoch": 0.15065084226646247, "grad_norm": 1.5007818937301636, "learning_rate": 1.9246824806067787e-05, "loss": 0.5956, "step": 5509 }, { "epoch": 0.1506781885801794, "grad_norm": 1.223707914352417, "learning_rate": 1.9246487547538733e-05, "loss": 0.5894, "step": 5510 }, { "epoch": 0.1507055348938963, "grad_norm": 1.2820919752120972, "learning_rate": 1.924615021647359e-05, "loss": 0.575, "step": 5511 }, { "epoch": 0.15073288120761322, "grad_norm": 1.055742621421814, "learning_rate": 1.9245812812874996e-05, "loss": 0.5799, "step": 5512 }, { "epoch": 0.15076022752133011, "grad_norm": 1.7998305559158325, "learning_rate": 1.9245475336745608e-05, "loss": 0.5746, "step": 5513 }, { "epoch": 0.15078757383504704, "grad_norm": 1.5027823448181152, "learning_rate": 1.924513778808806e-05, "loss": 0.5609, "step": 5514 }, { "epoch": 0.15081492014876394, "grad_norm": 1.3462673425674438, "learning_rate": 1.9244800166905016e-05, "loss": 0.5239, "step": 5515 }, { "epoch": 0.15084226646248086, "grad_norm": 1.1843901872634888, "learning_rate": 1.9244462473199114e-05, "loss": 0.5552, "step": 5516 }, { "epoch": 0.15086961277619776, "grad_norm": 1.3196933269500732, "learning_rate": 1.9244124706973007e-05, "loss": 0.5435, "step": 5517 }, { "epoch": 0.15089695908991468, "grad_norm": 1.5290471315383911, "learning_rate": 1.924378686822934e-05, "loss": 0.5698, "step": 5518 }, { "epoch": 0.15092430540363158, "grad_norm": 1.342262864112854, "learning_rate": 1.9243448956970767e-05, "loss": 0.9052, "step": 5519 }, { "epoch": 0.1509516517173485, "grad_norm": 1.6024402379989624, "learning_rate": 1.9243110973199937e-05, "loss": 0.605, "step": 5520 }, { "epoch": 0.1509789980310654, "grad_norm": 1.448170781135559, "learning_rate": 1.9242772916919506e-05, "loss": 0.5826, "step": 5521 }, { "epoch": 0.15100634434478233, "grad_norm": 1.7070484161376953, "learning_rate": 1.924243478813212e-05, "loss": 0.6262, "step": 5522 }, { "epoch": 0.15103369065849923, "grad_norm": 1.5504885911941528, "learning_rate": 1.9242096586840437e-05, "loss": 0.5571, "step": 5523 }, { "epoch": 0.15106103697221615, "grad_norm": 3.2668845653533936, "learning_rate": 1.9241758313047105e-05, "loss": 0.384, "step": 5524 }, { "epoch": 0.15108838328593305, "grad_norm": 1.2822232246398926, "learning_rate": 1.924141996675478e-05, "loss": 0.5727, "step": 5525 }, { "epoch": 0.15111572959964997, "grad_norm": 1.4257866144180298, "learning_rate": 1.9241081547966118e-05, "loss": 0.9301, "step": 5526 }, { "epoch": 0.15114307591336687, "grad_norm": 2.2079405784606934, "learning_rate": 1.924074305668377e-05, "loss": 0.5615, "step": 5527 }, { "epoch": 0.1511704222270838, "grad_norm": 2.0609419345855713, "learning_rate": 1.9240404492910394e-05, "loss": 0.5642, "step": 5528 }, { "epoch": 0.1511977685408007, "grad_norm": 1.2814005613327026, "learning_rate": 1.9240065856648645e-05, "loss": 0.5827, "step": 5529 }, { "epoch": 0.15122511485451762, "grad_norm": 1.8811759948730469, "learning_rate": 1.923972714790118e-05, "loss": 0.5639, "step": 5530 }, { "epoch": 0.15125246116823451, "grad_norm": 1.2955336570739746, "learning_rate": 1.9239388366670654e-05, "loss": 0.568, "step": 5531 }, { "epoch": 0.15127980748195144, "grad_norm": 1.3955345153808594, "learning_rate": 1.9239049512959728e-05, "loss": 0.934, "step": 5532 }, { "epoch": 0.15130715379566834, "grad_norm": 2.3504786491394043, "learning_rate": 1.923871058677106e-05, "loss": 0.6183, "step": 5533 }, { "epoch": 0.15133450010938526, "grad_norm": 1.8456215858459473, "learning_rate": 1.9238371588107304e-05, "loss": 0.566, "step": 5534 }, { "epoch": 0.15136184642310216, "grad_norm": 2.045562982559204, "learning_rate": 1.9238032516971125e-05, "loss": 0.6302, "step": 5535 }, { "epoch": 0.15138919273681908, "grad_norm": 1.879423975944519, "learning_rate": 1.923769337336518e-05, "loss": 0.9529, "step": 5536 }, { "epoch": 0.15141653905053598, "grad_norm": 1.6518337726593018, "learning_rate": 1.9237354157292132e-05, "loss": 0.5589, "step": 5537 }, { "epoch": 0.1514438853642529, "grad_norm": 1.2627949714660645, "learning_rate": 1.923701486875464e-05, "loss": 0.5788, "step": 5538 }, { "epoch": 0.1514712316779698, "grad_norm": 1.7526463270187378, "learning_rate": 1.9236675507755364e-05, "loss": 0.5746, "step": 5539 }, { "epoch": 0.15149857799168673, "grad_norm": 1.545576810836792, "learning_rate": 1.923633607429697e-05, "loss": 0.601, "step": 5540 }, { "epoch": 0.15152592430540363, "grad_norm": 1.9710990190505981, "learning_rate": 1.923599656838212e-05, "loss": 0.4403, "step": 5541 }, { "epoch": 0.15155327061912055, "grad_norm": 3.705040454864502, "learning_rate": 1.923565699001347e-05, "loss": 0.9033, "step": 5542 }, { "epoch": 0.15158061693283745, "grad_norm": 1.4151549339294434, "learning_rate": 1.9235317339193695e-05, "loss": 0.604, "step": 5543 }, { "epoch": 0.15160796324655437, "grad_norm": 1.5136045217514038, "learning_rate": 1.9234977615925454e-05, "loss": 0.5188, "step": 5544 }, { "epoch": 0.15163530956027127, "grad_norm": 1.7100940942764282, "learning_rate": 1.9234637820211414e-05, "loss": 0.5735, "step": 5545 }, { "epoch": 0.1516626558739882, "grad_norm": 1.4646164178848267, "learning_rate": 1.9234297952054236e-05, "loss": 0.6152, "step": 5546 }, { "epoch": 0.1516900021877051, "grad_norm": 1.3311771154403687, "learning_rate": 1.923395801145659e-05, "loss": 0.5693, "step": 5547 }, { "epoch": 0.15171734850142202, "grad_norm": 1.3742748498916626, "learning_rate": 1.923361799842114e-05, "loss": 0.5681, "step": 5548 }, { "epoch": 0.15174469481513891, "grad_norm": 1.3083912134170532, "learning_rate": 1.923327791295056e-05, "loss": 0.5787, "step": 5549 }, { "epoch": 0.15177204112885584, "grad_norm": 1.3953211307525635, "learning_rate": 1.923293775504751e-05, "loss": 0.5757, "step": 5550 }, { "epoch": 0.15179938744257274, "grad_norm": 1.2915676832199097, "learning_rate": 1.9232597524714666e-05, "loss": 0.605, "step": 5551 }, { "epoch": 0.15182673375628966, "grad_norm": 1.4950803518295288, "learning_rate": 1.923225722195469e-05, "loss": 0.5693, "step": 5552 }, { "epoch": 0.15185408007000656, "grad_norm": 1.1652194261550903, "learning_rate": 1.923191684677025e-05, "loss": 0.5597, "step": 5553 }, { "epoch": 0.15188142638372348, "grad_norm": 1.1397751569747925, "learning_rate": 1.9231576399164024e-05, "loss": 0.5479, "step": 5554 }, { "epoch": 0.15190877269744038, "grad_norm": 1.5630905628204346, "learning_rate": 1.923123587913868e-05, "loss": 0.5523, "step": 5555 }, { "epoch": 0.1519361190111573, "grad_norm": 1.4255644083023071, "learning_rate": 1.923089528669689e-05, "loss": 0.5413, "step": 5556 }, { "epoch": 0.1519634653248742, "grad_norm": 1.3772910833358765, "learning_rate": 1.9230554621841323e-05, "loss": 0.5511, "step": 5557 }, { "epoch": 0.15199081163859113, "grad_norm": 1.418728232383728, "learning_rate": 1.923021388457465e-05, "loss": 0.5638, "step": 5558 }, { "epoch": 0.15201815795230803, "grad_norm": 1.5918960571289062, "learning_rate": 1.922987307489955e-05, "loss": 0.5553, "step": 5559 }, { "epoch": 0.15204550426602495, "grad_norm": 1.5670067071914673, "learning_rate": 1.922953219281869e-05, "loss": 0.5789, "step": 5560 }, { "epoch": 0.15207285057974185, "grad_norm": 1.411693811416626, "learning_rate": 1.922919123833475e-05, "loss": 0.6003, "step": 5561 }, { "epoch": 0.15210019689345877, "grad_norm": 1.2276453971862793, "learning_rate": 1.92288502114504e-05, "loss": 0.4916, "step": 5562 }, { "epoch": 0.15212754320717567, "grad_norm": 2.665038585662842, "learning_rate": 1.922850911216832e-05, "loss": 0.4558, "step": 5563 }, { "epoch": 0.1521548895208926, "grad_norm": 1.3979004621505737, "learning_rate": 1.922816794049118e-05, "loss": 0.5704, "step": 5564 }, { "epoch": 0.1521822358346095, "grad_norm": 1.4590660333633423, "learning_rate": 1.922782669642166e-05, "loss": 0.5842, "step": 5565 }, { "epoch": 0.15220958214832642, "grad_norm": 1.2731434106826782, "learning_rate": 1.922748537996244e-05, "loss": 0.5371, "step": 5566 }, { "epoch": 0.1522369284620433, "grad_norm": 1.2827738523483276, "learning_rate": 1.922714399111619e-05, "loss": 0.5966, "step": 5567 }, { "epoch": 0.15226427477576024, "grad_norm": 1.7335845232009888, "learning_rate": 1.9226802529885593e-05, "loss": 0.5751, "step": 5568 }, { "epoch": 0.15229162108947714, "grad_norm": 1.4411535263061523, "learning_rate": 1.922646099627333e-05, "loss": 0.5871, "step": 5569 }, { "epoch": 0.15231896740319406, "grad_norm": 1.2652095556259155, "learning_rate": 1.9226119390282073e-05, "loss": 0.5675, "step": 5570 }, { "epoch": 0.15234631371691096, "grad_norm": 1.1859372854232788, "learning_rate": 1.9225777711914508e-05, "loss": 0.5481, "step": 5571 }, { "epoch": 0.15237366003062788, "grad_norm": 1.3333523273468018, "learning_rate": 1.9225435961173313e-05, "loss": 0.58, "step": 5572 }, { "epoch": 0.15240100634434478, "grad_norm": 1.4189344644546509, "learning_rate": 1.922509413806117e-05, "loss": 0.6462, "step": 5573 }, { "epoch": 0.1524283526580617, "grad_norm": 2.2690088748931885, "learning_rate": 1.9224752242580757e-05, "loss": 0.9096, "step": 5574 }, { "epoch": 0.1524556989717786, "grad_norm": 1.6357841491699219, "learning_rate": 1.9224410274734763e-05, "loss": 0.5137, "step": 5575 }, { "epoch": 0.15248304528549553, "grad_norm": 1.49314546585083, "learning_rate": 1.9224068234525863e-05, "loss": 0.5587, "step": 5576 }, { "epoch": 0.15251039159921242, "grad_norm": 1.6931087970733643, "learning_rate": 1.9223726121956744e-05, "loss": 0.5845, "step": 5577 }, { "epoch": 0.15253773791292935, "grad_norm": 1.5430445671081543, "learning_rate": 1.922338393703009e-05, "loss": 0.564, "step": 5578 }, { "epoch": 0.15256508422664625, "grad_norm": 1.4191662073135376, "learning_rate": 1.9223041679748583e-05, "loss": 0.5923, "step": 5579 }, { "epoch": 0.15259243054036317, "grad_norm": 1.1397641897201538, "learning_rate": 1.922269935011491e-05, "loss": 0.481, "step": 5580 }, { "epoch": 0.15261977685408007, "grad_norm": 1.2800824642181396, "learning_rate": 1.9222356948131758e-05, "loss": 0.6053, "step": 5581 }, { "epoch": 0.152647123167797, "grad_norm": 1.6351689100265503, "learning_rate": 1.922201447380181e-05, "loss": 0.653, "step": 5582 }, { "epoch": 0.1526744694815139, "grad_norm": 1.7432273626327515, "learning_rate": 1.9221671927127753e-05, "loss": 0.5674, "step": 5583 }, { "epoch": 0.15270181579523082, "grad_norm": 1.543531894683838, "learning_rate": 1.9221329308112274e-05, "loss": 0.5323, "step": 5584 }, { "epoch": 0.1527291621089477, "grad_norm": 1.3100374937057495, "learning_rate": 1.9220986616758065e-05, "loss": 0.5915, "step": 5585 }, { "epoch": 0.15275650842266464, "grad_norm": 1.08279550075531, "learning_rate": 1.922064385306781e-05, "loss": 0.5713, "step": 5586 }, { "epoch": 0.15278385473638154, "grad_norm": 1.1154553890228271, "learning_rate": 1.9220301017044196e-05, "loss": 0.4782, "step": 5587 }, { "epoch": 0.15281120105009843, "grad_norm": 1.3413770198822021, "learning_rate": 1.921995810868992e-05, "loss": 0.5683, "step": 5588 }, { "epoch": 0.15283854736381536, "grad_norm": 1.5643408298492432, "learning_rate": 1.9219615128007664e-05, "loss": 0.5702, "step": 5589 }, { "epoch": 0.15286589367753226, "grad_norm": 1.655130386352539, "learning_rate": 1.9219272075000123e-05, "loss": 0.9709, "step": 5590 }, { "epoch": 0.15289323999124918, "grad_norm": 1.7786258459091187, "learning_rate": 1.921892894966999e-05, "loss": 0.5832, "step": 5591 }, { "epoch": 0.15292058630496608, "grad_norm": 1.3797718286514282, "learning_rate": 1.921858575201995e-05, "loss": 0.5823, "step": 5592 }, { "epoch": 0.152947932618683, "grad_norm": 1.37274169921875, "learning_rate": 1.9218242482052698e-05, "loss": 0.5693, "step": 5593 }, { "epoch": 0.1529752789323999, "grad_norm": 1.1913446187973022, "learning_rate": 1.921789913977093e-05, "loss": 0.9189, "step": 5594 }, { "epoch": 0.15300262524611682, "grad_norm": 1.3451001644134521, "learning_rate": 1.9217555725177338e-05, "loss": 0.5669, "step": 5595 }, { "epoch": 0.15302997155983372, "grad_norm": 1.3611352443695068, "learning_rate": 1.9217212238274612e-05, "loss": 0.5726, "step": 5596 }, { "epoch": 0.15305731787355065, "grad_norm": 1.4850770235061646, "learning_rate": 1.9216868679065455e-05, "loss": 0.5732, "step": 5597 }, { "epoch": 0.15308466418726754, "grad_norm": 1.4101057052612305, "learning_rate": 1.9216525047552555e-05, "loss": 0.5321, "step": 5598 }, { "epoch": 0.15311201050098447, "grad_norm": 1.2506036758422852, "learning_rate": 1.921618134373861e-05, "loss": 0.6081, "step": 5599 }, { "epoch": 0.15313935681470137, "grad_norm": 1.0360263586044312, "learning_rate": 1.9215837567626313e-05, "loss": 0.5643, "step": 5600 }, { "epoch": 0.1531667031284183, "grad_norm": 1.8309849500656128, "learning_rate": 1.921549371921837e-05, "loss": 0.5419, "step": 5601 }, { "epoch": 0.1531940494421352, "grad_norm": 1.1535552740097046, "learning_rate": 1.9215149798517467e-05, "loss": 0.5955, "step": 5602 }, { "epoch": 0.1532213957558521, "grad_norm": 1.8235435485839844, "learning_rate": 1.9214805805526308e-05, "loss": 0.9028, "step": 5603 }, { "epoch": 0.153248742069569, "grad_norm": 1.239871621131897, "learning_rate": 1.9214461740247592e-05, "loss": 0.5558, "step": 5604 }, { "epoch": 0.15327608838328594, "grad_norm": 1.2093397378921509, "learning_rate": 1.9214117602684016e-05, "loss": 0.5898, "step": 5605 }, { "epoch": 0.15330343469700283, "grad_norm": 1.491896152496338, "learning_rate": 1.921377339283828e-05, "loss": 0.583, "step": 5606 }, { "epoch": 0.15333078101071976, "grad_norm": 1.461036205291748, "learning_rate": 1.9213429110713085e-05, "loss": 0.9069, "step": 5607 }, { "epoch": 0.15335812732443665, "grad_norm": 1.560626745223999, "learning_rate": 1.9213084756311135e-05, "loss": 0.5802, "step": 5608 }, { "epoch": 0.15338547363815358, "grad_norm": 1.2834430932998657, "learning_rate": 1.9212740329635126e-05, "loss": 0.579, "step": 5609 }, { "epoch": 0.15341281995187048, "grad_norm": 1.9015882015228271, "learning_rate": 1.921239583068776e-05, "loss": 0.5085, "step": 5610 }, { "epoch": 0.1534401662655874, "grad_norm": 1.7088615894317627, "learning_rate": 1.9212051259471743e-05, "loss": 0.5272, "step": 5611 }, { "epoch": 0.1534675125793043, "grad_norm": 1.35890531539917, "learning_rate": 1.921170661598977e-05, "loss": 0.5657, "step": 5612 }, { "epoch": 0.15349485889302122, "grad_norm": 1.2806974649429321, "learning_rate": 1.921136190024456e-05, "loss": 0.5449, "step": 5613 }, { "epoch": 0.15352220520673812, "grad_norm": 1.4444806575775146, "learning_rate": 1.9211017112238804e-05, "loss": 0.5257, "step": 5614 }, { "epoch": 0.15354955152045505, "grad_norm": 1.1824934482574463, "learning_rate": 1.921067225197521e-05, "loss": 0.5552, "step": 5615 }, { "epoch": 0.15357689783417194, "grad_norm": 1.4437932968139648, "learning_rate": 1.921032731945649e-05, "loss": 0.6188, "step": 5616 }, { "epoch": 0.15360424414788887, "grad_norm": 1.3428728580474854, "learning_rate": 1.920998231468534e-05, "loss": 0.5361, "step": 5617 }, { "epoch": 0.15363159046160577, "grad_norm": 1.4412965774536133, "learning_rate": 1.920963723766447e-05, "loss": 0.5755, "step": 5618 }, { "epoch": 0.1536589367753227, "grad_norm": 1.6694999933242798, "learning_rate": 1.9209292088396586e-05, "loss": 0.5192, "step": 5619 }, { "epoch": 0.1536862830890396, "grad_norm": 1.3997554779052734, "learning_rate": 1.9208946866884402e-05, "loss": 0.6035, "step": 5620 }, { "epoch": 0.1537136294027565, "grad_norm": 1.3209728002548218, "learning_rate": 1.9208601573130617e-05, "loss": 0.5448, "step": 5621 }, { "epoch": 0.1537409757164734, "grad_norm": 1.4688234329223633, "learning_rate": 1.9208256207137947e-05, "loss": 0.6432, "step": 5622 }, { "epoch": 0.15376832203019034, "grad_norm": 1.4159367084503174, "learning_rate": 1.9207910768909096e-05, "loss": 0.5856, "step": 5623 }, { "epoch": 0.15379566834390723, "grad_norm": 1.2399449348449707, "learning_rate": 1.920756525844678e-05, "loss": 0.545, "step": 5624 }, { "epoch": 0.15382301465762416, "grad_norm": 2.5315394401550293, "learning_rate": 1.9207219675753703e-05, "loss": 0.9254, "step": 5625 }, { "epoch": 0.15385036097134105, "grad_norm": 1.5095795392990112, "learning_rate": 1.920687402083258e-05, "loss": 0.5622, "step": 5626 }, { "epoch": 0.15387770728505798, "grad_norm": 1.4128050804138184, "learning_rate": 1.9206528293686123e-05, "loss": 0.4256, "step": 5627 }, { "epoch": 0.15390505359877488, "grad_norm": 1.2472808361053467, "learning_rate": 1.9206182494317037e-05, "loss": 0.5789, "step": 5628 }, { "epoch": 0.1539323999124918, "grad_norm": 1.1216950416564941, "learning_rate": 1.9205836622728045e-05, "loss": 0.5782, "step": 5629 }, { "epoch": 0.1539597462262087, "grad_norm": 1.5210133790969849, "learning_rate": 1.9205490678921854e-05, "loss": 0.5531, "step": 5630 }, { "epoch": 0.15398709253992562, "grad_norm": 1.2078750133514404, "learning_rate": 1.9205144662901182e-05, "loss": 0.5452, "step": 5631 }, { "epoch": 0.15401443885364252, "grad_norm": 1.3387720584869385, "learning_rate": 1.9204798574668735e-05, "loss": 0.5902, "step": 5632 }, { "epoch": 0.15404178516735945, "grad_norm": 1.1470335721969604, "learning_rate": 1.9204452414227237e-05, "loss": 0.5527, "step": 5633 }, { "epoch": 0.15406913148107634, "grad_norm": 1.3312612771987915, "learning_rate": 1.92041061815794e-05, "loss": 0.609, "step": 5634 }, { "epoch": 0.15409647779479327, "grad_norm": 2.3859376907348633, "learning_rate": 1.9203759876727942e-05, "loss": 0.9535, "step": 5635 }, { "epoch": 0.15412382410851017, "grad_norm": 1.6495716571807861, "learning_rate": 1.9203413499675573e-05, "loss": 0.5324, "step": 5636 }, { "epoch": 0.1541511704222271, "grad_norm": 1.6191349029541016, "learning_rate": 1.920306705042502e-05, "loss": 0.4455, "step": 5637 }, { "epoch": 0.154178516735944, "grad_norm": 1.4315450191497803, "learning_rate": 1.9202720528978994e-05, "loss": 0.5552, "step": 5638 }, { "epoch": 0.1542058630496609, "grad_norm": 1.4861207008361816, "learning_rate": 1.9202373935340215e-05, "loss": 0.5447, "step": 5639 }, { "epoch": 0.1542332093633778, "grad_norm": 1.4144694805145264, "learning_rate": 1.9202027269511404e-05, "loss": 0.5867, "step": 5640 }, { "epoch": 0.15426055567709474, "grad_norm": 1.7318776845932007, "learning_rate": 1.9201680531495276e-05, "loss": 0.5113, "step": 5641 }, { "epoch": 0.15428790199081163, "grad_norm": 1.4742337465286255, "learning_rate": 1.9201333721294558e-05, "loss": 0.5907, "step": 5642 }, { "epoch": 0.15431524830452856, "grad_norm": 1.410884976387024, "learning_rate": 1.920098683891196e-05, "loss": 0.5389, "step": 5643 }, { "epoch": 0.15434259461824545, "grad_norm": 1.9386554956436157, "learning_rate": 1.9200639884350214e-05, "loss": 0.5705, "step": 5644 }, { "epoch": 0.15436994093196238, "grad_norm": 1.5882773399353027, "learning_rate": 1.9200292857612038e-05, "loss": 0.5501, "step": 5645 }, { "epoch": 0.15439728724567928, "grad_norm": 1.8065953254699707, "learning_rate": 1.9199945758700152e-05, "loss": 0.5176, "step": 5646 }, { "epoch": 0.1544246335593962, "grad_norm": 1.4033271074295044, "learning_rate": 1.9199598587617282e-05, "loss": 0.5921, "step": 5647 }, { "epoch": 0.1544519798731131, "grad_norm": 1.3906663656234741, "learning_rate": 1.919925134436615e-05, "loss": 0.5197, "step": 5648 }, { "epoch": 0.15447932618683002, "grad_norm": 1.487816333770752, "learning_rate": 1.919890402894948e-05, "loss": 0.615, "step": 5649 }, { "epoch": 0.15450667250054692, "grad_norm": 1.5208098888397217, "learning_rate": 1.9198556641369996e-05, "loss": 0.5555, "step": 5650 }, { "epoch": 0.15453401881426385, "grad_norm": 1.2032870054244995, "learning_rate": 1.9198209181630423e-05, "loss": 0.5537, "step": 5651 }, { "epoch": 0.15456136512798074, "grad_norm": 1.3640366792678833, "learning_rate": 1.9197861649733486e-05, "loss": 0.5437, "step": 5652 }, { "epoch": 0.15458871144169767, "grad_norm": 1.2570446729660034, "learning_rate": 1.919751404568192e-05, "loss": 0.5196, "step": 5653 }, { "epoch": 0.15461605775541457, "grad_norm": 1.6273270845413208, "learning_rate": 1.9197166369478438e-05, "loss": 0.5631, "step": 5654 }, { "epoch": 0.1546434040691315, "grad_norm": 2.617706537246704, "learning_rate": 1.9196818621125775e-05, "loss": 0.9545, "step": 5655 }, { "epoch": 0.1546707503828484, "grad_norm": 1.6843100786209106, "learning_rate": 1.9196470800626658e-05, "loss": 0.5917, "step": 5656 }, { "epoch": 0.1546980966965653, "grad_norm": 1.3367993831634521, "learning_rate": 1.919612290798382e-05, "loss": 0.5846, "step": 5657 }, { "epoch": 0.1547254430102822, "grad_norm": 1.246848702430725, "learning_rate": 1.9195774943199978e-05, "loss": 0.555, "step": 5658 }, { "epoch": 0.15475278932399913, "grad_norm": 1.4954495429992676, "learning_rate": 1.9195426906277874e-05, "loss": 0.5643, "step": 5659 }, { "epoch": 0.15478013563771603, "grad_norm": 1.1744964122772217, "learning_rate": 1.9195078797220236e-05, "loss": 0.5613, "step": 5660 }, { "epoch": 0.15480748195143296, "grad_norm": 1.4701911211013794, "learning_rate": 1.919473061602979e-05, "loss": 0.5033, "step": 5661 }, { "epoch": 0.15483482826514985, "grad_norm": 6.181900501251221, "learning_rate": 1.919438236270927e-05, "loss": 0.5668, "step": 5662 }, { "epoch": 0.15486217457886678, "grad_norm": 1.1901708841323853, "learning_rate": 1.919403403726141e-05, "loss": 0.5509, "step": 5663 }, { "epoch": 0.15488952089258368, "grad_norm": 1.3749490976333618, "learning_rate": 1.9193685639688936e-05, "loss": 0.5373, "step": 5664 }, { "epoch": 0.1549168672063006, "grad_norm": 1.6224032640457153, "learning_rate": 1.919333716999459e-05, "loss": 0.5746, "step": 5665 }, { "epoch": 0.1549442135200175, "grad_norm": 1.7133337259292603, "learning_rate": 1.91929886281811e-05, "loss": 0.529, "step": 5666 }, { "epoch": 0.15497155983373442, "grad_norm": 1.493621826171875, "learning_rate": 1.91926400142512e-05, "loss": 0.4466, "step": 5667 }, { "epoch": 0.15499890614745132, "grad_norm": 1.127432107925415, "learning_rate": 1.9192291328207626e-05, "loss": 0.5525, "step": 5668 }, { "epoch": 0.15502625246116825, "grad_norm": 2.6487224102020264, "learning_rate": 1.9191942570053116e-05, "loss": 0.5678, "step": 5669 }, { "epoch": 0.15505359877488514, "grad_norm": 1.355241298675537, "learning_rate": 1.91915937397904e-05, "loss": 0.5723, "step": 5670 }, { "epoch": 0.15508094508860207, "grad_norm": 2.5943901538848877, "learning_rate": 1.919124483742222e-05, "loss": 0.9595, "step": 5671 }, { "epoch": 0.15510829140231897, "grad_norm": 1.6690356731414795, "learning_rate": 1.9190895862951308e-05, "loss": 0.5974, "step": 5672 }, { "epoch": 0.1551356377160359, "grad_norm": 1.6742933988571167, "learning_rate": 1.919054681638041e-05, "loss": 0.5448, "step": 5673 }, { "epoch": 0.1551629840297528, "grad_norm": 1.4305791854858398, "learning_rate": 1.9190197697712253e-05, "loss": 0.5786, "step": 5674 }, { "epoch": 0.1551903303434697, "grad_norm": 1.458768606185913, "learning_rate": 1.9189848506949587e-05, "loss": 0.5468, "step": 5675 }, { "epoch": 0.1552176766571866, "grad_norm": 1.1705098152160645, "learning_rate": 1.918949924409514e-05, "loss": 0.5809, "step": 5676 }, { "epoch": 0.15524502297090353, "grad_norm": 1.4347072839736938, "learning_rate": 1.918914990915166e-05, "loss": 0.5639, "step": 5677 }, { "epoch": 0.15527236928462043, "grad_norm": 1.5016522407531738, "learning_rate": 1.918880050212189e-05, "loss": 0.9672, "step": 5678 }, { "epoch": 0.15529971559833736, "grad_norm": 1.6820554733276367, "learning_rate": 1.9188451023008556e-05, "loss": 0.6275, "step": 5679 }, { "epoch": 0.15532706191205425, "grad_norm": 1.7657166719436646, "learning_rate": 1.9188101471814414e-05, "loss": 0.5215, "step": 5680 }, { "epoch": 0.15535440822577118, "grad_norm": 1.52340829372406, "learning_rate": 1.9187751848542206e-05, "loss": 0.9008, "step": 5681 }, { "epoch": 0.15538175453948808, "grad_norm": 1.7517139911651611, "learning_rate": 1.9187402153194664e-05, "loss": 0.9412, "step": 5682 }, { "epoch": 0.155409100853205, "grad_norm": 1.6557687520980835, "learning_rate": 1.9187052385774542e-05, "loss": 0.5701, "step": 5683 }, { "epoch": 0.1554364471669219, "grad_norm": 3.7282395362854004, "learning_rate": 1.9186702546284578e-05, "loss": 0.9171, "step": 5684 }, { "epoch": 0.15546379348063882, "grad_norm": 1.7507271766662598, "learning_rate": 1.918635263472752e-05, "loss": 0.5634, "step": 5685 }, { "epoch": 0.15549113979435572, "grad_norm": 1.575781226158142, "learning_rate": 1.9186002651106106e-05, "loss": 0.9094, "step": 5686 }, { "epoch": 0.15551848610807265, "grad_norm": 1.8738152980804443, "learning_rate": 1.918565259542309e-05, "loss": 0.5916, "step": 5687 }, { "epoch": 0.15554583242178954, "grad_norm": 1.2607938051223755, "learning_rate": 1.918530246768121e-05, "loss": 0.5731, "step": 5688 }, { "epoch": 0.15557317873550644, "grad_norm": 1.3437080383300781, "learning_rate": 1.9184952267883225e-05, "loss": 0.5542, "step": 5689 }, { "epoch": 0.15560052504922336, "grad_norm": 1.3356457948684692, "learning_rate": 1.9184601996031872e-05, "loss": 0.5706, "step": 5690 }, { "epoch": 0.15562787136294026, "grad_norm": 1.4866236448287964, "learning_rate": 1.9184251652129898e-05, "loss": 0.591, "step": 5691 }, { "epoch": 0.1556552176766572, "grad_norm": 1.6390999555587769, "learning_rate": 1.9183901236180054e-05, "loss": 0.5833, "step": 5692 }, { "epoch": 0.15568256399037408, "grad_norm": 1.2312111854553223, "learning_rate": 1.918355074818509e-05, "loss": 0.5705, "step": 5693 }, { "epoch": 0.155709910304091, "grad_norm": 1.5378506183624268, "learning_rate": 1.9183200188147755e-05, "loss": 0.5619, "step": 5694 }, { "epoch": 0.1557372566178079, "grad_norm": 1.1923096179962158, "learning_rate": 1.91828495560708e-05, "loss": 0.5504, "step": 5695 }, { "epoch": 0.15576460293152483, "grad_norm": 1.4120097160339355, "learning_rate": 1.9182498851956976e-05, "loss": 0.543, "step": 5696 }, { "epoch": 0.15579194924524173, "grad_norm": 1.4952715635299683, "learning_rate": 1.918214807580903e-05, "loss": 0.61, "step": 5697 }, { "epoch": 0.15581929555895865, "grad_norm": 1.6683037281036377, "learning_rate": 1.9181797227629718e-05, "loss": 0.589, "step": 5698 }, { "epoch": 0.15584664187267555, "grad_norm": 1.1996601819992065, "learning_rate": 1.918144630742179e-05, "loss": 0.556, "step": 5699 }, { "epoch": 0.15587398818639248, "grad_norm": 1.385370135307312, "learning_rate": 1.9181095315188e-05, "loss": 0.5795, "step": 5700 }, { "epoch": 0.15590133450010937, "grad_norm": 7.650031089782715, "learning_rate": 1.91807442509311e-05, "loss": 0.3979, "step": 5701 }, { "epoch": 0.1559286808138263, "grad_norm": 1.9244415760040283, "learning_rate": 1.918039311465385e-05, "loss": 0.6223, "step": 5702 }, { "epoch": 0.1559560271275432, "grad_norm": 1.3526803255081177, "learning_rate": 1.9180041906358994e-05, "loss": 0.5792, "step": 5703 }, { "epoch": 0.15598337344126012, "grad_norm": 1.3986155986785889, "learning_rate": 1.9179690626049293e-05, "loss": 0.5555, "step": 5704 }, { "epoch": 0.15601071975497702, "grad_norm": 1.7256839275360107, "learning_rate": 1.9179339273727504e-05, "loss": 0.5744, "step": 5705 }, { "epoch": 0.15603806606869394, "grad_norm": 1.629111647605896, "learning_rate": 1.917898784939638e-05, "loss": 0.6033, "step": 5706 }, { "epoch": 0.15606541238241084, "grad_norm": 1.7485089302062988, "learning_rate": 1.9178636353058682e-05, "loss": 0.5472, "step": 5707 }, { "epoch": 0.15609275869612776, "grad_norm": 1.36929452419281, "learning_rate": 1.9178284784717162e-05, "loss": 0.5301, "step": 5708 }, { "epoch": 0.15612010500984466, "grad_norm": 1.8010553121566772, "learning_rate": 1.9177933144374586e-05, "loss": 0.57, "step": 5709 }, { "epoch": 0.1561474513235616, "grad_norm": 1.7281283140182495, "learning_rate": 1.9177581432033704e-05, "loss": 0.615, "step": 5710 }, { "epoch": 0.15617479763727848, "grad_norm": 1.428843379020691, "learning_rate": 1.9177229647697278e-05, "loss": 0.5721, "step": 5711 }, { "epoch": 0.1562021439509954, "grad_norm": 1.537006139755249, "learning_rate": 1.9176877791368066e-05, "loss": 0.5915, "step": 5712 }, { "epoch": 0.1562294902647123, "grad_norm": 1.465321660041809, "learning_rate": 1.9176525863048833e-05, "loss": 0.5694, "step": 5713 }, { "epoch": 0.15625683657842923, "grad_norm": 1.5770989656448364, "learning_rate": 1.917617386274234e-05, "loss": 0.5475, "step": 5714 }, { "epoch": 0.15628418289214613, "grad_norm": 1.5160658359527588, "learning_rate": 1.9175821790451337e-05, "loss": 0.575, "step": 5715 }, { "epoch": 0.15631152920586305, "grad_norm": 1.3766729831695557, "learning_rate": 1.91754696461786e-05, "loss": 0.5331, "step": 5716 }, { "epoch": 0.15633887551957995, "grad_norm": 1.229068398475647, "learning_rate": 1.917511742992689e-05, "loss": 0.5618, "step": 5717 }, { "epoch": 0.15636622183329688, "grad_norm": 2.6687369346618652, "learning_rate": 1.917476514169896e-05, "loss": 0.5735, "step": 5718 }, { "epoch": 0.15639356814701377, "grad_norm": 1.8193873167037964, "learning_rate": 1.917441278149758e-05, "loss": 0.5814, "step": 5719 }, { "epoch": 0.1564209144607307, "grad_norm": 1.774932861328125, "learning_rate": 1.9174060349325516e-05, "loss": 0.58, "step": 5720 }, { "epoch": 0.1564482607744476, "grad_norm": 1.9581795930862427, "learning_rate": 1.9173707845185526e-05, "loss": 0.4377, "step": 5721 }, { "epoch": 0.15647560708816452, "grad_norm": 1.7361159324645996, "learning_rate": 1.9173355269080384e-05, "loss": 0.6112, "step": 5722 }, { "epoch": 0.15650295340188142, "grad_norm": 1.3370115756988525, "learning_rate": 1.9173002621012847e-05, "loss": 0.5813, "step": 5723 }, { "epoch": 0.15653029971559834, "grad_norm": 1.4799761772155762, "learning_rate": 1.917264990098569e-05, "loss": 0.6, "step": 5724 }, { "epoch": 0.15655764602931524, "grad_norm": 1.9944223165512085, "learning_rate": 1.9172297109001676e-05, "loss": 0.6374, "step": 5725 }, { "epoch": 0.15658499234303216, "grad_norm": 1.560867190361023, "learning_rate": 1.917194424506357e-05, "loss": 0.5793, "step": 5726 }, { "epoch": 0.15661233865674906, "grad_norm": 1.3042032718658447, "learning_rate": 1.9171591309174148e-05, "loss": 0.5673, "step": 5727 }, { "epoch": 0.156639684970466, "grad_norm": 1.511881947517395, "learning_rate": 1.9171238301336166e-05, "loss": 0.5607, "step": 5728 }, { "epoch": 0.15666703128418288, "grad_norm": 1.3939425945281982, "learning_rate": 1.9170885221552406e-05, "loss": 0.5776, "step": 5729 }, { "epoch": 0.1566943775978998, "grad_norm": 1.997222900390625, "learning_rate": 1.9170532069825628e-05, "loss": 0.5873, "step": 5730 }, { "epoch": 0.1567217239116167, "grad_norm": 2.269104480743408, "learning_rate": 1.917017884615861e-05, "loss": 0.9327, "step": 5731 }, { "epoch": 0.15674907022533363, "grad_norm": 1.5024054050445557, "learning_rate": 1.916982555055412e-05, "loss": 0.5674, "step": 5732 }, { "epoch": 0.15677641653905053, "grad_norm": 2.983767509460449, "learning_rate": 1.9169472183014926e-05, "loss": 0.5368, "step": 5733 }, { "epoch": 0.15680376285276745, "grad_norm": 1.7480272054672241, "learning_rate": 1.9169118743543805e-05, "loss": 0.595, "step": 5734 }, { "epoch": 0.15683110916648435, "grad_norm": 1.6331202983856201, "learning_rate": 1.9168765232143527e-05, "loss": 0.5911, "step": 5735 }, { "epoch": 0.15685845548020128, "grad_norm": 1.805498719215393, "learning_rate": 1.9168411648816868e-05, "loss": 0.5875, "step": 5736 }, { "epoch": 0.15688580179391817, "grad_norm": 1.360238790512085, "learning_rate": 1.9168057993566597e-05, "loss": 0.5856, "step": 5737 }, { "epoch": 0.1569131481076351, "grad_norm": 1.6469765901565552, "learning_rate": 1.9167704266395494e-05, "loss": 0.6251, "step": 5738 }, { "epoch": 0.156940494421352, "grad_norm": 1.1853781938552856, "learning_rate": 1.916735046730633e-05, "loss": 0.576, "step": 5739 }, { "epoch": 0.15696784073506892, "grad_norm": 1.5686265230178833, "learning_rate": 1.9166996596301885e-05, "loss": 0.5553, "step": 5740 }, { "epoch": 0.15699518704878582, "grad_norm": 1.2105225324630737, "learning_rate": 1.9166642653384927e-05, "loss": 0.5524, "step": 5741 }, { "epoch": 0.15702253336250274, "grad_norm": 2.236370325088501, "learning_rate": 1.9166288638558237e-05, "loss": 0.9209, "step": 5742 }, { "epoch": 0.15704987967621964, "grad_norm": 1.2165718078613281, "learning_rate": 1.9165934551824593e-05, "loss": 0.5898, "step": 5743 }, { "epoch": 0.15707722598993656, "grad_norm": 1.3618000745773315, "learning_rate": 1.9165580393186772e-05, "loss": 0.5544, "step": 5744 }, { "epoch": 0.15710457230365346, "grad_norm": 1.6635875701904297, "learning_rate": 1.9165226162647553e-05, "loss": 0.6561, "step": 5745 }, { "epoch": 0.1571319186173704, "grad_norm": 1.652456521987915, "learning_rate": 1.9164871860209714e-05, "loss": 0.952, "step": 5746 }, { "epoch": 0.15715926493108728, "grad_norm": 1.5372816324234009, "learning_rate": 1.9164517485876034e-05, "loss": 0.5553, "step": 5747 }, { "epoch": 0.1571866112448042, "grad_norm": 1.5889544486999512, "learning_rate": 1.9164163039649294e-05, "loss": 0.5932, "step": 5748 }, { "epoch": 0.1572139575585211, "grad_norm": 1.4082685708999634, "learning_rate": 1.9163808521532273e-05, "loss": 0.5687, "step": 5749 }, { "epoch": 0.15724130387223803, "grad_norm": 1.4248981475830078, "learning_rate": 1.9163453931527752e-05, "loss": 0.5971, "step": 5750 }, { "epoch": 0.15726865018595493, "grad_norm": 1.5885748863220215, "learning_rate": 1.9163099269638516e-05, "loss": 0.5764, "step": 5751 }, { "epoch": 0.15729599649967185, "grad_norm": 1.4628961086273193, "learning_rate": 1.9162744535867343e-05, "loss": 0.5831, "step": 5752 }, { "epoch": 0.15732334281338875, "grad_norm": 1.6732107400894165, "learning_rate": 1.916238973021702e-05, "loss": 0.6194, "step": 5753 }, { "epoch": 0.15735068912710568, "grad_norm": 1.3620120286941528, "learning_rate": 1.9162034852690325e-05, "loss": 0.4948, "step": 5754 }, { "epoch": 0.15737803544082257, "grad_norm": 1.2156927585601807, "learning_rate": 1.916167990329005e-05, "loss": 0.5964, "step": 5755 }, { "epoch": 0.1574053817545395, "grad_norm": 1.3569985628128052, "learning_rate": 1.916132488201897e-05, "loss": 0.583, "step": 5756 }, { "epoch": 0.1574327280682564, "grad_norm": 1.2438260316848755, "learning_rate": 1.9160969788879877e-05, "loss": 0.5612, "step": 5757 }, { "epoch": 0.15746007438197332, "grad_norm": 1.7057826519012451, "learning_rate": 1.916061462387555e-05, "loss": 0.6371, "step": 5758 }, { "epoch": 0.15748742069569022, "grad_norm": 1.4513875246047974, "learning_rate": 1.9160259387008782e-05, "loss": 0.5567, "step": 5759 }, { "epoch": 0.15751476700940714, "grad_norm": 1.6879568099975586, "learning_rate": 1.9159904078282362e-05, "loss": 0.4458, "step": 5760 }, { "epoch": 0.15754211332312404, "grad_norm": 1.568389892578125, "learning_rate": 1.9159548697699067e-05, "loss": 0.5766, "step": 5761 }, { "epoch": 0.15756945963684096, "grad_norm": 1.5697615146636963, "learning_rate": 1.915919324526169e-05, "loss": 0.5686, "step": 5762 }, { "epoch": 0.15759680595055786, "grad_norm": 1.7411065101623535, "learning_rate": 1.915883772097302e-05, "loss": 0.5976, "step": 5763 }, { "epoch": 0.15762415226427479, "grad_norm": 1.4996216297149658, "learning_rate": 1.9158482124835845e-05, "loss": 0.572, "step": 5764 }, { "epoch": 0.15765149857799168, "grad_norm": 1.6788623332977295, "learning_rate": 1.915812645685296e-05, "loss": 0.6242, "step": 5765 }, { "epoch": 0.1576788448917086, "grad_norm": 1.9555492401123047, "learning_rate": 1.9157770717027145e-05, "loss": 0.3819, "step": 5766 }, { "epoch": 0.1577061912054255, "grad_norm": 1.4814903736114502, "learning_rate": 1.9157414905361197e-05, "loss": 0.5557, "step": 5767 }, { "epoch": 0.15773353751914243, "grad_norm": 1.774131178855896, "learning_rate": 1.9157059021857907e-05, "loss": 0.5184, "step": 5768 }, { "epoch": 0.15776088383285933, "grad_norm": 1.33989679813385, "learning_rate": 1.9156703066520066e-05, "loss": 0.5675, "step": 5769 }, { "epoch": 0.15778823014657625, "grad_norm": 1.3040138483047485, "learning_rate": 1.9156347039350467e-05, "loss": 0.517, "step": 5770 }, { "epoch": 0.15781557646029315, "grad_norm": 1.5642726421356201, "learning_rate": 1.91559909403519e-05, "loss": 0.5437, "step": 5771 }, { "epoch": 0.15784292277401007, "grad_norm": 2.985473155975342, "learning_rate": 1.9155634769527162e-05, "loss": 0.4781, "step": 5772 }, { "epoch": 0.15787026908772697, "grad_norm": 2.32778000831604, "learning_rate": 1.9155278526879046e-05, "loss": 0.6203, "step": 5773 }, { "epoch": 0.1578976154014439, "grad_norm": 1.2893884181976318, "learning_rate": 1.9154922212410346e-05, "loss": 0.5888, "step": 5774 }, { "epoch": 0.1579249617151608, "grad_norm": 1.3784356117248535, "learning_rate": 1.915456582612386e-05, "loss": 0.6298, "step": 5775 }, { "epoch": 0.15795230802887772, "grad_norm": 1.856230616569519, "learning_rate": 1.915420936802238e-05, "loss": 0.5455, "step": 5776 }, { "epoch": 0.15797965434259462, "grad_norm": 1.7885411977767944, "learning_rate": 1.91538528381087e-05, "loss": 0.5668, "step": 5777 }, { "epoch": 0.15800700065631154, "grad_norm": 1.2089463472366333, "learning_rate": 1.9153496236385626e-05, "loss": 0.5918, "step": 5778 }, { "epoch": 0.15803434697002844, "grad_norm": 1.6158522367477417, "learning_rate": 1.9153139562855946e-05, "loss": 0.549, "step": 5779 }, { "epoch": 0.15806169328374536, "grad_norm": 1.731022834777832, "learning_rate": 1.9152782817522463e-05, "loss": 0.5685, "step": 5780 }, { "epoch": 0.15808903959746226, "grad_norm": 1.2404756546020508, "learning_rate": 1.915242600038798e-05, "loss": 0.5442, "step": 5781 }, { "epoch": 0.15811638591117919, "grad_norm": 1.4996414184570312, "learning_rate": 1.9152069111455286e-05, "loss": 0.5711, "step": 5782 }, { "epoch": 0.15814373222489608, "grad_norm": 1.5094102621078491, "learning_rate": 1.9151712150727183e-05, "loss": 0.5205, "step": 5783 }, { "epoch": 0.158171078538613, "grad_norm": 1.6976183652877808, "learning_rate": 1.9151355118206478e-05, "loss": 0.5631, "step": 5784 }, { "epoch": 0.1581984248523299, "grad_norm": 1.7031632661819458, "learning_rate": 1.9150998013895966e-05, "loss": 0.5418, "step": 5785 }, { "epoch": 0.15822577116604683, "grad_norm": 1.4733871221542358, "learning_rate": 1.915064083779845e-05, "loss": 0.6055, "step": 5786 }, { "epoch": 0.15825311747976373, "grad_norm": 2.7581119537353516, "learning_rate": 1.9150283589916732e-05, "loss": 0.5682, "step": 5787 }, { "epoch": 0.15828046379348065, "grad_norm": 1.4172419309616089, "learning_rate": 1.9149926270253613e-05, "loss": 0.5906, "step": 5788 }, { "epoch": 0.15830781010719755, "grad_norm": 1.7426307201385498, "learning_rate": 1.91495688788119e-05, "loss": 0.5971, "step": 5789 }, { "epoch": 0.15833515642091445, "grad_norm": 1.3836778402328491, "learning_rate": 1.914921141559439e-05, "loss": 0.5659, "step": 5790 }, { "epoch": 0.15836250273463137, "grad_norm": 1.874449372291565, "learning_rate": 1.9148853880603896e-05, "loss": 0.6485, "step": 5791 }, { "epoch": 0.15838984904834827, "grad_norm": 2.0650312900543213, "learning_rate": 1.9148496273843215e-05, "loss": 0.5763, "step": 5792 }, { "epoch": 0.1584171953620652, "grad_norm": 1.742451786994934, "learning_rate": 1.9148138595315156e-05, "loss": 0.5385, "step": 5793 }, { "epoch": 0.1584445416757821, "grad_norm": 1.3505213260650635, "learning_rate": 1.9147780845022527e-05, "loss": 0.5698, "step": 5794 }, { "epoch": 0.15847188798949902, "grad_norm": 1.7043043375015259, "learning_rate": 1.9147423022968127e-05, "loss": 0.5887, "step": 5795 }, { "epoch": 0.1584992343032159, "grad_norm": 1.5682021379470825, "learning_rate": 1.914706512915477e-05, "loss": 0.5536, "step": 5796 }, { "epoch": 0.15852658061693284, "grad_norm": 2.0443828105926514, "learning_rate": 1.914670716358526e-05, "loss": 0.5655, "step": 5797 }, { "epoch": 0.15855392693064974, "grad_norm": 1.6903347969055176, "learning_rate": 1.9146349126262408e-05, "loss": 0.5707, "step": 5798 }, { "epoch": 0.15858127324436666, "grad_norm": 1.872300386428833, "learning_rate": 1.914599101718902e-05, "loss": 0.4269, "step": 5799 }, { "epoch": 0.15860861955808356, "grad_norm": 1.327730417251587, "learning_rate": 1.9145632836367907e-05, "loss": 0.597, "step": 5800 }, { "epoch": 0.15863596587180048, "grad_norm": 1.5458743572235107, "learning_rate": 1.9145274583801877e-05, "loss": 0.5742, "step": 5801 }, { "epoch": 0.15866331218551738, "grad_norm": 1.4496265649795532, "learning_rate": 1.9144916259493745e-05, "loss": 0.5289, "step": 5802 }, { "epoch": 0.1586906584992343, "grad_norm": 1.4354214668273926, "learning_rate": 1.9144557863446317e-05, "loss": 0.5683, "step": 5803 }, { "epoch": 0.1587180048129512, "grad_norm": 6.4729437828063965, "learning_rate": 1.9144199395662404e-05, "loss": 0.6784, "step": 5804 }, { "epoch": 0.15874535112666813, "grad_norm": 1.449876308441162, "learning_rate": 1.914384085614482e-05, "loss": 0.5522, "step": 5805 }, { "epoch": 0.15877269744038502, "grad_norm": 1.3817435503005981, "learning_rate": 1.914348224489638e-05, "loss": 0.5653, "step": 5806 }, { "epoch": 0.15880004375410195, "grad_norm": 1.468421220779419, "learning_rate": 1.9143123561919895e-05, "loss": 0.5502, "step": 5807 }, { "epoch": 0.15882739006781885, "grad_norm": 1.8986326456069946, "learning_rate": 1.9142764807218178e-05, "loss": 0.9157, "step": 5808 }, { "epoch": 0.15885473638153577, "grad_norm": 1.493749737739563, "learning_rate": 1.9142405980794044e-05, "loss": 0.5922, "step": 5809 }, { "epoch": 0.15888208269525267, "grad_norm": 1.5388332605361938, "learning_rate": 1.9142047082650306e-05, "loss": 0.5827, "step": 5810 }, { "epoch": 0.1589094290089696, "grad_norm": 1.61527419090271, "learning_rate": 1.9141688112789786e-05, "loss": 0.5539, "step": 5811 }, { "epoch": 0.1589367753226865, "grad_norm": 2.5269625186920166, "learning_rate": 1.914132907121529e-05, "loss": 0.5728, "step": 5812 }, { "epoch": 0.15896412163640342, "grad_norm": 1.9082632064819336, "learning_rate": 1.9140969957929645e-05, "loss": 0.4347, "step": 5813 }, { "epoch": 0.1589914679501203, "grad_norm": 1.4157823324203491, "learning_rate": 1.9140610772935662e-05, "loss": 0.5658, "step": 5814 }, { "epoch": 0.15901881426383724, "grad_norm": 1.542240858078003, "learning_rate": 1.9140251516236156e-05, "loss": 0.5618, "step": 5815 }, { "epoch": 0.15904616057755414, "grad_norm": 1.427032709121704, "learning_rate": 1.9139892187833955e-05, "loss": 0.577, "step": 5816 }, { "epoch": 0.15907350689127106, "grad_norm": 2.9883224964141846, "learning_rate": 1.913953278773187e-05, "loss": 0.4341, "step": 5817 }, { "epoch": 0.15910085320498796, "grad_norm": 1.332640290260315, "learning_rate": 1.913917331593272e-05, "loss": 0.5722, "step": 5818 }, { "epoch": 0.15912819951870488, "grad_norm": 1.661673903465271, "learning_rate": 1.9138813772439332e-05, "loss": 0.5436, "step": 5819 }, { "epoch": 0.15915554583242178, "grad_norm": 1.6803784370422363, "learning_rate": 1.9138454157254517e-05, "loss": 0.5264, "step": 5820 }, { "epoch": 0.1591828921461387, "grad_norm": 2.386070966720581, "learning_rate": 1.91380944703811e-05, "loss": 0.5924, "step": 5821 }, { "epoch": 0.1592102384598556, "grad_norm": 1.562057375907898, "learning_rate": 1.9137734711821906e-05, "loss": 0.4197, "step": 5822 }, { "epoch": 0.15923758477357253, "grad_norm": 1.43235445022583, "learning_rate": 1.9137374881579758e-05, "loss": 0.5581, "step": 5823 }, { "epoch": 0.15926493108728942, "grad_norm": 1.7579199075698853, "learning_rate": 1.913701497965747e-05, "loss": 0.4116, "step": 5824 }, { "epoch": 0.15929227740100635, "grad_norm": 1.8347464799880981, "learning_rate": 1.9136655006057874e-05, "loss": 0.9759, "step": 5825 }, { "epoch": 0.15931962371472325, "grad_norm": 1.7323137521743774, "learning_rate": 1.913629496078379e-05, "loss": 0.5927, "step": 5826 }, { "epoch": 0.15934697002844017, "grad_norm": 1.846290111541748, "learning_rate": 1.9135934843838045e-05, "loss": 0.5742, "step": 5827 }, { "epoch": 0.15937431634215707, "grad_norm": 1.5634983777999878, "learning_rate": 1.913557465522346e-05, "loss": 0.4458, "step": 5828 }, { "epoch": 0.159401662655874, "grad_norm": 1.7053509950637817, "learning_rate": 1.9135214394942862e-05, "loss": 0.5715, "step": 5829 }, { "epoch": 0.1594290089695909, "grad_norm": 1.702885627746582, "learning_rate": 1.913485406299908e-05, "loss": 0.6235, "step": 5830 }, { "epoch": 0.15945635528330782, "grad_norm": 1.7769356966018677, "learning_rate": 1.9134493659394938e-05, "loss": 0.6039, "step": 5831 }, { "epoch": 0.1594837015970247, "grad_norm": 1.9139186143875122, "learning_rate": 1.9134133184133264e-05, "loss": 0.5617, "step": 5832 }, { "epoch": 0.15951104791074164, "grad_norm": 1.495072364807129, "learning_rate": 1.9133772637216882e-05, "loss": 0.5346, "step": 5833 }, { "epoch": 0.15953839422445854, "grad_norm": 2.0151898860931396, "learning_rate": 1.913341201864863e-05, "loss": 0.926, "step": 5834 }, { "epoch": 0.15956574053817546, "grad_norm": 1.4314031600952148, "learning_rate": 1.913305132843133e-05, "loss": 0.5817, "step": 5835 }, { "epoch": 0.15959308685189236, "grad_norm": 1.2578392028808594, "learning_rate": 1.913269056656781e-05, "loss": 0.5773, "step": 5836 }, { "epoch": 0.15962043316560928, "grad_norm": 1.403548240661621, "learning_rate": 1.9132329733060902e-05, "loss": 0.5935, "step": 5837 }, { "epoch": 0.15964777947932618, "grad_norm": 1.315849781036377, "learning_rate": 1.9131968827913443e-05, "loss": 0.5822, "step": 5838 }, { "epoch": 0.1596751257930431, "grad_norm": 1.3604671955108643, "learning_rate": 1.9131607851128254e-05, "loss": 0.5794, "step": 5839 }, { "epoch": 0.15970247210676, "grad_norm": 1.5044904947280884, "learning_rate": 1.913124680270817e-05, "loss": 0.5517, "step": 5840 }, { "epoch": 0.15972981842047693, "grad_norm": 1.3326467275619507, "learning_rate": 1.913088568265603e-05, "loss": 0.6003, "step": 5841 }, { "epoch": 0.15975716473419382, "grad_norm": 1.4001133441925049, "learning_rate": 1.9130524490974657e-05, "loss": 0.9574, "step": 5842 }, { "epoch": 0.15978451104791075, "grad_norm": 1.6198713779449463, "learning_rate": 1.913016322766689e-05, "loss": 0.4054, "step": 5843 }, { "epoch": 0.15981185736162765, "grad_norm": 10.161724090576172, "learning_rate": 1.9129801892735562e-05, "loss": 0.4326, "step": 5844 }, { "epoch": 0.15983920367534457, "grad_norm": 1.3517106771469116, "learning_rate": 1.9129440486183508e-05, "loss": 0.9149, "step": 5845 }, { "epoch": 0.15986654998906147, "grad_norm": 1.5963572263717651, "learning_rate": 1.912907900801356e-05, "loss": 0.6119, "step": 5846 }, { "epoch": 0.1598938963027784, "grad_norm": 2.204911470413208, "learning_rate": 1.912871745822856e-05, "loss": 0.4331, "step": 5847 }, { "epoch": 0.1599212426164953, "grad_norm": 1.7601341009140015, "learning_rate": 1.9128355836831338e-05, "loss": 0.5729, "step": 5848 }, { "epoch": 0.15994858893021222, "grad_norm": 1.8244593143463135, "learning_rate": 1.9127994143824735e-05, "loss": 0.5775, "step": 5849 }, { "epoch": 0.1599759352439291, "grad_norm": 1.7031700611114502, "learning_rate": 1.9127632379211586e-05, "loss": 0.6132, "step": 5850 }, { "epoch": 0.16000328155764604, "grad_norm": 1.521064043045044, "learning_rate": 1.912727054299473e-05, "loss": 0.5357, "step": 5851 }, { "epoch": 0.16003062787136293, "grad_norm": 1.6325217485427856, "learning_rate": 1.9126908635177007e-05, "loss": 0.5594, "step": 5852 }, { "epoch": 0.16005797418507986, "grad_norm": 1.7430044412612915, "learning_rate": 1.912654665576125e-05, "loss": 0.9476, "step": 5853 }, { "epoch": 0.16008532049879676, "grad_norm": 1.2307089567184448, "learning_rate": 1.9126184604750306e-05, "loss": 0.5449, "step": 5854 }, { "epoch": 0.16011266681251368, "grad_norm": 1.9687373638153076, "learning_rate": 1.9125822482147013e-05, "loss": 0.5937, "step": 5855 }, { "epoch": 0.16014001312623058, "grad_norm": 1.9292676448822021, "learning_rate": 1.9125460287954206e-05, "loss": 0.5988, "step": 5856 }, { "epoch": 0.1601673594399475, "grad_norm": 1.990572452545166, "learning_rate": 1.9125098022174735e-05, "loss": 0.592, "step": 5857 }, { "epoch": 0.1601947057536644, "grad_norm": 1.5259263515472412, "learning_rate": 1.912473568481144e-05, "loss": 0.5638, "step": 5858 }, { "epoch": 0.16022205206738133, "grad_norm": 1.2828214168548584, "learning_rate": 1.9124373275867158e-05, "loss": 0.5773, "step": 5859 }, { "epoch": 0.16024939838109822, "grad_norm": 1.7201439142227173, "learning_rate": 1.9124010795344736e-05, "loss": 0.6595, "step": 5860 }, { "epoch": 0.16027674469481515, "grad_norm": 1.8059754371643066, "learning_rate": 1.9123648243247018e-05, "loss": 0.6148, "step": 5861 }, { "epoch": 0.16030409100853205, "grad_norm": 1.3908857107162476, "learning_rate": 1.9123285619576845e-05, "loss": 0.5528, "step": 5862 }, { "epoch": 0.16033143732224897, "grad_norm": 1.6384505033493042, "learning_rate": 1.9122922924337067e-05, "loss": 0.5003, "step": 5863 }, { "epoch": 0.16035878363596587, "grad_norm": 1.5335718393325806, "learning_rate": 1.9122560157530526e-05, "loss": 0.6117, "step": 5864 }, { "epoch": 0.1603861299496828, "grad_norm": 1.806977391242981, "learning_rate": 1.9122197319160066e-05, "loss": 0.5526, "step": 5865 }, { "epoch": 0.1604134762633997, "grad_norm": 2.2009198665618896, "learning_rate": 1.9121834409228536e-05, "loss": 0.573, "step": 5866 }, { "epoch": 0.16044082257711662, "grad_norm": 1.469893455505371, "learning_rate": 1.912147142773878e-05, "loss": 0.5064, "step": 5867 }, { "epoch": 0.1604681688908335, "grad_norm": 2.5140464305877686, "learning_rate": 1.9121108374693648e-05, "loss": 0.5738, "step": 5868 }, { "epoch": 0.16049551520455044, "grad_norm": 1.9851056337356567, "learning_rate": 1.9120745250095993e-05, "loss": 0.5998, "step": 5869 }, { "epoch": 0.16052286151826733, "grad_norm": 3.5260555744171143, "learning_rate": 1.912038205394865e-05, "loss": 0.4574, "step": 5870 }, { "epoch": 0.16055020783198426, "grad_norm": 2.2795305252075195, "learning_rate": 1.9120018786254483e-05, "loss": 0.5838, "step": 5871 }, { "epoch": 0.16057755414570116, "grad_norm": 1.4804822206497192, "learning_rate": 1.9119655447016335e-05, "loss": 0.5606, "step": 5872 }, { "epoch": 0.16060490045941808, "grad_norm": 1.6934726238250732, "learning_rate": 1.9119292036237055e-05, "loss": 0.5432, "step": 5873 }, { "epoch": 0.16063224677313498, "grad_norm": 1.6044574975967407, "learning_rate": 1.9118928553919494e-05, "loss": 0.583, "step": 5874 }, { "epoch": 0.1606595930868519, "grad_norm": 1.4211465120315552, "learning_rate": 1.9118565000066507e-05, "loss": 0.5829, "step": 5875 }, { "epoch": 0.1606869394005688, "grad_norm": 1.633518099784851, "learning_rate": 1.911820137468094e-05, "loss": 0.5098, "step": 5876 }, { "epoch": 0.16071428571428573, "grad_norm": 1.6909327507019043, "learning_rate": 1.9117837677765654e-05, "loss": 0.4781, "step": 5877 }, { "epoch": 0.16074163202800262, "grad_norm": 1.477824091911316, "learning_rate": 1.9117473909323493e-05, "loss": 0.5668, "step": 5878 }, { "epoch": 0.16076897834171955, "grad_norm": 1.443052053451538, "learning_rate": 1.911711006935732e-05, "loss": 0.5242, "step": 5879 }, { "epoch": 0.16079632465543645, "grad_norm": 1.7722641229629517, "learning_rate": 1.9116746157869982e-05, "loss": 0.5852, "step": 5880 }, { "epoch": 0.16082367096915337, "grad_norm": 1.6022887229919434, "learning_rate": 1.9116382174864334e-05, "loss": 0.578, "step": 5881 }, { "epoch": 0.16085101728287027, "grad_norm": 1.4117108583450317, "learning_rate": 1.9116018120343236e-05, "loss": 0.5914, "step": 5882 }, { "epoch": 0.1608783635965872, "grad_norm": 1.2905250787734985, "learning_rate": 1.911565399430954e-05, "loss": 0.5696, "step": 5883 }, { "epoch": 0.1609057099103041, "grad_norm": 1.4937732219696045, "learning_rate": 1.9115289796766102e-05, "loss": 0.9369, "step": 5884 }, { "epoch": 0.16093305622402101, "grad_norm": 3.75714373588562, "learning_rate": 1.9114925527715784e-05, "loss": 0.5524, "step": 5885 }, { "epoch": 0.1609604025377379, "grad_norm": 1.4424800872802734, "learning_rate": 1.911456118716144e-05, "loss": 0.5383, "step": 5886 }, { "epoch": 0.16098774885145484, "grad_norm": 1.321887493133545, "learning_rate": 1.9114196775105928e-05, "loss": 0.5623, "step": 5887 }, { "epoch": 0.16101509516517173, "grad_norm": 1.4673031568527222, "learning_rate": 1.911383229155211e-05, "loss": 0.5829, "step": 5888 }, { "epoch": 0.16104244147888866, "grad_norm": 1.4682812690734863, "learning_rate": 1.9113467736502838e-05, "loss": 0.58, "step": 5889 }, { "epoch": 0.16106978779260556, "grad_norm": 1.5210522413253784, "learning_rate": 1.9113103109960977e-05, "loss": 0.5847, "step": 5890 }, { "epoch": 0.16109713410632248, "grad_norm": 1.7313460111618042, "learning_rate": 1.911273841192939e-05, "loss": 0.5406, "step": 5891 }, { "epoch": 0.16112448042003938, "grad_norm": 1.4973262548446655, "learning_rate": 1.9112373642410932e-05, "loss": 0.5724, "step": 5892 }, { "epoch": 0.16115182673375628, "grad_norm": 1.4794939756393433, "learning_rate": 1.9112008801408467e-05, "loss": 0.5662, "step": 5893 }, { "epoch": 0.1611791730474732, "grad_norm": 1.6825395822525024, "learning_rate": 1.911164388892486e-05, "loss": 0.5969, "step": 5894 }, { "epoch": 0.1612065193611901, "grad_norm": 1.7226237058639526, "learning_rate": 1.9111278904962968e-05, "loss": 0.5601, "step": 5895 }, { "epoch": 0.16123386567490702, "grad_norm": 1.513088345527649, "learning_rate": 1.911091384952566e-05, "loss": 0.5672, "step": 5896 }, { "epoch": 0.16126121198862392, "grad_norm": 1.6130213737487793, "learning_rate": 1.9110548722615792e-05, "loss": 0.586, "step": 5897 }, { "epoch": 0.16128855830234085, "grad_norm": 1.7178916931152344, "learning_rate": 1.9110183524236235e-05, "loss": 0.6739, "step": 5898 }, { "epoch": 0.16131590461605774, "grad_norm": 1.191756010055542, "learning_rate": 1.9109818254389856e-05, "loss": 0.5547, "step": 5899 }, { "epoch": 0.16134325092977467, "grad_norm": 1.4050014019012451, "learning_rate": 1.9109452913079514e-05, "loss": 0.5759, "step": 5900 }, { "epoch": 0.16137059724349156, "grad_norm": 1.3319107294082642, "learning_rate": 1.9109087500308078e-05, "loss": 0.5791, "step": 5901 }, { "epoch": 0.1613979435572085, "grad_norm": 1.9396660327911377, "learning_rate": 1.9108722016078412e-05, "loss": 0.6159, "step": 5902 }, { "epoch": 0.1614252898709254, "grad_norm": 1.2952054738998413, "learning_rate": 1.9108356460393386e-05, "loss": 0.5622, "step": 5903 }, { "epoch": 0.1614526361846423, "grad_norm": 1.3083281517028809, "learning_rate": 1.9107990833255868e-05, "loss": 0.5841, "step": 5904 }, { "epoch": 0.1614799824983592, "grad_norm": 1.4483917951583862, "learning_rate": 1.9107625134668723e-05, "loss": 0.5571, "step": 5905 }, { "epoch": 0.16150732881207613, "grad_norm": 1.7860476970672607, "learning_rate": 1.9107259364634826e-05, "loss": 0.4618, "step": 5906 }, { "epoch": 0.16153467512579303, "grad_norm": 1.434381127357483, "learning_rate": 1.910689352315704e-05, "loss": 0.5914, "step": 5907 }, { "epoch": 0.16156202143950996, "grad_norm": 1.9929224252700806, "learning_rate": 1.9106527610238237e-05, "loss": 0.9096, "step": 5908 }, { "epoch": 0.16158936775322685, "grad_norm": 2.1066055297851562, "learning_rate": 1.9106161625881288e-05, "loss": 0.5889, "step": 5909 }, { "epoch": 0.16161671406694378, "grad_norm": 1.779572606086731, "learning_rate": 1.9105795570089063e-05, "loss": 0.5704, "step": 5910 }, { "epoch": 0.16164406038066068, "grad_norm": 1.7021349668502808, "learning_rate": 1.9105429442864435e-05, "loss": 0.5403, "step": 5911 }, { "epoch": 0.1616714066943776, "grad_norm": 1.5556271076202393, "learning_rate": 1.9105063244210275e-05, "loss": 0.6124, "step": 5912 }, { "epoch": 0.1616987530080945, "grad_norm": 1.7249205112457275, "learning_rate": 1.9104696974129458e-05, "loss": 0.5451, "step": 5913 }, { "epoch": 0.16172609932181142, "grad_norm": 1.6438672542572021, "learning_rate": 1.910433063262485e-05, "loss": 0.5634, "step": 5914 }, { "epoch": 0.16175344563552832, "grad_norm": 1.2588365077972412, "learning_rate": 1.9103964219699337e-05, "loss": 0.5276, "step": 5915 }, { "epoch": 0.16178079194924525, "grad_norm": 1.8671029806137085, "learning_rate": 1.9103597735355786e-05, "loss": 0.5993, "step": 5916 }, { "epoch": 0.16180813826296214, "grad_norm": 1.3877201080322266, "learning_rate": 1.9103231179597072e-05, "loss": 0.573, "step": 5917 }, { "epoch": 0.16183548457667907, "grad_norm": 1.9864274263381958, "learning_rate": 1.9102864552426067e-05, "loss": 0.9383, "step": 5918 }, { "epoch": 0.16186283089039596, "grad_norm": 1.3458489179611206, "learning_rate": 1.9102497853845654e-05, "loss": 0.4007, "step": 5919 }, { "epoch": 0.1618901772041129, "grad_norm": 1.8560184240341187, "learning_rate": 1.9102131083858707e-05, "loss": 0.5668, "step": 5920 }, { "epoch": 0.1619175235178298, "grad_norm": 1.6226067543029785, "learning_rate": 1.9101764242468106e-05, "loss": 0.6197, "step": 5921 }, { "epoch": 0.1619448698315467, "grad_norm": 1.6449170112609863, "learning_rate": 1.9101397329676724e-05, "loss": 0.5942, "step": 5922 }, { "epoch": 0.1619722161452636, "grad_norm": 1.3739486932754517, "learning_rate": 1.910103034548744e-05, "loss": 0.578, "step": 5923 }, { "epoch": 0.16199956245898053, "grad_norm": 2.149041175842285, "learning_rate": 1.9100663289903135e-05, "loss": 0.5493, "step": 5924 }, { "epoch": 0.16202690877269743, "grad_norm": 1.64407479763031, "learning_rate": 1.910029616292669e-05, "loss": 0.5728, "step": 5925 }, { "epoch": 0.16205425508641436, "grad_norm": 1.9790648221969604, "learning_rate": 1.9099928964560978e-05, "loss": 0.5572, "step": 5926 }, { "epoch": 0.16208160140013125, "grad_norm": 1.4697716236114502, "learning_rate": 1.9099561694808888e-05, "loss": 0.667, "step": 5927 }, { "epoch": 0.16210894771384818, "grad_norm": 1.6202853918075562, "learning_rate": 1.90991943536733e-05, "loss": 0.56, "step": 5928 }, { "epoch": 0.16213629402756508, "grad_norm": 1.6233277320861816, "learning_rate": 1.9098826941157087e-05, "loss": 0.5709, "step": 5929 }, { "epoch": 0.162163640341282, "grad_norm": 1.6729228496551514, "learning_rate": 1.909845945726314e-05, "loss": 0.5205, "step": 5930 }, { "epoch": 0.1621909866549989, "grad_norm": 1.3368992805480957, "learning_rate": 1.9098091901994338e-05, "loss": 0.5853, "step": 5931 }, { "epoch": 0.16221833296871582, "grad_norm": 1.6139073371887207, "learning_rate": 1.9097724275353563e-05, "loss": 0.6261, "step": 5932 }, { "epoch": 0.16224567928243272, "grad_norm": 1.6695085763931274, "learning_rate": 1.9097356577343705e-05, "loss": 0.5546, "step": 5933 }, { "epoch": 0.16227302559614964, "grad_norm": 1.7174208164215088, "learning_rate": 1.9096988807967646e-05, "loss": 0.6101, "step": 5934 }, { "epoch": 0.16230037190986654, "grad_norm": 1.5532305240631104, "learning_rate": 1.909662096722827e-05, "loss": 0.5809, "step": 5935 }, { "epoch": 0.16232771822358347, "grad_norm": 1.5599278211593628, "learning_rate": 1.909625305512846e-05, "loss": 0.4816, "step": 5936 }, { "epoch": 0.16235506453730036, "grad_norm": 1.6557341814041138, "learning_rate": 1.9095885071671107e-05, "loss": 0.5623, "step": 5937 }, { "epoch": 0.1623824108510173, "grad_norm": 1.2658238410949707, "learning_rate": 1.909551701685909e-05, "loss": 0.5747, "step": 5938 }, { "epoch": 0.1624097571647342, "grad_norm": 1.459391474723816, "learning_rate": 1.9095148890695308e-05, "loss": 0.5541, "step": 5939 }, { "epoch": 0.1624371034784511, "grad_norm": 1.75478196144104, "learning_rate": 1.9094780693182637e-05, "loss": 0.6088, "step": 5940 }, { "epoch": 0.162464449792168, "grad_norm": 1.2400273084640503, "learning_rate": 1.9094412424323975e-05, "loss": 0.5526, "step": 5941 }, { "epoch": 0.16249179610588493, "grad_norm": 1.629898190498352, "learning_rate": 1.9094044084122204e-05, "loss": 0.581, "step": 5942 }, { "epoch": 0.16251914241960183, "grad_norm": 1.4919790029525757, "learning_rate": 1.909367567258022e-05, "loss": 0.5528, "step": 5943 }, { "epoch": 0.16254648873331876, "grad_norm": 1.747833490371704, "learning_rate": 1.909330718970091e-05, "loss": 0.5161, "step": 5944 }, { "epoch": 0.16257383504703565, "grad_norm": 1.4839898347854614, "learning_rate": 1.9092938635487162e-05, "loss": 0.55, "step": 5945 }, { "epoch": 0.16260118136075258, "grad_norm": 1.6623209714889526, "learning_rate": 1.909257000994187e-05, "loss": 0.5619, "step": 5946 }, { "epoch": 0.16262852767446948, "grad_norm": 1.4757063388824463, "learning_rate": 1.909220131306792e-05, "loss": 0.5954, "step": 5947 }, { "epoch": 0.1626558739881864, "grad_norm": 1.7919610738754272, "learning_rate": 1.9091832544868217e-05, "loss": 0.5844, "step": 5948 }, { "epoch": 0.1626832203019033, "grad_norm": 1.490363597869873, "learning_rate": 1.9091463705345644e-05, "loss": 0.5648, "step": 5949 }, { "epoch": 0.16271056661562022, "grad_norm": 1.4852337837219238, "learning_rate": 1.9091094794503096e-05, "loss": 0.5772, "step": 5950 }, { "epoch": 0.16273791292933712, "grad_norm": 1.3336135149002075, "learning_rate": 1.9090725812343467e-05, "loss": 0.5297, "step": 5951 }, { "epoch": 0.16276525924305404, "grad_norm": 1.6766141653060913, "learning_rate": 1.9090356758869654e-05, "loss": 0.4594, "step": 5952 }, { "epoch": 0.16279260555677094, "grad_norm": 2.6204893589019775, "learning_rate": 1.9089987634084547e-05, "loss": 0.9496, "step": 5953 }, { "epoch": 0.16281995187048787, "grad_norm": 1.3601505756378174, "learning_rate": 1.908961843799105e-05, "loss": 0.5793, "step": 5954 }, { "epoch": 0.16284729818420476, "grad_norm": 1.8198190927505493, "learning_rate": 1.9089249170592054e-05, "loss": 0.6078, "step": 5955 }, { "epoch": 0.1628746444979217, "grad_norm": 1.5182653665542603, "learning_rate": 1.9088879831890453e-05, "loss": 0.5798, "step": 5956 }, { "epoch": 0.1629019908116386, "grad_norm": 1.4886289834976196, "learning_rate": 1.908851042188915e-05, "loss": 0.5647, "step": 5957 }, { "epoch": 0.1629293371253555, "grad_norm": 1.4125040769577026, "learning_rate": 1.9088140940591037e-05, "loss": 0.5323, "step": 5958 }, { "epoch": 0.1629566834390724, "grad_norm": 1.455556035041809, "learning_rate": 1.908777138799902e-05, "loss": 0.5424, "step": 5959 }, { "epoch": 0.16298402975278933, "grad_norm": 1.4054362773895264, "learning_rate": 1.9087401764115993e-05, "loss": 0.5804, "step": 5960 }, { "epoch": 0.16301137606650623, "grad_norm": 1.922641396522522, "learning_rate": 1.9087032068944856e-05, "loss": 0.5952, "step": 5961 }, { "epoch": 0.16303872238022316, "grad_norm": 1.4280818700790405, "learning_rate": 1.908666230248851e-05, "loss": 0.5859, "step": 5962 }, { "epoch": 0.16306606869394005, "grad_norm": 3.1162965297698975, "learning_rate": 1.9086292464749857e-05, "loss": 0.9611, "step": 5963 }, { "epoch": 0.16309341500765698, "grad_norm": 1.5108541250228882, "learning_rate": 1.9085922555731794e-05, "loss": 0.5825, "step": 5964 }, { "epoch": 0.16312076132137387, "grad_norm": 1.6159263849258423, "learning_rate": 1.9085552575437228e-05, "loss": 0.5441, "step": 5965 }, { "epoch": 0.1631481076350908, "grad_norm": 1.2063313722610474, "learning_rate": 1.9085182523869057e-05, "loss": 0.5482, "step": 5966 }, { "epoch": 0.1631754539488077, "grad_norm": 1.3294498920440674, "learning_rate": 1.9084812401030185e-05, "loss": 0.5339, "step": 5967 }, { "epoch": 0.16320280026252462, "grad_norm": 1.1699198484420776, "learning_rate": 1.908444220692352e-05, "loss": 0.5691, "step": 5968 }, { "epoch": 0.16323014657624152, "grad_norm": 1.4496681690216064, "learning_rate": 1.9084071941551958e-05, "loss": 0.5881, "step": 5969 }, { "epoch": 0.16325749288995844, "grad_norm": 1.4212297201156616, "learning_rate": 1.908370160491841e-05, "loss": 0.5758, "step": 5970 }, { "epoch": 0.16328483920367534, "grad_norm": 1.9804880619049072, "learning_rate": 1.908333119702578e-05, "loss": 0.5166, "step": 5971 }, { "epoch": 0.16331218551739227, "grad_norm": 1.7440909147262573, "learning_rate": 1.908296071787697e-05, "loss": 0.4596, "step": 5972 }, { "epoch": 0.16333953183110916, "grad_norm": 1.4725481271743774, "learning_rate": 1.9082590167474895e-05, "loss": 0.5759, "step": 5973 }, { "epoch": 0.1633668781448261, "grad_norm": 1.5257575511932373, "learning_rate": 1.908221954582245e-05, "loss": 0.5661, "step": 5974 }, { "epoch": 0.16339422445854299, "grad_norm": 1.6753621101379395, "learning_rate": 1.9081848852922556e-05, "loss": 0.5658, "step": 5975 }, { "epoch": 0.1634215707722599, "grad_norm": 1.2661535739898682, "learning_rate": 1.9081478088778106e-05, "loss": 0.5979, "step": 5976 }, { "epoch": 0.1634489170859768, "grad_norm": 1.4703426361083984, "learning_rate": 1.9081107253392022e-05, "loss": 0.4414, "step": 5977 }, { "epoch": 0.16347626339969373, "grad_norm": 1.3017597198486328, "learning_rate": 1.9080736346767205e-05, "loss": 0.5908, "step": 5978 }, { "epoch": 0.16350360971341063, "grad_norm": 1.574306845664978, "learning_rate": 1.9080365368906568e-05, "loss": 0.5614, "step": 5979 }, { "epoch": 0.16353095602712756, "grad_norm": 1.4585732221603394, "learning_rate": 1.9079994319813017e-05, "loss": 0.5782, "step": 5980 }, { "epoch": 0.16355830234084445, "grad_norm": 1.477374792098999, "learning_rate": 1.907962319948947e-05, "loss": 0.5395, "step": 5981 }, { "epoch": 0.16358564865456138, "grad_norm": 1.7722983360290527, "learning_rate": 1.9079252007938827e-05, "loss": 0.5595, "step": 5982 }, { "epoch": 0.16361299496827827, "grad_norm": 1.26539146900177, "learning_rate": 1.9078880745164015e-05, "loss": 0.411, "step": 5983 }, { "epoch": 0.1636403412819952, "grad_norm": 1.956960678100586, "learning_rate": 1.9078509411167934e-05, "loss": 0.5812, "step": 5984 }, { "epoch": 0.1636676875957121, "grad_norm": 1.4716401100158691, "learning_rate": 1.9078138005953503e-05, "loss": 0.5919, "step": 5985 }, { "epoch": 0.16369503390942902, "grad_norm": 1.575512409210205, "learning_rate": 1.9077766529523635e-05, "loss": 0.6109, "step": 5986 }, { "epoch": 0.16372238022314592, "grad_norm": 1.4095252752304077, "learning_rate": 1.9077394981881245e-05, "loss": 0.5893, "step": 5987 }, { "epoch": 0.16374972653686284, "grad_norm": 1.3005425930023193, "learning_rate": 1.9077023363029244e-05, "loss": 0.5485, "step": 5988 }, { "epoch": 0.16377707285057974, "grad_norm": 2.1835479736328125, "learning_rate": 1.907665167297055e-05, "loss": 0.462, "step": 5989 }, { "epoch": 0.16380441916429667, "grad_norm": 1.5841833353042603, "learning_rate": 1.9076279911708075e-05, "loss": 0.5648, "step": 5990 }, { "epoch": 0.16383176547801356, "grad_norm": 2.4592204093933105, "learning_rate": 1.907590807924474e-05, "loss": 0.605, "step": 5991 }, { "epoch": 0.1638591117917305, "grad_norm": 3.5790419578552246, "learning_rate": 1.907553617558346e-05, "loss": 0.5577, "step": 5992 }, { "epoch": 0.16388645810544739, "grad_norm": 1.3889654874801636, "learning_rate": 1.9075164200727152e-05, "loss": 0.5855, "step": 5993 }, { "epoch": 0.16391380441916428, "grad_norm": 1.3940953016281128, "learning_rate": 1.9074792154678737e-05, "loss": 0.5594, "step": 5994 }, { "epoch": 0.1639411507328812, "grad_norm": 1.8820431232452393, "learning_rate": 1.907442003744113e-05, "loss": 0.562, "step": 5995 }, { "epoch": 0.1639684970465981, "grad_norm": 1.4122668504714966, "learning_rate": 1.9074047849017253e-05, "loss": 0.5743, "step": 5996 }, { "epoch": 0.16399584336031503, "grad_norm": 1.583655834197998, "learning_rate": 1.9073675589410023e-05, "loss": 0.5674, "step": 5997 }, { "epoch": 0.16402318967403193, "grad_norm": 1.404109239578247, "learning_rate": 1.907330325862236e-05, "loss": 0.5723, "step": 5998 }, { "epoch": 0.16405053598774885, "grad_norm": 1.4042441844940186, "learning_rate": 1.907293085665719e-05, "loss": 0.5528, "step": 5999 }, { "epoch": 0.16407788230146575, "grad_norm": 1.3616712093353271, "learning_rate": 1.907255838351743e-05, "loss": 0.3984, "step": 6000 }, { "epoch": 0.16410522861518267, "grad_norm": 1.331070065498352, "learning_rate": 1.9072185839206003e-05, "loss": 0.6063, "step": 6001 }, { "epoch": 0.16413257492889957, "grad_norm": 1.3965795040130615, "learning_rate": 1.9071813223725828e-05, "loss": 0.5959, "step": 6002 }, { "epoch": 0.1641599212426165, "grad_norm": 1.9251363277435303, "learning_rate": 1.9071440537079832e-05, "loss": 0.9503, "step": 6003 }, { "epoch": 0.1641872675563334, "grad_norm": 1.3410736322402954, "learning_rate": 1.9071067779270942e-05, "loss": 0.5067, "step": 6004 }, { "epoch": 0.16421461387005032, "grad_norm": 1.6448924541473389, "learning_rate": 1.9070694950302076e-05, "loss": 0.4012, "step": 6005 }, { "epoch": 0.16424196018376722, "grad_norm": 1.4135973453521729, "learning_rate": 1.9070322050176158e-05, "loss": 0.5527, "step": 6006 }, { "epoch": 0.16426930649748414, "grad_norm": 1.1668424606323242, "learning_rate": 1.9069949078896117e-05, "loss": 0.3694, "step": 6007 }, { "epoch": 0.16429665281120104, "grad_norm": 1.662306547164917, "learning_rate": 1.9069576036464882e-05, "loss": 0.5615, "step": 6008 }, { "epoch": 0.16432399912491796, "grad_norm": 1.933127999305725, "learning_rate": 1.906920292288537e-05, "loss": 0.5563, "step": 6009 }, { "epoch": 0.16435134543863486, "grad_norm": 1.4449806213378906, "learning_rate": 1.9068829738160518e-05, "loss": 0.5734, "step": 6010 }, { "epoch": 0.16437869175235179, "grad_norm": 1.159064769744873, "learning_rate": 1.9068456482293247e-05, "loss": 0.5654, "step": 6011 }, { "epoch": 0.16440603806606868, "grad_norm": 1.6751267910003662, "learning_rate": 1.9068083155286484e-05, "loss": 0.451, "step": 6012 }, { "epoch": 0.1644333843797856, "grad_norm": 1.1809359788894653, "learning_rate": 1.9067709757143163e-05, "loss": 0.5314, "step": 6013 }, { "epoch": 0.1644607306935025, "grad_norm": 1.5227651596069336, "learning_rate": 1.9067336287866215e-05, "loss": 0.5773, "step": 6014 }, { "epoch": 0.16448807700721943, "grad_norm": 1.7311632633209229, "learning_rate": 1.906696274745856e-05, "loss": 0.5665, "step": 6015 }, { "epoch": 0.16451542332093633, "grad_norm": 1.6487936973571777, "learning_rate": 1.9066589135923138e-05, "loss": 0.5737, "step": 6016 }, { "epoch": 0.16454276963465325, "grad_norm": 2.0278046131134033, "learning_rate": 1.9066215453262873e-05, "loss": 0.5677, "step": 6017 }, { "epoch": 0.16457011594837015, "grad_norm": 1.1929653882980347, "learning_rate": 1.90658416994807e-05, "loss": 0.5522, "step": 6018 }, { "epoch": 0.16459746226208707, "grad_norm": 15.086193084716797, "learning_rate": 1.9065467874579552e-05, "loss": 0.5494, "step": 6019 }, { "epoch": 0.16462480857580397, "grad_norm": 1.3976272344589233, "learning_rate": 1.9065093978562353e-05, "loss": 0.5393, "step": 6020 }, { "epoch": 0.1646521548895209, "grad_norm": 1.3803311586380005, "learning_rate": 1.906472001143205e-05, "loss": 0.5975, "step": 6021 }, { "epoch": 0.1646795012032378, "grad_norm": 1.3419415950775146, "learning_rate": 1.9064345973191565e-05, "loss": 0.5906, "step": 6022 }, { "epoch": 0.16470684751695472, "grad_norm": 1.1682783365249634, "learning_rate": 1.906397186384384e-05, "loss": 0.382, "step": 6023 }, { "epoch": 0.16473419383067162, "grad_norm": 1.751073956489563, "learning_rate": 1.9063597683391806e-05, "loss": 0.5885, "step": 6024 }, { "epoch": 0.16476154014438854, "grad_norm": 1.7998114824295044, "learning_rate": 1.90632234318384e-05, "loss": 0.583, "step": 6025 }, { "epoch": 0.16478888645810544, "grad_norm": 1.375634789466858, "learning_rate": 1.9062849109186556e-05, "loss": 0.5017, "step": 6026 }, { "epoch": 0.16481623277182236, "grad_norm": 1.345881700515747, "learning_rate": 1.906247471543921e-05, "loss": 0.5918, "step": 6027 }, { "epoch": 0.16484357908553926, "grad_norm": 2.429323196411133, "learning_rate": 1.90621002505993e-05, "loss": 0.5868, "step": 6028 }, { "epoch": 0.16487092539925619, "grad_norm": 1.654751181602478, "learning_rate": 1.9061725714669764e-05, "loss": 0.9123, "step": 6029 }, { "epoch": 0.16489827171297308, "grad_norm": 1.7129228115081787, "learning_rate": 1.906135110765354e-05, "loss": 0.5559, "step": 6030 }, { "epoch": 0.16492561802669, "grad_norm": 1.3718233108520508, "learning_rate": 1.906097642955357e-05, "loss": 0.9269, "step": 6031 }, { "epoch": 0.1649529643404069, "grad_norm": 1.4245206117630005, "learning_rate": 1.9060601680372785e-05, "loss": 0.5779, "step": 6032 }, { "epoch": 0.16498031065412383, "grad_norm": 1.3719795942306519, "learning_rate": 1.9060226860114133e-05, "loss": 0.8995, "step": 6033 }, { "epoch": 0.16500765696784073, "grad_norm": 1.4060263633728027, "learning_rate": 1.9059851968780546e-05, "loss": 0.5773, "step": 6034 }, { "epoch": 0.16503500328155765, "grad_norm": 1.3514962196350098, "learning_rate": 1.9059477006374976e-05, "loss": 0.8995, "step": 6035 }, { "epoch": 0.16506234959527455, "grad_norm": 1.332848072052002, "learning_rate": 1.9059101972900357e-05, "loss": 0.8809, "step": 6036 }, { "epoch": 0.16508969590899147, "grad_norm": 1.4075953960418701, "learning_rate": 1.9058726868359633e-05, "loss": 0.574, "step": 6037 }, { "epoch": 0.16511704222270837, "grad_norm": 1.3667340278625488, "learning_rate": 1.9058351692755745e-05, "loss": 0.5766, "step": 6038 }, { "epoch": 0.1651443885364253, "grad_norm": 1.2772613763809204, "learning_rate": 1.9057976446091637e-05, "loss": 0.5907, "step": 6039 }, { "epoch": 0.1651717348501422, "grad_norm": 1.1593668460845947, "learning_rate": 1.905760112837025e-05, "loss": 0.5456, "step": 6040 }, { "epoch": 0.16519908116385912, "grad_norm": 1.2368206977844238, "learning_rate": 1.9057225739594536e-05, "loss": 0.5945, "step": 6041 }, { "epoch": 0.16522642747757602, "grad_norm": 1.570327639579773, "learning_rate": 1.9056850279767434e-05, "loss": 0.5789, "step": 6042 }, { "epoch": 0.16525377379129294, "grad_norm": 1.4103370904922485, "learning_rate": 1.9056474748891887e-05, "loss": 0.5332, "step": 6043 }, { "epoch": 0.16528112010500984, "grad_norm": 1.5514276027679443, "learning_rate": 1.9056099146970848e-05, "loss": 0.9604, "step": 6044 }, { "epoch": 0.16530846641872676, "grad_norm": 1.8051424026489258, "learning_rate": 1.9055723474007258e-05, "loss": 0.5731, "step": 6045 }, { "epoch": 0.16533581273244366, "grad_norm": 1.2750720977783203, "learning_rate": 1.905534773000407e-05, "loss": 0.6237, "step": 6046 }, { "epoch": 0.16536315904616058, "grad_norm": 1.620413064956665, "learning_rate": 1.905497191496422e-05, "loss": 0.5692, "step": 6047 }, { "epoch": 0.16539050535987748, "grad_norm": 1.1685595512390137, "learning_rate": 1.905459602889067e-05, "loss": 0.6139, "step": 6048 }, { "epoch": 0.1654178516735944, "grad_norm": 1.1214159727096558, "learning_rate": 1.9054220071786357e-05, "loss": 0.4408, "step": 6049 }, { "epoch": 0.1654451979873113, "grad_norm": 1.429620623588562, "learning_rate": 1.905384404365424e-05, "loss": 0.6243, "step": 6050 }, { "epoch": 0.16547254430102823, "grad_norm": 1.4117066860198975, "learning_rate": 1.9053467944497264e-05, "loss": 0.569, "step": 6051 }, { "epoch": 0.16549989061474513, "grad_norm": 1.3761757612228394, "learning_rate": 1.9053091774318378e-05, "loss": 0.6117, "step": 6052 }, { "epoch": 0.16552723692846205, "grad_norm": 1.3525804281234741, "learning_rate": 1.9052715533120537e-05, "loss": 0.5898, "step": 6053 }, { "epoch": 0.16555458324217895, "grad_norm": 1.699837565422058, "learning_rate": 1.9052339220906684e-05, "loss": 0.5686, "step": 6054 }, { "epoch": 0.16558192955589587, "grad_norm": 1.4261404275894165, "learning_rate": 1.905196283767978e-05, "loss": 0.5662, "step": 6055 }, { "epoch": 0.16560927586961277, "grad_norm": 1.2737091779708862, "learning_rate": 1.9051586383442783e-05, "loss": 0.5456, "step": 6056 }, { "epoch": 0.1656366221833297, "grad_norm": 1.6154223680496216, "learning_rate": 1.905120985819863e-05, "loss": 0.5739, "step": 6057 }, { "epoch": 0.1656639684970466, "grad_norm": 1.6131689548492432, "learning_rate": 1.9050833261950286e-05, "loss": 0.9323, "step": 6058 }, { "epoch": 0.16569131481076352, "grad_norm": 1.209761142730713, "learning_rate": 1.90504565947007e-05, "loss": 0.5688, "step": 6059 }, { "epoch": 0.16571866112448042, "grad_norm": 1.3519271612167358, "learning_rate": 1.9050079856452833e-05, "loss": 0.9366, "step": 6060 }, { "epoch": 0.16574600743819734, "grad_norm": 1.5231635570526123, "learning_rate": 1.9049703047209632e-05, "loss": 0.578, "step": 6061 }, { "epoch": 0.16577335375191424, "grad_norm": 1.4186490774154663, "learning_rate": 1.9049326166974062e-05, "loss": 0.5917, "step": 6062 }, { "epoch": 0.16580070006563116, "grad_norm": 1.323387622833252, "learning_rate": 1.9048949215749067e-05, "loss": 0.9151, "step": 6063 }, { "epoch": 0.16582804637934806, "grad_norm": 1.5847368240356445, "learning_rate": 1.904857219353762e-05, "loss": 0.5978, "step": 6064 }, { "epoch": 0.16585539269306498, "grad_norm": 1.5066441297531128, "learning_rate": 1.904819510034267e-05, "loss": 0.5891, "step": 6065 }, { "epoch": 0.16588273900678188, "grad_norm": 1.2625740766525269, "learning_rate": 1.9047817936167174e-05, "loss": 0.5653, "step": 6066 }, { "epoch": 0.1659100853204988, "grad_norm": 1.3055005073547363, "learning_rate": 1.904744070101409e-05, "loss": 0.5868, "step": 6067 }, { "epoch": 0.1659374316342157, "grad_norm": 1.5168925523757935, "learning_rate": 1.9047063394886382e-05, "loss": 0.5236, "step": 6068 }, { "epoch": 0.16596477794793263, "grad_norm": 1.5125995874404907, "learning_rate": 1.9046686017787008e-05, "loss": 0.5609, "step": 6069 }, { "epoch": 0.16599212426164953, "grad_norm": 1.234078288078308, "learning_rate": 1.9046308569718927e-05, "loss": 0.5768, "step": 6070 }, { "epoch": 0.16601947057536645, "grad_norm": 1.6435903310775757, "learning_rate": 1.90459310506851e-05, "loss": 0.5577, "step": 6071 }, { "epoch": 0.16604681688908335, "grad_norm": 1.4610583782196045, "learning_rate": 1.9045553460688495e-05, "loss": 0.5667, "step": 6072 }, { "epoch": 0.16607416320280027, "grad_norm": 1.525671124458313, "learning_rate": 1.9045175799732066e-05, "loss": 0.5961, "step": 6073 }, { "epoch": 0.16610150951651717, "grad_norm": 1.557851791381836, "learning_rate": 1.9044798067818776e-05, "loss": 0.9217, "step": 6074 }, { "epoch": 0.1661288558302341, "grad_norm": 1.4025598764419556, "learning_rate": 1.9044420264951596e-05, "loss": 0.9272, "step": 6075 }, { "epoch": 0.166156202143951, "grad_norm": 1.8898730278015137, "learning_rate": 1.904404239113348e-05, "loss": 0.933, "step": 6076 }, { "epoch": 0.16618354845766792, "grad_norm": 1.6395480632781982, "learning_rate": 1.9043664446367397e-05, "loss": 0.5674, "step": 6077 }, { "epoch": 0.16621089477138482, "grad_norm": 1.6475000381469727, "learning_rate": 1.904328643065631e-05, "loss": 0.5955, "step": 6078 }, { "epoch": 0.16623824108510174, "grad_norm": 1.4589987993240356, "learning_rate": 1.904290834400319e-05, "loss": 0.588, "step": 6079 }, { "epoch": 0.16626558739881864, "grad_norm": 1.5629559755325317, "learning_rate": 1.9042530186410998e-05, "loss": 0.5416, "step": 6080 }, { "epoch": 0.16629293371253556, "grad_norm": 1.5716333389282227, "learning_rate": 1.9042151957882697e-05, "loss": 0.6017, "step": 6081 }, { "epoch": 0.16632028002625246, "grad_norm": 1.4167860746383667, "learning_rate": 1.9041773658421262e-05, "loss": 0.5921, "step": 6082 }, { "epoch": 0.16634762633996938, "grad_norm": 1.3837387561798096, "learning_rate": 1.9041395288029657e-05, "loss": 0.5806, "step": 6083 }, { "epoch": 0.16637497265368628, "grad_norm": 1.515162467956543, "learning_rate": 1.904101684671085e-05, "loss": 0.589, "step": 6084 }, { "epoch": 0.1664023189674032, "grad_norm": 1.6479600667953491, "learning_rate": 1.9040638334467814e-05, "loss": 0.5647, "step": 6085 }, { "epoch": 0.1664296652811201, "grad_norm": 1.2435076236724854, "learning_rate": 1.904025975130351e-05, "loss": 0.5564, "step": 6086 }, { "epoch": 0.16645701159483703, "grad_norm": 1.5946931838989258, "learning_rate": 1.9039881097220912e-05, "loss": 0.6104, "step": 6087 }, { "epoch": 0.16648435790855393, "grad_norm": 1.4410210847854614, "learning_rate": 1.9039502372222993e-05, "loss": 0.5863, "step": 6088 }, { "epoch": 0.16651170422227085, "grad_norm": 2.064537525177002, "learning_rate": 1.903912357631272e-05, "loss": 0.5108, "step": 6089 }, { "epoch": 0.16653905053598775, "grad_norm": 2.0964410305023193, "learning_rate": 1.903874470949307e-05, "loss": 0.5465, "step": 6090 }, { "epoch": 0.16656639684970467, "grad_norm": 1.501317024230957, "learning_rate": 1.903836577176701e-05, "loss": 0.5878, "step": 6091 }, { "epoch": 0.16659374316342157, "grad_norm": 1.29203462600708, "learning_rate": 1.9037986763137513e-05, "loss": 0.6052, "step": 6092 }, { "epoch": 0.1666210894771385, "grad_norm": 1.2106839418411255, "learning_rate": 1.9037607683607554e-05, "loss": 0.5733, "step": 6093 }, { "epoch": 0.1666484357908554, "grad_norm": 1.699949860572815, "learning_rate": 1.9037228533180103e-05, "loss": 0.5675, "step": 6094 }, { "epoch": 0.1666757821045723, "grad_norm": 2.5812058448791504, "learning_rate": 1.9036849311858142e-05, "loss": 0.5707, "step": 6095 }, { "epoch": 0.16670312841828921, "grad_norm": 1.3711482286453247, "learning_rate": 1.903647001964464e-05, "loss": 0.5864, "step": 6096 }, { "epoch": 0.1667304747320061, "grad_norm": 1.526296854019165, "learning_rate": 1.903609065654257e-05, "loss": 0.5735, "step": 6097 }, { "epoch": 0.16675782104572304, "grad_norm": 1.3887284994125366, "learning_rate": 1.9035711222554917e-05, "loss": 0.5915, "step": 6098 }, { "epoch": 0.16678516735943993, "grad_norm": 1.69767427444458, "learning_rate": 1.9035331717684652e-05, "loss": 0.5209, "step": 6099 }, { "epoch": 0.16681251367315686, "grad_norm": 2.070401906967163, "learning_rate": 1.903495214193475e-05, "loss": 0.5595, "step": 6100 }, { "epoch": 0.16683985998687376, "grad_norm": 1.3899173736572266, "learning_rate": 1.9034572495308193e-05, "loss": 0.5776, "step": 6101 }, { "epoch": 0.16686720630059068, "grad_norm": 1.5966598987579346, "learning_rate": 1.9034192777807956e-05, "loss": 0.5362, "step": 6102 }, { "epoch": 0.16689455261430758, "grad_norm": 1.2930610179901123, "learning_rate": 1.903381298943702e-05, "loss": 0.5606, "step": 6103 }, { "epoch": 0.1669218989280245, "grad_norm": 1.9457296133041382, "learning_rate": 1.9033433130198363e-05, "loss": 0.5808, "step": 6104 }, { "epoch": 0.1669492452417414, "grad_norm": 1.404354214668274, "learning_rate": 1.9033053200094963e-05, "loss": 0.6349, "step": 6105 }, { "epoch": 0.16697659155545833, "grad_norm": 1.5899556875228882, "learning_rate": 1.9032673199129807e-05, "loss": 0.5558, "step": 6106 }, { "epoch": 0.16700393786917522, "grad_norm": 1.7018629312515259, "learning_rate": 1.903229312730587e-05, "loss": 0.5865, "step": 6107 }, { "epoch": 0.16703128418289215, "grad_norm": 1.8377056121826172, "learning_rate": 1.9031912984626134e-05, "loss": 0.5557, "step": 6108 }, { "epoch": 0.16705863049660905, "grad_norm": 1.4210432767868042, "learning_rate": 1.9031532771093584e-05, "loss": 0.5576, "step": 6109 }, { "epoch": 0.16708597681032597, "grad_norm": 1.6864979267120361, "learning_rate": 1.90311524867112e-05, "loss": 0.5805, "step": 6110 }, { "epoch": 0.16711332312404287, "grad_norm": 1.2484848499298096, "learning_rate": 1.9030772131481967e-05, "loss": 0.5637, "step": 6111 }, { "epoch": 0.1671406694377598, "grad_norm": 1.5247217416763306, "learning_rate": 1.9030391705408868e-05, "loss": 0.4644, "step": 6112 }, { "epoch": 0.1671680157514767, "grad_norm": 1.479942798614502, "learning_rate": 1.9030011208494886e-05, "loss": 0.582, "step": 6113 }, { "epoch": 0.16719536206519361, "grad_norm": 1.4005379676818848, "learning_rate": 1.902963064074301e-05, "loss": 0.5911, "step": 6114 }, { "epoch": 0.1672227083789105, "grad_norm": 1.2843431234359741, "learning_rate": 1.902925000215622e-05, "loss": 0.5653, "step": 6115 }, { "epoch": 0.16725005469262744, "grad_norm": 1.5450503826141357, "learning_rate": 1.902886929273751e-05, "loss": 0.5537, "step": 6116 }, { "epoch": 0.16727740100634433, "grad_norm": 1.6106483936309814, "learning_rate": 1.9028488512489854e-05, "loss": 0.5519, "step": 6117 }, { "epoch": 0.16730474732006126, "grad_norm": 1.6565196514129639, "learning_rate": 1.9028107661416252e-05, "loss": 0.5388, "step": 6118 }, { "epoch": 0.16733209363377816, "grad_norm": 1.4087148904800415, "learning_rate": 1.9027726739519684e-05, "loss": 0.5836, "step": 6119 }, { "epoch": 0.16735943994749508, "grad_norm": 1.8900467157363892, "learning_rate": 1.9027345746803142e-05, "loss": 0.568, "step": 6120 }, { "epoch": 0.16738678626121198, "grad_norm": 1.340457797050476, "learning_rate": 1.902696468326961e-05, "loss": 0.598, "step": 6121 }, { "epoch": 0.1674141325749289, "grad_norm": 1.3126920461654663, "learning_rate": 1.9026583548922085e-05, "loss": 0.5821, "step": 6122 }, { "epoch": 0.1674414788886458, "grad_norm": 1.4092514514923096, "learning_rate": 1.9026202343763547e-05, "loss": 0.5589, "step": 6123 }, { "epoch": 0.16746882520236273, "grad_norm": 1.5479365587234497, "learning_rate": 1.9025821067796996e-05, "loss": 0.5126, "step": 6124 }, { "epoch": 0.16749617151607962, "grad_norm": 1.4540059566497803, "learning_rate": 1.9025439721025415e-05, "loss": 0.3999, "step": 6125 }, { "epoch": 0.16752351782979655, "grad_norm": 1.3998565673828125, "learning_rate": 1.9025058303451803e-05, "loss": 0.5729, "step": 6126 }, { "epoch": 0.16755086414351344, "grad_norm": 1.415713906288147, "learning_rate": 1.902467681507915e-05, "loss": 0.554, "step": 6127 }, { "epoch": 0.16757821045723037, "grad_norm": 1.448199987411499, "learning_rate": 1.9024295255910442e-05, "loss": 0.5753, "step": 6128 }, { "epoch": 0.16760555677094727, "grad_norm": 1.6118266582489014, "learning_rate": 1.902391362594868e-05, "loss": 0.6018, "step": 6129 }, { "epoch": 0.1676329030846642, "grad_norm": 1.7612580060958862, "learning_rate": 1.9023531925196853e-05, "loss": 0.5652, "step": 6130 }, { "epoch": 0.1676602493983811, "grad_norm": 1.3130831718444824, "learning_rate": 1.902315015365796e-05, "loss": 0.551, "step": 6131 }, { "epoch": 0.16768759571209801, "grad_norm": 1.6227455139160156, "learning_rate": 1.9022768311334992e-05, "loss": 0.6652, "step": 6132 }, { "epoch": 0.1677149420258149, "grad_norm": 1.4043352603912354, "learning_rate": 1.902238639823095e-05, "loss": 0.567, "step": 6133 }, { "epoch": 0.16774228833953184, "grad_norm": 1.6011364459991455, "learning_rate": 1.902200441434882e-05, "loss": 0.6001, "step": 6134 }, { "epoch": 0.16776963465324873, "grad_norm": 1.7553035020828247, "learning_rate": 1.9021622359691604e-05, "loss": 0.5174, "step": 6135 }, { "epoch": 0.16779698096696566, "grad_norm": 2.1634037494659424, "learning_rate": 1.9021240234262303e-05, "loss": 0.9637, "step": 6136 }, { "epoch": 0.16782432728068256, "grad_norm": 1.9181995391845703, "learning_rate": 1.902085803806391e-05, "loss": 0.9535, "step": 6137 }, { "epoch": 0.16785167359439948, "grad_norm": 1.5417628288269043, "learning_rate": 1.902047577109943e-05, "loss": 0.5343, "step": 6138 }, { "epoch": 0.16787901990811638, "grad_norm": 1.2965587377548218, "learning_rate": 1.9020093433371846e-05, "loss": 0.5977, "step": 6139 }, { "epoch": 0.1679063662218333, "grad_norm": 1.4865905046463013, "learning_rate": 1.9019711024884175e-05, "loss": 0.582, "step": 6140 }, { "epoch": 0.1679337125355502, "grad_norm": 1.6428812742233276, "learning_rate": 1.9019328545639406e-05, "loss": 0.5767, "step": 6141 }, { "epoch": 0.16796105884926713, "grad_norm": 1.5469087362289429, "learning_rate": 1.901894599564054e-05, "loss": 0.6025, "step": 6142 }, { "epoch": 0.16798840516298402, "grad_norm": 1.7939987182617188, "learning_rate": 1.9018563374890586e-05, "loss": 0.5689, "step": 6143 }, { "epoch": 0.16801575147670095, "grad_norm": 1.38558828830719, "learning_rate": 1.901818068339254e-05, "loss": 0.5389, "step": 6144 }, { "epoch": 0.16804309779041784, "grad_norm": 1.2833114862442017, "learning_rate": 1.9017797921149402e-05, "loss": 0.5873, "step": 6145 }, { "epoch": 0.16807044410413477, "grad_norm": 1.550028681755066, "learning_rate": 1.901741508816418e-05, "loss": 0.5493, "step": 6146 }, { "epoch": 0.16809779041785167, "grad_norm": 1.2921415567398071, "learning_rate": 1.901703218443987e-05, "loss": 0.5398, "step": 6147 }, { "epoch": 0.1681251367315686, "grad_norm": 2.1466469764709473, "learning_rate": 1.9016649209979485e-05, "loss": 0.5855, "step": 6148 }, { "epoch": 0.1681524830452855, "grad_norm": 1.653169870376587, "learning_rate": 1.901626616478602e-05, "loss": 0.4359, "step": 6149 }, { "epoch": 0.16817982935900241, "grad_norm": 1.4668138027191162, "learning_rate": 1.9015883048862487e-05, "loss": 0.5655, "step": 6150 }, { "epoch": 0.1682071756727193, "grad_norm": 1.3227835893630981, "learning_rate": 1.901549986221189e-05, "loss": 0.5615, "step": 6151 }, { "epoch": 0.16823452198643624, "grad_norm": 1.2678608894348145, "learning_rate": 1.9015116604837233e-05, "loss": 0.6045, "step": 6152 }, { "epoch": 0.16826186830015313, "grad_norm": 1.4208260774612427, "learning_rate": 1.901473327674152e-05, "loss": 0.6101, "step": 6153 }, { "epoch": 0.16828921461387006, "grad_norm": 1.4124637842178345, "learning_rate": 1.9014349877927765e-05, "loss": 0.5742, "step": 6154 }, { "epoch": 0.16831656092758696, "grad_norm": 1.8975305557250977, "learning_rate": 1.901396640839897e-05, "loss": 0.5526, "step": 6155 }, { "epoch": 0.16834390724130388, "grad_norm": 1.596906304359436, "learning_rate": 1.9013582868158147e-05, "loss": 0.5736, "step": 6156 }, { "epoch": 0.16837125355502078, "grad_norm": 1.607408881187439, "learning_rate": 1.9013199257208304e-05, "loss": 0.5423, "step": 6157 }, { "epoch": 0.1683985998687377, "grad_norm": 1.6723685264587402, "learning_rate": 1.9012815575552446e-05, "loss": 0.5828, "step": 6158 }, { "epoch": 0.1684259461824546, "grad_norm": 1.3913836479187012, "learning_rate": 1.9012431823193587e-05, "loss": 0.5838, "step": 6159 }, { "epoch": 0.16845329249617153, "grad_norm": 1.5477497577667236, "learning_rate": 1.9012048000134737e-05, "loss": 0.5691, "step": 6160 }, { "epoch": 0.16848063880988842, "grad_norm": 1.3014339208602905, "learning_rate": 1.901166410637891e-05, "loss": 0.5652, "step": 6161 }, { "epoch": 0.16850798512360535, "grad_norm": 1.4657334089279175, "learning_rate": 1.901128014192911e-05, "loss": 0.5502, "step": 6162 }, { "epoch": 0.16853533143732224, "grad_norm": 1.3517602682113647, "learning_rate": 1.9010896106788354e-05, "loss": 0.5583, "step": 6163 }, { "epoch": 0.16856267775103917, "grad_norm": 1.5279947519302368, "learning_rate": 1.9010512000959654e-05, "loss": 0.5782, "step": 6164 }, { "epoch": 0.16859002406475607, "grad_norm": 1.5650750398635864, "learning_rate": 1.9010127824446024e-05, "loss": 0.5802, "step": 6165 }, { "epoch": 0.168617370378473, "grad_norm": 1.4422359466552734, "learning_rate": 1.9009743577250476e-05, "loss": 0.543, "step": 6166 }, { "epoch": 0.1686447166921899, "grad_norm": 1.3658474683761597, "learning_rate": 1.9009359259376026e-05, "loss": 0.5798, "step": 6167 }, { "epoch": 0.1686720630059068, "grad_norm": 1.3371881246566772, "learning_rate": 1.900897487082569e-05, "loss": 0.5369, "step": 6168 }, { "epoch": 0.1686994093196237, "grad_norm": 1.1649154424667358, "learning_rate": 1.9008590411602476e-05, "loss": 0.5948, "step": 6169 }, { "epoch": 0.16872675563334064, "grad_norm": 1.4769741296768188, "learning_rate": 1.9008205881709408e-05, "loss": 0.5751, "step": 6170 }, { "epoch": 0.16875410194705753, "grad_norm": 3.169163227081299, "learning_rate": 1.9007821281149502e-05, "loss": 1.0848, "step": 6171 }, { "epoch": 0.16878144826077446, "grad_norm": 2.731264352798462, "learning_rate": 1.900743660992577e-05, "loss": 1.0506, "step": 6172 }, { "epoch": 0.16880879457449136, "grad_norm": 1.8353147506713867, "learning_rate": 1.9007051868041237e-05, "loss": 0.5335, "step": 6173 }, { "epoch": 0.16883614088820828, "grad_norm": 1.3112109899520874, "learning_rate": 1.9006667055498912e-05, "loss": 0.6042, "step": 6174 }, { "epoch": 0.16886348720192518, "grad_norm": 1.613484263420105, "learning_rate": 1.900628217230182e-05, "loss": 0.5378, "step": 6175 }, { "epoch": 0.1688908335156421, "grad_norm": 1.414391040802002, "learning_rate": 1.9005897218452984e-05, "loss": 0.5641, "step": 6176 }, { "epoch": 0.168918179829359, "grad_norm": 1.331538200378418, "learning_rate": 1.9005512193955416e-05, "loss": 0.5618, "step": 6177 }, { "epoch": 0.16894552614307592, "grad_norm": 1.2352654933929443, "learning_rate": 1.9005127098812135e-05, "loss": 0.4451, "step": 6178 }, { "epoch": 0.16897287245679282, "grad_norm": 1.5986511707305908, "learning_rate": 1.9004741933026168e-05, "loss": 0.5742, "step": 6179 }, { "epoch": 0.16900021877050975, "grad_norm": 1.6457135677337646, "learning_rate": 1.9004356696600536e-05, "loss": 0.6107, "step": 6180 }, { "epoch": 0.16902756508422664, "grad_norm": 1.3890410661697388, "learning_rate": 1.9003971389538262e-05, "loss": 0.5705, "step": 6181 }, { "epoch": 0.16905491139794357, "grad_norm": 1.245865821838379, "learning_rate": 1.9003586011842365e-05, "loss": 0.5473, "step": 6182 }, { "epoch": 0.16908225771166047, "grad_norm": 2.173617362976074, "learning_rate": 1.9003200563515866e-05, "loss": 0.5611, "step": 6183 }, { "epoch": 0.1691096040253774, "grad_norm": 1.2523101568222046, "learning_rate": 1.90028150445618e-05, "loss": 0.5982, "step": 6184 }, { "epoch": 0.1691369503390943, "grad_norm": 1.7921473979949951, "learning_rate": 1.9002429454983177e-05, "loss": 0.5311, "step": 6185 }, { "epoch": 0.1691642966528112, "grad_norm": 1.5596387386322021, "learning_rate": 1.900204379478303e-05, "loss": 0.5368, "step": 6186 }, { "epoch": 0.1691916429665281, "grad_norm": 1.9755162000656128, "learning_rate": 1.900165806396438e-05, "loss": 0.5741, "step": 6187 }, { "epoch": 0.16921898928024504, "grad_norm": 1.4918041229248047, "learning_rate": 1.9001272262530256e-05, "loss": 0.5684, "step": 6188 }, { "epoch": 0.16924633559396193, "grad_norm": 1.0772664546966553, "learning_rate": 1.9000886390483688e-05, "loss": 0.4196, "step": 6189 }, { "epoch": 0.16927368190767886, "grad_norm": 1.6888048648834229, "learning_rate": 1.90005004478277e-05, "loss": 0.5425, "step": 6190 }, { "epoch": 0.16930102822139576, "grad_norm": 2.1794626712799072, "learning_rate": 1.9000114434565317e-05, "loss": 0.5816, "step": 6191 }, { "epoch": 0.16932837453511268, "grad_norm": 1.4879512786865234, "learning_rate": 1.899972835069957e-05, "loss": 0.6197, "step": 6192 }, { "epoch": 0.16935572084882958, "grad_norm": 1.5200344324111938, "learning_rate": 1.8999342196233486e-05, "loss": 0.5167, "step": 6193 }, { "epoch": 0.1693830671625465, "grad_norm": 1.3017405271530151, "learning_rate": 1.8998955971170094e-05, "loss": 0.5577, "step": 6194 }, { "epoch": 0.1694104134762634, "grad_norm": 1.397629737854004, "learning_rate": 1.8998569675512428e-05, "loss": 0.574, "step": 6195 }, { "epoch": 0.16943775978998032, "grad_norm": 1.4156246185302734, "learning_rate": 1.899818330926351e-05, "loss": 0.5513, "step": 6196 }, { "epoch": 0.16946510610369722, "grad_norm": 2.1756646633148193, "learning_rate": 1.899779687242638e-05, "loss": 0.6495, "step": 6197 }, { "epoch": 0.16949245241741412, "grad_norm": 1.4147863388061523, "learning_rate": 1.8997410365004064e-05, "loss": 0.3984, "step": 6198 }, { "epoch": 0.16951979873113104, "grad_norm": 3.4600391387939453, "learning_rate": 1.89970237869996e-05, "loss": 1.1243, "step": 6199 }, { "epoch": 0.16954714504484794, "grad_norm": 1.9329159259796143, "learning_rate": 1.8996637138416014e-05, "loss": 0.5771, "step": 6200 }, { "epoch": 0.16957449135856487, "grad_norm": 1.345317006111145, "learning_rate": 1.8996250419256338e-05, "loss": 0.5651, "step": 6201 }, { "epoch": 0.16960183767228176, "grad_norm": 1.8164734840393066, "learning_rate": 1.8995863629523615e-05, "loss": 0.6138, "step": 6202 }, { "epoch": 0.1696291839859987, "grad_norm": 2.095306634902954, "learning_rate": 1.899547676922087e-05, "loss": 0.5118, "step": 6203 }, { "epoch": 0.16965653029971559, "grad_norm": 1.4295923709869385, "learning_rate": 1.899508983835114e-05, "loss": 0.5612, "step": 6204 }, { "epoch": 0.1696838766134325, "grad_norm": 2.044917106628418, "learning_rate": 1.8994702836917465e-05, "loss": 0.6423, "step": 6205 }, { "epoch": 0.1697112229271494, "grad_norm": 1.418408751487732, "learning_rate": 1.8994315764922878e-05, "loss": 0.5619, "step": 6206 }, { "epoch": 0.16973856924086633, "grad_norm": 2.8712329864501953, "learning_rate": 1.8993928622370412e-05, "loss": 0.5823, "step": 6207 }, { "epoch": 0.16976591555458323, "grad_norm": 1.7295876741409302, "learning_rate": 1.8993541409263107e-05, "loss": 0.5643, "step": 6208 }, { "epoch": 0.16979326186830015, "grad_norm": 1.3058992624282837, "learning_rate": 1.8993154125604002e-05, "loss": 0.5693, "step": 6209 }, { "epoch": 0.16982060818201705, "grad_norm": 1.8222839832305908, "learning_rate": 1.8992766771396133e-05, "loss": 0.5413, "step": 6210 }, { "epoch": 0.16984795449573398, "grad_norm": 1.6695866584777832, "learning_rate": 1.8992379346642542e-05, "loss": 0.5571, "step": 6211 }, { "epoch": 0.16987530080945087, "grad_norm": 1.7994050979614258, "learning_rate": 1.8991991851346262e-05, "loss": 0.5404, "step": 6212 }, { "epoch": 0.1699026471231678, "grad_norm": 1.6921017169952393, "learning_rate": 1.899160428551034e-05, "loss": 0.5991, "step": 6213 }, { "epoch": 0.1699299934368847, "grad_norm": 1.675964593887329, "learning_rate": 1.899121664913781e-05, "loss": 0.5825, "step": 6214 }, { "epoch": 0.16995733975060162, "grad_norm": 1.5928826332092285, "learning_rate": 1.899082894223172e-05, "loss": 0.6003, "step": 6215 }, { "epoch": 0.16998468606431852, "grad_norm": 1.9130430221557617, "learning_rate": 1.89904411647951e-05, "loss": 0.4558, "step": 6216 }, { "epoch": 0.17001203237803544, "grad_norm": 1.5076018571853638, "learning_rate": 1.8990053316831005e-05, "loss": 0.5393, "step": 6217 }, { "epoch": 0.17003937869175234, "grad_norm": 1.8079112768173218, "learning_rate": 1.8989665398342468e-05, "loss": 0.5283, "step": 6218 }, { "epoch": 0.17006672500546927, "grad_norm": 2.1881985664367676, "learning_rate": 1.898927740933254e-05, "loss": 0.4284, "step": 6219 }, { "epoch": 0.17009407131918616, "grad_norm": 1.7037032842636108, "learning_rate": 1.8988889349804262e-05, "loss": 0.5282, "step": 6220 }, { "epoch": 0.1701214176329031, "grad_norm": 1.470991611480713, "learning_rate": 1.898850121976067e-05, "loss": 0.5886, "step": 6221 }, { "epoch": 0.17014876394661999, "grad_norm": 1.606184482574463, "learning_rate": 1.898811301920482e-05, "loss": 0.5432, "step": 6222 }, { "epoch": 0.1701761102603369, "grad_norm": 1.2501052618026733, "learning_rate": 1.898772474813975e-05, "loss": 0.5711, "step": 6223 }, { "epoch": 0.1702034565740538, "grad_norm": 1.475935459136963, "learning_rate": 1.8987336406568512e-05, "loss": 0.5448, "step": 6224 }, { "epoch": 0.17023080288777073, "grad_norm": 1.9982218742370605, "learning_rate": 1.8986947994494148e-05, "loss": 0.51, "step": 6225 }, { "epoch": 0.17025814920148763, "grad_norm": 1.8602123260498047, "learning_rate": 1.8986559511919703e-05, "loss": 0.5608, "step": 6226 }, { "epoch": 0.17028549551520455, "grad_norm": 2.0228092670440674, "learning_rate": 1.898617095884823e-05, "loss": 0.5327, "step": 6227 }, { "epoch": 0.17031284182892145, "grad_norm": 1.8303050994873047, "learning_rate": 1.8985782335282777e-05, "loss": 0.5762, "step": 6228 }, { "epoch": 0.17034018814263838, "grad_norm": 1.4688196182250977, "learning_rate": 1.8985393641226388e-05, "loss": 0.5939, "step": 6229 }, { "epoch": 0.17036753445635527, "grad_norm": 1.8678911924362183, "learning_rate": 1.8985004876682115e-05, "loss": 0.5752, "step": 6230 }, { "epoch": 0.1703948807700722, "grad_norm": 1.546636700630188, "learning_rate": 1.898461604165301e-05, "loss": 0.5882, "step": 6231 }, { "epoch": 0.1704222270837891, "grad_norm": 1.6376911401748657, "learning_rate": 1.8984227136142116e-05, "loss": 0.5678, "step": 6232 }, { "epoch": 0.17044957339750602, "grad_norm": 2.8274002075195312, "learning_rate": 1.8983838160152487e-05, "loss": 1.0077, "step": 6233 }, { "epoch": 0.17047691971122292, "grad_norm": 1.800192952156067, "learning_rate": 1.8983449113687183e-05, "loss": 0.5771, "step": 6234 }, { "epoch": 0.17050426602493984, "grad_norm": 1.9624398946762085, "learning_rate": 1.8983059996749245e-05, "loss": 0.5306, "step": 6235 }, { "epoch": 0.17053161233865674, "grad_norm": 1.5834966897964478, "learning_rate": 1.898267080934173e-05, "loss": 0.9827, "step": 6236 }, { "epoch": 0.17055895865237367, "grad_norm": 1.5010597705841064, "learning_rate": 1.898228155146769e-05, "loss": 0.5995, "step": 6237 }, { "epoch": 0.17058630496609056, "grad_norm": 2.6418282985687256, "learning_rate": 1.898189222313018e-05, "loss": 0.5532, "step": 6238 }, { "epoch": 0.1706136512798075, "grad_norm": 1.8839010000228882, "learning_rate": 1.898150282433225e-05, "loss": 0.546, "step": 6239 }, { "epoch": 0.17064099759352439, "grad_norm": 1.6931978464126587, "learning_rate": 1.898111335507696e-05, "loss": 0.5892, "step": 6240 }, { "epoch": 0.1706683439072413, "grad_norm": 1.4782710075378418, "learning_rate": 1.8980723815367365e-05, "loss": 0.5637, "step": 6241 }, { "epoch": 0.1706956902209582, "grad_norm": 1.8811078071594238, "learning_rate": 1.8980334205206516e-05, "loss": 0.5913, "step": 6242 }, { "epoch": 0.17072303653467513, "grad_norm": 1.4014846086502075, "learning_rate": 1.8979944524597474e-05, "loss": 0.5869, "step": 6243 }, { "epoch": 0.17075038284839203, "grad_norm": 1.7941431999206543, "learning_rate": 1.8979554773543296e-05, "loss": 0.6546, "step": 6244 }, { "epoch": 0.17077772916210895, "grad_norm": 1.5169878005981445, "learning_rate": 1.8979164952047036e-05, "loss": 0.5406, "step": 6245 }, { "epoch": 0.17080507547582585, "grad_norm": 1.8575628995895386, "learning_rate": 1.8978775060111753e-05, "loss": 0.5508, "step": 6246 }, { "epoch": 0.17083242178954278, "grad_norm": 1.403059959411621, "learning_rate": 1.8978385097740507e-05, "loss": 0.5652, "step": 6247 }, { "epoch": 0.17085976810325967, "grad_norm": 1.3988600969314575, "learning_rate": 1.8977995064936358e-05, "loss": 0.5286, "step": 6248 }, { "epoch": 0.1708871144169766, "grad_norm": 1.6466301679611206, "learning_rate": 1.8977604961702364e-05, "loss": 0.5229, "step": 6249 }, { "epoch": 0.1709144607306935, "grad_norm": 1.4040025472640991, "learning_rate": 1.8977214788041585e-05, "loss": 0.5809, "step": 6250 }, { "epoch": 0.17094180704441042, "grad_norm": 1.838731288909912, "learning_rate": 1.8976824543957084e-05, "loss": 0.5691, "step": 6251 }, { "epoch": 0.17096915335812732, "grad_norm": 2.4740817546844482, "learning_rate": 1.8976434229451918e-05, "loss": 0.546, "step": 6252 }, { "epoch": 0.17099649967184424, "grad_norm": 1.5936561822891235, "learning_rate": 1.8976043844529152e-05, "loss": 0.5568, "step": 6253 }, { "epoch": 0.17102384598556114, "grad_norm": 1.3012912273406982, "learning_rate": 1.897565338919185e-05, "loss": 0.571, "step": 6254 }, { "epoch": 0.17105119229927807, "grad_norm": 1.309544563293457, "learning_rate": 1.8975262863443073e-05, "loss": 0.5493, "step": 6255 }, { "epoch": 0.17107853861299496, "grad_norm": 2.1626553535461426, "learning_rate": 1.8974872267285885e-05, "loss": 0.5394, "step": 6256 }, { "epoch": 0.1711058849267119, "grad_norm": 1.9145413637161255, "learning_rate": 1.897448160072335e-05, "loss": 0.5273, "step": 6257 }, { "epoch": 0.17113323124042878, "grad_norm": 1.3816322088241577, "learning_rate": 1.897409086375853e-05, "loss": 0.5587, "step": 6258 }, { "epoch": 0.1711605775541457, "grad_norm": 1.9116036891937256, "learning_rate": 1.8973700056394492e-05, "loss": 0.5879, "step": 6259 }, { "epoch": 0.1711879238678626, "grad_norm": 2.341172695159912, "learning_rate": 1.8973309178634303e-05, "loss": 0.5606, "step": 6260 }, { "epoch": 0.17121527018157953, "grad_norm": 2.8340675830841064, "learning_rate": 1.897291823048103e-05, "loss": 0.9713, "step": 6261 }, { "epoch": 0.17124261649529643, "grad_norm": 1.363476037979126, "learning_rate": 1.8972527211937742e-05, "loss": 0.5886, "step": 6262 }, { "epoch": 0.17126996280901335, "grad_norm": 1.2748569250106812, "learning_rate": 1.89721361230075e-05, "loss": 0.5609, "step": 6263 }, { "epoch": 0.17129730912273025, "grad_norm": 2.0120537281036377, "learning_rate": 1.8971744963693376e-05, "loss": 0.9542, "step": 6264 }, { "epoch": 0.17132465543644718, "grad_norm": 1.9354504346847534, "learning_rate": 1.8971353733998437e-05, "loss": 0.5754, "step": 6265 }, { "epoch": 0.17135200175016407, "grad_norm": 1.5918192863464355, "learning_rate": 1.897096243392575e-05, "loss": 0.5539, "step": 6266 }, { "epoch": 0.171379348063881, "grad_norm": 1.3481309413909912, "learning_rate": 1.8970571063478392e-05, "loss": 0.534, "step": 6267 }, { "epoch": 0.1714066943775979, "grad_norm": 1.1480416059494019, "learning_rate": 1.8970179622659425e-05, "loss": 0.5469, "step": 6268 }, { "epoch": 0.17143404069131482, "grad_norm": 1.5100921392440796, "learning_rate": 1.8969788111471924e-05, "loss": 0.5481, "step": 6269 }, { "epoch": 0.17146138700503172, "grad_norm": 1.6408636569976807, "learning_rate": 1.896939652991896e-05, "loss": 0.534, "step": 6270 }, { "epoch": 0.17148873331874864, "grad_norm": 3.8817625045776367, "learning_rate": 1.8969004878003608e-05, "loss": 0.5608, "step": 6271 }, { "epoch": 0.17151607963246554, "grad_norm": 1.5031622648239136, "learning_rate": 1.896861315572893e-05, "loss": 0.5579, "step": 6272 }, { "epoch": 0.17154342594618247, "grad_norm": 2.3015737533569336, "learning_rate": 1.8968221363098013e-05, "loss": 0.5905, "step": 6273 }, { "epoch": 0.17157077225989936, "grad_norm": 1.4170136451721191, "learning_rate": 1.8967829500113918e-05, "loss": 0.5653, "step": 6274 }, { "epoch": 0.1715981185736163, "grad_norm": 1.3851735591888428, "learning_rate": 1.896743756677973e-05, "loss": 0.5706, "step": 6275 }, { "epoch": 0.17162546488733318, "grad_norm": 1.588178038597107, "learning_rate": 1.896704556309851e-05, "loss": 0.5056, "step": 6276 }, { "epoch": 0.1716528112010501, "grad_norm": 1.7126461267471313, "learning_rate": 1.8966653489073345e-05, "loss": 0.5851, "step": 6277 }, { "epoch": 0.171680157514767, "grad_norm": 2.1750450134277344, "learning_rate": 1.8966261344707307e-05, "loss": 0.5588, "step": 6278 }, { "epoch": 0.17170750382848393, "grad_norm": 1.4929760694503784, "learning_rate": 1.896586913000347e-05, "loss": 0.5745, "step": 6279 }, { "epoch": 0.17173485014220083, "grad_norm": 1.375241994857788, "learning_rate": 1.8965476844964917e-05, "loss": 0.571, "step": 6280 }, { "epoch": 0.17176219645591775, "grad_norm": 6.842548370361328, "learning_rate": 1.896508448959472e-05, "loss": 0.9895, "step": 6281 }, { "epoch": 0.17178954276963465, "grad_norm": 1.783475637435913, "learning_rate": 1.8964692063895954e-05, "loss": 0.5652, "step": 6282 }, { "epoch": 0.17181688908335158, "grad_norm": 1.179213523864746, "learning_rate": 1.8964299567871706e-05, "loss": 0.5758, "step": 6283 }, { "epoch": 0.17184423539706847, "grad_norm": 1.1636919975280762, "learning_rate": 1.8963907001525048e-05, "loss": 0.5478, "step": 6284 }, { "epoch": 0.1718715817107854, "grad_norm": 1.8387413024902344, "learning_rate": 1.8963514364859067e-05, "loss": 0.5705, "step": 6285 }, { "epoch": 0.1718989280245023, "grad_norm": 1.5786139965057373, "learning_rate": 1.896312165787683e-05, "loss": 0.5939, "step": 6286 }, { "epoch": 0.17192627433821922, "grad_norm": 1.5332192182540894, "learning_rate": 1.8962728880581433e-05, "loss": 0.532, "step": 6287 }, { "epoch": 0.17195362065193612, "grad_norm": 1.532041311264038, "learning_rate": 1.896233603297595e-05, "loss": 0.602, "step": 6288 }, { "epoch": 0.17198096696565304, "grad_norm": 1.5540976524353027, "learning_rate": 1.8961943115063463e-05, "loss": 0.5604, "step": 6289 }, { "epoch": 0.17200831327936994, "grad_norm": 1.5435521602630615, "learning_rate": 1.896155012684705e-05, "loss": 0.6086, "step": 6290 }, { "epoch": 0.17203565959308686, "grad_norm": 1.6551491022109985, "learning_rate": 1.8961157068329802e-05, "loss": 0.5613, "step": 6291 }, { "epoch": 0.17206300590680376, "grad_norm": 1.943263292312622, "learning_rate": 1.8960763939514798e-05, "loss": 0.5743, "step": 6292 }, { "epoch": 0.1720903522205207, "grad_norm": 1.358614206314087, "learning_rate": 1.8960370740405123e-05, "loss": 0.5696, "step": 6293 }, { "epoch": 0.17211769853423758, "grad_norm": 1.2888195514678955, "learning_rate": 1.895997747100386e-05, "loss": 0.5499, "step": 6294 }, { "epoch": 0.1721450448479545, "grad_norm": 1.7124882936477661, "learning_rate": 1.8959584131314095e-05, "loss": 0.4643, "step": 6295 }, { "epoch": 0.1721723911616714, "grad_norm": 1.4860702753067017, "learning_rate": 1.8959190721338915e-05, "loss": 0.5665, "step": 6296 }, { "epoch": 0.17219973747538833, "grad_norm": 1.479506492614746, "learning_rate": 1.8958797241081407e-05, "loss": 0.4338, "step": 6297 }, { "epoch": 0.17222708378910523, "grad_norm": 1.9639496803283691, "learning_rate": 1.895840369054465e-05, "loss": 0.6181, "step": 6298 }, { "epoch": 0.17225443010282213, "grad_norm": 1.9871208667755127, "learning_rate": 1.895801006973174e-05, "loss": 0.9139, "step": 6299 }, { "epoch": 0.17228177641653905, "grad_norm": 1.4175121784210205, "learning_rate": 1.8957616378645767e-05, "loss": 0.6444, "step": 6300 }, { "epoch": 0.17230912273025595, "grad_norm": 2.3895320892333984, "learning_rate": 1.895722261728981e-05, "loss": 0.5359, "step": 6301 }, { "epoch": 0.17233646904397287, "grad_norm": 1.402251124382019, "learning_rate": 1.8956828785666963e-05, "loss": 0.5564, "step": 6302 }, { "epoch": 0.17236381535768977, "grad_norm": 1.4609794616699219, "learning_rate": 1.8956434883780316e-05, "loss": 0.9713, "step": 6303 }, { "epoch": 0.1723911616714067, "grad_norm": 1.659480333328247, "learning_rate": 1.8956040911632954e-05, "loss": 0.6219, "step": 6304 }, { "epoch": 0.1724185079851236, "grad_norm": 2.231015205383301, "learning_rate": 1.8955646869227976e-05, "loss": 0.4387, "step": 6305 }, { "epoch": 0.17244585429884052, "grad_norm": 1.2839665412902832, "learning_rate": 1.8955252756568466e-05, "loss": 0.5589, "step": 6306 }, { "epoch": 0.17247320061255741, "grad_norm": 1.4535303115844727, "learning_rate": 1.8954858573657518e-05, "loss": 0.5993, "step": 6307 }, { "epoch": 0.17250054692627434, "grad_norm": 1.4608312845230103, "learning_rate": 1.8954464320498224e-05, "loss": 0.563, "step": 6308 }, { "epoch": 0.17252789323999124, "grad_norm": 2.062023162841797, "learning_rate": 1.8954069997093684e-05, "loss": 0.5396, "step": 6309 }, { "epoch": 0.17255523955370816, "grad_norm": 1.7904595136642456, "learning_rate": 1.8953675603446976e-05, "loss": 0.9218, "step": 6310 }, { "epoch": 0.17258258586742506, "grad_norm": 1.3247294425964355, "learning_rate": 1.895328113956121e-05, "loss": 0.5931, "step": 6311 }, { "epoch": 0.17260993218114198, "grad_norm": 1.2830810546875, "learning_rate": 1.8952886605439467e-05, "loss": 0.5681, "step": 6312 }, { "epoch": 0.17263727849485888, "grad_norm": 1.7971054315567017, "learning_rate": 1.8952492001084853e-05, "loss": 0.625, "step": 6313 }, { "epoch": 0.1726646248085758, "grad_norm": 1.3839915990829468, "learning_rate": 1.8952097326500453e-05, "loss": 0.5924, "step": 6314 }, { "epoch": 0.1726919711222927, "grad_norm": 1.4468868970870972, "learning_rate": 1.8951702581689372e-05, "loss": 0.5401, "step": 6315 }, { "epoch": 0.17271931743600963, "grad_norm": 1.443250060081482, "learning_rate": 1.8951307766654706e-05, "loss": 0.5703, "step": 6316 }, { "epoch": 0.17274666374972653, "grad_norm": 1.5698559284210205, "learning_rate": 1.8950912881399545e-05, "loss": 0.5737, "step": 6317 }, { "epoch": 0.17277401006344345, "grad_norm": 1.4486700296401978, "learning_rate": 1.8950517925926996e-05, "loss": 0.5046, "step": 6318 }, { "epoch": 0.17280135637716035, "grad_norm": 1.4096121788024902, "learning_rate": 1.8950122900240152e-05, "loss": 0.6066, "step": 6319 }, { "epoch": 0.17282870269087727, "grad_norm": 1.3083986043930054, "learning_rate": 1.8949727804342107e-05, "loss": 0.5568, "step": 6320 }, { "epoch": 0.17285604900459417, "grad_norm": 2.2788381576538086, "learning_rate": 1.8949332638235974e-05, "loss": 0.4811, "step": 6321 }, { "epoch": 0.1728833953183111, "grad_norm": 1.3394187688827515, "learning_rate": 1.894893740192484e-05, "loss": 0.5447, "step": 6322 }, { "epoch": 0.172910741632028, "grad_norm": 1.1353516578674316, "learning_rate": 1.8948542095411812e-05, "loss": 0.554, "step": 6323 }, { "epoch": 0.17293808794574492, "grad_norm": 1.397352695465088, "learning_rate": 1.894814671869999e-05, "loss": 0.5589, "step": 6324 }, { "epoch": 0.17296543425946181, "grad_norm": 1.5220918655395508, "learning_rate": 1.8947751271792474e-05, "loss": 0.9309, "step": 6325 }, { "epoch": 0.17299278057317874, "grad_norm": 1.4734280109405518, "learning_rate": 1.8947355754692368e-05, "loss": 0.4255, "step": 6326 }, { "epoch": 0.17302012688689564, "grad_norm": 1.954423427581787, "learning_rate": 1.8946960167402774e-05, "loss": 0.4434, "step": 6327 }, { "epoch": 0.17304747320061256, "grad_norm": 1.712472915649414, "learning_rate": 1.8946564509926795e-05, "loss": 0.5546, "step": 6328 }, { "epoch": 0.17307481951432946, "grad_norm": 1.3215124607086182, "learning_rate": 1.8946168782267535e-05, "loss": 0.5465, "step": 6329 }, { "epoch": 0.17310216582804638, "grad_norm": 1.325217843055725, "learning_rate": 1.8945772984428103e-05, "loss": 0.5399, "step": 6330 }, { "epoch": 0.17312951214176328, "grad_norm": 1.2513346672058105, "learning_rate": 1.894537711641159e-05, "loss": 0.5621, "step": 6331 }, { "epoch": 0.1731568584554802, "grad_norm": 1.5660983324050903, "learning_rate": 1.894498117822112e-05, "loss": 0.57, "step": 6332 }, { "epoch": 0.1731842047691971, "grad_norm": 3.0049171447753906, "learning_rate": 1.8944585169859786e-05, "loss": 0.5726, "step": 6333 }, { "epoch": 0.17321155108291403, "grad_norm": 1.8459951877593994, "learning_rate": 1.8944189091330698e-05, "loss": 0.5669, "step": 6334 }, { "epoch": 0.17323889739663093, "grad_norm": 1.7408353090286255, "learning_rate": 1.8943792942636964e-05, "loss": 0.3927, "step": 6335 }, { "epoch": 0.17326624371034785, "grad_norm": 1.4283136129379272, "learning_rate": 1.8943396723781696e-05, "loss": 0.5543, "step": 6336 }, { "epoch": 0.17329359002406475, "grad_norm": 1.3371773958206177, "learning_rate": 1.8943000434767993e-05, "loss": 0.5875, "step": 6337 }, { "epoch": 0.17332093633778167, "grad_norm": 1.564188003540039, "learning_rate": 1.894260407559897e-05, "loss": 0.5123, "step": 6338 }, { "epoch": 0.17334828265149857, "grad_norm": 1.790382981300354, "learning_rate": 1.8942207646277735e-05, "loss": 0.5671, "step": 6339 }, { "epoch": 0.1733756289652155, "grad_norm": 1.5564138889312744, "learning_rate": 1.8941811146807394e-05, "loss": 0.5815, "step": 6340 }, { "epoch": 0.1734029752789324, "grad_norm": 1.408049464225769, "learning_rate": 1.8941414577191066e-05, "loss": 0.5405, "step": 6341 }, { "epoch": 0.17343032159264932, "grad_norm": 2.640242576599121, "learning_rate": 1.894101793743185e-05, "loss": 0.5264, "step": 6342 }, { "epoch": 0.17345766790636621, "grad_norm": 1.5132555961608887, "learning_rate": 1.894062122753287e-05, "loss": 0.5613, "step": 6343 }, { "epoch": 0.17348501422008314, "grad_norm": 1.4097753763198853, "learning_rate": 1.8940224447497234e-05, "loss": 0.5728, "step": 6344 }, { "epoch": 0.17351236053380004, "grad_norm": 1.4701858758926392, "learning_rate": 1.893982759732805e-05, "loss": 0.5583, "step": 6345 }, { "epoch": 0.17353970684751696, "grad_norm": 1.5251719951629639, "learning_rate": 1.8939430677028436e-05, "loss": 0.5796, "step": 6346 }, { "epoch": 0.17356705316123386, "grad_norm": 2.1778504848480225, "learning_rate": 1.8939033686601505e-05, "loss": 0.6157, "step": 6347 }, { "epoch": 0.17359439947495078, "grad_norm": 1.4126056432724, "learning_rate": 1.893863662605037e-05, "loss": 0.5537, "step": 6348 }, { "epoch": 0.17362174578866768, "grad_norm": 1.7567099332809448, "learning_rate": 1.893823949537814e-05, "loss": 0.5444, "step": 6349 }, { "epoch": 0.1736490921023846, "grad_norm": 1.3486711978912354, "learning_rate": 1.8937842294587944e-05, "loss": 0.5709, "step": 6350 }, { "epoch": 0.1736764384161015, "grad_norm": 2.276731252670288, "learning_rate": 1.8937445023682888e-05, "loss": 0.5667, "step": 6351 }, { "epoch": 0.17370378472981843, "grad_norm": 1.684315800666809, "learning_rate": 1.893704768266609e-05, "loss": 0.4493, "step": 6352 }, { "epoch": 0.17373113104353533, "grad_norm": 1.4035305976867676, "learning_rate": 1.893665027154067e-05, "loss": 0.5384, "step": 6353 }, { "epoch": 0.17375847735725225, "grad_norm": 2.0408568382263184, "learning_rate": 1.893625279030974e-05, "loss": 0.9448, "step": 6354 }, { "epoch": 0.17378582367096915, "grad_norm": 1.4483503103256226, "learning_rate": 1.8935855238976427e-05, "loss": 0.5905, "step": 6355 }, { "epoch": 0.17381316998468607, "grad_norm": 2.2712697982788086, "learning_rate": 1.8935457617543842e-05, "loss": 0.5442, "step": 6356 }, { "epoch": 0.17384051629840297, "grad_norm": 1.417853593826294, "learning_rate": 1.8935059926015106e-05, "loss": 0.5748, "step": 6357 }, { "epoch": 0.1738678626121199, "grad_norm": 1.4123871326446533, "learning_rate": 1.893466216439334e-05, "loss": 0.5749, "step": 6358 }, { "epoch": 0.1738952089258368, "grad_norm": 1.798945665359497, "learning_rate": 1.8934264332681664e-05, "loss": 0.4153, "step": 6359 }, { "epoch": 0.17392255523955372, "grad_norm": 2.130784273147583, "learning_rate": 1.8933866430883196e-05, "loss": 0.5832, "step": 6360 }, { "epoch": 0.1739499015532706, "grad_norm": 1.6456966400146484, "learning_rate": 1.8933468459001062e-05, "loss": 0.5702, "step": 6361 }, { "epoch": 0.17397724786698754, "grad_norm": 1.836440086364746, "learning_rate": 1.8933070417038384e-05, "loss": 0.5437, "step": 6362 }, { "epoch": 0.17400459418070444, "grad_norm": 1.6709266901016235, "learning_rate": 1.893267230499828e-05, "loss": 0.5798, "step": 6363 }, { "epoch": 0.17403194049442136, "grad_norm": 1.6621677875518799, "learning_rate": 1.8932274122883878e-05, "loss": 0.5669, "step": 6364 }, { "epoch": 0.17405928680813826, "grad_norm": 1.2638781070709229, "learning_rate": 1.8931875870698298e-05, "loss": 0.555, "step": 6365 }, { "epoch": 0.17408663312185518, "grad_norm": 1.5175381898880005, "learning_rate": 1.8931477548444665e-05, "loss": 0.5965, "step": 6366 }, { "epoch": 0.17411397943557208, "grad_norm": 1.9702483415603638, "learning_rate": 1.8931079156126104e-05, "loss": 0.5909, "step": 6367 }, { "epoch": 0.174141325749289, "grad_norm": 1.392630696296692, "learning_rate": 1.893068069374574e-05, "loss": 0.5615, "step": 6368 }, { "epoch": 0.1741686720630059, "grad_norm": 1.3377234935760498, "learning_rate": 1.8930282161306703e-05, "loss": 0.5725, "step": 6369 }, { "epoch": 0.17419601837672283, "grad_norm": 1.2986445426940918, "learning_rate": 1.892988355881211e-05, "loss": 0.5777, "step": 6370 }, { "epoch": 0.17422336469043972, "grad_norm": 1.3863368034362793, "learning_rate": 1.89294848862651e-05, "loss": 0.5662, "step": 6371 }, { "epoch": 0.17425071100415665, "grad_norm": 1.494829773902893, "learning_rate": 1.892908614366879e-05, "loss": 0.5255, "step": 6372 }, { "epoch": 0.17427805731787355, "grad_norm": 1.7097681760787964, "learning_rate": 1.8928687331026312e-05, "loss": 0.5563, "step": 6373 }, { "epoch": 0.17430540363159047, "grad_norm": 1.332131028175354, "learning_rate": 1.8928288448340794e-05, "loss": 0.5747, "step": 6374 }, { "epoch": 0.17433274994530737, "grad_norm": 1.30059814453125, "learning_rate": 1.8927889495615368e-05, "loss": 0.5658, "step": 6375 }, { "epoch": 0.1743600962590243, "grad_norm": 1.5114381313323975, "learning_rate": 1.8927490472853162e-05, "loss": 0.5706, "step": 6376 }, { "epoch": 0.1743874425727412, "grad_norm": 1.433994174003601, "learning_rate": 1.89270913800573e-05, "loss": 0.6015, "step": 6377 }, { "epoch": 0.17441478888645812, "grad_norm": 1.3597370386123657, "learning_rate": 1.8926692217230924e-05, "loss": 0.556, "step": 6378 }, { "epoch": 0.174442135200175, "grad_norm": 1.4294565916061401, "learning_rate": 1.8926292984377158e-05, "loss": 0.5116, "step": 6379 }, { "epoch": 0.17446948151389194, "grad_norm": 1.8441762924194336, "learning_rate": 1.8925893681499137e-05, "loss": 0.5571, "step": 6380 }, { "epoch": 0.17449682782760884, "grad_norm": 2.106092929840088, "learning_rate": 1.8925494308599992e-05, "loss": 0.6446, "step": 6381 }, { "epoch": 0.17452417414132576, "grad_norm": 1.557881474494934, "learning_rate": 1.8925094865682854e-05, "loss": 0.4644, "step": 6382 }, { "epoch": 0.17455152045504266, "grad_norm": 1.4164842367172241, "learning_rate": 1.892469535275086e-05, "loss": 0.5314, "step": 6383 }, { "epoch": 0.17457886676875958, "grad_norm": 3.2638938426971436, "learning_rate": 1.8924295769807138e-05, "loss": 0.4628, "step": 6384 }, { "epoch": 0.17460621308247648, "grad_norm": 1.9941984415054321, "learning_rate": 1.8923896116854835e-05, "loss": 0.9413, "step": 6385 }, { "epoch": 0.1746335593961934, "grad_norm": 1.2146010398864746, "learning_rate": 1.8923496393897075e-05, "loss": 0.6126, "step": 6386 }, { "epoch": 0.1746609057099103, "grad_norm": 1.1827929019927979, "learning_rate": 1.8923096600936995e-05, "loss": 0.5644, "step": 6387 }, { "epoch": 0.17468825202362723, "grad_norm": 1.3912047147750854, "learning_rate": 1.8922696737977737e-05, "loss": 0.5589, "step": 6388 }, { "epoch": 0.17471559833734412, "grad_norm": 1.6966626644134521, "learning_rate": 1.892229680502243e-05, "loss": 0.5098, "step": 6389 }, { "epoch": 0.17474294465106105, "grad_norm": 1.4880130290985107, "learning_rate": 1.892189680207422e-05, "loss": 0.5545, "step": 6390 }, { "epoch": 0.17477029096477795, "grad_norm": 1.5739713907241821, "learning_rate": 1.8921496729136235e-05, "loss": 0.6061, "step": 6391 }, { "epoch": 0.17479763727849487, "grad_norm": 1.1998823881149292, "learning_rate": 1.892109658621162e-05, "loss": 0.5273, "step": 6392 }, { "epoch": 0.17482498359221177, "grad_norm": 2.207989454269409, "learning_rate": 1.892069637330352e-05, "loss": 0.9664, "step": 6393 }, { "epoch": 0.1748523299059287, "grad_norm": 1.7652959823608398, "learning_rate": 1.8920296090415056e-05, "loss": 0.5333, "step": 6394 }, { "epoch": 0.1748796762196456, "grad_norm": 1.902224063873291, "learning_rate": 1.891989573754939e-05, "loss": 0.9248, "step": 6395 }, { "epoch": 0.17490702253336252, "grad_norm": 1.7888487577438354, "learning_rate": 1.8919495314709647e-05, "loss": 0.613, "step": 6396 }, { "epoch": 0.1749343688470794, "grad_norm": 2.535278081893921, "learning_rate": 1.8919094821898976e-05, "loss": 0.5209, "step": 6397 }, { "epoch": 0.17496171516079634, "grad_norm": 1.4726872444152832, "learning_rate": 1.8918694259120514e-05, "loss": 0.5795, "step": 6398 }, { "epoch": 0.17498906147451324, "grad_norm": 1.5616813898086548, "learning_rate": 1.8918293626377404e-05, "loss": 0.5567, "step": 6399 }, { "epoch": 0.17501640778823013, "grad_norm": 5.4451093673706055, "learning_rate": 1.8917892923672795e-05, "loss": 0.552, "step": 6400 }, { "epoch": 0.17504375410194706, "grad_norm": 1.36739981174469, "learning_rate": 1.891749215100982e-05, "loss": 0.6539, "step": 6401 }, { "epoch": 0.17507110041566395, "grad_norm": 1.4473507404327393, "learning_rate": 1.8917091308391632e-05, "loss": 0.5405, "step": 6402 }, { "epoch": 0.17509844672938088, "grad_norm": 1.524725079536438, "learning_rate": 1.8916690395821373e-05, "loss": 0.5865, "step": 6403 }, { "epoch": 0.17512579304309778, "grad_norm": 1.6128382682800293, "learning_rate": 1.891628941330219e-05, "loss": 0.5657, "step": 6404 }, { "epoch": 0.1751531393568147, "grad_norm": 1.3168330192565918, "learning_rate": 1.891588836083722e-05, "loss": 0.5279, "step": 6405 }, { "epoch": 0.1751804856705316, "grad_norm": 3.086009979248047, "learning_rate": 1.8915487238429613e-05, "loss": 0.4416, "step": 6406 }, { "epoch": 0.17520783198424852, "grad_norm": 1.1921271085739136, "learning_rate": 1.8915086046082526e-05, "loss": 0.5362, "step": 6407 }, { "epoch": 0.17523517829796542, "grad_norm": 1.9979534149169922, "learning_rate": 1.891468478379909e-05, "loss": 0.6363, "step": 6408 }, { "epoch": 0.17526252461168235, "grad_norm": 1.7724947929382324, "learning_rate": 1.8914283451582466e-05, "loss": 0.585, "step": 6409 }, { "epoch": 0.17528987092539924, "grad_norm": 1.5458319187164307, "learning_rate": 1.8913882049435797e-05, "loss": 0.5326, "step": 6410 }, { "epoch": 0.17531721723911617, "grad_norm": 1.8954206705093384, "learning_rate": 1.8913480577362227e-05, "loss": 0.5736, "step": 6411 }, { "epoch": 0.17534456355283307, "grad_norm": 1.4407949447631836, "learning_rate": 1.8913079035364912e-05, "loss": 0.5689, "step": 6412 }, { "epoch": 0.17537190986655, "grad_norm": 1.2227798700332642, "learning_rate": 1.8912677423447007e-05, "loss": 0.5626, "step": 6413 }, { "epoch": 0.1753992561802669, "grad_norm": 1.1992334127426147, "learning_rate": 1.8912275741611647e-05, "loss": 0.5538, "step": 6414 }, { "epoch": 0.1754266024939838, "grad_norm": 3.8662168979644775, "learning_rate": 1.8911873989861998e-05, "loss": 0.9782, "step": 6415 }, { "epoch": 0.1754539488077007, "grad_norm": 1.8467860221862793, "learning_rate": 1.89114721682012e-05, "loss": 0.5944, "step": 6416 }, { "epoch": 0.17548129512141764, "grad_norm": 1.7285007238388062, "learning_rate": 1.8911070276632416e-05, "loss": 0.5667, "step": 6417 }, { "epoch": 0.17550864143513453, "grad_norm": 2.0987775325775146, "learning_rate": 1.8910668315158792e-05, "loss": 0.5846, "step": 6418 }, { "epoch": 0.17553598774885146, "grad_norm": 1.705978512763977, "learning_rate": 1.8910266283783482e-05, "loss": 0.5177, "step": 6419 }, { "epoch": 0.17556333406256835, "grad_norm": 1.236372709274292, "learning_rate": 1.8909864182509642e-05, "loss": 0.5715, "step": 6420 }, { "epoch": 0.17559068037628528, "grad_norm": 1.8214631080627441, "learning_rate": 1.890946201134042e-05, "loss": 0.4301, "step": 6421 }, { "epoch": 0.17561802669000218, "grad_norm": 1.7326743602752686, "learning_rate": 1.890905977027898e-05, "loss": 0.5882, "step": 6422 }, { "epoch": 0.1756453730037191, "grad_norm": 1.6957634687423706, "learning_rate": 1.8908657459328474e-05, "loss": 0.5793, "step": 6423 }, { "epoch": 0.175672719317436, "grad_norm": 1.822123408317566, "learning_rate": 1.8908255078492056e-05, "loss": 0.5495, "step": 6424 }, { "epoch": 0.17570006563115292, "grad_norm": 1.681833267211914, "learning_rate": 1.8907852627772886e-05, "loss": 0.5563, "step": 6425 }, { "epoch": 0.17572741194486982, "grad_norm": 1.5484036207199097, "learning_rate": 1.8907450107174115e-05, "loss": 0.5701, "step": 6426 }, { "epoch": 0.17575475825858675, "grad_norm": 2.071624517440796, "learning_rate": 1.890704751669891e-05, "loss": 0.6314, "step": 6427 }, { "epoch": 0.17578210457230364, "grad_norm": 1.3513315916061401, "learning_rate": 1.8906644856350418e-05, "loss": 0.5549, "step": 6428 }, { "epoch": 0.17580945088602057, "grad_norm": 1.2021806240081787, "learning_rate": 1.8906242126131805e-05, "loss": 0.5847, "step": 6429 }, { "epoch": 0.17583679719973747, "grad_norm": 3.0967612266540527, "learning_rate": 1.8905839326046232e-05, "loss": 0.9443, "step": 6430 }, { "epoch": 0.1758641435134544, "grad_norm": 1.4240081310272217, "learning_rate": 1.8905436456096856e-05, "loss": 0.5928, "step": 6431 }, { "epoch": 0.1758914898271713, "grad_norm": 1.3087078332901, "learning_rate": 1.8905033516286832e-05, "loss": 0.5677, "step": 6432 }, { "epoch": 0.1759188361408882, "grad_norm": 1.4480856657028198, "learning_rate": 1.890463050661933e-05, "loss": 0.5471, "step": 6433 }, { "epoch": 0.1759461824546051, "grad_norm": 1.2740319967269897, "learning_rate": 1.8904227427097507e-05, "loss": 0.5647, "step": 6434 }, { "epoch": 0.17597352876832204, "grad_norm": 1.7205419540405273, "learning_rate": 1.8903824277724525e-05, "loss": 0.5687, "step": 6435 }, { "epoch": 0.17600087508203893, "grad_norm": 1.531630277633667, "learning_rate": 1.890342105850355e-05, "loss": 0.6101, "step": 6436 }, { "epoch": 0.17602822139575586, "grad_norm": 1.7275742292404175, "learning_rate": 1.8903017769437738e-05, "loss": 0.5461, "step": 6437 }, { "epoch": 0.17605556770947275, "grad_norm": 1.6441570520401, "learning_rate": 1.8902614410530262e-05, "loss": 0.5241, "step": 6438 }, { "epoch": 0.17608291402318968, "grad_norm": 1.4649701118469238, "learning_rate": 1.8902210981784275e-05, "loss": 0.9695, "step": 6439 }, { "epoch": 0.17611026033690658, "grad_norm": 1.522686243057251, "learning_rate": 1.8901807483202957e-05, "loss": 0.5732, "step": 6440 }, { "epoch": 0.1761376066506235, "grad_norm": 1.4163835048675537, "learning_rate": 1.8901403914789457e-05, "loss": 0.5652, "step": 6441 }, { "epoch": 0.1761649529643404, "grad_norm": 1.5764445066452026, "learning_rate": 1.890100027654695e-05, "loss": 0.5796, "step": 6442 }, { "epoch": 0.17619229927805732, "grad_norm": 1.9606984853744507, "learning_rate": 1.8900596568478603e-05, "loss": 0.5828, "step": 6443 }, { "epoch": 0.17621964559177422, "grad_norm": 1.8722620010375977, "learning_rate": 1.890019279058758e-05, "loss": 0.5399, "step": 6444 }, { "epoch": 0.17624699190549115, "grad_norm": 1.4804296493530273, "learning_rate": 1.889978894287705e-05, "loss": 0.6007, "step": 6445 }, { "epoch": 0.17627433821920804, "grad_norm": 1.5240809917449951, "learning_rate": 1.8899385025350178e-05, "loss": 0.5737, "step": 6446 }, { "epoch": 0.17630168453292497, "grad_norm": 1.6490195989608765, "learning_rate": 1.8898981038010137e-05, "loss": 0.9032, "step": 6447 }, { "epoch": 0.17632903084664187, "grad_norm": 1.4638643264770508, "learning_rate": 1.8898576980860094e-05, "loss": 0.558, "step": 6448 }, { "epoch": 0.1763563771603588, "grad_norm": 1.4180362224578857, "learning_rate": 1.8898172853903216e-05, "loss": 0.5744, "step": 6449 }, { "epoch": 0.1763837234740757, "grad_norm": 1.2552744150161743, "learning_rate": 1.889776865714268e-05, "loss": 0.5489, "step": 6450 }, { "epoch": 0.1764110697877926, "grad_norm": 1.176415205001831, "learning_rate": 1.8897364390581652e-05, "loss": 0.497, "step": 6451 }, { "epoch": 0.1764384161015095, "grad_norm": 1.107589840888977, "learning_rate": 1.88969600542233e-05, "loss": 0.6177, "step": 6452 }, { "epoch": 0.17646576241522643, "grad_norm": 1.3442835807800293, "learning_rate": 1.8896555648070808e-05, "loss": 0.5645, "step": 6453 }, { "epoch": 0.17649310872894333, "grad_norm": 1.7538172006607056, "learning_rate": 1.8896151172127334e-05, "loss": 0.5724, "step": 6454 }, { "epoch": 0.17652045504266026, "grad_norm": 1.7202644348144531, "learning_rate": 1.8895746626396062e-05, "loss": 0.9302, "step": 6455 }, { "epoch": 0.17654780135637715, "grad_norm": 1.3555526733398438, "learning_rate": 1.889534201088016e-05, "loss": 0.5798, "step": 6456 }, { "epoch": 0.17657514767009408, "grad_norm": 1.3597160577774048, "learning_rate": 1.88949373255828e-05, "loss": 0.5889, "step": 6457 }, { "epoch": 0.17660249398381098, "grad_norm": 1.3723320960998535, "learning_rate": 1.8894532570507164e-05, "loss": 0.575, "step": 6458 }, { "epoch": 0.1766298402975279, "grad_norm": 1.6680877208709717, "learning_rate": 1.8894127745656422e-05, "loss": 0.5788, "step": 6459 }, { "epoch": 0.1766571866112448, "grad_norm": 1.6208875179290771, "learning_rate": 1.889372285103375e-05, "loss": 0.5935, "step": 6460 }, { "epoch": 0.17668453292496172, "grad_norm": 1.9133085012435913, "learning_rate": 1.8893317886642326e-05, "loss": 0.526, "step": 6461 }, { "epoch": 0.17671187923867862, "grad_norm": 1.8223352432250977, "learning_rate": 1.8892912852485325e-05, "loss": 0.5747, "step": 6462 }, { "epoch": 0.17673922555239555, "grad_norm": 1.954354166984558, "learning_rate": 1.8892507748565924e-05, "loss": 0.5518, "step": 6463 }, { "epoch": 0.17676657186611244, "grad_norm": 1.5344938039779663, "learning_rate": 1.8892102574887306e-05, "loss": 0.5919, "step": 6464 }, { "epoch": 0.17679391817982937, "grad_norm": 1.8953204154968262, "learning_rate": 1.8891697331452645e-05, "loss": 0.576, "step": 6465 }, { "epoch": 0.17682126449354627, "grad_norm": 1.2680494785308838, "learning_rate": 1.8891292018265118e-05, "loss": 0.5658, "step": 6466 }, { "epoch": 0.1768486108072632, "grad_norm": 1.9310425519943237, "learning_rate": 1.8890886635327908e-05, "loss": 0.5579, "step": 6467 }, { "epoch": 0.1768759571209801, "grad_norm": 1.655800223350525, "learning_rate": 1.8890481182644196e-05, "loss": 0.9132, "step": 6468 }, { "epoch": 0.176903303434697, "grad_norm": 3.037078619003296, "learning_rate": 1.8890075660217163e-05, "loss": 0.5598, "step": 6469 }, { "epoch": 0.1769306497484139, "grad_norm": 1.819353461265564, "learning_rate": 1.8889670068049984e-05, "loss": 0.5484, "step": 6470 }, { "epoch": 0.17695799606213083, "grad_norm": 1.7858548164367676, "learning_rate": 1.8889264406145848e-05, "loss": 0.5404, "step": 6471 }, { "epoch": 0.17698534237584773, "grad_norm": 1.2967720031738281, "learning_rate": 1.888885867450793e-05, "loss": 0.5529, "step": 6472 }, { "epoch": 0.17701268868956466, "grad_norm": 1.3268851041793823, "learning_rate": 1.8888452873139423e-05, "loss": 0.5506, "step": 6473 }, { "epoch": 0.17704003500328155, "grad_norm": 1.7363522052764893, "learning_rate": 1.8888047002043502e-05, "loss": 0.5815, "step": 6474 }, { "epoch": 0.17706738131699848, "grad_norm": 1.645527720451355, "learning_rate": 1.8887641061223356e-05, "loss": 0.5738, "step": 6475 }, { "epoch": 0.17709472763071538, "grad_norm": 1.4489468336105347, "learning_rate": 1.8887235050682166e-05, "loss": 0.5705, "step": 6476 }, { "epoch": 0.1771220739444323, "grad_norm": 1.5197947025299072, "learning_rate": 1.8886828970423113e-05, "loss": 0.5486, "step": 6477 }, { "epoch": 0.1771494202581492, "grad_norm": 1.6245648860931396, "learning_rate": 1.8886422820449394e-05, "loss": 0.6186, "step": 6478 }, { "epoch": 0.17717676657186612, "grad_norm": 1.829901933670044, "learning_rate": 1.8886016600764185e-05, "loss": 0.9007, "step": 6479 }, { "epoch": 0.17720411288558302, "grad_norm": 2.0205938816070557, "learning_rate": 1.888561031137068e-05, "loss": 0.5556, "step": 6480 }, { "epoch": 0.17723145919929995, "grad_norm": 1.7710742950439453, "learning_rate": 1.888520395227206e-05, "loss": 0.5714, "step": 6481 }, { "epoch": 0.17725880551301684, "grad_norm": 1.533097743988037, "learning_rate": 1.8884797523471516e-05, "loss": 0.5082, "step": 6482 }, { "epoch": 0.17728615182673377, "grad_norm": 1.5424447059631348, "learning_rate": 1.8884391024972236e-05, "loss": 0.8799, "step": 6483 }, { "epoch": 0.17731349814045066, "grad_norm": 1.3280961513519287, "learning_rate": 1.888398445677741e-05, "loss": 0.5419, "step": 6484 }, { "epoch": 0.1773408444541676, "grad_norm": 1.472970724105835, "learning_rate": 1.8883577818890224e-05, "loss": 0.5032, "step": 6485 }, { "epoch": 0.1773681907678845, "grad_norm": 1.3711597919464111, "learning_rate": 1.888317111131387e-05, "loss": 0.564, "step": 6486 }, { "epoch": 0.1773955370816014, "grad_norm": 1.7470813989639282, "learning_rate": 1.888276433405154e-05, "loss": 0.5555, "step": 6487 }, { "epoch": 0.1774228833953183, "grad_norm": 1.750678300857544, "learning_rate": 1.8882357487106425e-05, "loss": 0.4741, "step": 6488 }, { "epoch": 0.17745022970903523, "grad_norm": 1.5213013887405396, "learning_rate": 1.8881950570481714e-05, "loss": 0.5574, "step": 6489 }, { "epoch": 0.17747757602275213, "grad_norm": 1.7165814638137817, "learning_rate": 1.88815435841806e-05, "loss": 0.6308, "step": 6490 }, { "epoch": 0.17750492233646906, "grad_norm": 1.8162249326705933, "learning_rate": 1.8881136528206273e-05, "loss": 0.9395, "step": 6491 }, { "epoch": 0.17753226865018595, "grad_norm": 1.2081414461135864, "learning_rate": 1.8880729402561932e-05, "loss": 0.5403, "step": 6492 }, { "epoch": 0.17755961496390288, "grad_norm": 1.7310354709625244, "learning_rate": 1.888032220725077e-05, "loss": 0.5607, "step": 6493 }, { "epoch": 0.17758696127761978, "grad_norm": 1.9197930097579956, "learning_rate": 1.8879914942275975e-05, "loss": 0.5661, "step": 6494 }, { "epoch": 0.1776143075913367, "grad_norm": 1.2575863599777222, "learning_rate": 1.887950760764075e-05, "loss": 0.5599, "step": 6495 }, { "epoch": 0.1776416539050536, "grad_norm": 1.610710859298706, "learning_rate": 1.8879100203348287e-05, "loss": 0.5322, "step": 6496 }, { "epoch": 0.17766900021877052, "grad_norm": 1.5966362953186035, "learning_rate": 1.8878692729401783e-05, "loss": 0.593, "step": 6497 }, { "epoch": 0.17769634653248742, "grad_norm": 1.4389384984970093, "learning_rate": 1.887828518580443e-05, "loss": 0.5289, "step": 6498 }, { "epoch": 0.17772369284620435, "grad_norm": 1.6981775760650635, "learning_rate": 1.887787757255943e-05, "loss": 0.6036, "step": 6499 }, { "epoch": 0.17775103915992124, "grad_norm": 1.673026204109192, "learning_rate": 1.8877469889669976e-05, "loss": 0.5636, "step": 6500 }, { "epoch": 0.17777838547363814, "grad_norm": 1.596771478652954, "learning_rate": 1.887706213713927e-05, "loss": 0.6683, "step": 6501 }, { "epoch": 0.17780573178735506, "grad_norm": 1.6176342964172363, "learning_rate": 1.8876654314970512e-05, "loss": 0.5128, "step": 6502 }, { "epoch": 0.17783307810107196, "grad_norm": 1.2236156463623047, "learning_rate": 1.8876246423166896e-05, "loss": 0.5558, "step": 6503 }, { "epoch": 0.1778604244147889, "grad_norm": 1.5488102436065674, "learning_rate": 1.8875838461731626e-05, "loss": 0.5383, "step": 6504 }, { "epoch": 0.17788777072850578, "grad_norm": 3.9320924282073975, "learning_rate": 1.8875430430667903e-05, "loss": 0.5788, "step": 6505 }, { "epoch": 0.1779151170422227, "grad_norm": 1.500805139541626, "learning_rate": 1.8875022329978925e-05, "loss": 0.5441, "step": 6506 }, { "epoch": 0.1779424633559396, "grad_norm": 1.4926629066467285, "learning_rate": 1.8874614159667895e-05, "loss": 0.5995, "step": 6507 }, { "epoch": 0.17796980966965653, "grad_norm": 1.3502686023712158, "learning_rate": 1.8874205919738013e-05, "loss": 0.5837, "step": 6508 }, { "epoch": 0.17799715598337343, "grad_norm": 2.518733024597168, "learning_rate": 1.8873797610192483e-05, "loss": 0.6505, "step": 6509 }, { "epoch": 0.17802450229709035, "grad_norm": 1.5270471572875977, "learning_rate": 1.887338923103451e-05, "loss": 0.5169, "step": 6510 }, { "epoch": 0.17805184861080725, "grad_norm": 1.514316201210022, "learning_rate": 1.8872980782267293e-05, "loss": 0.5621, "step": 6511 }, { "epoch": 0.17807919492452418, "grad_norm": 1.4941445589065552, "learning_rate": 1.887257226389404e-05, "loss": 0.5542, "step": 6512 }, { "epoch": 0.17810654123824107, "grad_norm": 1.8984228372573853, "learning_rate": 1.8872163675917954e-05, "loss": 0.6209, "step": 6513 }, { "epoch": 0.178133887551958, "grad_norm": 2.2546849250793457, "learning_rate": 1.8871755018342242e-05, "loss": 0.9124, "step": 6514 }, { "epoch": 0.1781612338656749, "grad_norm": 2.1207284927368164, "learning_rate": 1.8871346291170106e-05, "loss": 0.5414, "step": 6515 }, { "epoch": 0.17818858017939182, "grad_norm": 2.0636680126190186, "learning_rate": 1.887093749440476e-05, "loss": 0.5403, "step": 6516 }, { "epoch": 0.17821592649310872, "grad_norm": 1.4293142557144165, "learning_rate": 1.8870528628049402e-05, "loss": 0.5382, "step": 6517 }, { "epoch": 0.17824327280682564, "grad_norm": 1.8442844152450562, "learning_rate": 1.8870119692107246e-05, "loss": 0.5303, "step": 6518 }, { "epoch": 0.17827061912054254, "grad_norm": 1.8300657272338867, "learning_rate": 1.8869710686581496e-05, "loss": 0.5812, "step": 6519 }, { "epoch": 0.17829796543425946, "grad_norm": 1.4135818481445312, "learning_rate": 1.886930161147536e-05, "loss": 0.4739, "step": 6520 }, { "epoch": 0.17832531174797636, "grad_norm": 1.2493635416030884, "learning_rate": 1.8868892466792047e-05, "loss": 0.5411, "step": 6521 }, { "epoch": 0.1783526580616933, "grad_norm": 1.7007259130477905, "learning_rate": 1.8868483252534773e-05, "loss": 0.6093, "step": 6522 }, { "epoch": 0.17838000437541018, "grad_norm": 1.8459534645080566, "learning_rate": 1.8868073968706743e-05, "loss": 0.5554, "step": 6523 }, { "epoch": 0.1784073506891271, "grad_norm": 2.4771480560302734, "learning_rate": 1.8867664615311168e-05, "loss": 0.5357, "step": 6524 }, { "epoch": 0.178434697002844, "grad_norm": 1.6992658376693726, "learning_rate": 1.886725519235126e-05, "loss": 0.9781, "step": 6525 }, { "epoch": 0.17846204331656093, "grad_norm": 1.5531359910964966, "learning_rate": 1.886684569983023e-05, "loss": 0.9012, "step": 6526 }, { "epoch": 0.17848938963027783, "grad_norm": 1.534932255744934, "learning_rate": 1.886643613775129e-05, "loss": 0.6129, "step": 6527 }, { "epoch": 0.17851673594399475, "grad_norm": 1.4306412935256958, "learning_rate": 1.8866026506117653e-05, "loss": 0.5641, "step": 6528 }, { "epoch": 0.17854408225771165, "grad_norm": 1.642998456954956, "learning_rate": 1.8865616804932535e-05, "loss": 0.5991, "step": 6529 }, { "epoch": 0.17857142857142858, "grad_norm": 1.4654467105865479, "learning_rate": 1.8865207034199148e-05, "loss": 0.5601, "step": 6530 }, { "epoch": 0.17859877488514547, "grad_norm": 1.536401629447937, "learning_rate": 1.8864797193920707e-05, "loss": 0.9356, "step": 6531 }, { "epoch": 0.1786261211988624, "grad_norm": 1.3255956172943115, "learning_rate": 1.8864387284100427e-05, "loss": 0.5622, "step": 6532 }, { "epoch": 0.1786534675125793, "grad_norm": 1.2869675159454346, "learning_rate": 1.8863977304741524e-05, "loss": 0.5837, "step": 6533 }, { "epoch": 0.17868081382629622, "grad_norm": 1.7445106506347656, "learning_rate": 1.886356725584721e-05, "loss": 0.4929, "step": 6534 }, { "epoch": 0.17870816014001312, "grad_norm": 6.462890625, "learning_rate": 1.886315713742071e-05, "loss": 0.431, "step": 6535 }, { "epoch": 0.17873550645373004, "grad_norm": 1.503901720046997, "learning_rate": 1.8862746949465235e-05, "loss": 0.5794, "step": 6536 }, { "epoch": 0.17876285276744694, "grad_norm": 2.110747814178467, "learning_rate": 1.8862336691984002e-05, "loss": 0.6573, "step": 6537 }, { "epoch": 0.17879019908116386, "grad_norm": 1.6644177436828613, "learning_rate": 1.8861926364980233e-05, "loss": 0.5893, "step": 6538 }, { "epoch": 0.17881754539488076, "grad_norm": 2.0666651725769043, "learning_rate": 1.8861515968457146e-05, "loss": 0.5954, "step": 6539 }, { "epoch": 0.1788448917085977, "grad_norm": 1.7111783027648926, "learning_rate": 1.886110550241796e-05, "loss": 0.5884, "step": 6540 }, { "epoch": 0.17887223802231458, "grad_norm": 1.4998446702957153, "learning_rate": 1.8860694966865894e-05, "loss": 0.5887, "step": 6541 }, { "epoch": 0.1788995843360315, "grad_norm": 2.1775519847869873, "learning_rate": 1.8860284361804173e-05, "loss": 0.4934, "step": 6542 }, { "epoch": 0.1789269306497484, "grad_norm": 1.5905972719192505, "learning_rate": 1.8859873687236013e-05, "loss": 0.6059, "step": 6543 }, { "epoch": 0.17895427696346533, "grad_norm": 1.2062658071517944, "learning_rate": 1.8859462943164635e-05, "loss": 0.5675, "step": 6544 }, { "epoch": 0.17898162327718223, "grad_norm": 1.5169564485549927, "learning_rate": 1.8859052129593265e-05, "loss": 0.5312, "step": 6545 }, { "epoch": 0.17900896959089915, "grad_norm": 1.506451964378357, "learning_rate": 1.8858641246525126e-05, "loss": 0.5835, "step": 6546 }, { "epoch": 0.17903631590461605, "grad_norm": 2.8904054164886475, "learning_rate": 1.8858230293963435e-05, "loss": 0.5958, "step": 6547 }, { "epoch": 0.17906366221833298, "grad_norm": 1.512031078338623, "learning_rate": 1.8857819271911423e-05, "loss": 0.5767, "step": 6548 }, { "epoch": 0.17909100853204987, "grad_norm": 1.6156086921691895, "learning_rate": 1.885740818037231e-05, "loss": 0.5707, "step": 6549 }, { "epoch": 0.1791183548457668, "grad_norm": 6.489860534667969, "learning_rate": 1.8856997019349327e-05, "loss": 0.5445, "step": 6550 }, { "epoch": 0.1791457011594837, "grad_norm": 1.7298073768615723, "learning_rate": 1.885658578884569e-05, "loss": 0.5806, "step": 6551 }, { "epoch": 0.17917304747320062, "grad_norm": 1.7808908224105835, "learning_rate": 1.8856174488864635e-05, "loss": 0.5726, "step": 6552 }, { "epoch": 0.17920039378691752, "grad_norm": 2.307589292526245, "learning_rate": 1.885576311940938e-05, "loss": 0.6025, "step": 6553 }, { "epoch": 0.17922774010063444, "grad_norm": 1.3479074239730835, "learning_rate": 1.8855351680483154e-05, "loss": 0.537, "step": 6554 }, { "epoch": 0.17925508641435134, "grad_norm": 1.3666988611221313, "learning_rate": 1.8854940172089188e-05, "loss": 0.5622, "step": 6555 }, { "epoch": 0.17928243272806826, "grad_norm": 1.3095544576644897, "learning_rate": 1.885452859423071e-05, "loss": 0.5843, "step": 6556 }, { "epoch": 0.17930977904178516, "grad_norm": 13.032122611999512, "learning_rate": 1.8854116946910946e-05, "loss": 0.5694, "step": 6557 }, { "epoch": 0.17933712535550209, "grad_norm": 2.6312661170959473, "learning_rate": 1.8853705230133128e-05, "loss": 0.6181, "step": 6558 }, { "epoch": 0.17936447166921898, "grad_norm": 1.6059489250183105, "learning_rate": 1.8853293443900483e-05, "loss": 0.5952, "step": 6559 }, { "epoch": 0.1793918179829359, "grad_norm": 1.6439318656921387, "learning_rate": 1.8852881588216238e-05, "loss": 0.5666, "step": 6560 }, { "epoch": 0.1794191642966528, "grad_norm": 1.3876569271087646, "learning_rate": 1.8852469663083634e-05, "loss": 0.5511, "step": 6561 }, { "epoch": 0.17944651061036973, "grad_norm": 1.3583534955978394, "learning_rate": 1.8852057668505894e-05, "loss": 0.5642, "step": 6562 }, { "epoch": 0.17947385692408663, "grad_norm": 1.6019052267074585, "learning_rate": 1.8851645604486253e-05, "loss": 0.4582, "step": 6563 }, { "epoch": 0.17950120323780355, "grad_norm": 1.7231489419937134, "learning_rate": 1.8851233471027944e-05, "loss": 0.9496, "step": 6564 }, { "epoch": 0.17952854955152045, "grad_norm": 1.5575939416885376, "learning_rate": 1.8850821268134202e-05, "loss": 0.5885, "step": 6565 }, { "epoch": 0.17955589586523737, "grad_norm": 1.7983852624893188, "learning_rate": 1.885040899580825e-05, "loss": 0.5813, "step": 6566 }, { "epoch": 0.17958324217895427, "grad_norm": 2.0067687034606934, "learning_rate": 1.8849996654053336e-05, "loss": 0.5819, "step": 6567 }, { "epoch": 0.1796105884926712, "grad_norm": 1.637542486190796, "learning_rate": 1.884958424287269e-05, "loss": 0.5201, "step": 6568 }, { "epoch": 0.1796379348063881, "grad_norm": 1.5387628078460693, "learning_rate": 1.8849171762269543e-05, "loss": 0.9155, "step": 6569 }, { "epoch": 0.17966528112010502, "grad_norm": 1.4873583316802979, "learning_rate": 1.8848759212247137e-05, "loss": 0.543, "step": 6570 }, { "epoch": 0.17969262743382192, "grad_norm": 2.1012964248657227, "learning_rate": 1.8848346592808703e-05, "loss": 0.586, "step": 6571 }, { "epoch": 0.17971997374753884, "grad_norm": 1.2579892873764038, "learning_rate": 1.884793390395748e-05, "loss": 0.5904, "step": 6572 }, { "epoch": 0.17974732006125574, "grad_norm": 1.3738831281661987, "learning_rate": 1.8847521145696707e-05, "loss": 0.5677, "step": 6573 }, { "epoch": 0.17977466637497266, "grad_norm": 1.7869775295257568, "learning_rate": 1.884710831802962e-05, "loss": 0.5641, "step": 6574 }, { "epoch": 0.17980201268868956, "grad_norm": 1.455359935760498, "learning_rate": 1.8846695420959457e-05, "loss": 0.5905, "step": 6575 }, { "epoch": 0.17982935900240649, "grad_norm": 2.1114931106567383, "learning_rate": 1.884628245448946e-05, "loss": 0.4323, "step": 6576 }, { "epoch": 0.17985670531612338, "grad_norm": 1.48849618434906, "learning_rate": 1.8845869418622863e-05, "loss": 0.5327, "step": 6577 }, { "epoch": 0.1798840516298403, "grad_norm": 1.7995115518569946, "learning_rate": 1.8845456313362913e-05, "loss": 0.6438, "step": 6578 }, { "epoch": 0.1799113979435572, "grad_norm": 1.3255071640014648, "learning_rate": 1.8845043138712847e-05, "loss": 0.5548, "step": 6579 }, { "epoch": 0.17993874425727413, "grad_norm": 1.3238226175308228, "learning_rate": 1.8844629894675906e-05, "loss": 0.604, "step": 6580 }, { "epoch": 0.17996609057099103, "grad_norm": 1.5980106592178345, "learning_rate": 1.8844216581255335e-05, "loss": 0.6184, "step": 6581 }, { "epoch": 0.17999343688470795, "grad_norm": 1.4740593433380127, "learning_rate": 1.884380319845437e-05, "loss": 0.5941, "step": 6582 }, { "epoch": 0.18002078319842485, "grad_norm": 1.7985420227050781, "learning_rate": 1.884338974627626e-05, "loss": 0.6624, "step": 6583 }, { "epoch": 0.18004812951214177, "grad_norm": 1.311131477355957, "learning_rate": 1.8842976224724244e-05, "loss": 0.5597, "step": 6584 }, { "epoch": 0.18007547582585867, "grad_norm": 1.3502769470214844, "learning_rate": 1.8842562633801572e-05, "loss": 0.5834, "step": 6585 }, { "epoch": 0.1801028221395756, "grad_norm": 1.5924917459487915, "learning_rate": 1.8842148973511485e-05, "loss": 0.6135, "step": 6586 }, { "epoch": 0.1801301684532925, "grad_norm": 2.0173346996307373, "learning_rate": 1.8841735243857223e-05, "loss": 0.5547, "step": 6587 }, { "epoch": 0.18015751476700942, "grad_norm": 1.3550652265548706, "learning_rate": 1.884132144484204e-05, "loss": 0.5736, "step": 6588 }, { "epoch": 0.18018486108072632, "grad_norm": 1.5583394765853882, "learning_rate": 1.8840907576469177e-05, "loss": 0.5616, "step": 6589 }, { "epoch": 0.18021220739444324, "grad_norm": 1.5495260953903198, "learning_rate": 1.8840493638741885e-05, "loss": 0.939, "step": 6590 }, { "epoch": 0.18023955370816014, "grad_norm": 1.09207284450531, "learning_rate": 1.8840079631663404e-05, "loss": 0.5987, "step": 6591 }, { "epoch": 0.18026690002187706, "grad_norm": 1.494566798210144, "learning_rate": 1.8839665555236988e-05, "loss": 0.5599, "step": 6592 }, { "epoch": 0.18029424633559396, "grad_norm": 1.4736219644546509, "learning_rate": 1.8839251409465885e-05, "loss": 0.5988, "step": 6593 }, { "epoch": 0.18032159264931089, "grad_norm": 1.8272483348846436, "learning_rate": 1.8838837194353337e-05, "loss": 0.5471, "step": 6594 }, { "epoch": 0.18034893896302778, "grad_norm": 1.4274706840515137, "learning_rate": 1.8838422909902605e-05, "loss": 0.5311, "step": 6595 }, { "epoch": 0.1803762852767447, "grad_norm": 1.2248594760894775, "learning_rate": 1.883800855611693e-05, "loss": 0.5503, "step": 6596 }, { "epoch": 0.1804036315904616, "grad_norm": 1.368789553642273, "learning_rate": 1.8837594132999563e-05, "loss": 0.5631, "step": 6597 }, { "epoch": 0.18043097790417853, "grad_norm": 1.4888954162597656, "learning_rate": 1.883717964055376e-05, "loss": 0.5427, "step": 6598 }, { "epoch": 0.18045832421789543, "grad_norm": 1.3605865240097046, "learning_rate": 1.8836765078782768e-05, "loss": 0.4242, "step": 6599 }, { "epoch": 0.18048567053161235, "grad_norm": 1.4719438552856445, "learning_rate": 1.8836350447689843e-05, "loss": 0.5782, "step": 6600 }, { "epoch": 0.18051301684532925, "grad_norm": 1.6000919342041016, "learning_rate": 1.8835935747278232e-05, "loss": 0.5403, "step": 6601 }, { "epoch": 0.18054036315904617, "grad_norm": 1.3555302619934082, "learning_rate": 1.8835520977551194e-05, "loss": 0.5493, "step": 6602 }, { "epoch": 0.18056770947276307, "grad_norm": 1.1512385606765747, "learning_rate": 1.8835106138511983e-05, "loss": 0.4292, "step": 6603 }, { "epoch": 0.18059505578647997, "grad_norm": 3.6139774322509766, "learning_rate": 1.8834691230163846e-05, "loss": 0.5538, "step": 6604 }, { "epoch": 0.1806224021001969, "grad_norm": 1.3640581369400024, "learning_rate": 1.8834276252510047e-05, "loss": 0.5657, "step": 6605 }, { "epoch": 0.1806497484139138, "grad_norm": 1.5189077854156494, "learning_rate": 1.8833861205553835e-05, "loss": 0.5862, "step": 6606 }, { "epoch": 0.18067709472763072, "grad_norm": 1.9982562065124512, "learning_rate": 1.8833446089298463e-05, "loss": 0.5385, "step": 6607 }, { "epoch": 0.1807044410413476, "grad_norm": 1.6871373653411865, "learning_rate": 1.88330309037472e-05, "loss": 0.5818, "step": 6608 }, { "epoch": 0.18073178735506454, "grad_norm": 1.9566518068313599, "learning_rate": 1.883261564890329e-05, "loss": 0.6584, "step": 6609 }, { "epoch": 0.18075913366878144, "grad_norm": 1.429352879524231, "learning_rate": 1.8832200324770002e-05, "loss": 0.5556, "step": 6610 }, { "epoch": 0.18078647998249836, "grad_norm": 1.4619096517562866, "learning_rate": 1.8831784931350582e-05, "loss": 0.9371, "step": 6611 }, { "epoch": 0.18081382629621526, "grad_norm": 1.67318856716156, "learning_rate": 1.8831369468648296e-05, "loss": 0.6225, "step": 6612 }, { "epoch": 0.18084117260993218, "grad_norm": 1.5108381509780884, "learning_rate": 1.8830953936666402e-05, "loss": 0.4178, "step": 6613 }, { "epoch": 0.18086851892364908, "grad_norm": 1.3996472358703613, "learning_rate": 1.8830538335408163e-05, "loss": 0.5598, "step": 6614 }, { "epoch": 0.180895865237366, "grad_norm": 1.247941255569458, "learning_rate": 1.8830122664876833e-05, "loss": 0.5496, "step": 6615 }, { "epoch": 0.1809232115510829, "grad_norm": 1.1829676628112793, "learning_rate": 1.8829706925075676e-05, "loss": 0.4022, "step": 6616 }, { "epoch": 0.18095055786479983, "grad_norm": 1.4330005645751953, "learning_rate": 1.8829291116007953e-05, "loss": 0.9511, "step": 6617 }, { "epoch": 0.18097790417851672, "grad_norm": 1.6798608303070068, "learning_rate": 1.8828875237676927e-05, "loss": 0.575, "step": 6618 }, { "epoch": 0.18100525049223365, "grad_norm": 1.3503479957580566, "learning_rate": 1.882845929008586e-05, "loss": 0.5857, "step": 6619 }, { "epoch": 0.18103259680595055, "grad_norm": 1.4176559448242188, "learning_rate": 1.882804327323801e-05, "loss": 0.5623, "step": 6620 }, { "epoch": 0.18105994311966747, "grad_norm": 1.618519902229309, "learning_rate": 1.882762718713665e-05, "loss": 0.5434, "step": 6621 }, { "epoch": 0.18108728943338437, "grad_norm": 2.8239734172821045, "learning_rate": 1.8827211031785035e-05, "loss": 0.949, "step": 6622 }, { "epoch": 0.1811146357471013, "grad_norm": 1.4415346384048462, "learning_rate": 1.8826794807186438e-05, "loss": 0.5829, "step": 6623 }, { "epoch": 0.1811419820608182, "grad_norm": 1.74053156375885, "learning_rate": 1.8826378513344114e-05, "loss": 0.6247, "step": 6624 }, { "epoch": 0.18116932837453512, "grad_norm": 2.027350902557373, "learning_rate": 1.882596215026134e-05, "loss": 0.8884, "step": 6625 }, { "epoch": 0.181196674688252, "grad_norm": 1.303127408027649, "learning_rate": 1.8825545717941373e-05, "loss": 0.6137, "step": 6626 }, { "epoch": 0.18122402100196894, "grad_norm": 1.3771913051605225, "learning_rate": 1.8825129216387485e-05, "loss": 0.6218, "step": 6627 }, { "epoch": 0.18125136731568584, "grad_norm": 1.4631644487380981, "learning_rate": 1.8824712645602945e-05, "loss": 0.5433, "step": 6628 }, { "epoch": 0.18127871362940276, "grad_norm": 1.3104779720306396, "learning_rate": 1.8824296005591015e-05, "loss": 0.9065, "step": 6629 }, { "epoch": 0.18130605994311966, "grad_norm": 1.6825299263000488, "learning_rate": 1.882387929635496e-05, "loss": 0.6261, "step": 6630 }, { "epoch": 0.18133340625683658, "grad_norm": 1.7274090051651, "learning_rate": 1.8823462517898065e-05, "loss": 0.5759, "step": 6631 }, { "epoch": 0.18136075257055348, "grad_norm": 2.19067120552063, "learning_rate": 1.8823045670223587e-05, "loss": 0.5852, "step": 6632 }, { "epoch": 0.1813880988842704, "grad_norm": 1.5633933544158936, "learning_rate": 1.8822628753334798e-05, "loss": 0.5512, "step": 6633 }, { "epoch": 0.1814154451979873, "grad_norm": 1.2808334827423096, "learning_rate": 1.8822211767234967e-05, "loss": 0.5774, "step": 6634 }, { "epoch": 0.18144279151170423, "grad_norm": 1.455427885055542, "learning_rate": 1.8821794711927374e-05, "loss": 0.5964, "step": 6635 }, { "epoch": 0.18147013782542112, "grad_norm": 1.5206574201583862, "learning_rate": 1.8821377587415278e-05, "loss": 0.5648, "step": 6636 }, { "epoch": 0.18149748413913805, "grad_norm": 1.7912094593048096, "learning_rate": 1.882096039370196e-05, "loss": 0.6059, "step": 6637 }, { "epoch": 0.18152483045285495, "grad_norm": 1.1794195175170898, "learning_rate": 1.8820543130790687e-05, "loss": 0.5744, "step": 6638 }, { "epoch": 0.18155217676657187, "grad_norm": 1.2893725633621216, "learning_rate": 1.8820125798684738e-05, "loss": 0.5493, "step": 6639 }, { "epoch": 0.18157952308028877, "grad_norm": 1.233163833618164, "learning_rate": 1.8819708397387385e-05, "loss": 0.6081, "step": 6640 }, { "epoch": 0.1816068693940057, "grad_norm": 1.8318376541137695, "learning_rate": 1.88192909269019e-05, "loss": 0.6123, "step": 6641 }, { "epoch": 0.1816342157077226, "grad_norm": 1.7350493669509888, "learning_rate": 1.881887338723156e-05, "loss": 0.5576, "step": 6642 }, { "epoch": 0.18166156202143952, "grad_norm": 1.4080978631973267, "learning_rate": 1.881845577837964e-05, "loss": 0.5715, "step": 6643 }, { "epoch": 0.1816889083351564, "grad_norm": 1.3455826044082642, "learning_rate": 1.8818038100349415e-05, "loss": 0.5655, "step": 6644 }, { "epoch": 0.18171625464887334, "grad_norm": 1.5101693868637085, "learning_rate": 1.8817620353144166e-05, "loss": 0.5603, "step": 6645 }, { "epoch": 0.18174360096259023, "grad_norm": 1.317084789276123, "learning_rate": 1.8817202536767166e-05, "loss": 0.5753, "step": 6646 }, { "epoch": 0.18177094727630716, "grad_norm": 2.2177648544311523, "learning_rate": 1.881678465122169e-05, "loss": 0.5155, "step": 6647 }, { "epoch": 0.18179829359002406, "grad_norm": 1.449056625366211, "learning_rate": 1.8816366696511024e-05, "loss": 0.5672, "step": 6648 }, { "epoch": 0.18182563990374098, "grad_norm": 1.4473414421081543, "learning_rate": 1.8815948672638444e-05, "loss": 0.4082, "step": 6649 }, { "epoch": 0.18185298621745788, "grad_norm": 1.390061378479004, "learning_rate": 1.881553057960722e-05, "loss": 0.5739, "step": 6650 }, { "epoch": 0.1818803325311748, "grad_norm": 2.0337395668029785, "learning_rate": 1.8815112417420643e-05, "loss": 0.5794, "step": 6651 }, { "epoch": 0.1819076788448917, "grad_norm": 1.553628921508789, "learning_rate": 1.8814694186081993e-05, "loss": 0.5727, "step": 6652 }, { "epoch": 0.18193502515860863, "grad_norm": 1.2756682634353638, "learning_rate": 1.8814275885594548e-05, "loss": 0.5612, "step": 6653 }, { "epoch": 0.18196237147232552, "grad_norm": 1.5342237949371338, "learning_rate": 1.8813857515961584e-05, "loss": 0.5747, "step": 6654 }, { "epoch": 0.18198971778604245, "grad_norm": 1.228333592414856, "learning_rate": 1.881343907718639e-05, "loss": 0.5431, "step": 6655 }, { "epoch": 0.18201706409975935, "grad_norm": 1.5853230953216553, "learning_rate": 1.881302056927225e-05, "loss": 0.5783, "step": 6656 }, { "epoch": 0.18204441041347627, "grad_norm": 1.713706612586975, "learning_rate": 1.8812601992222437e-05, "loss": 0.5864, "step": 6657 }, { "epoch": 0.18207175672719317, "grad_norm": 1.2667380571365356, "learning_rate": 1.881218334604025e-05, "loss": 0.579, "step": 6658 }, { "epoch": 0.1820991030409101, "grad_norm": 1.536957025527954, "learning_rate": 1.881176463072896e-05, "loss": 0.5706, "step": 6659 }, { "epoch": 0.182126449354627, "grad_norm": 1.3527731895446777, "learning_rate": 1.8811345846291855e-05, "loss": 0.5673, "step": 6660 }, { "epoch": 0.18215379566834392, "grad_norm": 1.5203309059143066, "learning_rate": 1.8810926992732224e-05, "loss": 0.5577, "step": 6661 }, { "epoch": 0.1821811419820608, "grad_norm": 1.4855269193649292, "learning_rate": 1.881050807005335e-05, "loss": 0.5768, "step": 6662 }, { "epoch": 0.18220848829577774, "grad_norm": 1.4660018682479858, "learning_rate": 1.881008907825852e-05, "loss": 0.58, "step": 6663 }, { "epoch": 0.18223583460949463, "grad_norm": 1.169986367225647, "learning_rate": 1.880967001735102e-05, "loss": 0.5515, "step": 6664 }, { "epoch": 0.18226318092321156, "grad_norm": 1.1625367403030396, "learning_rate": 1.8809250887334137e-05, "loss": 0.4328, "step": 6665 }, { "epoch": 0.18229052723692846, "grad_norm": 1.4867396354675293, "learning_rate": 1.8808831688211163e-05, "loss": 0.5417, "step": 6666 }, { "epoch": 0.18231787355064538, "grad_norm": 1.4419025182724, "learning_rate": 1.880841241998538e-05, "loss": 0.6227, "step": 6667 }, { "epoch": 0.18234521986436228, "grad_norm": 1.5970275402069092, "learning_rate": 1.8807993082660083e-05, "loss": 0.5256, "step": 6668 }, { "epoch": 0.1823725661780792, "grad_norm": 1.4436756372451782, "learning_rate": 1.8807573676238557e-05, "loss": 0.9489, "step": 6669 }, { "epoch": 0.1823999124917961, "grad_norm": 1.7741297483444214, "learning_rate": 1.8807154200724097e-05, "loss": 0.6079, "step": 6670 }, { "epoch": 0.18242725880551303, "grad_norm": 1.2807929515838623, "learning_rate": 1.880673465611999e-05, "loss": 0.5636, "step": 6671 }, { "epoch": 0.18245460511922992, "grad_norm": 1.4975343942642212, "learning_rate": 1.8806315042429524e-05, "loss": 0.5777, "step": 6672 }, { "epoch": 0.18248195143294685, "grad_norm": 1.9652220010757446, "learning_rate": 1.8805895359656e-05, "loss": 0.5745, "step": 6673 }, { "epoch": 0.18250929774666375, "grad_norm": 1.2637478113174438, "learning_rate": 1.88054756078027e-05, "loss": 0.5704, "step": 6674 }, { "epoch": 0.18253664406038067, "grad_norm": 1.4664047956466675, "learning_rate": 1.8805055786872928e-05, "loss": 0.5877, "step": 6675 }, { "epoch": 0.18256399037409757, "grad_norm": 1.2945570945739746, "learning_rate": 1.8804635896869966e-05, "loss": 0.556, "step": 6676 }, { "epoch": 0.1825913366878145, "grad_norm": 1.7240418195724487, "learning_rate": 1.8804215937797115e-05, "loss": 0.6154, "step": 6677 }, { "epoch": 0.1826186830015314, "grad_norm": 1.4030705690383911, "learning_rate": 1.8803795909657668e-05, "loss": 0.5322, "step": 6678 }, { "epoch": 0.18264602931524831, "grad_norm": 1.431789755821228, "learning_rate": 1.880337581245492e-05, "loss": 0.593, "step": 6679 }, { "epoch": 0.1826733756289652, "grad_norm": 1.8536734580993652, "learning_rate": 1.880295564619217e-05, "loss": 0.5822, "step": 6680 }, { "epoch": 0.18270072194268214, "grad_norm": 1.448650598526001, "learning_rate": 1.8802535410872705e-05, "loss": 0.5721, "step": 6681 }, { "epoch": 0.18272806825639903, "grad_norm": 1.3075836896896362, "learning_rate": 1.8802115106499826e-05, "loss": 0.5896, "step": 6682 }, { "epoch": 0.18275541457011596, "grad_norm": 1.5928499698638916, "learning_rate": 1.8801694733076835e-05, "loss": 0.9056, "step": 6683 }, { "epoch": 0.18278276088383286, "grad_norm": 1.1796824932098389, "learning_rate": 1.8801274290607024e-05, "loss": 0.557, "step": 6684 }, { "epoch": 0.18281010719754978, "grad_norm": 2.5151498317718506, "learning_rate": 1.8800853779093695e-05, "loss": 0.4638, "step": 6685 }, { "epoch": 0.18283745351126668, "grad_norm": 1.270519733428955, "learning_rate": 1.880043319854014e-05, "loss": 0.5867, "step": 6686 }, { "epoch": 0.1828647998249836, "grad_norm": 1.590200662612915, "learning_rate": 1.8800012548949665e-05, "loss": 0.5845, "step": 6687 }, { "epoch": 0.1828921461387005, "grad_norm": 1.493408441543579, "learning_rate": 1.8799591830325572e-05, "loss": 0.5613, "step": 6688 }, { "epoch": 0.18291949245241743, "grad_norm": 1.4979526996612549, "learning_rate": 1.8799171042671153e-05, "loss": 0.56, "step": 6689 }, { "epoch": 0.18294683876613432, "grad_norm": 1.1692733764648438, "learning_rate": 1.8798750185989716e-05, "loss": 0.5798, "step": 6690 }, { "epoch": 0.18297418507985125, "grad_norm": 1.515505075454712, "learning_rate": 1.879832926028456e-05, "loss": 0.5675, "step": 6691 }, { "epoch": 0.18300153139356815, "grad_norm": 1.2115602493286133, "learning_rate": 1.8797908265558985e-05, "loss": 0.5615, "step": 6692 }, { "epoch": 0.18302887770728507, "grad_norm": 1.5575871467590332, "learning_rate": 1.8797487201816298e-05, "loss": 0.5597, "step": 6693 }, { "epoch": 0.18305622402100197, "grad_norm": 1.3660551309585571, "learning_rate": 1.87970660690598e-05, "loss": 0.5591, "step": 6694 }, { "epoch": 0.1830835703347189, "grad_norm": 1.570923924446106, "learning_rate": 1.8796644867292792e-05, "loss": 0.5564, "step": 6695 }, { "epoch": 0.1831109166484358, "grad_norm": 1.3274178504943848, "learning_rate": 1.879622359651858e-05, "loss": 0.5586, "step": 6696 }, { "epoch": 0.18313826296215271, "grad_norm": 1.694492220878601, "learning_rate": 1.8795802256740473e-05, "loss": 0.5645, "step": 6697 }, { "epoch": 0.1831656092758696, "grad_norm": 1.2936009168624878, "learning_rate": 1.879538084796177e-05, "loss": 0.5621, "step": 6698 }, { "epoch": 0.18319295558958654, "grad_norm": 1.6744465827941895, "learning_rate": 1.879495937018578e-05, "loss": 0.5394, "step": 6699 }, { "epoch": 0.18322030190330343, "grad_norm": 1.731614351272583, "learning_rate": 1.879453782341581e-05, "loss": 0.4914, "step": 6700 }, { "epoch": 0.18324764821702036, "grad_norm": 1.4499481916427612, "learning_rate": 1.8794116207655166e-05, "loss": 0.6124, "step": 6701 }, { "epoch": 0.18327499453073726, "grad_norm": 2.058617353439331, "learning_rate": 1.8793694522907153e-05, "loss": 0.5381, "step": 6702 }, { "epoch": 0.18330234084445418, "grad_norm": 1.4248723983764648, "learning_rate": 1.879327276917508e-05, "loss": 0.5651, "step": 6703 }, { "epoch": 0.18332968715817108, "grad_norm": 1.8461412191390991, "learning_rate": 1.8792850946462263e-05, "loss": 0.6063, "step": 6704 }, { "epoch": 0.18335703347188798, "grad_norm": 1.3187721967697144, "learning_rate": 1.8792429054772002e-05, "loss": 0.5088, "step": 6705 }, { "epoch": 0.1833843797856049, "grad_norm": 1.3232147693634033, "learning_rate": 1.8792007094107605e-05, "loss": 0.5507, "step": 6706 }, { "epoch": 0.1834117260993218, "grad_norm": 1.6963601112365723, "learning_rate": 1.8791585064472394e-05, "loss": 0.5614, "step": 6707 }, { "epoch": 0.18343907241303872, "grad_norm": 3.2381062507629395, "learning_rate": 1.8791162965869664e-05, "loss": 0.9401, "step": 6708 }, { "epoch": 0.18346641872675562, "grad_norm": 1.3918393850326538, "learning_rate": 1.879074079830274e-05, "loss": 0.5688, "step": 6709 }, { "epoch": 0.18349376504047255, "grad_norm": 1.4274344444274902, "learning_rate": 1.879031856177493e-05, "loss": 0.9334, "step": 6710 }, { "epoch": 0.18352111135418944, "grad_norm": 1.7034871578216553, "learning_rate": 1.878989625628954e-05, "loss": 0.5533, "step": 6711 }, { "epoch": 0.18354845766790637, "grad_norm": 1.6384233236312866, "learning_rate": 1.8789473881849887e-05, "loss": 0.4511, "step": 6712 }, { "epoch": 0.18357580398162326, "grad_norm": 1.40592360496521, "learning_rate": 1.8789051438459292e-05, "loss": 0.5482, "step": 6713 }, { "epoch": 0.1836031502953402, "grad_norm": 1.2633460760116577, "learning_rate": 1.8788628926121056e-05, "loss": 0.562, "step": 6714 }, { "epoch": 0.1836304966090571, "grad_norm": 1.5756909847259521, "learning_rate": 1.87882063448385e-05, "loss": 0.6092, "step": 6715 }, { "epoch": 0.183657842922774, "grad_norm": 1.7862796783447266, "learning_rate": 1.8787783694614942e-05, "loss": 0.5708, "step": 6716 }, { "epoch": 0.1836851892364909, "grad_norm": 3.2395875453948975, "learning_rate": 1.878736097545369e-05, "loss": 0.4156, "step": 6717 }, { "epoch": 0.18371253555020783, "grad_norm": 1.1633872985839844, "learning_rate": 1.8786938187358066e-05, "loss": 0.5785, "step": 6718 }, { "epoch": 0.18373988186392473, "grad_norm": 1.7541943788528442, "learning_rate": 1.8786515330331382e-05, "loss": 0.6214, "step": 6719 }, { "epoch": 0.18376722817764166, "grad_norm": 1.6517754793167114, "learning_rate": 1.8786092404376962e-05, "loss": 0.6047, "step": 6720 }, { "epoch": 0.18379457449135855, "grad_norm": 1.3653175830841064, "learning_rate": 1.8785669409498116e-05, "loss": 0.5489, "step": 6721 }, { "epoch": 0.18382192080507548, "grad_norm": 1.5949957370758057, "learning_rate": 1.8785246345698168e-05, "loss": 0.5639, "step": 6722 }, { "epoch": 0.18384926711879238, "grad_norm": 1.8976484537124634, "learning_rate": 1.8784823212980437e-05, "loss": 0.4701, "step": 6723 }, { "epoch": 0.1838766134325093, "grad_norm": 1.672500729560852, "learning_rate": 1.878440001134824e-05, "loss": 0.5552, "step": 6724 }, { "epoch": 0.1839039597462262, "grad_norm": 1.1208598613739014, "learning_rate": 1.878397674080489e-05, "loss": 0.5721, "step": 6725 }, { "epoch": 0.18393130605994312, "grad_norm": 1.38301420211792, "learning_rate": 1.878355340135372e-05, "loss": 0.6189, "step": 6726 }, { "epoch": 0.18395865237366002, "grad_norm": 1.282584547996521, "learning_rate": 1.8783129992998045e-05, "loss": 0.5394, "step": 6727 }, { "epoch": 0.18398599868737694, "grad_norm": 1.5017645359039307, "learning_rate": 1.878270651574119e-05, "loss": 0.5325, "step": 6728 }, { "epoch": 0.18401334500109384, "grad_norm": 1.3136876821517944, "learning_rate": 1.878228296958647e-05, "loss": 0.5866, "step": 6729 }, { "epoch": 0.18404069131481077, "grad_norm": 1.5555380582809448, "learning_rate": 1.8781859354537212e-05, "loss": 0.6183, "step": 6730 }, { "epoch": 0.18406803762852766, "grad_norm": 1.3858524560928345, "learning_rate": 1.878143567059674e-05, "loss": 0.5768, "step": 6731 }, { "epoch": 0.1840953839422446, "grad_norm": 1.5338740348815918, "learning_rate": 1.8781011917768376e-05, "loss": 0.5593, "step": 6732 }, { "epoch": 0.1841227302559615, "grad_norm": 1.243596076965332, "learning_rate": 1.8780588096055442e-05, "loss": 0.5831, "step": 6733 }, { "epoch": 0.1841500765696784, "grad_norm": 1.847570776939392, "learning_rate": 1.878016420546127e-05, "loss": 0.5372, "step": 6734 }, { "epoch": 0.1841774228833953, "grad_norm": 1.6483114957809448, "learning_rate": 1.8779740245989178e-05, "loss": 0.5522, "step": 6735 }, { "epoch": 0.18420476919711223, "grad_norm": 1.5848509073257446, "learning_rate": 1.8779316217642495e-05, "loss": 0.5816, "step": 6736 }, { "epoch": 0.18423211551082913, "grad_norm": 1.1817010641098022, "learning_rate": 1.877889212042455e-05, "loss": 0.542, "step": 6737 }, { "epoch": 0.18425946182454606, "grad_norm": 1.6104902029037476, "learning_rate": 1.8778467954338665e-05, "loss": 0.5703, "step": 6738 }, { "epoch": 0.18428680813826295, "grad_norm": 1.383262038230896, "learning_rate": 1.8778043719388165e-05, "loss": 0.5177, "step": 6739 }, { "epoch": 0.18431415445197988, "grad_norm": 2.797398090362549, "learning_rate": 1.8777619415576387e-05, "loss": 0.9709, "step": 6740 }, { "epoch": 0.18434150076569678, "grad_norm": 1.3417129516601562, "learning_rate": 1.8777195042906654e-05, "loss": 0.5938, "step": 6741 }, { "epoch": 0.1843688470794137, "grad_norm": 1.4780713319778442, "learning_rate": 1.8776770601382294e-05, "loss": 0.5485, "step": 6742 }, { "epoch": 0.1843961933931306, "grad_norm": 1.40680992603302, "learning_rate": 1.8776346091006644e-05, "loss": 0.5483, "step": 6743 }, { "epoch": 0.18442353970684752, "grad_norm": 1.493699312210083, "learning_rate": 1.8775921511783024e-05, "loss": 0.5691, "step": 6744 }, { "epoch": 0.18445088602056442, "grad_norm": 1.305040717124939, "learning_rate": 1.877549686371477e-05, "loss": 0.5641, "step": 6745 }, { "epoch": 0.18447823233428134, "grad_norm": 1.617942214012146, "learning_rate": 1.8775072146805215e-05, "loss": 0.566, "step": 6746 }, { "epoch": 0.18450557864799824, "grad_norm": 1.343756079673767, "learning_rate": 1.8774647361057687e-05, "loss": 0.5703, "step": 6747 }, { "epoch": 0.18453292496171517, "grad_norm": 1.9136008024215698, "learning_rate": 1.8774222506475518e-05, "loss": 0.5687, "step": 6748 }, { "epoch": 0.18456027127543206, "grad_norm": 1.328913688659668, "learning_rate": 1.8773797583062046e-05, "loss": 0.6011, "step": 6749 }, { "epoch": 0.184587617589149, "grad_norm": 1.8087249994277954, "learning_rate": 1.87733725908206e-05, "loss": 0.569, "step": 6750 }, { "epoch": 0.1846149639028659, "grad_norm": 1.3399152755737305, "learning_rate": 1.8772947529754513e-05, "loss": 0.571, "step": 6751 }, { "epoch": 0.1846423102165828, "grad_norm": 1.3268101215362549, "learning_rate": 1.8772522399867124e-05, "loss": 0.5746, "step": 6752 }, { "epoch": 0.1846696565302997, "grad_norm": 1.4235897064208984, "learning_rate": 1.8772097201161768e-05, "loss": 0.5762, "step": 6753 }, { "epoch": 0.18469700284401663, "grad_norm": 1.6557905673980713, "learning_rate": 1.8771671933641774e-05, "loss": 0.4741, "step": 6754 }, { "epoch": 0.18472434915773353, "grad_norm": 1.17949640750885, "learning_rate": 1.8771246597310484e-05, "loss": 0.5472, "step": 6755 }, { "epoch": 0.18475169547145046, "grad_norm": 2.279095411300659, "learning_rate": 1.877082119217123e-05, "loss": 0.558, "step": 6756 }, { "epoch": 0.18477904178516735, "grad_norm": 3.2261509895324707, "learning_rate": 1.8770395718227353e-05, "loss": 0.6238, "step": 6757 }, { "epoch": 0.18480638809888428, "grad_norm": 1.440782904624939, "learning_rate": 1.8769970175482194e-05, "loss": 0.5876, "step": 6758 }, { "epoch": 0.18483373441260117, "grad_norm": 1.1718604564666748, "learning_rate": 1.8769544563939084e-05, "loss": 0.5819, "step": 6759 }, { "epoch": 0.1848610807263181, "grad_norm": 2.940514087677002, "learning_rate": 1.8769118883601364e-05, "loss": 0.945, "step": 6760 }, { "epoch": 0.184888427040035, "grad_norm": 1.2604886293411255, "learning_rate": 1.876869313447238e-05, "loss": 0.5354, "step": 6761 }, { "epoch": 0.18491577335375192, "grad_norm": 1.6093868017196655, "learning_rate": 1.876826731655546e-05, "loss": 0.5917, "step": 6762 }, { "epoch": 0.18494311966746882, "grad_norm": 1.3236230611801147, "learning_rate": 1.876784142985395e-05, "loss": 0.5265, "step": 6763 }, { "epoch": 0.18497046598118574, "grad_norm": 1.3410475254058838, "learning_rate": 1.8767415474371194e-05, "loss": 0.5713, "step": 6764 }, { "epoch": 0.18499781229490264, "grad_norm": 1.6339941024780273, "learning_rate": 1.8766989450110528e-05, "loss": 0.561, "step": 6765 }, { "epoch": 0.18502515860861957, "grad_norm": 1.2598379850387573, "learning_rate": 1.87665633570753e-05, "loss": 0.5995, "step": 6766 }, { "epoch": 0.18505250492233646, "grad_norm": 1.8515616655349731, "learning_rate": 1.876613719526885e-05, "loss": 0.6172, "step": 6767 }, { "epoch": 0.1850798512360534, "grad_norm": 1.271409511566162, "learning_rate": 1.876571096469452e-05, "loss": 0.5551, "step": 6768 }, { "epoch": 0.18510719754977029, "grad_norm": 1.534395694732666, "learning_rate": 1.8765284665355653e-05, "loss": 0.6154, "step": 6769 }, { "epoch": 0.1851345438634872, "grad_norm": 1.237345576286316, "learning_rate": 1.8764858297255597e-05, "loss": 0.5426, "step": 6770 }, { "epoch": 0.1851618901772041, "grad_norm": 1.1323097944259644, "learning_rate": 1.876443186039769e-05, "loss": 0.5757, "step": 6771 }, { "epoch": 0.18518923649092103, "grad_norm": 1.3683537244796753, "learning_rate": 1.8764005354785285e-05, "loss": 0.5932, "step": 6772 }, { "epoch": 0.18521658280463793, "grad_norm": 1.3665964603424072, "learning_rate": 1.8763578780421725e-05, "loss": 0.5599, "step": 6773 }, { "epoch": 0.18524392911835486, "grad_norm": 1.3480547666549683, "learning_rate": 1.8763152137310355e-05, "loss": 0.593, "step": 6774 }, { "epoch": 0.18527127543207175, "grad_norm": 1.2268248796463013, "learning_rate": 1.876272542545452e-05, "loss": 0.3787, "step": 6775 }, { "epoch": 0.18529862174578868, "grad_norm": 3.1235158443450928, "learning_rate": 1.8762298644857573e-05, "loss": 0.9815, "step": 6776 }, { "epoch": 0.18532596805950557, "grad_norm": 2.321254253387451, "learning_rate": 1.876187179552286e-05, "loss": 0.947, "step": 6777 }, { "epoch": 0.1853533143732225, "grad_norm": 1.3880975246429443, "learning_rate": 1.8761444877453726e-05, "loss": 0.5853, "step": 6778 }, { "epoch": 0.1853806606869394, "grad_norm": 1.4269977807998657, "learning_rate": 1.8761017890653523e-05, "loss": 0.5927, "step": 6779 }, { "epoch": 0.18540800700065632, "grad_norm": 1.4157582521438599, "learning_rate": 1.8760590835125603e-05, "loss": 0.5831, "step": 6780 }, { "epoch": 0.18543535331437322, "grad_norm": 1.2722973823547363, "learning_rate": 1.8760163710873308e-05, "loss": 0.591, "step": 6781 }, { "epoch": 0.18546269962809014, "grad_norm": 1.2934917211532593, "learning_rate": 1.87597365179e-05, "loss": 0.6076, "step": 6782 }, { "epoch": 0.18549004594180704, "grad_norm": 1.9477099180221558, "learning_rate": 1.8759309256209018e-05, "loss": 0.6288, "step": 6783 }, { "epoch": 0.18551739225552397, "grad_norm": 1.3349121809005737, "learning_rate": 1.8758881925803724e-05, "loss": 0.5954, "step": 6784 }, { "epoch": 0.18554473856924086, "grad_norm": 1.23383367061615, "learning_rate": 1.8758454526687466e-05, "loss": 0.5602, "step": 6785 }, { "epoch": 0.1855720848829578, "grad_norm": 1.6042879819869995, "learning_rate": 1.87580270588636e-05, "loss": 0.5166, "step": 6786 }, { "epoch": 0.18559943119667469, "grad_norm": 1.476684808731079, "learning_rate": 1.875759952233547e-05, "loss": 0.5865, "step": 6787 }, { "epoch": 0.1856267775103916, "grad_norm": 1.3101447820663452, "learning_rate": 1.875717191710644e-05, "loss": 0.5848, "step": 6788 }, { "epoch": 0.1856541238241085, "grad_norm": 1.102612853050232, "learning_rate": 1.875674424317986e-05, "loss": 0.585, "step": 6789 }, { "epoch": 0.18568147013782543, "grad_norm": 1.2195029258728027, "learning_rate": 1.8756316500559086e-05, "loss": 0.5924, "step": 6790 }, { "epoch": 0.18570881645154233, "grad_norm": 1.4518860578536987, "learning_rate": 1.8755888689247475e-05, "loss": 0.5659, "step": 6791 }, { "epoch": 0.18573616276525926, "grad_norm": 1.2635084390640259, "learning_rate": 1.8755460809248377e-05, "loss": 0.5735, "step": 6792 }, { "epoch": 0.18576350907897615, "grad_norm": 1.266180157661438, "learning_rate": 1.8755032860565156e-05, "loss": 0.5697, "step": 6793 }, { "epoch": 0.18579085539269308, "grad_norm": 1.1463507413864136, "learning_rate": 1.8754604843201166e-05, "loss": 0.5613, "step": 6794 }, { "epoch": 0.18581820170640997, "grad_norm": 1.4081188440322876, "learning_rate": 1.875417675715976e-05, "loss": 0.5543, "step": 6795 }, { "epoch": 0.1858455480201269, "grad_norm": 1.649065375328064, "learning_rate": 1.875374860244431e-05, "loss": 0.5677, "step": 6796 }, { "epoch": 0.1858728943338438, "grad_norm": 1.3365932703018188, "learning_rate": 1.875332037905816e-05, "loss": 0.5662, "step": 6797 }, { "epoch": 0.18590024064756072, "grad_norm": 1.5347710847854614, "learning_rate": 1.8752892087004676e-05, "loss": 0.5613, "step": 6798 }, { "epoch": 0.18592758696127762, "grad_norm": 2.02496075630188, "learning_rate": 1.8752463726287217e-05, "loss": 0.5622, "step": 6799 }, { "epoch": 0.18595493327499454, "grad_norm": 1.6712374687194824, "learning_rate": 1.8752035296909143e-05, "loss": 0.558, "step": 6800 }, { "epoch": 0.18598227958871144, "grad_norm": 6.164422988891602, "learning_rate": 1.8751606798873817e-05, "loss": 1.1761, "step": 6801 }, { "epoch": 0.18600962590242837, "grad_norm": 1.3330074548721313, "learning_rate": 1.8751178232184596e-05, "loss": 0.5559, "step": 6802 }, { "epoch": 0.18603697221614526, "grad_norm": 1.3439908027648926, "learning_rate": 1.8750749596844847e-05, "loss": 0.5481, "step": 6803 }, { "epoch": 0.1860643185298622, "grad_norm": 1.2278376817703247, "learning_rate": 1.8750320892857928e-05, "loss": 0.5295, "step": 6804 }, { "epoch": 0.18609166484357909, "grad_norm": 1.7704063653945923, "learning_rate": 1.8749892120227206e-05, "loss": 0.5924, "step": 6805 }, { "epoch": 0.18611901115729598, "grad_norm": 1.2624561786651611, "learning_rate": 1.874946327895604e-05, "loss": 0.5497, "step": 6806 }, { "epoch": 0.1861463574710129, "grad_norm": 1.960202693939209, "learning_rate": 1.87490343690478e-05, "loss": 0.4315, "step": 6807 }, { "epoch": 0.1861737037847298, "grad_norm": 1.0551893711090088, "learning_rate": 1.8748605390505844e-05, "loss": 0.563, "step": 6808 }, { "epoch": 0.18620105009844673, "grad_norm": 1.3733640909194946, "learning_rate": 1.8748176343333542e-05, "loss": 0.5808, "step": 6809 }, { "epoch": 0.18622839641216363, "grad_norm": 1.8229728937149048, "learning_rate": 1.874774722753426e-05, "loss": 0.6125, "step": 6810 }, { "epoch": 0.18625574272588055, "grad_norm": 1.6871392726898193, "learning_rate": 1.8747318043111363e-05, "loss": 0.5765, "step": 6811 }, { "epoch": 0.18628308903959745, "grad_norm": 1.2550169229507446, "learning_rate": 1.8746888790068215e-05, "loss": 0.5458, "step": 6812 }, { "epoch": 0.18631043535331437, "grad_norm": 1.5051013231277466, "learning_rate": 1.874645946840819e-05, "loss": 0.5933, "step": 6813 }, { "epoch": 0.18633778166703127, "grad_norm": 1.405914306640625, "learning_rate": 1.874603007813465e-05, "loss": 0.5833, "step": 6814 }, { "epoch": 0.1863651279807482, "grad_norm": 1.3635305166244507, "learning_rate": 1.8745600619250967e-05, "loss": 0.5125, "step": 6815 }, { "epoch": 0.1863924742944651, "grad_norm": 1.4384201765060425, "learning_rate": 1.8745171091760504e-05, "loss": 0.5446, "step": 6816 }, { "epoch": 0.18641982060818202, "grad_norm": 1.4586536884307861, "learning_rate": 1.8744741495666638e-05, "loss": 0.5789, "step": 6817 }, { "epoch": 0.18644716692189892, "grad_norm": 1.2178847789764404, "learning_rate": 1.8744311830972735e-05, "loss": 0.5657, "step": 6818 }, { "epoch": 0.18647451323561584, "grad_norm": 1.82508385181427, "learning_rate": 1.8743882097682163e-05, "loss": 0.5765, "step": 6819 }, { "epoch": 0.18650185954933274, "grad_norm": 1.334838628768921, "learning_rate": 1.8743452295798304e-05, "loss": 0.5631, "step": 6820 }, { "epoch": 0.18652920586304966, "grad_norm": 1.1954822540283203, "learning_rate": 1.8743022425324516e-05, "loss": 0.5498, "step": 6821 }, { "epoch": 0.18655655217676656, "grad_norm": 1.0961788892745972, "learning_rate": 1.874259248626418e-05, "loss": 0.5628, "step": 6822 }, { "epoch": 0.18658389849048349, "grad_norm": 1.4066017866134644, "learning_rate": 1.874216247862067e-05, "loss": 0.6174, "step": 6823 }, { "epoch": 0.18661124480420038, "grad_norm": 1.3964911699295044, "learning_rate": 1.8741732402397352e-05, "loss": 0.6026, "step": 6824 }, { "epoch": 0.1866385911179173, "grad_norm": 1.281927227973938, "learning_rate": 1.8741302257597605e-05, "loss": 0.5169, "step": 6825 }, { "epoch": 0.1866659374316342, "grad_norm": 2.1072964668273926, "learning_rate": 1.87408720442248e-05, "loss": 0.5763, "step": 6826 }, { "epoch": 0.18669328374535113, "grad_norm": 1.2096632719039917, "learning_rate": 1.8740441762282313e-05, "loss": 0.5586, "step": 6827 }, { "epoch": 0.18672063005906803, "grad_norm": 1.3659087419509888, "learning_rate": 1.874001141177352e-05, "loss": 0.5299, "step": 6828 }, { "epoch": 0.18674797637278495, "grad_norm": 1.5057580471038818, "learning_rate": 1.8739580992701802e-05, "loss": 0.5668, "step": 6829 }, { "epoch": 0.18677532268650185, "grad_norm": 4.586731433868408, "learning_rate": 1.8739150505070525e-05, "loss": 1.0588, "step": 6830 }, { "epoch": 0.18680266900021877, "grad_norm": 3.7732255458831787, "learning_rate": 1.8738719948883078e-05, "loss": 1.0631, "step": 6831 }, { "epoch": 0.18683001531393567, "grad_norm": 1.3983083963394165, "learning_rate": 1.8738289324142825e-05, "loss": 0.6218, "step": 6832 }, { "epoch": 0.1868573616276526, "grad_norm": 1.8496381044387817, "learning_rate": 1.8737858630853155e-05, "loss": 0.9511, "step": 6833 }, { "epoch": 0.1868847079413695, "grad_norm": 1.6417350769042969, "learning_rate": 1.8737427869017445e-05, "loss": 0.6018, "step": 6834 }, { "epoch": 0.18691205425508642, "grad_norm": 1.2857314348220825, "learning_rate": 1.873699703863907e-05, "loss": 0.5414, "step": 6835 }, { "epoch": 0.18693940056880332, "grad_norm": 1.7878752946853638, "learning_rate": 1.873656613972141e-05, "loss": 0.6066, "step": 6836 }, { "epoch": 0.18696674688252024, "grad_norm": 1.3133668899536133, "learning_rate": 1.8736135172267847e-05, "loss": 0.5693, "step": 6837 }, { "epoch": 0.18699409319623714, "grad_norm": 1.4395761489868164, "learning_rate": 1.873570413628177e-05, "loss": 0.5411, "step": 6838 }, { "epoch": 0.18702143950995406, "grad_norm": 1.5458723306655884, "learning_rate": 1.8735273031766544e-05, "loss": 0.5668, "step": 6839 }, { "epoch": 0.18704878582367096, "grad_norm": 5.233407497406006, "learning_rate": 1.8734841858725564e-05, "loss": 1.1835, "step": 6840 }, { "epoch": 0.18707613213738788, "grad_norm": 1.5518935918807983, "learning_rate": 1.8734410617162204e-05, "loss": 0.4957, "step": 6841 }, { "epoch": 0.18710347845110478, "grad_norm": 1.49580717086792, "learning_rate": 1.8733979307079855e-05, "loss": 0.5341, "step": 6842 }, { "epoch": 0.1871308247648217, "grad_norm": 1.1996582746505737, "learning_rate": 1.8733547928481895e-05, "loss": 0.5491, "step": 6843 }, { "epoch": 0.1871581710785386, "grad_norm": 1.2428133487701416, "learning_rate": 1.8733116481371706e-05, "loss": 0.5385, "step": 6844 }, { "epoch": 0.18718551739225553, "grad_norm": 1.6805862188339233, "learning_rate": 1.8732684965752683e-05, "loss": 0.5625, "step": 6845 }, { "epoch": 0.18721286370597243, "grad_norm": 1.5276552438735962, "learning_rate": 1.8732253381628196e-05, "loss": 0.4438, "step": 6846 }, { "epoch": 0.18724021001968935, "grad_norm": 1.7083038091659546, "learning_rate": 1.8731821729001644e-05, "loss": 0.5644, "step": 6847 }, { "epoch": 0.18726755633340625, "grad_norm": 1.5697370767593384, "learning_rate": 1.8731390007876404e-05, "loss": 0.5894, "step": 6848 }, { "epoch": 0.18729490264712317, "grad_norm": 1.2610015869140625, "learning_rate": 1.873095821825587e-05, "loss": 0.5528, "step": 6849 }, { "epoch": 0.18732224896084007, "grad_norm": 1.3703498840332031, "learning_rate": 1.8730526360143428e-05, "loss": 0.5863, "step": 6850 }, { "epoch": 0.187349595274557, "grad_norm": 2.153261661529541, "learning_rate": 1.8730094433542456e-05, "loss": 0.5669, "step": 6851 }, { "epoch": 0.1873769415882739, "grad_norm": 1.498310923576355, "learning_rate": 1.8729662438456357e-05, "loss": 0.5577, "step": 6852 }, { "epoch": 0.18740428790199082, "grad_norm": 1.326974630355835, "learning_rate": 1.872923037488851e-05, "loss": 0.5623, "step": 6853 }, { "epoch": 0.18743163421570772, "grad_norm": 1.1330475807189941, "learning_rate": 1.8728798242842307e-05, "loss": 0.5846, "step": 6854 }, { "epoch": 0.18745898052942464, "grad_norm": 1.5518590211868286, "learning_rate": 1.872836604232114e-05, "loss": 0.5294, "step": 6855 }, { "epoch": 0.18748632684314154, "grad_norm": 1.2902841567993164, "learning_rate": 1.8727933773328397e-05, "loss": 0.5842, "step": 6856 }, { "epoch": 0.18751367315685846, "grad_norm": 1.309882640838623, "learning_rate": 1.8727501435867467e-05, "loss": 0.5873, "step": 6857 }, { "epoch": 0.18754101947057536, "grad_norm": 1.3768644332885742, "learning_rate": 1.8727069029941748e-05, "loss": 0.5584, "step": 6858 }, { "epoch": 0.18756836578429228, "grad_norm": 2.8419673442840576, "learning_rate": 1.8726636555554625e-05, "loss": 0.9985, "step": 6859 }, { "epoch": 0.18759571209800918, "grad_norm": 1.4555608034133911, "learning_rate": 1.87262040127095e-05, "loss": 0.5649, "step": 6860 }, { "epoch": 0.1876230584117261, "grad_norm": 2.659169912338257, "learning_rate": 1.8725771401409754e-05, "loss": 0.6282, "step": 6861 }, { "epoch": 0.187650404725443, "grad_norm": 1.6892163753509521, "learning_rate": 1.872533872165879e-05, "loss": 0.5733, "step": 6862 }, { "epoch": 0.18767775103915993, "grad_norm": 1.463912844657898, "learning_rate": 1.872490597346e-05, "loss": 0.5009, "step": 6863 }, { "epoch": 0.18770509735287683, "grad_norm": 2.055295944213867, "learning_rate": 1.8724473156816776e-05, "loss": 0.997, "step": 6864 }, { "epoch": 0.18773244366659375, "grad_norm": 1.5609279870986938, "learning_rate": 1.8724040271732518e-05, "loss": 0.5908, "step": 6865 }, { "epoch": 0.18775978998031065, "grad_norm": 1.7478644847869873, "learning_rate": 1.8723607318210618e-05, "loss": 0.5698, "step": 6866 }, { "epoch": 0.18778713629402757, "grad_norm": 1.6300532817840576, "learning_rate": 1.8723174296254472e-05, "loss": 0.549, "step": 6867 }, { "epoch": 0.18781448260774447, "grad_norm": 1.096957802772522, "learning_rate": 1.872274120586748e-05, "loss": 0.5865, "step": 6868 }, { "epoch": 0.1878418289214614, "grad_norm": 1.5156198740005493, "learning_rate": 1.8722308047053036e-05, "loss": 0.5813, "step": 6869 }, { "epoch": 0.1878691752351783, "grad_norm": 1.4491684436798096, "learning_rate": 1.8721874819814543e-05, "loss": 0.6009, "step": 6870 }, { "epoch": 0.18789652154889522, "grad_norm": 1.335797667503357, "learning_rate": 1.8721441524155394e-05, "loss": 0.5415, "step": 6871 }, { "epoch": 0.18792386786261212, "grad_norm": 1.6375360488891602, "learning_rate": 1.8721008160078993e-05, "loss": 0.5246, "step": 6872 }, { "epoch": 0.18795121417632904, "grad_norm": 1.477883219718933, "learning_rate": 1.872057472758874e-05, "loss": 0.6054, "step": 6873 }, { "epoch": 0.18797856049004594, "grad_norm": 1.745774745941162, "learning_rate": 1.8720141226688025e-05, "loss": 0.5646, "step": 6874 }, { "epoch": 0.18800590680376286, "grad_norm": 1.3524863719940186, "learning_rate": 1.871970765738026e-05, "loss": 0.5694, "step": 6875 }, { "epoch": 0.18803325311747976, "grad_norm": 1.083444595336914, "learning_rate": 1.871927401966884e-05, "loss": 0.5683, "step": 6876 }, { "epoch": 0.18806059943119668, "grad_norm": 1.4881439208984375, "learning_rate": 1.8718840313557174e-05, "loss": 0.5431, "step": 6877 }, { "epoch": 0.18808794574491358, "grad_norm": 1.439160704612732, "learning_rate": 1.8718406539048657e-05, "loss": 0.5589, "step": 6878 }, { "epoch": 0.1881152920586305, "grad_norm": 1.290928602218628, "learning_rate": 1.8717972696146692e-05, "loss": 0.5519, "step": 6879 }, { "epoch": 0.1881426383723474, "grad_norm": 2.3115627765655518, "learning_rate": 1.871753878485469e-05, "loss": 0.9368, "step": 6880 }, { "epoch": 0.18816998468606433, "grad_norm": 1.4185336828231812, "learning_rate": 1.8717104805176047e-05, "loss": 0.5579, "step": 6881 }, { "epoch": 0.18819733099978123, "grad_norm": 1.325974702835083, "learning_rate": 1.8716670757114167e-05, "loss": 0.5645, "step": 6882 }, { "epoch": 0.18822467731349815, "grad_norm": 1.5731505155563354, "learning_rate": 1.8716236640672462e-05, "loss": 0.4926, "step": 6883 }, { "epoch": 0.18825202362721505, "grad_norm": 1.3651355504989624, "learning_rate": 1.871580245585433e-05, "loss": 0.5619, "step": 6884 }, { "epoch": 0.18827936994093197, "grad_norm": 1.357419490814209, "learning_rate": 1.8715368202663183e-05, "loss": 0.5704, "step": 6885 }, { "epoch": 0.18830671625464887, "grad_norm": 1.2619845867156982, "learning_rate": 1.8714933881102425e-05, "loss": 0.5643, "step": 6886 }, { "epoch": 0.1883340625683658, "grad_norm": 1.36001455783844, "learning_rate": 1.8714499491175466e-05, "loss": 0.5649, "step": 6887 }, { "epoch": 0.1883614088820827, "grad_norm": 1.3325854539871216, "learning_rate": 1.8714065032885704e-05, "loss": 0.5801, "step": 6888 }, { "epoch": 0.18838875519579962, "grad_norm": 1.4132628440856934, "learning_rate": 1.871363050623656e-05, "loss": 0.5343, "step": 6889 }, { "epoch": 0.18841610150951651, "grad_norm": 1.4699573516845703, "learning_rate": 1.8713195911231436e-05, "loss": 0.5627, "step": 6890 }, { "epoch": 0.18844344782323344, "grad_norm": 1.3427618741989136, "learning_rate": 1.8712761247873738e-05, "loss": 0.5723, "step": 6891 }, { "epoch": 0.18847079413695034, "grad_norm": 1.346195101737976, "learning_rate": 1.8712326516166887e-05, "loss": 0.5653, "step": 6892 }, { "epoch": 0.18849814045066726, "grad_norm": 1.3693326711654663, "learning_rate": 1.871189171611428e-05, "loss": 0.5915, "step": 6893 }, { "epoch": 0.18852548676438416, "grad_norm": 1.275709629058838, "learning_rate": 1.8711456847719337e-05, "loss": 0.5557, "step": 6894 }, { "epoch": 0.18855283307810108, "grad_norm": 1.0903009176254272, "learning_rate": 1.8711021910985468e-05, "loss": 0.5384, "step": 6895 }, { "epoch": 0.18858017939181798, "grad_norm": 1.2517932653427124, "learning_rate": 1.871058690591608e-05, "loss": 0.5978, "step": 6896 }, { "epoch": 0.1886075257055349, "grad_norm": 1.440351963043213, "learning_rate": 1.8710151832514594e-05, "loss": 0.4866, "step": 6897 }, { "epoch": 0.1886348720192518, "grad_norm": 1.4187103509902954, "learning_rate": 1.8709716690784413e-05, "loss": 0.5687, "step": 6898 }, { "epoch": 0.18866221833296873, "grad_norm": 1.4275870323181152, "learning_rate": 1.8709281480728957e-05, "loss": 0.4395, "step": 6899 }, { "epoch": 0.18868956464668563, "grad_norm": 1.492915391921997, "learning_rate": 1.870884620235164e-05, "loss": 0.5919, "step": 6900 }, { "epoch": 0.18871691096040255, "grad_norm": 1.3673092126846313, "learning_rate": 1.8708410855655878e-05, "loss": 0.9201, "step": 6901 }, { "epoch": 0.18874425727411945, "grad_norm": 1.2595683336257935, "learning_rate": 1.870797544064508e-05, "loss": 0.5691, "step": 6902 }, { "epoch": 0.18877160358783637, "grad_norm": 1.5476206541061401, "learning_rate": 1.8707539957322665e-05, "loss": 0.4929, "step": 6903 }, { "epoch": 0.18879894990155327, "grad_norm": 1.1080517768859863, "learning_rate": 1.870710440569205e-05, "loss": 0.3799, "step": 6904 }, { "epoch": 0.1888262962152702, "grad_norm": 1.289863109588623, "learning_rate": 1.8706668785756653e-05, "loss": 0.5957, "step": 6905 }, { "epoch": 0.1888536425289871, "grad_norm": 2.3087666034698486, "learning_rate": 1.8706233097519887e-05, "loss": 0.5569, "step": 6906 }, { "epoch": 0.18888098884270402, "grad_norm": 1.4807419776916504, "learning_rate": 1.8705797340985176e-05, "loss": 0.6122, "step": 6907 }, { "epoch": 0.18890833515642091, "grad_norm": 1.5787461996078491, "learning_rate": 1.870536151615593e-05, "loss": 0.5323, "step": 6908 }, { "epoch": 0.1889356814701378, "grad_norm": 1.1527657508850098, "learning_rate": 1.870492562303558e-05, "loss": 0.5613, "step": 6909 }, { "epoch": 0.18896302778385474, "grad_norm": 1.4697325229644775, "learning_rate": 1.8704489661627532e-05, "loss": 0.5552, "step": 6910 }, { "epoch": 0.18899037409757163, "grad_norm": 1.9160048961639404, "learning_rate": 1.8704053631935216e-05, "loss": 0.5848, "step": 6911 }, { "epoch": 0.18901772041128856, "grad_norm": 1.382253885269165, "learning_rate": 1.8703617533962046e-05, "loss": 0.6154, "step": 6912 }, { "epoch": 0.18904506672500546, "grad_norm": 1.595456838607788, "learning_rate": 1.8703181367711445e-05, "loss": 0.5815, "step": 6913 }, { "epoch": 0.18907241303872238, "grad_norm": 1.4910399913787842, "learning_rate": 1.8702745133186837e-05, "loss": 0.5858, "step": 6914 }, { "epoch": 0.18909975935243928, "grad_norm": 1.586256742477417, "learning_rate": 1.8702308830391644e-05, "loss": 0.5169, "step": 6915 }, { "epoch": 0.1891271056661562, "grad_norm": 1.295937180519104, "learning_rate": 1.8701872459329286e-05, "loss": 0.5967, "step": 6916 }, { "epoch": 0.1891544519798731, "grad_norm": 1.4056915044784546, "learning_rate": 1.8701436020003187e-05, "loss": 0.5898, "step": 6917 }, { "epoch": 0.18918179829359003, "grad_norm": 1.4737459421157837, "learning_rate": 1.8700999512416773e-05, "loss": 0.5608, "step": 6918 }, { "epoch": 0.18920914460730692, "grad_norm": 1.5457861423492432, "learning_rate": 1.8700562936573465e-05, "loss": 0.5699, "step": 6919 }, { "epoch": 0.18923649092102385, "grad_norm": 1.1731928586959839, "learning_rate": 1.870012629247669e-05, "loss": 0.4544, "step": 6920 }, { "epoch": 0.18926383723474074, "grad_norm": 1.0612196922302246, "learning_rate": 1.869968958012987e-05, "loss": 0.4182, "step": 6921 }, { "epoch": 0.18929118354845767, "grad_norm": 1.4266464710235596, "learning_rate": 1.869925279953644e-05, "loss": 0.5835, "step": 6922 }, { "epoch": 0.18931852986217457, "grad_norm": 1.259750247001648, "learning_rate": 1.8698815950699814e-05, "loss": 0.3818, "step": 6923 }, { "epoch": 0.1893458761758915, "grad_norm": 1.2474004030227661, "learning_rate": 1.8698379033623426e-05, "loss": 0.5458, "step": 6924 }, { "epoch": 0.1893732224896084, "grad_norm": 1.7730135917663574, "learning_rate": 1.8697942048310705e-05, "loss": 0.5668, "step": 6925 }, { "epoch": 0.18940056880332531, "grad_norm": 1.3614405393600464, "learning_rate": 1.869750499476507e-05, "loss": 0.5703, "step": 6926 }, { "epoch": 0.1894279151170422, "grad_norm": 1.2205969095230103, "learning_rate": 1.869706787298996e-05, "loss": 0.5555, "step": 6927 }, { "epoch": 0.18945526143075914, "grad_norm": 1.214812159538269, "learning_rate": 1.8696630682988807e-05, "loss": 0.5743, "step": 6928 }, { "epoch": 0.18948260774447603, "grad_norm": 1.3666181564331055, "learning_rate": 1.8696193424765027e-05, "loss": 0.5499, "step": 6929 }, { "epoch": 0.18950995405819296, "grad_norm": 1.0455000400543213, "learning_rate": 1.8695756098322054e-05, "loss": 0.5473, "step": 6930 }, { "epoch": 0.18953730037190986, "grad_norm": 1.4024320840835571, "learning_rate": 1.8695318703663327e-05, "loss": 0.5412, "step": 6931 }, { "epoch": 0.18956464668562678, "grad_norm": 1.6850228309631348, "learning_rate": 1.869488124079227e-05, "loss": 0.5733, "step": 6932 }, { "epoch": 0.18959199299934368, "grad_norm": 1.3540375232696533, "learning_rate": 1.8694443709712318e-05, "loss": 0.585, "step": 6933 }, { "epoch": 0.1896193393130606, "grad_norm": 1.7103774547576904, "learning_rate": 1.86940061104269e-05, "loss": 0.5222, "step": 6934 }, { "epoch": 0.1896466856267775, "grad_norm": 1.3107984066009521, "learning_rate": 1.869356844293945e-05, "loss": 0.916, "step": 6935 }, { "epoch": 0.18967403194049443, "grad_norm": 1.5142842531204224, "learning_rate": 1.8693130707253403e-05, "loss": 0.46, "step": 6936 }, { "epoch": 0.18970137825421132, "grad_norm": 1.6419181823730469, "learning_rate": 1.8692692903372195e-05, "loss": 0.5619, "step": 6937 }, { "epoch": 0.18972872456792825, "grad_norm": 1.4769048690795898, "learning_rate": 1.869225503129925e-05, "loss": 0.5702, "step": 6938 }, { "epoch": 0.18975607088164514, "grad_norm": 1.3453168869018555, "learning_rate": 1.869181709103802e-05, "loss": 0.5678, "step": 6939 }, { "epoch": 0.18978341719536207, "grad_norm": 1.2459660768508911, "learning_rate": 1.8691379082591924e-05, "loss": 0.5492, "step": 6940 }, { "epoch": 0.18981076350907897, "grad_norm": 1.8106942176818848, "learning_rate": 1.8690941005964406e-05, "loss": 0.5405, "step": 6941 }, { "epoch": 0.1898381098227959, "grad_norm": 1.281906247138977, "learning_rate": 1.8690502861158904e-05, "loss": 0.5798, "step": 6942 }, { "epoch": 0.1898654561365128, "grad_norm": 1.422476053237915, "learning_rate": 1.8690064648178855e-05, "loss": 0.6128, "step": 6943 }, { "epoch": 0.18989280245022971, "grad_norm": 1.4006903171539307, "learning_rate": 1.868962636702769e-05, "loss": 0.5886, "step": 6944 }, { "epoch": 0.1899201487639466, "grad_norm": 1.4060540199279785, "learning_rate": 1.868918801770885e-05, "loss": 0.5796, "step": 6945 }, { "epoch": 0.18994749507766354, "grad_norm": 1.413561224937439, "learning_rate": 1.8688749600225778e-05, "loss": 0.61, "step": 6946 }, { "epoch": 0.18997484139138043, "grad_norm": 1.4536373615264893, "learning_rate": 1.8688311114581913e-05, "loss": 0.5815, "step": 6947 }, { "epoch": 0.19000218770509736, "grad_norm": 1.6957144737243652, "learning_rate": 1.868787256078069e-05, "loss": 0.5678, "step": 6948 }, { "epoch": 0.19002953401881426, "grad_norm": 1.5658409595489502, "learning_rate": 1.8687433938825552e-05, "loss": 0.5842, "step": 6949 }, { "epoch": 0.19005688033253118, "grad_norm": 1.666212558746338, "learning_rate": 1.8686995248719938e-05, "loss": 0.6003, "step": 6950 }, { "epoch": 0.19008422664624808, "grad_norm": 1.4840102195739746, "learning_rate": 1.8686556490467294e-05, "loss": 0.5593, "step": 6951 }, { "epoch": 0.190111572959965, "grad_norm": 1.3809300661087036, "learning_rate": 1.868611766407106e-05, "loss": 0.4542, "step": 6952 }, { "epoch": 0.1901389192736819, "grad_norm": 1.2720781564712524, "learning_rate": 1.8685678769534674e-05, "loss": 0.5266, "step": 6953 }, { "epoch": 0.19016626558739883, "grad_norm": 1.3364537954330444, "learning_rate": 1.8685239806861585e-05, "loss": 0.5683, "step": 6954 }, { "epoch": 0.19019361190111572, "grad_norm": 1.500600814819336, "learning_rate": 1.868480077605523e-05, "loss": 0.5954, "step": 6955 }, { "epoch": 0.19022095821483265, "grad_norm": 1.6782695055007935, "learning_rate": 1.868436167711906e-05, "loss": 0.5809, "step": 6956 }, { "epoch": 0.19024830452854954, "grad_norm": 1.3025603294372559, "learning_rate": 1.8683922510056515e-05, "loss": 0.5778, "step": 6957 }, { "epoch": 0.19027565084226647, "grad_norm": 1.916951298713684, "learning_rate": 1.8683483274871043e-05, "loss": 0.5469, "step": 6958 }, { "epoch": 0.19030299715598337, "grad_norm": 1.523413896560669, "learning_rate": 1.8683043971566087e-05, "loss": 0.6089, "step": 6959 }, { "epoch": 0.1903303434697003, "grad_norm": 1.1949540376663208, "learning_rate": 1.8682604600145097e-05, "loss": 0.5418, "step": 6960 }, { "epoch": 0.1903576897834172, "grad_norm": 1.4195183515548706, "learning_rate": 1.8682165160611517e-05, "loss": 0.8957, "step": 6961 }, { "epoch": 0.1903850360971341, "grad_norm": 1.6975624561309814, "learning_rate": 1.8681725652968794e-05, "loss": 0.5747, "step": 6962 }, { "epoch": 0.190412382410851, "grad_norm": 1.3362339735031128, "learning_rate": 1.8681286077220377e-05, "loss": 0.5796, "step": 6963 }, { "epoch": 0.19043972872456794, "grad_norm": 1.4871609210968018, "learning_rate": 1.868084643336971e-05, "loss": 0.5163, "step": 6964 }, { "epoch": 0.19046707503828483, "grad_norm": 1.3731489181518555, "learning_rate": 1.8680406721420253e-05, "loss": 0.5415, "step": 6965 }, { "epoch": 0.19049442135200176, "grad_norm": 1.4438486099243164, "learning_rate": 1.8679966941375444e-05, "loss": 0.5366, "step": 6966 }, { "epoch": 0.19052176766571866, "grad_norm": 1.5530610084533691, "learning_rate": 1.8679527093238737e-05, "loss": 0.5804, "step": 6967 }, { "epoch": 0.19054911397943558, "grad_norm": 1.5527241230010986, "learning_rate": 1.8679087177013586e-05, "loss": 0.5464, "step": 6968 }, { "epoch": 0.19057646029315248, "grad_norm": 1.3302032947540283, "learning_rate": 1.867864719270343e-05, "loss": 0.5709, "step": 6969 }, { "epoch": 0.1906038066068694, "grad_norm": 1.4303644895553589, "learning_rate": 1.8678207140311735e-05, "loss": 0.5529, "step": 6970 }, { "epoch": 0.1906311529205863, "grad_norm": 1.2715791463851929, "learning_rate": 1.867776701984195e-05, "loss": 0.5369, "step": 6971 }, { "epoch": 0.19065849923430322, "grad_norm": 1.3884304761886597, "learning_rate": 1.867732683129752e-05, "loss": 0.5838, "step": 6972 }, { "epoch": 0.19068584554802012, "grad_norm": 1.199676275253296, "learning_rate": 1.8676886574681906e-05, "loss": 0.5412, "step": 6973 }, { "epoch": 0.19071319186173705, "grad_norm": 1.521162509918213, "learning_rate": 1.8676446249998558e-05, "loss": 0.5268, "step": 6974 }, { "epoch": 0.19074053817545394, "grad_norm": 1.4230318069458008, "learning_rate": 1.867600585725093e-05, "loss": 0.5813, "step": 6975 }, { "epoch": 0.19076788448917087, "grad_norm": 1.3991718292236328, "learning_rate": 1.8675565396442477e-05, "loss": 0.5221, "step": 6976 }, { "epoch": 0.19079523080288777, "grad_norm": 1.2889888286590576, "learning_rate": 1.8675124867576656e-05, "loss": 0.413, "step": 6977 }, { "epoch": 0.1908225771166047, "grad_norm": 1.507317066192627, "learning_rate": 1.8674684270656926e-05, "loss": 0.5761, "step": 6978 }, { "epoch": 0.1908499234303216, "grad_norm": 1.3328890800476074, "learning_rate": 1.867424360568673e-05, "loss": 0.5927, "step": 6979 }, { "epoch": 0.1908772697440385, "grad_norm": 1.6808801889419556, "learning_rate": 1.867380287266954e-05, "loss": 0.5268, "step": 6980 }, { "epoch": 0.1909046160577554, "grad_norm": 1.5206693410873413, "learning_rate": 1.8673362071608808e-05, "loss": 0.5786, "step": 6981 }, { "epoch": 0.19093196237147234, "grad_norm": 1.4005632400512695, "learning_rate": 1.867292120250799e-05, "loss": 0.5355, "step": 6982 }, { "epoch": 0.19095930868518923, "grad_norm": 1.5509029626846313, "learning_rate": 1.8672480265370547e-05, "loss": 0.5529, "step": 6983 }, { "epoch": 0.19098665499890616, "grad_norm": 1.7611150741577148, "learning_rate": 1.8672039260199936e-05, "loss": 0.5415, "step": 6984 }, { "epoch": 0.19101400131262306, "grad_norm": 1.4509239196777344, "learning_rate": 1.8671598186999617e-05, "loss": 0.5291, "step": 6985 }, { "epoch": 0.19104134762633998, "grad_norm": 1.5850869417190552, "learning_rate": 1.867115704577305e-05, "loss": 0.5726, "step": 6986 }, { "epoch": 0.19106869394005688, "grad_norm": 1.441702127456665, "learning_rate": 1.86707158365237e-05, "loss": 0.606, "step": 6987 }, { "epoch": 0.1910960402537738, "grad_norm": 1.7310339212417603, "learning_rate": 1.867027455925502e-05, "loss": 0.544, "step": 6988 }, { "epoch": 0.1911233865674907, "grad_norm": 1.6146645545959473, "learning_rate": 1.8669833213970476e-05, "loss": 0.5563, "step": 6989 }, { "epoch": 0.19115073288120762, "grad_norm": 1.265537977218628, "learning_rate": 1.8669391800673536e-05, "loss": 0.5254, "step": 6990 }, { "epoch": 0.19117807919492452, "grad_norm": 1.3490040302276611, "learning_rate": 1.866895031936765e-05, "loss": 0.9259, "step": 6991 }, { "epoch": 0.19120542550864145, "grad_norm": 2.1449673175811768, "learning_rate": 1.866850877005629e-05, "loss": 0.5665, "step": 6992 }, { "epoch": 0.19123277182235834, "grad_norm": 1.8538533449172974, "learning_rate": 1.8668067152742922e-05, "loss": 0.5534, "step": 6993 }, { "epoch": 0.19126011813607527, "grad_norm": 1.2696932554244995, "learning_rate": 1.8667625467431004e-05, "loss": 0.5329, "step": 6994 }, { "epoch": 0.19128746444979217, "grad_norm": 1.1451443433761597, "learning_rate": 1.8667183714124008e-05, "loss": 0.3954, "step": 6995 }, { "epoch": 0.1913148107635091, "grad_norm": 1.3858436346054077, "learning_rate": 1.866674189282539e-05, "loss": 0.5338, "step": 6996 }, { "epoch": 0.191342157077226, "grad_norm": 1.6818244457244873, "learning_rate": 1.866630000353862e-05, "loss": 0.6081, "step": 6997 }, { "epoch": 0.1913695033909429, "grad_norm": 1.6278761625289917, "learning_rate": 1.8665858046267168e-05, "loss": 0.5191, "step": 6998 }, { "epoch": 0.1913968497046598, "grad_norm": 1.4650131464004517, "learning_rate": 1.8665416021014497e-05, "loss": 0.5528, "step": 6999 }, { "epoch": 0.19142419601837674, "grad_norm": 1.6536903381347656, "learning_rate": 1.8664973927784076e-05, "loss": 0.5504, "step": 7000 }, { "epoch": 0.19145154233209363, "grad_norm": 1.4550467729568481, "learning_rate": 1.8664531766579375e-05, "loss": 0.5768, "step": 7001 }, { "epoch": 0.19147888864581056, "grad_norm": 1.392207145690918, "learning_rate": 1.8664089537403858e-05, "loss": 0.6077, "step": 7002 }, { "epoch": 0.19150623495952745, "grad_norm": 1.4099252223968506, "learning_rate": 1.8663647240261e-05, "loss": 0.5802, "step": 7003 }, { "epoch": 0.19153358127324438, "grad_norm": 1.5294188261032104, "learning_rate": 1.8663204875154264e-05, "loss": 0.5454, "step": 7004 }, { "epoch": 0.19156092758696128, "grad_norm": 1.3299742937088013, "learning_rate": 1.8662762442087125e-05, "loss": 0.9456, "step": 7005 }, { "epoch": 0.1915882739006782, "grad_norm": 1.1148045063018799, "learning_rate": 1.8662319941063055e-05, "loss": 0.5164, "step": 7006 }, { "epoch": 0.1916156202143951, "grad_norm": 1.3237895965576172, "learning_rate": 1.8661877372085523e-05, "loss": 0.59, "step": 7007 }, { "epoch": 0.19164296652811202, "grad_norm": 1.214264154434204, "learning_rate": 1.8661434735157998e-05, "loss": 0.5597, "step": 7008 }, { "epoch": 0.19167031284182892, "grad_norm": 1.435497522354126, "learning_rate": 1.8660992030283953e-05, "loss": 0.5955, "step": 7009 }, { "epoch": 0.19169765915554582, "grad_norm": 1.4330912828445435, "learning_rate": 1.8660549257466868e-05, "loss": 0.5361, "step": 7010 }, { "epoch": 0.19172500546926274, "grad_norm": 1.5487444400787354, "learning_rate": 1.866010641671021e-05, "loss": 0.5277, "step": 7011 }, { "epoch": 0.19175235178297964, "grad_norm": 1.3142060041427612, "learning_rate": 1.8659663508017452e-05, "loss": 0.5535, "step": 7012 }, { "epoch": 0.19177969809669657, "grad_norm": 1.4187530279159546, "learning_rate": 1.8659220531392073e-05, "loss": 0.5472, "step": 7013 }, { "epoch": 0.19180704441041346, "grad_norm": 1.5100706815719604, "learning_rate": 1.8658777486837545e-05, "loss": 0.5278, "step": 7014 }, { "epoch": 0.1918343907241304, "grad_norm": 1.93104088306427, "learning_rate": 1.8658334374357345e-05, "loss": 0.5794, "step": 7015 }, { "epoch": 0.19186173703784729, "grad_norm": 1.4175384044647217, "learning_rate": 1.8657891193954948e-05, "loss": 0.426, "step": 7016 }, { "epoch": 0.1918890833515642, "grad_norm": 1.5819132328033447, "learning_rate": 1.865744794563383e-05, "loss": 0.6093, "step": 7017 }, { "epoch": 0.1919164296652811, "grad_norm": 1.131942868232727, "learning_rate": 1.865700462939747e-05, "loss": 0.5564, "step": 7018 }, { "epoch": 0.19194377597899803, "grad_norm": 1.2965047359466553, "learning_rate": 1.8656561245249344e-05, "loss": 0.5932, "step": 7019 }, { "epoch": 0.19197112229271493, "grad_norm": 1.2727165222167969, "learning_rate": 1.8656117793192933e-05, "loss": 0.604, "step": 7020 }, { "epoch": 0.19199846860643185, "grad_norm": 1.2201882600784302, "learning_rate": 1.8655674273231714e-05, "loss": 0.5695, "step": 7021 }, { "epoch": 0.19202581492014875, "grad_norm": 1.3651788234710693, "learning_rate": 1.8655230685369167e-05, "loss": 0.5095, "step": 7022 }, { "epoch": 0.19205316123386568, "grad_norm": 1.4802958965301514, "learning_rate": 1.8654787029608766e-05, "loss": 0.5491, "step": 7023 }, { "epoch": 0.19208050754758257, "grad_norm": 1.345573902130127, "learning_rate": 1.8654343305954002e-05, "loss": 0.5844, "step": 7024 }, { "epoch": 0.1921078538612995, "grad_norm": 2.2344858646392822, "learning_rate": 1.8653899514408352e-05, "loss": 0.5721, "step": 7025 }, { "epoch": 0.1921352001750164, "grad_norm": 1.3050365447998047, "learning_rate": 1.865345565497529e-05, "loss": 0.5554, "step": 7026 }, { "epoch": 0.19216254648873332, "grad_norm": 1.2243719100952148, "learning_rate": 1.8653011727658306e-05, "loss": 0.4237, "step": 7027 }, { "epoch": 0.19218989280245022, "grad_norm": 1.6951161623001099, "learning_rate": 1.865256773246088e-05, "loss": 0.5939, "step": 7028 }, { "epoch": 0.19221723911616714, "grad_norm": 1.1994681358337402, "learning_rate": 1.8652123669386496e-05, "loss": 0.5214, "step": 7029 }, { "epoch": 0.19224458542988404, "grad_norm": 1.4379876852035522, "learning_rate": 1.8651679538438637e-05, "loss": 0.578, "step": 7030 }, { "epoch": 0.19227193174360097, "grad_norm": 1.4942522048950195, "learning_rate": 1.8651235339620785e-05, "loss": 0.5031, "step": 7031 }, { "epoch": 0.19229927805731786, "grad_norm": 1.2949448823928833, "learning_rate": 1.8650791072936428e-05, "loss": 0.5689, "step": 7032 }, { "epoch": 0.1923266243710348, "grad_norm": 1.7332777976989746, "learning_rate": 1.8650346738389048e-05, "loss": 0.5237, "step": 7033 }, { "epoch": 0.19235397068475169, "grad_norm": 2.488445281982422, "learning_rate": 1.8649902335982132e-05, "loss": 0.9031, "step": 7034 }, { "epoch": 0.1923813169984686, "grad_norm": 1.8381911516189575, "learning_rate": 1.8649457865719166e-05, "loss": 0.4918, "step": 7035 }, { "epoch": 0.1924086633121855, "grad_norm": 2.4151103496551514, "learning_rate": 1.864901332760364e-05, "loss": 0.547, "step": 7036 }, { "epoch": 0.19243600962590243, "grad_norm": 1.4895100593566895, "learning_rate": 1.864856872163904e-05, "loss": 0.5935, "step": 7037 }, { "epoch": 0.19246335593961933, "grad_norm": 1.6027417182922363, "learning_rate": 1.8648124047828848e-05, "loss": 0.5782, "step": 7038 }, { "epoch": 0.19249070225333625, "grad_norm": 1.4924806356430054, "learning_rate": 1.8647679306176556e-05, "loss": 0.5671, "step": 7039 }, { "epoch": 0.19251804856705315, "grad_norm": 1.2847710847854614, "learning_rate": 1.864723449668566e-05, "loss": 0.5668, "step": 7040 }, { "epoch": 0.19254539488077008, "grad_norm": 1.3966853618621826, "learning_rate": 1.8646789619359635e-05, "loss": 0.551, "step": 7041 }, { "epoch": 0.19257274119448697, "grad_norm": 1.2451266050338745, "learning_rate": 1.8646344674201985e-05, "loss": 0.5725, "step": 7042 }, { "epoch": 0.1926000875082039, "grad_norm": 1.5038795471191406, "learning_rate": 1.864589966121619e-05, "loss": 0.9118, "step": 7043 }, { "epoch": 0.1926274338219208, "grad_norm": 1.7433289289474487, "learning_rate": 1.8645454580405746e-05, "loss": 0.5283, "step": 7044 }, { "epoch": 0.19265478013563772, "grad_norm": 1.3156322240829468, "learning_rate": 1.8645009431774147e-05, "loss": 0.9254, "step": 7045 }, { "epoch": 0.19268212644935462, "grad_norm": 1.457800269126892, "learning_rate": 1.8644564215324878e-05, "loss": 0.5488, "step": 7046 }, { "epoch": 0.19270947276307154, "grad_norm": 1.6007237434387207, "learning_rate": 1.864411893106144e-05, "loss": 0.5651, "step": 7047 }, { "epoch": 0.19273681907678844, "grad_norm": 1.8480689525604248, "learning_rate": 1.8643673578987316e-05, "loss": 0.5154, "step": 7048 }, { "epoch": 0.19276416539050537, "grad_norm": 3.741408586502075, "learning_rate": 1.864322815910601e-05, "loss": 0.5927, "step": 7049 }, { "epoch": 0.19279151170422226, "grad_norm": 1.3700071573257446, "learning_rate": 1.864278267142101e-05, "loss": 0.5552, "step": 7050 }, { "epoch": 0.1928188580179392, "grad_norm": 1.8773919343948364, "learning_rate": 1.8642337115935812e-05, "loss": 0.6269, "step": 7051 }, { "epoch": 0.19284620433165608, "grad_norm": 1.3927462100982666, "learning_rate": 1.8641891492653912e-05, "loss": 0.5815, "step": 7052 }, { "epoch": 0.192873550645373, "grad_norm": 1.3221533298492432, "learning_rate": 1.8641445801578805e-05, "loss": 0.5442, "step": 7053 }, { "epoch": 0.1929008969590899, "grad_norm": 1.4897403717041016, "learning_rate": 1.8641000042713984e-05, "loss": 0.5952, "step": 7054 }, { "epoch": 0.19292824327280683, "grad_norm": 1.705040693283081, "learning_rate": 1.8640554216062956e-05, "loss": 0.5588, "step": 7055 }, { "epoch": 0.19295558958652373, "grad_norm": 1.4566742181777954, "learning_rate": 1.8640108321629208e-05, "loss": 0.5389, "step": 7056 }, { "epoch": 0.19298293590024065, "grad_norm": 1.1305437088012695, "learning_rate": 1.8639662359416242e-05, "loss": 0.5472, "step": 7057 }, { "epoch": 0.19301028221395755, "grad_norm": 1.3026347160339355, "learning_rate": 1.8639216329427556e-05, "loss": 0.5603, "step": 7058 }, { "epoch": 0.19303762852767448, "grad_norm": 1.3671469688415527, "learning_rate": 1.863877023166665e-05, "loss": 0.5262, "step": 7059 }, { "epoch": 0.19306497484139137, "grad_norm": 1.5764697790145874, "learning_rate": 1.8638324066137023e-05, "loss": 0.5331, "step": 7060 }, { "epoch": 0.1930923211551083, "grad_norm": 1.4437850713729858, "learning_rate": 1.8637877832842174e-05, "loss": 0.5471, "step": 7061 }, { "epoch": 0.1931196674688252, "grad_norm": 1.6926283836364746, "learning_rate": 1.8637431531785604e-05, "loss": 0.5724, "step": 7062 }, { "epoch": 0.19314701378254212, "grad_norm": 2.606842041015625, "learning_rate": 1.8636985162970816e-05, "loss": 0.6055, "step": 7063 }, { "epoch": 0.19317436009625902, "grad_norm": 1.663405418395996, "learning_rate": 1.863653872640131e-05, "loss": 0.5935, "step": 7064 }, { "epoch": 0.19320170640997594, "grad_norm": 1.6513535976409912, "learning_rate": 1.8636092222080588e-05, "loss": 0.5488, "step": 7065 }, { "epoch": 0.19322905272369284, "grad_norm": 2.0089194774627686, "learning_rate": 1.863564565001215e-05, "loss": 0.4007, "step": 7066 }, { "epoch": 0.19325639903740977, "grad_norm": 1.4403071403503418, "learning_rate": 1.8635199010199505e-05, "loss": 0.4001, "step": 7067 }, { "epoch": 0.19328374535112666, "grad_norm": 1.532913088798523, "learning_rate": 1.863475230264615e-05, "loss": 0.5653, "step": 7068 }, { "epoch": 0.1933110916648436, "grad_norm": 1.4108333587646484, "learning_rate": 1.8634305527355604e-05, "loss": 0.6019, "step": 7069 }, { "epoch": 0.19333843797856048, "grad_norm": 1.3433290719985962, "learning_rate": 1.863385868433135e-05, "loss": 0.5698, "step": 7070 }, { "epoch": 0.1933657842922774, "grad_norm": 1.3768699169158936, "learning_rate": 1.8633411773576912e-05, "loss": 0.5731, "step": 7071 }, { "epoch": 0.1933931306059943, "grad_norm": 1.3636008501052856, "learning_rate": 1.863296479509578e-05, "loss": 0.5193, "step": 7072 }, { "epoch": 0.19342047691971123, "grad_norm": 1.6174496412277222, "learning_rate": 1.8632517748891473e-05, "loss": 0.576, "step": 7073 }, { "epoch": 0.19344782323342813, "grad_norm": 1.565616488456726, "learning_rate": 1.8632070634967494e-05, "loss": 0.4059, "step": 7074 }, { "epoch": 0.19347516954714505, "grad_norm": 1.266108751296997, "learning_rate": 1.863162345332735e-05, "loss": 0.5443, "step": 7075 }, { "epoch": 0.19350251586086195, "grad_norm": 1.4408656358718872, "learning_rate": 1.863117620397455e-05, "loss": 0.5458, "step": 7076 }, { "epoch": 0.19352986217457888, "grad_norm": 1.2436244487762451, "learning_rate": 1.86307288869126e-05, "loss": 0.5572, "step": 7077 }, { "epoch": 0.19355720848829577, "grad_norm": 1.5578523874282837, "learning_rate": 1.8630281502145012e-05, "loss": 0.5603, "step": 7078 }, { "epoch": 0.1935845548020127, "grad_norm": 1.4213143587112427, "learning_rate": 1.8629834049675293e-05, "loss": 0.9268, "step": 7079 }, { "epoch": 0.1936119011157296, "grad_norm": 1.486272931098938, "learning_rate": 1.8629386529506956e-05, "loss": 0.5585, "step": 7080 }, { "epoch": 0.19363924742944652, "grad_norm": 1.1306308507919312, "learning_rate": 1.862893894164351e-05, "loss": 0.4198, "step": 7081 }, { "epoch": 0.19366659374316342, "grad_norm": 1.3289825916290283, "learning_rate": 1.8628491286088466e-05, "loss": 0.5752, "step": 7082 }, { "epoch": 0.19369394005688034, "grad_norm": 1.2530986070632935, "learning_rate": 1.8628043562845336e-05, "loss": 0.5427, "step": 7083 }, { "epoch": 0.19372128637059724, "grad_norm": 1.7075389623641968, "learning_rate": 1.862759577191763e-05, "loss": 0.5864, "step": 7084 }, { "epoch": 0.19374863268431416, "grad_norm": 1.3486990928649902, "learning_rate": 1.8627147913308866e-05, "loss": 0.3918, "step": 7085 }, { "epoch": 0.19377597899803106, "grad_norm": 1.5101215839385986, "learning_rate": 1.8626699987022556e-05, "loss": 0.5469, "step": 7086 }, { "epoch": 0.193803325311748, "grad_norm": 1.445603609085083, "learning_rate": 1.862625199306221e-05, "loss": 0.5034, "step": 7087 }, { "epoch": 0.19383067162546488, "grad_norm": 1.248890995979309, "learning_rate": 1.8625803931431346e-05, "loss": 0.5467, "step": 7088 }, { "epoch": 0.1938580179391818, "grad_norm": 1.6047592163085938, "learning_rate": 1.8625355802133475e-05, "loss": 0.579, "step": 7089 }, { "epoch": 0.1938853642528987, "grad_norm": 1.4636849164962769, "learning_rate": 1.862490760517212e-05, "loss": 0.5749, "step": 7090 }, { "epoch": 0.19391271056661563, "grad_norm": 1.836958885192871, "learning_rate": 1.8624459340550788e-05, "loss": 0.6067, "step": 7091 }, { "epoch": 0.19394005688033253, "grad_norm": 1.7445811033248901, "learning_rate": 1.8624011008273e-05, "loss": 0.5819, "step": 7092 }, { "epoch": 0.19396740319404945, "grad_norm": 1.3526160717010498, "learning_rate": 1.8623562608342273e-05, "loss": 0.573, "step": 7093 }, { "epoch": 0.19399474950776635, "grad_norm": 1.5413753986358643, "learning_rate": 1.8623114140762127e-05, "loss": 0.5355, "step": 7094 }, { "epoch": 0.19402209582148328, "grad_norm": 1.21514093875885, "learning_rate": 1.8622665605536073e-05, "loss": 0.5695, "step": 7095 }, { "epoch": 0.19404944213520017, "grad_norm": 1.4742951393127441, "learning_rate": 1.8622217002667638e-05, "loss": 0.5804, "step": 7096 }, { "epoch": 0.1940767884489171, "grad_norm": 1.4085109233856201, "learning_rate": 1.8621768332160336e-05, "loss": 0.5344, "step": 7097 }, { "epoch": 0.194104134762634, "grad_norm": 1.3038628101348877, "learning_rate": 1.8621319594017687e-05, "loss": 0.5481, "step": 7098 }, { "epoch": 0.19413148107635092, "grad_norm": 1.7112700939178467, "learning_rate": 1.8620870788243207e-05, "loss": 0.588, "step": 7099 }, { "epoch": 0.19415882739006782, "grad_norm": 1.5582302808761597, "learning_rate": 1.8620421914840428e-05, "loss": 0.5361, "step": 7100 }, { "epoch": 0.19418617370378474, "grad_norm": 1.8042250871658325, "learning_rate": 1.8619972973812866e-05, "loss": 0.4052, "step": 7101 }, { "epoch": 0.19421352001750164, "grad_norm": 1.259426474571228, "learning_rate": 1.8619523965164038e-05, "loss": 0.5559, "step": 7102 }, { "epoch": 0.19424086633121856, "grad_norm": 1.4868178367614746, "learning_rate": 1.861907488889747e-05, "loss": 0.5713, "step": 7103 }, { "epoch": 0.19426821264493546, "grad_norm": 1.613161563873291, "learning_rate": 1.861862574501669e-05, "loss": 0.5871, "step": 7104 }, { "epoch": 0.1942955589586524, "grad_norm": 1.514193058013916, "learning_rate": 1.8618176533525215e-05, "loss": 0.6015, "step": 7105 }, { "epoch": 0.19432290527236928, "grad_norm": 1.5843509435653687, "learning_rate": 1.8617727254426566e-05, "loss": 0.5732, "step": 7106 }, { "epoch": 0.1943502515860862, "grad_norm": 1.2286792993545532, "learning_rate": 1.8617277907724273e-05, "loss": 0.5491, "step": 7107 }, { "epoch": 0.1943775978998031, "grad_norm": 1.4411232471466064, "learning_rate": 1.8616828493421867e-05, "loss": 0.5282, "step": 7108 }, { "epoch": 0.19440494421352003, "grad_norm": 1.3564890623092651, "learning_rate": 1.861637901152286e-05, "loss": 0.4873, "step": 7109 }, { "epoch": 0.19443229052723693, "grad_norm": 1.4561347961425781, "learning_rate": 1.8615929462030782e-05, "loss": 0.915, "step": 7110 }, { "epoch": 0.19445963684095383, "grad_norm": 1.2854090929031372, "learning_rate": 1.8615479844949166e-05, "loss": 0.5784, "step": 7111 }, { "epoch": 0.19448698315467075, "grad_norm": 1.0631508827209473, "learning_rate": 1.8615030160281537e-05, "loss": 0.411, "step": 7112 }, { "epoch": 0.19451432946838765, "grad_norm": 1.5825468301773071, "learning_rate": 1.861458040803142e-05, "loss": 0.5063, "step": 7113 }, { "epoch": 0.19454167578210457, "grad_norm": 1.5351566076278687, "learning_rate": 1.8614130588202342e-05, "loss": 0.5723, "step": 7114 }, { "epoch": 0.19456902209582147, "grad_norm": 1.340565800666809, "learning_rate": 1.8613680700797835e-05, "loss": 0.584, "step": 7115 }, { "epoch": 0.1945963684095384, "grad_norm": 1.3569645881652832, "learning_rate": 1.8613230745821426e-05, "loss": 0.5079, "step": 7116 }, { "epoch": 0.1946237147232553, "grad_norm": 1.3210844993591309, "learning_rate": 1.8612780723276647e-05, "loss": 0.6025, "step": 7117 }, { "epoch": 0.19465106103697222, "grad_norm": 3.43098783493042, "learning_rate": 1.8612330633167025e-05, "loss": 0.5695, "step": 7118 }, { "epoch": 0.19467840735068911, "grad_norm": 1.486929178237915, "learning_rate": 1.8611880475496096e-05, "loss": 0.5627, "step": 7119 }, { "epoch": 0.19470575366440604, "grad_norm": 1.3026925325393677, "learning_rate": 1.861143025026739e-05, "loss": 0.5751, "step": 7120 }, { "epoch": 0.19473309997812294, "grad_norm": 1.2411812543869019, "learning_rate": 1.8610979957484432e-05, "loss": 0.5787, "step": 7121 }, { "epoch": 0.19476044629183986, "grad_norm": 1.4306696653366089, "learning_rate": 1.861052959715076e-05, "loss": 0.6095, "step": 7122 }, { "epoch": 0.19478779260555676, "grad_norm": 1.3054537773132324, "learning_rate": 1.8610079169269908e-05, "loss": 0.5807, "step": 7123 }, { "epoch": 0.19481513891927368, "grad_norm": 1.9574997425079346, "learning_rate": 1.860962867384541e-05, "loss": 0.9183, "step": 7124 }, { "epoch": 0.19484248523299058, "grad_norm": 1.7081782817840576, "learning_rate": 1.8609178110880798e-05, "loss": 0.9252, "step": 7125 }, { "epoch": 0.1948698315467075, "grad_norm": 1.7805434465408325, "learning_rate": 1.8608727480379602e-05, "loss": 0.5784, "step": 7126 }, { "epoch": 0.1948971778604244, "grad_norm": 1.3581640720367432, "learning_rate": 1.8608276782345363e-05, "loss": 0.6054, "step": 7127 }, { "epoch": 0.19492452417414133, "grad_norm": 1.2275453805923462, "learning_rate": 1.8607826016781616e-05, "loss": 0.5723, "step": 7128 }, { "epoch": 0.19495187048785823, "grad_norm": 1.4274661540985107, "learning_rate": 1.8607375183691897e-05, "loss": 0.5591, "step": 7129 }, { "epoch": 0.19497921680157515, "grad_norm": 1.6079325675964355, "learning_rate": 1.8606924283079745e-05, "loss": 0.637, "step": 7130 }, { "epoch": 0.19500656311529205, "grad_norm": 1.4974197149276733, "learning_rate": 1.8606473314948688e-05, "loss": 0.6162, "step": 7131 }, { "epoch": 0.19503390942900897, "grad_norm": 1.40716552734375, "learning_rate": 1.8606022279302275e-05, "loss": 0.5684, "step": 7132 }, { "epoch": 0.19506125574272587, "grad_norm": 1.879902720451355, "learning_rate": 1.8605571176144036e-05, "loss": 0.9325, "step": 7133 }, { "epoch": 0.1950886020564428, "grad_norm": 1.3262115716934204, "learning_rate": 1.8605120005477512e-05, "loss": 0.5562, "step": 7134 }, { "epoch": 0.1951159483701597, "grad_norm": 2.9954159259796143, "learning_rate": 1.860466876730625e-05, "loss": 0.5739, "step": 7135 }, { "epoch": 0.19514329468387662, "grad_norm": 1.2246090173721313, "learning_rate": 1.8604217461633777e-05, "loss": 0.5768, "step": 7136 }, { "epoch": 0.19517064099759351, "grad_norm": 1.4658337831497192, "learning_rate": 1.860376608846364e-05, "loss": 0.5687, "step": 7137 }, { "epoch": 0.19519798731131044, "grad_norm": 1.5238128900527954, "learning_rate": 1.8603314647799377e-05, "loss": 0.5937, "step": 7138 }, { "epoch": 0.19522533362502734, "grad_norm": 1.9146100282669067, "learning_rate": 1.860286313964454e-05, "loss": 0.4068, "step": 7139 }, { "epoch": 0.19525267993874426, "grad_norm": 1.2296006679534912, "learning_rate": 1.8602411564002655e-05, "loss": 0.5523, "step": 7140 }, { "epoch": 0.19528002625246116, "grad_norm": 1.637813687324524, "learning_rate": 1.8601959920877277e-05, "loss": 0.5717, "step": 7141 }, { "epoch": 0.19530737256617808, "grad_norm": 1.505617618560791, "learning_rate": 1.8601508210271938e-05, "loss": 0.5916, "step": 7142 }, { "epoch": 0.19533471887989498, "grad_norm": 1.4903196096420288, "learning_rate": 1.8601056432190193e-05, "loss": 0.5581, "step": 7143 }, { "epoch": 0.1953620651936119, "grad_norm": 1.3379613161087036, "learning_rate": 1.8600604586635582e-05, "loss": 0.5348, "step": 7144 }, { "epoch": 0.1953894115073288, "grad_norm": 1.4930709600448608, "learning_rate": 1.8600152673611646e-05, "loss": 0.5543, "step": 7145 }, { "epoch": 0.19541675782104573, "grad_norm": 1.7166180610656738, "learning_rate": 1.8599700693121932e-05, "loss": 0.9496, "step": 7146 }, { "epoch": 0.19544410413476263, "grad_norm": 1.6764066219329834, "learning_rate": 1.8599248645169984e-05, "loss": 0.5247, "step": 7147 }, { "epoch": 0.19547145044847955, "grad_norm": 1.1870059967041016, "learning_rate": 1.8598796529759357e-05, "loss": 0.5442, "step": 7148 }, { "epoch": 0.19549879676219645, "grad_norm": 1.3886051177978516, "learning_rate": 1.8598344346893587e-05, "loss": 0.5647, "step": 7149 }, { "epoch": 0.19552614307591337, "grad_norm": 1.803235411643982, "learning_rate": 1.8597892096576225e-05, "loss": 0.5334, "step": 7150 }, { "epoch": 0.19555348938963027, "grad_norm": 1.4122915267944336, "learning_rate": 1.859743977881082e-05, "loss": 0.5423, "step": 7151 }, { "epoch": 0.1955808357033472, "grad_norm": 1.5577280521392822, "learning_rate": 1.859698739360092e-05, "loss": 0.4754, "step": 7152 }, { "epoch": 0.1956081820170641, "grad_norm": 1.4984300136566162, "learning_rate": 1.859653494095007e-05, "loss": 0.5266, "step": 7153 }, { "epoch": 0.19563552833078102, "grad_norm": 1.5567041635513306, "learning_rate": 1.859608242086183e-05, "loss": 0.9039, "step": 7154 }, { "epoch": 0.19566287464449791, "grad_norm": 1.4684274196624756, "learning_rate": 1.8595629833339736e-05, "loss": 0.5542, "step": 7155 }, { "epoch": 0.19569022095821484, "grad_norm": 1.432841181755066, "learning_rate": 1.8595177178387346e-05, "loss": 0.6067, "step": 7156 }, { "epoch": 0.19571756727193174, "grad_norm": 1.3566632270812988, "learning_rate": 1.859472445600821e-05, "loss": 0.5631, "step": 7157 }, { "epoch": 0.19574491358564866, "grad_norm": 1.1676816940307617, "learning_rate": 1.8594271666205884e-05, "loss": 0.5795, "step": 7158 }, { "epoch": 0.19577225989936556, "grad_norm": 1.3033236265182495, "learning_rate": 1.859381880898391e-05, "loss": 0.55, "step": 7159 }, { "epoch": 0.19579960621308248, "grad_norm": 2.299724578857422, "learning_rate": 1.8593365884345848e-05, "loss": 0.9029, "step": 7160 }, { "epoch": 0.19582695252679938, "grad_norm": 1.2030164003372192, "learning_rate": 1.8592912892295246e-05, "loss": 0.5687, "step": 7161 }, { "epoch": 0.1958542988405163, "grad_norm": 1.2469370365142822, "learning_rate": 1.8592459832835665e-05, "loss": 0.5863, "step": 7162 }, { "epoch": 0.1958816451542332, "grad_norm": 1.2594507932662964, "learning_rate": 1.8592006705970652e-05, "loss": 0.5786, "step": 7163 }, { "epoch": 0.19590899146795013, "grad_norm": 2.0078024864196777, "learning_rate": 1.859155351170377e-05, "loss": 0.5807, "step": 7164 }, { "epoch": 0.19593633778166702, "grad_norm": 1.2341961860656738, "learning_rate": 1.859110025003856e-05, "loss": 0.5686, "step": 7165 }, { "epoch": 0.19596368409538395, "grad_norm": 1.4218358993530273, "learning_rate": 1.8590646920978592e-05, "loss": 0.9259, "step": 7166 }, { "epoch": 0.19599103040910085, "grad_norm": 3.1436007022857666, "learning_rate": 1.8590193524527414e-05, "loss": 0.9327, "step": 7167 }, { "epoch": 0.19601837672281777, "grad_norm": 1.1287921667099, "learning_rate": 1.8589740060688585e-05, "loss": 0.5856, "step": 7168 }, { "epoch": 0.19604572303653467, "grad_norm": 2.242115020751953, "learning_rate": 1.8589286529465665e-05, "loss": 0.4038, "step": 7169 }, { "epoch": 0.1960730693502516, "grad_norm": 1.5400030612945557, "learning_rate": 1.8588832930862207e-05, "loss": 0.5794, "step": 7170 }, { "epoch": 0.1961004156639685, "grad_norm": 2.468613386154175, "learning_rate": 1.8588379264881774e-05, "loss": 0.5572, "step": 7171 }, { "epoch": 0.19612776197768542, "grad_norm": 1.2018533945083618, "learning_rate": 1.8587925531527923e-05, "loss": 0.5948, "step": 7172 }, { "epoch": 0.1961551082914023, "grad_norm": 1.4610421657562256, "learning_rate": 1.858747173080421e-05, "loss": 0.5576, "step": 7173 }, { "epoch": 0.19618245460511924, "grad_norm": 1.3563507795333862, "learning_rate": 1.85870178627142e-05, "loss": 0.5576, "step": 7174 }, { "epoch": 0.19620980091883614, "grad_norm": 1.777374029159546, "learning_rate": 1.858656392726145e-05, "loss": 0.8982, "step": 7175 }, { "epoch": 0.19623714723255306, "grad_norm": 1.0803675651550293, "learning_rate": 1.8586109924449522e-05, "loss": 0.5801, "step": 7176 }, { "epoch": 0.19626449354626996, "grad_norm": 1.7781471014022827, "learning_rate": 1.858565585428198e-05, "loss": 0.6315, "step": 7177 }, { "epoch": 0.19629183985998688, "grad_norm": 1.731793761253357, "learning_rate": 1.8585201716762386e-05, "loss": 0.5849, "step": 7178 }, { "epoch": 0.19631918617370378, "grad_norm": 1.6409717798233032, "learning_rate": 1.8584747511894296e-05, "loss": 0.6219, "step": 7179 }, { "epoch": 0.1963465324874207, "grad_norm": 1.3124005794525146, "learning_rate": 1.8584293239681283e-05, "loss": 0.5549, "step": 7180 }, { "epoch": 0.1963738788011376, "grad_norm": 1.2333273887634277, "learning_rate": 1.85838389001269e-05, "loss": 0.4993, "step": 7181 }, { "epoch": 0.19640122511485453, "grad_norm": 1.505755066871643, "learning_rate": 1.8583384493234717e-05, "loss": 0.5859, "step": 7182 }, { "epoch": 0.19642857142857142, "grad_norm": 1.5915082693099976, "learning_rate": 1.8582930019008304e-05, "loss": 0.5804, "step": 7183 }, { "epoch": 0.19645591774228835, "grad_norm": 1.348982810974121, "learning_rate": 1.8582475477451214e-05, "loss": 0.5697, "step": 7184 }, { "epoch": 0.19648326405600525, "grad_norm": 1.242295742034912, "learning_rate": 1.8582020868567023e-05, "loss": 0.5355, "step": 7185 }, { "epoch": 0.19651061036972217, "grad_norm": 1.5168094635009766, "learning_rate": 1.858156619235929e-05, "loss": 0.5555, "step": 7186 }, { "epoch": 0.19653795668343907, "grad_norm": 1.493380069732666, "learning_rate": 1.858111144883159e-05, "loss": 0.553, "step": 7187 }, { "epoch": 0.196565302997156, "grad_norm": 1.5581092834472656, "learning_rate": 1.858065663798748e-05, "loss": 0.5321, "step": 7188 }, { "epoch": 0.1965926493108729, "grad_norm": 1.6149972677230835, "learning_rate": 1.8580201759830538e-05, "loss": 0.5753, "step": 7189 }, { "epoch": 0.19661999562458982, "grad_norm": 1.637911081314087, "learning_rate": 1.8579746814364327e-05, "loss": 0.5175, "step": 7190 }, { "epoch": 0.1966473419383067, "grad_norm": 1.322545051574707, "learning_rate": 1.8579291801592414e-05, "loss": 0.5503, "step": 7191 }, { "epoch": 0.19667468825202364, "grad_norm": 1.2276524305343628, "learning_rate": 1.8578836721518376e-05, "loss": 0.5568, "step": 7192 }, { "epoch": 0.19670203456574054, "grad_norm": 1.534751296043396, "learning_rate": 1.8578381574145775e-05, "loss": 0.5378, "step": 7193 }, { "epoch": 0.19672938087945746, "grad_norm": 2.4266040325164795, "learning_rate": 1.8577926359478186e-05, "loss": 0.5853, "step": 7194 }, { "epoch": 0.19675672719317436, "grad_norm": 1.4513121843338013, "learning_rate": 1.8577471077519175e-05, "loss": 0.5421, "step": 7195 }, { "epoch": 0.19678407350689128, "grad_norm": 1.234777808189392, "learning_rate": 1.857701572827232e-05, "loss": 0.566, "step": 7196 }, { "epoch": 0.19681141982060818, "grad_norm": 1.349911093711853, "learning_rate": 1.8576560311741194e-05, "loss": 0.5764, "step": 7197 }, { "epoch": 0.1968387661343251, "grad_norm": 1.7438991069793701, "learning_rate": 1.8576104827929363e-05, "loss": 0.4242, "step": 7198 }, { "epoch": 0.196866112448042, "grad_norm": 1.4434679746627808, "learning_rate": 1.8575649276840402e-05, "loss": 0.5479, "step": 7199 }, { "epoch": 0.19689345876175893, "grad_norm": 1.353610634803772, "learning_rate": 1.8575193658477886e-05, "loss": 0.6073, "step": 7200 }, { "epoch": 0.19692080507547582, "grad_norm": 1.3734130859375, "learning_rate": 1.8574737972845392e-05, "loss": 0.5377, "step": 7201 }, { "epoch": 0.19694815138919275, "grad_norm": 1.8459738492965698, "learning_rate": 1.8574282219946487e-05, "loss": 0.532, "step": 7202 }, { "epoch": 0.19697549770290965, "grad_norm": 1.3476654291152954, "learning_rate": 1.8573826399784755e-05, "loss": 0.6078, "step": 7203 }, { "epoch": 0.19700284401662657, "grad_norm": 1.4066656827926636, "learning_rate": 1.8573370512363764e-05, "loss": 0.589, "step": 7204 }, { "epoch": 0.19703019033034347, "grad_norm": 1.4093081951141357, "learning_rate": 1.8572914557687097e-05, "loss": 0.567, "step": 7205 }, { "epoch": 0.1970575366440604, "grad_norm": 1.8505064249038696, "learning_rate": 1.8572458535758328e-05, "loss": 0.9266, "step": 7206 }, { "epoch": 0.1970848829577773, "grad_norm": 1.5322672128677368, "learning_rate": 1.8572002446581035e-05, "loss": 0.5609, "step": 7207 }, { "epoch": 0.19711222927149422, "grad_norm": 1.5000826120376587, "learning_rate": 1.857154629015879e-05, "loss": 0.5825, "step": 7208 }, { "epoch": 0.1971395755852111, "grad_norm": 1.2386724948883057, "learning_rate": 1.857109006649518e-05, "loss": 0.5328, "step": 7209 }, { "epoch": 0.19716692189892804, "grad_norm": 3.597926378250122, "learning_rate": 1.8570633775593784e-05, "loss": 0.8951, "step": 7210 }, { "epoch": 0.19719426821264494, "grad_norm": 1.3512111902236938, "learning_rate": 1.8570177417458173e-05, "loss": 0.5416, "step": 7211 }, { "epoch": 0.19722161452636186, "grad_norm": 1.2517273426055908, "learning_rate": 1.8569720992091935e-05, "loss": 0.4172, "step": 7212 }, { "epoch": 0.19724896084007876, "grad_norm": 1.3771929740905762, "learning_rate": 1.8569264499498646e-05, "loss": 0.6084, "step": 7213 }, { "epoch": 0.19727630715379565, "grad_norm": 1.4028151035308838, "learning_rate": 1.856880793968189e-05, "loss": 0.5377, "step": 7214 }, { "epoch": 0.19730365346751258, "grad_norm": 3.4671549797058105, "learning_rate": 1.8568351312645244e-05, "loss": 0.5707, "step": 7215 }, { "epoch": 0.19733099978122948, "grad_norm": 1.5413254499435425, "learning_rate": 1.8567894618392296e-05, "loss": 0.5601, "step": 7216 }, { "epoch": 0.1973583460949464, "grad_norm": 1.7694058418273926, "learning_rate": 1.8567437856926627e-05, "loss": 0.5857, "step": 7217 }, { "epoch": 0.1973856924086633, "grad_norm": 1.355976939201355, "learning_rate": 1.8566981028251817e-05, "loss": 0.5606, "step": 7218 }, { "epoch": 0.19741303872238022, "grad_norm": 1.7002794742584229, "learning_rate": 1.8566524132371454e-05, "loss": 0.5251, "step": 7219 }, { "epoch": 0.19744038503609712, "grad_norm": 2.2042529582977295, "learning_rate": 1.8566067169289118e-05, "loss": 0.6515, "step": 7220 }, { "epoch": 0.19746773134981405, "grad_norm": 1.8036065101623535, "learning_rate": 1.8565610139008394e-05, "loss": 0.5619, "step": 7221 }, { "epoch": 0.19749507766353094, "grad_norm": 1.4523707628250122, "learning_rate": 1.8565153041532873e-05, "loss": 0.5788, "step": 7222 }, { "epoch": 0.19752242397724787, "grad_norm": 1.6368244886398315, "learning_rate": 1.8564695876866134e-05, "loss": 0.5878, "step": 7223 }, { "epoch": 0.19754977029096477, "grad_norm": 1.3972827196121216, "learning_rate": 1.8564238645011766e-05, "loss": 0.5559, "step": 7224 }, { "epoch": 0.1975771166046817, "grad_norm": 1.6328359842300415, "learning_rate": 1.856378134597336e-05, "loss": 0.5393, "step": 7225 }, { "epoch": 0.1976044629183986, "grad_norm": 1.353759765625, "learning_rate": 1.8563323979754494e-05, "loss": 0.6027, "step": 7226 }, { "epoch": 0.1976318092321155, "grad_norm": 1.64047110080719, "learning_rate": 1.8562866546358767e-05, "loss": 0.5373, "step": 7227 }, { "epoch": 0.1976591555458324, "grad_norm": 3.411599636077881, "learning_rate": 1.8562409045789756e-05, "loss": 0.9251, "step": 7228 }, { "epoch": 0.19768650185954934, "grad_norm": 1.599387764930725, "learning_rate": 1.856195147805106e-05, "loss": 0.5738, "step": 7229 }, { "epoch": 0.19771384817326623, "grad_norm": 1.4623064994812012, "learning_rate": 1.856149384314626e-05, "loss": 0.4385, "step": 7230 }, { "epoch": 0.19774119448698316, "grad_norm": 1.6530479192733765, "learning_rate": 1.8561036141078954e-05, "loss": 0.5533, "step": 7231 }, { "epoch": 0.19776854080070005, "grad_norm": 1.477645993232727, "learning_rate": 1.8560578371852725e-05, "loss": 0.5636, "step": 7232 }, { "epoch": 0.19779588711441698, "grad_norm": 2.0933449268341064, "learning_rate": 1.8560120535471173e-05, "loss": 0.8877, "step": 7233 }, { "epoch": 0.19782323342813388, "grad_norm": 1.528394341468811, "learning_rate": 1.8559662631937882e-05, "loss": 0.5587, "step": 7234 }, { "epoch": 0.1978505797418508, "grad_norm": 1.4896321296691895, "learning_rate": 1.8559204661256448e-05, "loss": 0.8791, "step": 7235 }, { "epoch": 0.1978779260555677, "grad_norm": 1.4602445363998413, "learning_rate": 1.855874662343046e-05, "loss": 0.5597, "step": 7236 }, { "epoch": 0.19790527236928462, "grad_norm": 1.4885870218276978, "learning_rate": 1.8558288518463512e-05, "loss": 0.5858, "step": 7237 }, { "epoch": 0.19793261868300152, "grad_norm": 1.5290812253952026, "learning_rate": 1.85578303463592e-05, "loss": 0.5643, "step": 7238 }, { "epoch": 0.19795996499671845, "grad_norm": 1.650600790977478, "learning_rate": 1.855737210712112e-05, "loss": 0.5491, "step": 7239 }, { "epoch": 0.19798731131043534, "grad_norm": 1.3178218603134155, "learning_rate": 1.8556913800752863e-05, "loss": 0.5726, "step": 7240 }, { "epoch": 0.19801465762415227, "grad_norm": 1.4798945188522339, "learning_rate": 1.8556455427258023e-05, "loss": 0.6024, "step": 7241 }, { "epoch": 0.19804200393786917, "grad_norm": 1.654549241065979, "learning_rate": 1.8555996986640198e-05, "loss": 0.5586, "step": 7242 }, { "epoch": 0.1980693502515861, "grad_norm": 1.8679767847061157, "learning_rate": 1.855553847890299e-05, "loss": 0.528, "step": 7243 }, { "epoch": 0.198096696565303, "grad_norm": 1.2894668579101562, "learning_rate": 1.8555079904049985e-05, "loss": 0.5846, "step": 7244 }, { "epoch": 0.1981240428790199, "grad_norm": 1.5268203020095825, "learning_rate": 1.8554621262084785e-05, "loss": 0.5747, "step": 7245 }, { "epoch": 0.1981513891927368, "grad_norm": 1.4703376293182373, "learning_rate": 1.8554162553010995e-05, "loss": 0.5209, "step": 7246 }, { "epoch": 0.19817873550645373, "grad_norm": 1.4169683456420898, "learning_rate": 1.85537037768322e-05, "loss": 0.5743, "step": 7247 }, { "epoch": 0.19820608182017063, "grad_norm": 1.931580901145935, "learning_rate": 1.8553244933552014e-05, "loss": 0.9502, "step": 7248 }, { "epoch": 0.19823342813388756, "grad_norm": 1.3198262453079224, "learning_rate": 1.855278602317402e-05, "loss": 0.5115, "step": 7249 }, { "epoch": 0.19826077444760445, "grad_norm": 1.4954723119735718, "learning_rate": 1.855232704570183e-05, "loss": 0.5529, "step": 7250 }, { "epoch": 0.19828812076132138, "grad_norm": 1.5357701778411865, "learning_rate": 1.8551868001139043e-05, "loss": 0.5505, "step": 7251 }, { "epoch": 0.19831546707503828, "grad_norm": 1.455759048461914, "learning_rate": 1.8551408889489256e-05, "loss": 0.5675, "step": 7252 }, { "epoch": 0.1983428133887552, "grad_norm": 1.8217072486877441, "learning_rate": 1.8550949710756076e-05, "loss": 0.5594, "step": 7253 }, { "epoch": 0.1983701597024721, "grad_norm": 1.3726251125335693, "learning_rate": 1.85504904649431e-05, "loss": 0.5785, "step": 7254 }, { "epoch": 0.19839750601618902, "grad_norm": 1.6377345323562622, "learning_rate": 1.8550031152053932e-05, "loss": 0.5607, "step": 7255 }, { "epoch": 0.19842485232990592, "grad_norm": 1.8962996006011963, "learning_rate": 1.8549571772092175e-05, "loss": 0.603, "step": 7256 }, { "epoch": 0.19845219864362285, "grad_norm": 1.464226245880127, "learning_rate": 1.8549112325061434e-05, "loss": 0.4484, "step": 7257 }, { "epoch": 0.19847954495733974, "grad_norm": 1.5775336027145386, "learning_rate": 1.8548652810965315e-05, "loss": 0.5686, "step": 7258 }, { "epoch": 0.19850689127105667, "grad_norm": 1.6317845582962036, "learning_rate": 1.854819322980742e-05, "loss": 0.5612, "step": 7259 }, { "epoch": 0.19853423758477357, "grad_norm": 1.643580675125122, "learning_rate": 1.854773358159135e-05, "loss": 0.5972, "step": 7260 }, { "epoch": 0.1985615838984905, "grad_norm": 1.370698094367981, "learning_rate": 1.854727386632072e-05, "loss": 0.5394, "step": 7261 }, { "epoch": 0.1985889302122074, "grad_norm": 1.8137673139572144, "learning_rate": 1.8546814083999134e-05, "loss": 0.6442, "step": 7262 }, { "epoch": 0.1986162765259243, "grad_norm": 1.5658379793167114, "learning_rate": 1.8546354234630197e-05, "loss": 0.5615, "step": 7263 }, { "epoch": 0.1986436228396412, "grad_norm": 1.6414836645126343, "learning_rate": 1.854589431821751e-05, "loss": 0.5962, "step": 7264 }, { "epoch": 0.19867096915335813, "grad_norm": 3.744452476501465, "learning_rate": 1.854543433476469e-05, "loss": 0.9395, "step": 7265 }, { "epoch": 0.19869831546707503, "grad_norm": 1.567789912223816, "learning_rate": 1.8544974284275345e-05, "loss": 0.4639, "step": 7266 }, { "epoch": 0.19872566178079196, "grad_norm": 1.93080472946167, "learning_rate": 1.854451416675308e-05, "loss": 0.5513, "step": 7267 }, { "epoch": 0.19875300809450885, "grad_norm": 1.6960209608078003, "learning_rate": 1.8544053982201502e-05, "loss": 0.5826, "step": 7268 }, { "epoch": 0.19878035440822578, "grad_norm": 1.5873260498046875, "learning_rate": 1.854359373062423e-05, "loss": 0.5731, "step": 7269 }, { "epoch": 0.19880770072194268, "grad_norm": 1.4880805015563965, "learning_rate": 1.8543133412024868e-05, "loss": 0.5584, "step": 7270 }, { "epoch": 0.1988350470356596, "grad_norm": 1.6706032752990723, "learning_rate": 1.854267302640703e-05, "loss": 0.4195, "step": 7271 }, { "epoch": 0.1988623933493765, "grad_norm": 1.4738234281539917, "learning_rate": 1.8542212573774324e-05, "loss": 0.5686, "step": 7272 }, { "epoch": 0.19888973966309342, "grad_norm": 1.7680696249008179, "learning_rate": 1.8541752054130363e-05, "loss": 0.6087, "step": 7273 }, { "epoch": 0.19891708597681032, "grad_norm": 1.5337671041488647, "learning_rate": 1.8541291467478765e-05, "loss": 0.586, "step": 7274 }, { "epoch": 0.19894443229052725, "grad_norm": 1.4428892135620117, "learning_rate": 1.8540830813823136e-05, "loss": 0.5708, "step": 7275 }, { "epoch": 0.19897177860424414, "grad_norm": 1.7726047039031982, "learning_rate": 1.8540370093167093e-05, "loss": 0.4208, "step": 7276 }, { "epoch": 0.19899912491796107, "grad_norm": 1.4108902215957642, "learning_rate": 1.853990930551425e-05, "loss": 0.9196, "step": 7277 }, { "epoch": 0.19902647123167796, "grad_norm": 3.5414938926696777, "learning_rate": 1.8539448450868223e-05, "loss": 0.9336, "step": 7278 }, { "epoch": 0.1990538175453949, "grad_norm": 2.5706188678741455, "learning_rate": 1.8538987529232627e-05, "loss": 0.5622, "step": 7279 }, { "epoch": 0.1990811638591118, "grad_norm": 1.5314970016479492, "learning_rate": 1.8538526540611077e-05, "loss": 0.5794, "step": 7280 }, { "epoch": 0.1991085101728287, "grad_norm": 1.96547269821167, "learning_rate": 1.853806548500719e-05, "loss": 0.5787, "step": 7281 }, { "epoch": 0.1991358564865456, "grad_norm": 1.3828288316726685, "learning_rate": 1.853760436242458e-05, "loss": 0.8827, "step": 7282 }, { "epoch": 0.19916320280026253, "grad_norm": 2.243389368057251, "learning_rate": 1.8537143172866865e-05, "loss": 0.591, "step": 7283 }, { "epoch": 0.19919054911397943, "grad_norm": 1.506281852722168, "learning_rate": 1.8536681916337667e-05, "loss": 0.5814, "step": 7284 }, { "epoch": 0.19921789542769636, "grad_norm": 1.4955108165740967, "learning_rate": 1.85362205928406e-05, "loss": 0.5641, "step": 7285 }, { "epoch": 0.19924524174141325, "grad_norm": 2.5382702350616455, "learning_rate": 1.8535759202379286e-05, "loss": 0.5542, "step": 7286 }, { "epoch": 0.19927258805513018, "grad_norm": 1.3929554224014282, "learning_rate": 1.8535297744957343e-05, "loss": 0.4936, "step": 7287 }, { "epoch": 0.19929993436884708, "grad_norm": 1.4410213232040405, "learning_rate": 1.853483622057839e-05, "loss": 0.5692, "step": 7288 }, { "epoch": 0.199327280682564, "grad_norm": 1.567009449005127, "learning_rate": 1.8534374629246054e-05, "loss": 0.572, "step": 7289 }, { "epoch": 0.1993546269962809, "grad_norm": 2.2534372806549072, "learning_rate": 1.8533912970963945e-05, "loss": 0.4599, "step": 7290 }, { "epoch": 0.19938197330999782, "grad_norm": 1.3797792196273804, "learning_rate": 1.853345124573569e-05, "loss": 0.5525, "step": 7291 }, { "epoch": 0.19940931962371472, "grad_norm": 1.5800857543945312, "learning_rate": 1.8532989453564917e-05, "loss": 0.5655, "step": 7292 }, { "epoch": 0.19943666593743165, "grad_norm": 1.6000934839248657, "learning_rate": 1.8532527594455237e-05, "loss": 0.508, "step": 7293 }, { "epoch": 0.19946401225114854, "grad_norm": 2.7416622638702393, "learning_rate": 1.8532065668410283e-05, "loss": 0.4212, "step": 7294 }, { "epoch": 0.19949135856486547, "grad_norm": 1.6882323026657104, "learning_rate": 1.8531603675433673e-05, "loss": 0.5865, "step": 7295 }, { "epoch": 0.19951870487858236, "grad_norm": 1.6773780584335327, "learning_rate": 1.8531141615529036e-05, "loss": 0.9476, "step": 7296 }, { "epoch": 0.1995460511922993, "grad_norm": 4.403456211090088, "learning_rate": 1.8530679488699994e-05, "loss": 0.9392, "step": 7297 }, { "epoch": 0.1995733975060162, "grad_norm": 1.8822208642959595, "learning_rate": 1.853021729495017e-05, "loss": 0.5949, "step": 7298 }, { "epoch": 0.1996007438197331, "grad_norm": 1.7027497291564941, "learning_rate": 1.852975503428319e-05, "loss": 0.5422, "step": 7299 }, { "epoch": 0.19962809013345, "grad_norm": 2.372209310531616, "learning_rate": 1.8529292706702685e-05, "loss": 0.4357, "step": 7300 }, { "epoch": 0.19965543644716693, "grad_norm": 1.254969596862793, "learning_rate": 1.852883031221228e-05, "loss": 0.57, "step": 7301 }, { "epoch": 0.19968278276088383, "grad_norm": 1.9130362272262573, "learning_rate": 1.85283678508156e-05, "loss": 0.5159, "step": 7302 }, { "epoch": 0.19971012907460076, "grad_norm": 1.358408808708191, "learning_rate": 1.852790532251627e-05, "loss": 0.5389, "step": 7303 }, { "epoch": 0.19973747538831765, "grad_norm": 1.404523253440857, "learning_rate": 1.852744272731793e-05, "loss": 0.5417, "step": 7304 }, { "epoch": 0.19976482170203458, "grad_norm": 1.3520945310592651, "learning_rate": 1.85269800652242e-05, "loss": 0.5667, "step": 7305 }, { "epoch": 0.19979216801575148, "grad_norm": 2.1868784427642822, "learning_rate": 1.852651733623871e-05, "loss": 0.5856, "step": 7306 }, { "epoch": 0.1998195143294684, "grad_norm": 1.4430640935897827, "learning_rate": 1.8526054540365092e-05, "loss": 0.5965, "step": 7307 }, { "epoch": 0.1998468606431853, "grad_norm": 1.4585016965866089, "learning_rate": 1.852559167760697e-05, "loss": 0.5694, "step": 7308 }, { "epoch": 0.19987420695690222, "grad_norm": 1.3422025442123413, "learning_rate": 1.8525128747967985e-05, "loss": 0.5741, "step": 7309 }, { "epoch": 0.19990155327061912, "grad_norm": 1.2677724361419678, "learning_rate": 1.8524665751451767e-05, "loss": 0.561, "step": 7310 }, { "epoch": 0.19992889958433605, "grad_norm": 1.6752352714538574, "learning_rate": 1.852420268806194e-05, "loss": 0.5183, "step": 7311 }, { "epoch": 0.19995624589805294, "grad_norm": 1.4120320081710815, "learning_rate": 1.8523739557802147e-05, "loss": 0.5566, "step": 7312 }, { "epoch": 0.19998359221176987, "grad_norm": 1.6467669010162354, "learning_rate": 1.852327636067601e-05, "loss": 0.567, "step": 7313 }, { "epoch": 0.20001093852548676, "grad_norm": 1.6303858757019043, "learning_rate": 1.852281309668717e-05, "loss": 0.5732, "step": 7314 }, { "epoch": 0.20003828483920366, "grad_norm": 1.2844643592834473, "learning_rate": 1.8522349765839264e-05, "loss": 0.5352, "step": 7315 }, { "epoch": 0.2000656311529206, "grad_norm": 1.5291850566864014, "learning_rate": 1.8521886368135923e-05, "loss": 0.5562, "step": 7316 }, { "epoch": 0.20009297746663748, "grad_norm": 1.7446849346160889, "learning_rate": 1.8521422903580774e-05, "loss": 0.4609, "step": 7317 }, { "epoch": 0.2001203237803544, "grad_norm": 1.1845567226409912, "learning_rate": 1.8520959372177467e-05, "loss": 0.5305, "step": 7318 }, { "epoch": 0.2001476700940713, "grad_norm": 1.9920766353607178, "learning_rate": 1.852049577392963e-05, "loss": 0.5405, "step": 7319 }, { "epoch": 0.20017501640778823, "grad_norm": 1.513101577758789, "learning_rate": 1.85200321088409e-05, "loss": 0.578, "step": 7320 }, { "epoch": 0.20020236272150513, "grad_norm": 1.3659698963165283, "learning_rate": 1.8519568376914917e-05, "loss": 0.5587, "step": 7321 }, { "epoch": 0.20022970903522205, "grad_norm": 1.5730643272399902, "learning_rate": 1.851910457815532e-05, "loss": 0.5051, "step": 7322 }, { "epoch": 0.20025705534893895, "grad_norm": 1.2707172632217407, "learning_rate": 1.8518640712565744e-05, "loss": 0.5894, "step": 7323 }, { "epoch": 0.20028440166265588, "grad_norm": 1.6707526445388794, "learning_rate": 1.8518176780149826e-05, "loss": 0.6236, "step": 7324 }, { "epoch": 0.20031174797637277, "grad_norm": 1.6201590299606323, "learning_rate": 1.851771278091121e-05, "loss": 0.6191, "step": 7325 }, { "epoch": 0.2003390942900897, "grad_norm": 1.7621551752090454, "learning_rate": 1.8517248714853535e-05, "loss": 0.5707, "step": 7326 }, { "epoch": 0.2003664406038066, "grad_norm": 1.6150397062301636, "learning_rate": 1.851678458198044e-05, "loss": 0.573, "step": 7327 }, { "epoch": 0.20039378691752352, "grad_norm": 1.8855596780776978, "learning_rate": 1.851632038229557e-05, "loss": 0.55, "step": 7328 }, { "epoch": 0.20042113323124042, "grad_norm": 1.4220668077468872, "learning_rate": 1.851585611580256e-05, "loss": 0.6118, "step": 7329 }, { "epoch": 0.20044847954495734, "grad_norm": 1.7282356023788452, "learning_rate": 1.8515391782505053e-05, "loss": 0.5848, "step": 7330 }, { "epoch": 0.20047582585867424, "grad_norm": 1.61378812789917, "learning_rate": 1.8514927382406697e-05, "loss": 0.4467, "step": 7331 }, { "epoch": 0.20050317217239116, "grad_norm": 1.3205205202102661, "learning_rate": 1.8514462915511132e-05, "loss": 0.5517, "step": 7332 }, { "epoch": 0.20053051848610806, "grad_norm": 1.5497108697891235, "learning_rate": 1.8513998381822002e-05, "loss": 0.5987, "step": 7333 }, { "epoch": 0.200557864799825, "grad_norm": 1.5976389646530151, "learning_rate": 1.8513533781342952e-05, "loss": 0.5773, "step": 7334 }, { "epoch": 0.20058521111354188, "grad_norm": 1.985288381576538, "learning_rate": 1.851306911407762e-05, "loss": 0.5599, "step": 7335 }, { "epoch": 0.2006125574272588, "grad_norm": 1.6022711992263794, "learning_rate": 1.8512604380029664e-05, "loss": 0.589, "step": 7336 }, { "epoch": 0.2006399037409757, "grad_norm": 1.576560139656067, "learning_rate": 1.8512139579202717e-05, "loss": 0.513, "step": 7337 }, { "epoch": 0.20066725005469263, "grad_norm": 1.9814553260803223, "learning_rate": 1.8511674711600432e-05, "loss": 0.9406, "step": 7338 }, { "epoch": 0.20069459636840953, "grad_norm": 1.6753603219985962, "learning_rate": 1.8511209777226456e-05, "loss": 0.6071, "step": 7339 }, { "epoch": 0.20072194268212645, "grad_norm": 1.497586965560913, "learning_rate": 1.851074477608443e-05, "loss": 0.5377, "step": 7340 }, { "epoch": 0.20074928899584335, "grad_norm": 1.7557653188705444, "learning_rate": 1.851027970817801e-05, "loss": 0.6176, "step": 7341 }, { "epoch": 0.20077663530956028, "grad_norm": 1.408123254776001, "learning_rate": 1.8509814573510842e-05, "loss": 0.5543, "step": 7342 }, { "epoch": 0.20080398162327717, "grad_norm": 1.6308927536010742, "learning_rate": 1.8509349372086568e-05, "loss": 0.5842, "step": 7343 }, { "epoch": 0.2008313279369941, "grad_norm": 2.0938355922698975, "learning_rate": 1.850888410390885e-05, "loss": 0.5777, "step": 7344 }, { "epoch": 0.200858674250711, "grad_norm": 1.4449963569641113, "learning_rate": 1.8508418768981324e-05, "loss": 0.5495, "step": 7345 }, { "epoch": 0.20088602056442792, "grad_norm": 1.409531593322754, "learning_rate": 1.850795336730765e-05, "loss": 0.5362, "step": 7346 }, { "epoch": 0.20091336687814482, "grad_norm": 1.6195210218429565, "learning_rate": 1.850748789889148e-05, "loss": 0.5216, "step": 7347 }, { "epoch": 0.20094071319186174, "grad_norm": 1.4217678308486938, "learning_rate": 1.8507022363736457e-05, "loss": 0.5697, "step": 7348 }, { "epoch": 0.20096805950557864, "grad_norm": 1.1100729703903198, "learning_rate": 1.8506556761846237e-05, "loss": 0.5763, "step": 7349 }, { "epoch": 0.20099540581929556, "grad_norm": 1.3080905675888062, "learning_rate": 1.8506091093224478e-05, "loss": 0.4985, "step": 7350 }, { "epoch": 0.20102275213301246, "grad_norm": 1.6915117502212524, "learning_rate": 1.850562535787482e-05, "loss": 0.544, "step": 7351 }, { "epoch": 0.2010500984467294, "grad_norm": 1.8640655279159546, "learning_rate": 1.850515955580093e-05, "loss": 0.8915, "step": 7352 }, { "epoch": 0.20107744476044628, "grad_norm": 1.395470142364502, "learning_rate": 1.8504693687006462e-05, "loss": 0.5882, "step": 7353 }, { "epoch": 0.2011047910741632, "grad_norm": 1.519419550895691, "learning_rate": 1.8504227751495056e-05, "loss": 0.5973, "step": 7354 }, { "epoch": 0.2011321373878801, "grad_norm": 2.093812942504883, "learning_rate": 1.850376174927038e-05, "loss": 0.5746, "step": 7355 }, { "epoch": 0.20115948370159703, "grad_norm": 1.4079031944274902, "learning_rate": 1.850329568033609e-05, "loss": 0.5598, "step": 7356 }, { "epoch": 0.20118683001531393, "grad_norm": 1.3399450778961182, "learning_rate": 1.8502829544695834e-05, "loss": 0.5789, "step": 7357 }, { "epoch": 0.20121417632903085, "grad_norm": 1.3148177862167358, "learning_rate": 1.8502363342353275e-05, "loss": 0.5449, "step": 7358 }, { "epoch": 0.20124152264274775, "grad_norm": 1.485187292098999, "learning_rate": 1.850189707331207e-05, "loss": 0.5956, "step": 7359 }, { "epoch": 0.20126886895646467, "grad_norm": 1.5658010244369507, "learning_rate": 1.850143073757587e-05, "loss": 0.5657, "step": 7360 }, { "epoch": 0.20129621527018157, "grad_norm": 1.3769172430038452, "learning_rate": 1.850096433514834e-05, "loss": 0.5614, "step": 7361 }, { "epoch": 0.2013235615838985, "grad_norm": 2.09712290763855, "learning_rate": 1.850049786603314e-05, "loss": 0.409, "step": 7362 }, { "epoch": 0.2013509078976154, "grad_norm": 1.557281732559204, "learning_rate": 1.8500031330233925e-05, "loss": 0.6189, "step": 7363 }, { "epoch": 0.20137825421133232, "grad_norm": 1.2962480783462524, "learning_rate": 1.8499564727754357e-05, "loss": 0.5624, "step": 7364 }, { "epoch": 0.20140560052504922, "grad_norm": 1.6488083600997925, "learning_rate": 1.8499098058598097e-05, "loss": 0.4257, "step": 7365 }, { "epoch": 0.20143294683876614, "grad_norm": 1.7781972885131836, "learning_rate": 1.84986313227688e-05, "loss": 0.5541, "step": 7366 }, { "epoch": 0.20146029315248304, "grad_norm": 1.2942057847976685, "learning_rate": 1.8498164520270137e-05, "loss": 0.5472, "step": 7367 }, { "epoch": 0.20148763946619996, "grad_norm": 1.3647485971450806, "learning_rate": 1.8497697651105762e-05, "loss": 0.6025, "step": 7368 }, { "epoch": 0.20151498577991686, "grad_norm": 1.2468844652175903, "learning_rate": 1.849723071527934e-05, "loss": 0.5688, "step": 7369 }, { "epoch": 0.20154233209363379, "grad_norm": 1.5789821147918701, "learning_rate": 1.8496763712794537e-05, "loss": 0.5642, "step": 7370 }, { "epoch": 0.20156967840735068, "grad_norm": 1.5448527336120605, "learning_rate": 1.849629664365501e-05, "loss": 0.5643, "step": 7371 }, { "epoch": 0.2015970247210676, "grad_norm": 1.7405030727386475, "learning_rate": 1.849582950786443e-05, "loss": 0.9377, "step": 7372 }, { "epoch": 0.2016243710347845, "grad_norm": 1.1936886310577393, "learning_rate": 1.8495362305426456e-05, "loss": 0.5646, "step": 7373 }, { "epoch": 0.20165171734850143, "grad_norm": 1.8124967813491821, "learning_rate": 1.849489503634476e-05, "loss": 0.5461, "step": 7374 }, { "epoch": 0.20167906366221833, "grad_norm": 1.3045685291290283, "learning_rate": 1.8494427700622998e-05, "loss": 0.5604, "step": 7375 }, { "epoch": 0.20170640997593525, "grad_norm": 1.376761555671692, "learning_rate": 1.8493960298264844e-05, "loss": 0.5779, "step": 7376 }, { "epoch": 0.20173375628965215, "grad_norm": 1.3314659595489502, "learning_rate": 1.8493492829273963e-05, "loss": 0.5511, "step": 7377 }, { "epoch": 0.20176110260336907, "grad_norm": 1.6847692728042603, "learning_rate": 1.849302529365402e-05, "loss": 0.5415, "step": 7378 }, { "epoch": 0.20178844891708597, "grad_norm": 2.177203416824341, "learning_rate": 1.849255769140868e-05, "loss": 0.5848, "step": 7379 }, { "epoch": 0.2018157952308029, "grad_norm": 1.4015942811965942, "learning_rate": 1.849209002254162e-05, "loss": 0.5727, "step": 7380 }, { "epoch": 0.2018431415445198, "grad_norm": 1.395818829536438, "learning_rate": 1.8491622287056502e-05, "loss": 0.5909, "step": 7381 }, { "epoch": 0.20187048785823672, "grad_norm": 1.7852414846420288, "learning_rate": 1.8491154484956995e-05, "loss": 0.5476, "step": 7382 }, { "epoch": 0.20189783417195362, "grad_norm": 1.23698890209198, "learning_rate": 1.8490686616246773e-05, "loss": 0.5707, "step": 7383 }, { "epoch": 0.20192518048567054, "grad_norm": 1.6935733556747437, "learning_rate": 1.8490218680929502e-05, "loss": 0.5615, "step": 7384 }, { "epoch": 0.20195252679938744, "grad_norm": 1.6766413450241089, "learning_rate": 1.8489750679008858e-05, "loss": 0.9025, "step": 7385 }, { "epoch": 0.20197987311310436, "grad_norm": 1.3316214084625244, "learning_rate": 1.8489282610488507e-05, "loss": 0.5477, "step": 7386 }, { "epoch": 0.20200721942682126, "grad_norm": 2.1869113445281982, "learning_rate": 1.8488814475372122e-05, "loss": 0.6154, "step": 7387 }, { "epoch": 0.20203456574053819, "grad_norm": 1.4157023429870605, "learning_rate": 1.848834627366338e-05, "loss": 0.5798, "step": 7388 }, { "epoch": 0.20206191205425508, "grad_norm": 1.9304978847503662, "learning_rate": 1.8487878005365946e-05, "loss": 0.541, "step": 7389 }, { "epoch": 0.202089258367972, "grad_norm": 1.6437147855758667, "learning_rate": 1.8487409670483498e-05, "loss": 0.6035, "step": 7390 }, { "epoch": 0.2021166046816889, "grad_norm": 1.711826205253601, "learning_rate": 1.8486941269019713e-05, "loss": 0.6504, "step": 7391 }, { "epoch": 0.20214395099540583, "grad_norm": 1.426816701889038, "learning_rate": 1.8486472800978258e-05, "loss": 0.5645, "step": 7392 }, { "epoch": 0.20217129730912273, "grad_norm": 1.5692499876022339, "learning_rate": 1.8486004266362815e-05, "loss": 0.5611, "step": 7393 }, { "epoch": 0.20219864362283965, "grad_norm": 1.5457559823989868, "learning_rate": 1.8485535665177057e-05, "loss": 0.5692, "step": 7394 }, { "epoch": 0.20222598993655655, "grad_norm": 2.0420773029327393, "learning_rate": 1.8485066997424657e-05, "loss": 0.9046, "step": 7395 }, { "epoch": 0.20225333625027347, "grad_norm": 1.4778990745544434, "learning_rate": 1.8484598263109298e-05, "loss": 0.5689, "step": 7396 }, { "epoch": 0.20228068256399037, "grad_norm": 1.951045036315918, "learning_rate": 1.8484129462234652e-05, "loss": 0.4416, "step": 7397 }, { "epoch": 0.2023080288777073, "grad_norm": 1.6355476379394531, "learning_rate": 1.84836605948044e-05, "loss": 0.5625, "step": 7398 }, { "epoch": 0.2023353751914242, "grad_norm": 1.396986484527588, "learning_rate": 1.8483191660822215e-05, "loss": 0.5645, "step": 7399 }, { "epoch": 0.20236272150514112, "grad_norm": 1.3782620429992676, "learning_rate": 1.848272266029178e-05, "loss": 0.5957, "step": 7400 }, { "epoch": 0.20239006781885802, "grad_norm": 2.096133232116699, "learning_rate": 1.8482253593216772e-05, "loss": 0.5889, "step": 7401 }, { "epoch": 0.20241741413257494, "grad_norm": 1.3850857019424438, "learning_rate": 1.8481784459600873e-05, "loss": 0.5502, "step": 7402 }, { "epoch": 0.20244476044629184, "grad_norm": 1.6579108238220215, "learning_rate": 1.848131525944776e-05, "loss": 0.5808, "step": 7403 }, { "epoch": 0.20247210676000876, "grad_norm": 1.355080485343933, "learning_rate": 1.8480845992761118e-05, "loss": 0.5321, "step": 7404 }, { "epoch": 0.20249945307372566, "grad_norm": 1.7558411359786987, "learning_rate": 1.8480376659544625e-05, "loss": 0.5645, "step": 7405 }, { "epoch": 0.20252679938744259, "grad_norm": 1.499149203300476, "learning_rate": 1.8479907259801964e-05, "loss": 0.9021, "step": 7406 }, { "epoch": 0.20255414570115948, "grad_norm": 1.534777283668518, "learning_rate": 1.8479437793536815e-05, "loss": 0.5666, "step": 7407 }, { "epoch": 0.2025814920148764, "grad_norm": 1.6184171438217163, "learning_rate": 1.8478968260752865e-05, "loss": 0.435, "step": 7408 }, { "epoch": 0.2026088383285933, "grad_norm": 1.7592389583587646, "learning_rate": 1.8478498661453796e-05, "loss": 0.4925, "step": 7409 }, { "epoch": 0.20263618464231023, "grad_norm": 1.8821263313293457, "learning_rate": 1.8478028995643287e-05, "loss": 0.8962, "step": 7410 }, { "epoch": 0.20266353095602713, "grad_norm": 1.607363224029541, "learning_rate": 1.8477559263325032e-05, "loss": 0.3686, "step": 7411 }, { "epoch": 0.20269087726974405, "grad_norm": 1.4724977016448975, "learning_rate": 1.8477089464502707e-05, "loss": 0.5991, "step": 7412 }, { "epoch": 0.20271822358346095, "grad_norm": 1.4930486679077148, "learning_rate": 1.8476619599179998e-05, "loss": 0.5434, "step": 7413 }, { "epoch": 0.20274556989717787, "grad_norm": 1.549432396888733, "learning_rate": 1.8476149667360598e-05, "loss": 0.3921, "step": 7414 }, { "epoch": 0.20277291621089477, "grad_norm": 1.4798088073730469, "learning_rate": 1.8475679669048187e-05, "loss": 0.5581, "step": 7415 }, { "epoch": 0.20280026252461167, "grad_norm": 1.2875053882598877, "learning_rate": 1.8475209604246457e-05, "loss": 0.5807, "step": 7416 }, { "epoch": 0.2028276088383286, "grad_norm": 1.5238630771636963, "learning_rate": 1.8474739472959092e-05, "loss": 0.5715, "step": 7417 }, { "epoch": 0.2028549551520455, "grad_norm": 1.7201921939849854, "learning_rate": 1.847426927518978e-05, "loss": 0.5263, "step": 7418 }, { "epoch": 0.20288230146576242, "grad_norm": 1.7232725620269775, "learning_rate": 1.8473799010942212e-05, "loss": 0.9315, "step": 7419 }, { "epoch": 0.2029096477794793, "grad_norm": 1.4704461097717285, "learning_rate": 1.8473328680220072e-05, "loss": 0.5852, "step": 7420 }, { "epoch": 0.20293699409319624, "grad_norm": 1.4847313165664673, "learning_rate": 1.8472858283027056e-05, "loss": 0.5499, "step": 7421 }, { "epoch": 0.20296434040691314, "grad_norm": 1.4801702499389648, "learning_rate": 1.847238781936685e-05, "loss": 0.8982, "step": 7422 }, { "epoch": 0.20299168672063006, "grad_norm": 1.9153095483779907, "learning_rate": 1.8471917289243146e-05, "loss": 0.5572, "step": 7423 }, { "epoch": 0.20301903303434696, "grad_norm": 2.2984402179718018, "learning_rate": 1.8471446692659637e-05, "loss": 0.3975, "step": 7424 }, { "epoch": 0.20304637934806388, "grad_norm": 1.654788613319397, "learning_rate": 1.847097602962001e-05, "loss": 0.5076, "step": 7425 }, { "epoch": 0.20307372566178078, "grad_norm": 1.5709342956542969, "learning_rate": 1.847050530012796e-05, "loss": 0.558, "step": 7426 }, { "epoch": 0.2031010719754977, "grad_norm": 1.5139015913009644, "learning_rate": 1.847003450418718e-05, "loss": 0.5908, "step": 7427 }, { "epoch": 0.2031284182892146, "grad_norm": 2.303786516189575, "learning_rate": 1.8469563641801366e-05, "loss": 0.5586, "step": 7428 }, { "epoch": 0.20315576460293153, "grad_norm": 1.4245836734771729, "learning_rate": 1.846909271297421e-05, "loss": 0.5399, "step": 7429 }, { "epoch": 0.20318311091664842, "grad_norm": 1.7381125688552856, "learning_rate": 1.8468621717709396e-05, "loss": 0.5743, "step": 7430 }, { "epoch": 0.20321045723036535, "grad_norm": 1.4716901779174805, "learning_rate": 1.8468150656010636e-05, "loss": 0.5354, "step": 7431 }, { "epoch": 0.20323780354408225, "grad_norm": 1.3726614713668823, "learning_rate": 1.8467679527881614e-05, "loss": 0.5233, "step": 7432 }, { "epoch": 0.20326514985779917, "grad_norm": 1.6238185167312622, "learning_rate": 1.846720833332603e-05, "loss": 0.5932, "step": 7433 }, { "epoch": 0.20329249617151607, "grad_norm": 1.926706314086914, "learning_rate": 1.846673707234758e-05, "loss": 0.5238, "step": 7434 }, { "epoch": 0.203319842485233, "grad_norm": 1.5649982690811157, "learning_rate": 1.8466265744949957e-05, "loss": 0.5882, "step": 7435 }, { "epoch": 0.2033471887989499, "grad_norm": 1.3756648302078247, "learning_rate": 1.8465794351136867e-05, "loss": 0.443, "step": 7436 }, { "epoch": 0.20337453511266682, "grad_norm": 1.6048357486724854, "learning_rate": 1.8465322890912e-05, "loss": 0.5485, "step": 7437 }, { "epoch": 0.2034018814263837, "grad_norm": 1.1971709728240967, "learning_rate": 1.846485136427906e-05, "loss": 0.5298, "step": 7438 }, { "epoch": 0.20342922774010064, "grad_norm": 1.33074951171875, "learning_rate": 1.8464379771241742e-05, "loss": 0.5642, "step": 7439 }, { "epoch": 0.20345657405381753, "grad_norm": 1.3038698434829712, "learning_rate": 1.8463908111803744e-05, "loss": 0.5452, "step": 7440 }, { "epoch": 0.20348392036753446, "grad_norm": 5.494812965393066, "learning_rate": 1.846343638596877e-05, "loss": 0.5579, "step": 7441 }, { "epoch": 0.20351126668125136, "grad_norm": 5.266465663909912, "learning_rate": 1.846296459374052e-05, "loss": 0.3993, "step": 7442 }, { "epoch": 0.20353861299496828, "grad_norm": 1.5865371227264404, "learning_rate": 1.8462492735122693e-05, "loss": 0.5684, "step": 7443 }, { "epoch": 0.20356595930868518, "grad_norm": 1.3113057613372803, "learning_rate": 1.8462020810118996e-05, "loss": 0.5703, "step": 7444 }, { "epoch": 0.2035933056224021, "grad_norm": 1.6314030885696411, "learning_rate": 1.8461548818733123e-05, "loss": 0.5345, "step": 7445 }, { "epoch": 0.203620651936119, "grad_norm": 1.6467328071594238, "learning_rate": 1.8461076760968782e-05, "loss": 0.5232, "step": 7446 }, { "epoch": 0.20364799824983593, "grad_norm": 1.7146164178848267, "learning_rate": 1.8460604636829675e-05, "loss": 0.5664, "step": 7447 }, { "epoch": 0.20367534456355282, "grad_norm": 2.0380375385284424, "learning_rate": 1.846013244631951e-05, "loss": 0.519, "step": 7448 }, { "epoch": 0.20370269087726975, "grad_norm": 1.5948282480239868, "learning_rate": 1.845966018944198e-05, "loss": 0.5887, "step": 7449 }, { "epoch": 0.20373003719098665, "grad_norm": 1.8371689319610596, "learning_rate": 1.8459187866200797e-05, "loss": 0.4103, "step": 7450 }, { "epoch": 0.20375738350470357, "grad_norm": 1.547243595123291, "learning_rate": 1.845871547659967e-05, "loss": 0.9247, "step": 7451 }, { "epoch": 0.20378472981842047, "grad_norm": 1.6659049987792969, "learning_rate": 1.8458243020642298e-05, "loss": 0.5922, "step": 7452 }, { "epoch": 0.2038120761321374, "grad_norm": 1.2264893054962158, "learning_rate": 1.8457770498332393e-05, "loss": 0.5647, "step": 7453 }, { "epoch": 0.2038394224458543, "grad_norm": 1.3642553091049194, "learning_rate": 1.8457297909673655e-05, "loss": 0.5615, "step": 7454 }, { "epoch": 0.20386676875957122, "grad_norm": 2.5753068923950195, "learning_rate": 1.8456825254669798e-05, "loss": 0.4703, "step": 7455 }, { "epoch": 0.2038941150732881, "grad_norm": 1.9912325143814087, "learning_rate": 1.8456352533324524e-05, "loss": 0.5304, "step": 7456 }, { "epoch": 0.20392146138700504, "grad_norm": 1.5177221298217773, "learning_rate": 1.8455879745641545e-05, "loss": 0.5358, "step": 7457 }, { "epoch": 0.20394880770072193, "grad_norm": 1.578391432762146, "learning_rate": 1.845540689162457e-05, "loss": 0.5589, "step": 7458 }, { "epoch": 0.20397615401443886, "grad_norm": 2.4818036556243896, "learning_rate": 1.8454933971277307e-05, "loss": 0.9035, "step": 7459 }, { "epoch": 0.20400350032815576, "grad_norm": 1.4897778034210205, "learning_rate": 1.8454460984603466e-05, "loss": 0.5705, "step": 7460 }, { "epoch": 0.20403084664187268, "grad_norm": 1.658043622970581, "learning_rate": 1.845398793160676e-05, "loss": 0.5575, "step": 7461 }, { "epoch": 0.20405819295558958, "grad_norm": 2.112771511077881, "learning_rate": 1.8453514812290895e-05, "loss": 0.4833, "step": 7462 }, { "epoch": 0.2040855392693065, "grad_norm": 1.5874698162078857, "learning_rate": 1.8453041626659586e-05, "loss": 0.5577, "step": 7463 }, { "epoch": 0.2041128855830234, "grad_norm": 2.1015424728393555, "learning_rate": 1.8452568374716543e-05, "loss": 0.5875, "step": 7464 }, { "epoch": 0.20414023189674033, "grad_norm": 1.6719391345977783, "learning_rate": 1.845209505646548e-05, "loss": 0.5788, "step": 7465 }, { "epoch": 0.20416757821045722, "grad_norm": 1.7960585355758667, "learning_rate": 1.8451621671910114e-05, "loss": 0.5703, "step": 7466 }, { "epoch": 0.20419492452417415, "grad_norm": 1.5747054815292358, "learning_rate": 1.845114822105415e-05, "loss": 0.5319, "step": 7467 }, { "epoch": 0.20422227083789105, "grad_norm": 1.6535286903381348, "learning_rate": 1.845067470390131e-05, "loss": 0.5514, "step": 7468 }, { "epoch": 0.20424961715160797, "grad_norm": 1.7153774499893188, "learning_rate": 1.8450201120455303e-05, "loss": 0.5701, "step": 7469 }, { "epoch": 0.20427696346532487, "grad_norm": 1.8327614068984985, "learning_rate": 1.8449727470719846e-05, "loss": 0.5506, "step": 7470 }, { "epoch": 0.2043043097790418, "grad_norm": 1.425297498703003, "learning_rate": 1.844925375469866e-05, "loss": 0.5312, "step": 7471 }, { "epoch": 0.2043316560927587, "grad_norm": 1.4338165521621704, "learning_rate": 1.844877997239545e-05, "loss": 0.5696, "step": 7472 }, { "epoch": 0.20435900240647561, "grad_norm": 1.2985330820083618, "learning_rate": 1.844830612381394e-05, "loss": 0.5875, "step": 7473 }, { "epoch": 0.2043863487201925, "grad_norm": 1.274022102355957, "learning_rate": 1.844783220895785e-05, "loss": 0.5818, "step": 7474 }, { "epoch": 0.20441369503390944, "grad_norm": 1.3369938135147095, "learning_rate": 1.844735822783089e-05, "loss": 0.5417, "step": 7475 }, { "epoch": 0.20444104134762633, "grad_norm": 1.9782088994979858, "learning_rate": 1.8446884180436782e-05, "loss": 0.6291, "step": 7476 }, { "epoch": 0.20446838766134326, "grad_norm": 1.308192253112793, "learning_rate": 1.8446410066779245e-05, "loss": 0.5418, "step": 7477 }, { "epoch": 0.20449573397506016, "grad_norm": 1.5265341997146606, "learning_rate": 1.8445935886862e-05, "loss": 0.9022, "step": 7478 }, { "epoch": 0.20452308028877708, "grad_norm": 1.43227219581604, "learning_rate": 1.8445461640688764e-05, "loss": 0.496, "step": 7479 }, { "epoch": 0.20455042660249398, "grad_norm": 1.5869077444076538, "learning_rate": 1.8444987328263256e-05, "loss": 0.5652, "step": 7480 }, { "epoch": 0.2045777729162109, "grad_norm": 1.645729660987854, "learning_rate": 1.8444512949589198e-05, "loss": 0.6241, "step": 7481 }, { "epoch": 0.2046051192299278, "grad_norm": 1.9510160684585571, "learning_rate": 1.844403850467032e-05, "loss": 0.5743, "step": 7482 }, { "epoch": 0.20463246554364473, "grad_norm": 1.6867350339889526, "learning_rate": 1.8443563993510327e-05, "loss": 0.5166, "step": 7483 }, { "epoch": 0.20465981185736162, "grad_norm": 1.3830935955047607, "learning_rate": 1.8443089416112955e-05, "loss": 0.8557, "step": 7484 }, { "epoch": 0.20468715817107855, "grad_norm": 1.5253713130950928, "learning_rate": 1.8442614772481925e-05, "loss": 0.6234, "step": 7485 }, { "epoch": 0.20471450448479545, "grad_norm": 1.5707764625549316, "learning_rate": 1.8442140062620953e-05, "loss": 0.543, "step": 7486 }, { "epoch": 0.20474185079851237, "grad_norm": 1.5095707178115845, "learning_rate": 1.844166528653377e-05, "loss": 0.5953, "step": 7487 }, { "epoch": 0.20476919711222927, "grad_norm": 1.5212373733520508, "learning_rate": 1.84411904442241e-05, "loss": 0.5738, "step": 7488 }, { "epoch": 0.2047965434259462, "grad_norm": 1.5856084823608398, "learning_rate": 1.8440715535695666e-05, "loss": 0.5833, "step": 7489 }, { "epoch": 0.2048238897396631, "grad_norm": 1.6908010244369507, "learning_rate": 1.8440240560952194e-05, "loss": 0.5235, "step": 7490 }, { "epoch": 0.20485123605338001, "grad_norm": 1.534716248512268, "learning_rate": 1.843976551999741e-05, "loss": 0.5681, "step": 7491 }, { "epoch": 0.2048785823670969, "grad_norm": 1.6779085397720337, "learning_rate": 1.843929041283504e-05, "loss": 0.5619, "step": 7492 }, { "epoch": 0.20490592868081384, "grad_norm": 1.4065412282943726, "learning_rate": 1.8438815239468813e-05, "loss": 0.5855, "step": 7493 }, { "epoch": 0.20493327499453073, "grad_norm": 1.1746972799301147, "learning_rate": 1.8438339999902454e-05, "loss": 0.5673, "step": 7494 }, { "epoch": 0.20496062130824766, "grad_norm": 1.295904278755188, "learning_rate": 1.8437864694139692e-05, "loss": 0.5711, "step": 7495 }, { "epoch": 0.20498796762196456, "grad_norm": 1.6449495553970337, "learning_rate": 1.843738932218426e-05, "loss": 0.5339, "step": 7496 }, { "epoch": 0.20501531393568148, "grad_norm": 1.5850473642349243, "learning_rate": 1.843691388403988e-05, "loss": 0.5464, "step": 7497 }, { "epoch": 0.20504266024939838, "grad_norm": 1.3789244890213013, "learning_rate": 1.8436438379710283e-05, "loss": 0.5213, "step": 7498 }, { "epoch": 0.2050700065631153, "grad_norm": 4.452301979064941, "learning_rate": 1.8435962809199205e-05, "loss": 0.9366, "step": 7499 }, { "epoch": 0.2050973528768322, "grad_norm": 1.4558957815170288, "learning_rate": 1.843548717251037e-05, "loss": 0.5677, "step": 7500 }, { "epoch": 0.20512469919054913, "grad_norm": 1.5387663841247559, "learning_rate": 1.8435011469647513e-05, "loss": 0.527, "step": 7501 }, { "epoch": 0.20515204550426602, "grad_norm": 1.4532763957977295, "learning_rate": 1.8434535700614368e-05, "loss": 0.5397, "step": 7502 }, { "epoch": 0.20517939181798295, "grad_norm": 1.4778424501419067, "learning_rate": 1.8434059865414658e-05, "loss": 0.5282, "step": 7503 }, { "epoch": 0.20520673813169985, "grad_norm": 1.9619567394256592, "learning_rate": 1.8433583964052127e-05, "loss": 0.5601, "step": 7504 }, { "epoch": 0.20523408444541677, "grad_norm": 1.696165919303894, "learning_rate": 1.84331079965305e-05, "loss": 0.5603, "step": 7505 }, { "epoch": 0.20526143075913367, "grad_norm": 1.847296118736267, "learning_rate": 1.8432631962853517e-05, "loss": 0.5724, "step": 7506 }, { "epoch": 0.2052887770728506, "grad_norm": 1.9207593202590942, "learning_rate": 1.843215586302491e-05, "loss": 0.5867, "step": 7507 }, { "epoch": 0.2053161233865675, "grad_norm": 1.4380804300308228, "learning_rate": 1.8431679697048408e-05, "loss": 0.571, "step": 7508 }, { "epoch": 0.20534346970028441, "grad_norm": 2.04904842376709, "learning_rate": 1.8431203464927754e-05, "loss": 0.5947, "step": 7509 }, { "epoch": 0.2053708160140013, "grad_norm": 1.4730560779571533, "learning_rate": 1.8430727166666685e-05, "loss": 0.8901, "step": 7510 }, { "epoch": 0.20539816232771824, "grad_norm": 1.7373521327972412, "learning_rate": 1.8430250802268932e-05, "loss": 0.5688, "step": 7511 }, { "epoch": 0.20542550864143513, "grad_norm": 1.7014535665512085, "learning_rate": 1.842977437173823e-05, "loss": 0.5826, "step": 7512 }, { "epoch": 0.20545285495515206, "grad_norm": 1.6266932487487793, "learning_rate": 1.8429297875078325e-05, "loss": 0.5742, "step": 7513 }, { "epoch": 0.20548020126886896, "grad_norm": 1.296005368232727, "learning_rate": 1.842882131229295e-05, "loss": 0.5731, "step": 7514 }, { "epoch": 0.20550754758258588, "grad_norm": 1.4272968769073486, "learning_rate": 1.842834468338584e-05, "loss": 0.5818, "step": 7515 }, { "epoch": 0.20553489389630278, "grad_norm": 1.5720350742340088, "learning_rate": 1.842786798836074e-05, "loss": 0.5529, "step": 7516 }, { "epoch": 0.20556224021001968, "grad_norm": 2.26735258102417, "learning_rate": 1.842739122722139e-05, "loss": 0.5188, "step": 7517 }, { "epoch": 0.2055895865237366, "grad_norm": 2.2920420169830322, "learning_rate": 1.8426914399971526e-05, "loss": 0.5626, "step": 7518 }, { "epoch": 0.2056169328374535, "grad_norm": 1.7048046588897705, "learning_rate": 1.842643750661489e-05, "loss": 0.5296, "step": 7519 }, { "epoch": 0.20564427915117042, "grad_norm": 1.960253119468689, "learning_rate": 1.8425960547155223e-05, "loss": 0.5073, "step": 7520 }, { "epoch": 0.20567162546488732, "grad_norm": 1.5674233436584473, "learning_rate": 1.8425483521596267e-05, "loss": 0.6078, "step": 7521 }, { "epoch": 0.20569897177860424, "grad_norm": 1.5569871664047241, "learning_rate": 1.8425006429941764e-05, "loss": 0.55, "step": 7522 }, { "epoch": 0.20572631809232114, "grad_norm": 1.830008864402771, "learning_rate": 1.8424529272195456e-05, "loss": 0.5119, "step": 7523 }, { "epoch": 0.20575366440603807, "grad_norm": 1.5362077951431274, "learning_rate": 1.8424052048361086e-05, "loss": 0.512, "step": 7524 }, { "epoch": 0.20578101071975496, "grad_norm": 1.5917826890945435, "learning_rate": 1.84235747584424e-05, "loss": 0.5731, "step": 7525 }, { "epoch": 0.2058083570334719, "grad_norm": 1.6890532970428467, "learning_rate": 1.8423097402443143e-05, "loss": 0.5307, "step": 7526 }, { "epoch": 0.2058357033471888, "grad_norm": 1.881222128868103, "learning_rate": 1.842261998036705e-05, "loss": 0.5369, "step": 7527 }, { "epoch": 0.2058630496609057, "grad_norm": 1.7237797975540161, "learning_rate": 1.8422142492217882e-05, "loss": 0.5718, "step": 7528 }, { "epoch": 0.2058903959746226, "grad_norm": 2.370023250579834, "learning_rate": 1.842166493799937e-05, "loss": 0.9022, "step": 7529 }, { "epoch": 0.20591774228833953, "grad_norm": 1.6667523384094238, "learning_rate": 1.842118731771527e-05, "loss": 0.565, "step": 7530 }, { "epoch": 0.20594508860205643, "grad_norm": 1.8567843437194824, "learning_rate": 1.8420709631369322e-05, "loss": 0.5626, "step": 7531 }, { "epoch": 0.20597243491577336, "grad_norm": 1.7281817197799683, "learning_rate": 1.842023187896528e-05, "loss": 0.531, "step": 7532 }, { "epoch": 0.20599978122949025, "grad_norm": 1.5774602890014648, "learning_rate": 1.841975406050689e-05, "loss": 0.5287, "step": 7533 }, { "epoch": 0.20602712754320718, "grad_norm": 1.2515020370483398, "learning_rate": 1.8419276175997894e-05, "loss": 0.539, "step": 7534 }, { "epoch": 0.20605447385692408, "grad_norm": 1.326069712638855, "learning_rate": 1.841879822544205e-05, "loss": 0.5464, "step": 7535 }, { "epoch": 0.206081820170641, "grad_norm": 1.5432560443878174, "learning_rate": 1.8418320208843103e-05, "loss": 0.5537, "step": 7536 }, { "epoch": 0.2061091664843579, "grad_norm": 1.946319818496704, "learning_rate": 1.84178421262048e-05, "loss": 0.8913, "step": 7537 }, { "epoch": 0.20613651279807482, "grad_norm": 1.357567310333252, "learning_rate": 1.8417363977530895e-05, "loss": 0.5685, "step": 7538 }, { "epoch": 0.20616385911179172, "grad_norm": 1.4120876789093018, "learning_rate": 1.841688576282514e-05, "loss": 0.6186, "step": 7539 }, { "epoch": 0.20619120542550864, "grad_norm": 1.5394846200942993, "learning_rate": 1.8416407482091287e-05, "loss": 0.5553, "step": 7540 }, { "epoch": 0.20621855173922554, "grad_norm": 1.230323076248169, "learning_rate": 1.841592913533308e-05, "loss": 0.578, "step": 7541 }, { "epoch": 0.20624589805294247, "grad_norm": 1.3335272073745728, "learning_rate": 1.841545072255428e-05, "loss": 0.5635, "step": 7542 }, { "epoch": 0.20627324436665936, "grad_norm": 1.2942472696304321, "learning_rate": 1.841497224375864e-05, "loss": 0.5419, "step": 7543 }, { "epoch": 0.2063005906803763, "grad_norm": 1.786359429359436, "learning_rate": 1.841449369894991e-05, "loss": 0.3978, "step": 7544 }, { "epoch": 0.2063279369940932, "grad_norm": 1.5731457471847534, "learning_rate": 1.841401508813184e-05, "loss": 0.5766, "step": 7545 }, { "epoch": 0.2063552833078101, "grad_norm": 1.2023152112960815, "learning_rate": 1.8413536411308195e-05, "loss": 0.5638, "step": 7546 }, { "epoch": 0.206382629621527, "grad_norm": 1.6742706298828125, "learning_rate": 1.8413057668482722e-05, "loss": 0.5324, "step": 7547 }, { "epoch": 0.20640997593524393, "grad_norm": 1.356953740119934, "learning_rate": 1.8412578859659183e-05, "loss": 0.571, "step": 7548 }, { "epoch": 0.20643732224896083, "grad_norm": 1.254374623298645, "learning_rate": 1.841209998484133e-05, "loss": 0.4769, "step": 7549 }, { "epoch": 0.20646466856267776, "grad_norm": 1.4174846410751343, "learning_rate": 1.8411621044032915e-05, "loss": 0.5224, "step": 7550 }, { "epoch": 0.20649201487639465, "grad_norm": 1.3007292747497559, "learning_rate": 1.8411142037237702e-05, "loss": 0.5511, "step": 7551 }, { "epoch": 0.20651936119011158, "grad_norm": 1.5570905208587646, "learning_rate": 1.8410662964459448e-05, "loss": 0.5698, "step": 7552 }, { "epoch": 0.20654670750382847, "grad_norm": 1.4944360256195068, "learning_rate": 1.8410183825701912e-05, "loss": 0.5299, "step": 7553 }, { "epoch": 0.2065740538175454, "grad_norm": 1.720657467842102, "learning_rate": 1.8409704620968848e-05, "loss": 0.5673, "step": 7554 }, { "epoch": 0.2066014001312623, "grad_norm": 1.5850120782852173, "learning_rate": 1.8409225350264016e-05, "loss": 0.5842, "step": 7555 }, { "epoch": 0.20662874644497922, "grad_norm": 1.5162928104400635, "learning_rate": 1.840874601359118e-05, "loss": 0.5465, "step": 7556 }, { "epoch": 0.20665609275869612, "grad_norm": 1.7824903726577759, "learning_rate": 1.84082666109541e-05, "loss": 0.6233, "step": 7557 }, { "epoch": 0.20668343907241304, "grad_norm": 1.4983292818069458, "learning_rate": 1.8407787142356533e-05, "loss": 0.6049, "step": 7558 }, { "epoch": 0.20671078538612994, "grad_norm": 1.3857976198196411, "learning_rate": 1.8407307607802242e-05, "loss": 0.5663, "step": 7559 }, { "epoch": 0.20673813169984687, "grad_norm": 2.0668978691101074, "learning_rate": 1.8406828007294984e-05, "loss": 0.5298, "step": 7560 }, { "epoch": 0.20676547801356376, "grad_norm": 1.850494146347046, "learning_rate": 1.8406348340838535e-05, "loss": 0.4208, "step": 7561 }, { "epoch": 0.2067928243272807, "grad_norm": 1.3445968627929688, "learning_rate": 1.8405868608436646e-05, "loss": 0.5424, "step": 7562 }, { "epoch": 0.20682017064099759, "grad_norm": 1.6594754457473755, "learning_rate": 1.840538881009308e-05, "loss": 0.6175, "step": 7563 }, { "epoch": 0.2068475169547145, "grad_norm": 1.353251576423645, "learning_rate": 1.8404908945811608e-05, "loss": 0.5536, "step": 7564 }, { "epoch": 0.2068748632684314, "grad_norm": 1.5565141439437866, "learning_rate": 1.840442901559599e-05, "loss": 0.9241, "step": 7565 }, { "epoch": 0.20690220958214833, "grad_norm": 1.717877984046936, "learning_rate": 1.8403949019449986e-05, "loss": 0.5145, "step": 7566 }, { "epoch": 0.20692955589586523, "grad_norm": 1.6068254709243774, "learning_rate": 1.8403468957377373e-05, "loss": 0.5668, "step": 7567 }, { "epoch": 0.20695690220958216, "grad_norm": 1.9064775705337524, "learning_rate": 1.840298882938191e-05, "loss": 0.5405, "step": 7568 }, { "epoch": 0.20698424852329905, "grad_norm": 1.4244714975357056, "learning_rate": 1.8402508635467367e-05, "loss": 0.5613, "step": 7569 }, { "epoch": 0.20701159483701598, "grad_norm": 1.5902557373046875, "learning_rate": 1.8402028375637504e-05, "loss": 0.6032, "step": 7570 }, { "epoch": 0.20703894115073287, "grad_norm": 1.9969441890716553, "learning_rate": 1.8401548049896093e-05, "loss": 0.5482, "step": 7571 }, { "epoch": 0.2070662874644498, "grad_norm": 1.9607421159744263, "learning_rate": 1.8401067658246903e-05, "loss": 0.4874, "step": 7572 }, { "epoch": 0.2070936337781667, "grad_norm": 1.5065958499908447, "learning_rate": 1.8400587200693702e-05, "loss": 0.5828, "step": 7573 }, { "epoch": 0.20712098009188362, "grad_norm": 1.4795295000076294, "learning_rate": 1.840010667724026e-05, "loss": 0.5529, "step": 7574 }, { "epoch": 0.20714832640560052, "grad_norm": 1.5458904504776, "learning_rate": 1.8399626087890345e-05, "loss": 0.5989, "step": 7575 }, { "epoch": 0.20717567271931744, "grad_norm": 1.9678388833999634, "learning_rate": 1.8399145432647724e-05, "loss": 0.5181, "step": 7576 }, { "epoch": 0.20720301903303434, "grad_norm": 1.5550909042358398, "learning_rate": 1.8398664711516172e-05, "loss": 0.5742, "step": 7577 }, { "epoch": 0.20723036534675127, "grad_norm": 1.2367156744003296, "learning_rate": 1.839818392449946e-05, "loss": 0.4278, "step": 7578 }, { "epoch": 0.20725771166046816, "grad_norm": 1.519568920135498, "learning_rate": 1.839770307160136e-05, "loss": 0.5492, "step": 7579 }, { "epoch": 0.2072850579741851, "grad_norm": 1.880623459815979, "learning_rate": 1.839722215282564e-05, "loss": 0.9044, "step": 7580 }, { "epoch": 0.20731240428790199, "grad_norm": 1.589353084564209, "learning_rate": 1.8396741168176076e-05, "loss": 0.5953, "step": 7581 }, { "epoch": 0.2073397506016189, "grad_norm": 1.9732580184936523, "learning_rate": 1.839626011765644e-05, "loss": 0.5711, "step": 7582 }, { "epoch": 0.2073670969153358, "grad_norm": 1.3239645957946777, "learning_rate": 1.8395779001270506e-05, "loss": 0.5752, "step": 7583 }, { "epoch": 0.20739444322905273, "grad_norm": 1.7358571290969849, "learning_rate": 1.839529781902205e-05, "loss": 0.5539, "step": 7584 }, { "epoch": 0.20742178954276963, "grad_norm": 1.505698323249817, "learning_rate": 1.8394816570914846e-05, "loss": 0.6182, "step": 7585 }, { "epoch": 0.20744913585648656, "grad_norm": 1.6710866689682007, "learning_rate": 1.8394335256952666e-05, "loss": 0.5581, "step": 7586 }, { "epoch": 0.20747648217020345, "grad_norm": 1.2928088903427124, "learning_rate": 1.839385387713929e-05, "loss": 0.5452, "step": 7587 }, { "epoch": 0.20750382848392038, "grad_norm": 5.656646251678467, "learning_rate": 1.839337243147849e-05, "loss": 0.5863, "step": 7588 }, { "epoch": 0.20753117479763727, "grad_norm": 1.8710885047912598, "learning_rate": 1.839289091997405e-05, "loss": 0.5487, "step": 7589 }, { "epoch": 0.2075585211113542, "grad_norm": 1.9936226606369019, "learning_rate": 1.839240934262974e-05, "loss": 0.551, "step": 7590 }, { "epoch": 0.2075858674250711, "grad_norm": 1.6152559518814087, "learning_rate": 1.839192769944934e-05, "loss": 0.5737, "step": 7591 }, { "epoch": 0.20761321373878802, "grad_norm": 1.277636170387268, "learning_rate": 1.839144599043663e-05, "loss": 0.5489, "step": 7592 }, { "epoch": 0.20764056005250492, "grad_norm": 1.4956086874008179, "learning_rate": 1.839096421559539e-05, "loss": 0.4123, "step": 7593 }, { "epoch": 0.20766790636622184, "grad_norm": 5.190946102142334, "learning_rate": 1.8390482374929394e-05, "loss": 0.5449, "step": 7594 }, { "epoch": 0.20769525267993874, "grad_norm": 2.66320538520813, "learning_rate": 1.8390000468442425e-05, "loss": 0.6129, "step": 7595 }, { "epoch": 0.20772259899365567, "grad_norm": 7.052215576171875, "learning_rate": 1.8389518496138263e-05, "loss": 0.4371, "step": 7596 }, { "epoch": 0.20774994530737256, "grad_norm": 1.3743778467178345, "learning_rate": 1.838903645802069e-05, "loss": 0.5508, "step": 7597 }, { "epoch": 0.2077772916210895, "grad_norm": 1.852225422859192, "learning_rate": 1.838855435409349e-05, "loss": 0.5619, "step": 7598 }, { "epoch": 0.20780463793480639, "grad_norm": 1.9257943630218506, "learning_rate": 1.8388072184360435e-05, "loss": 0.9086, "step": 7599 }, { "epoch": 0.2078319842485233, "grad_norm": 1.6803624629974365, "learning_rate": 1.8387589948825318e-05, "loss": 0.5415, "step": 7600 }, { "epoch": 0.2078593305622402, "grad_norm": 1.2149295806884766, "learning_rate": 1.8387107647491922e-05, "loss": 0.5796, "step": 7601 }, { "epoch": 0.20788667687595713, "grad_norm": 1.6570225954055786, "learning_rate": 1.8386625280364022e-05, "loss": 0.5364, "step": 7602 }, { "epoch": 0.20791402318967403, "grad_norm": 1.420615792274475, "learning_rate": 1.8386142847445413e-05, "loss": 0.5095, "step": 7603 }, { "epoch": 0.20794136950339095, "grad_norm": 1.7926887273788452, "learning_rate": 1.8385660348739866e-05, "loss": 0.4614, "step": 7604 }, { "epoch": 0.20796871581710785, "grad_norm": 1.5556178092956543, "learning_rate": 1.838517778425118e-05, "loss": 0.5925, "step": 7605 }, { "epoch": 0.20799606213082478, "grad_norm": 1.4741817712783813, "learning_rate": 1.838469515398313e-05, "loss": 0.5984, "step": 7606 }, { "epoch": 0.20802340844454167, "grad_norm": 1.6905317306518555, "learning_rate": 1.8384212457939505e-05, "loss": 0.5361, "step": 7607 }, { "epoch": 0.2080507547582586, "grad_norm": 1.5170055627822876, "learning_rate": 1.8383729696124095e-05, "loss": 0.5581, "step": 7608 }, { "epoch": 0.2080781010719755, "grad_norm": 1.5264440774917603, "learning_rate": 1.8383246868540682e-05, "loss": 0.56, "step": 7609 }, { "epoch": 0.20810544738569242, "grad_norm": 2.6241469383239746, "learning_rate": 1.8382763975193062e-05, "loss": 0.5618, "step": 7610 }, { "epoch": 0.20813279369940932, "grad_norm": 1.5001591444015503, "learning_rate": 1.8382281016085013e-05, "loss": 0.5921, "step": 7611 }, { "epoch": 0.20816014001312624, "grad_norm": 1.5695263147354126, "learning_rate": 1.838179799122033e-05, "loss": 0.546, "step": 7612 }, { "epoch": 0.20818748632684314, "grad_norm": 1.7349286079406738, "learning_rate": 1.83813149006028e-05, "loss": 0.5659, "step": 7613 }, { "epoch": 0.20821483264056007, "grad_norm": 1.942307710647583, "learning_rate": 1.8380831744236216e-05, "loss": 0.5722, "step": 7614 }, { "epoch": 0.20824217895427696, "grad_norm": 1.859729290008545, "learning_rate": 1.8380348522124362e-05, "loss": 0.5971, "step": 7615 }, { "epoch": 0.2082695252679939, "grad_norm": 1.6308530569076538, "learning_rate": 1.8379865234271035e-05, "loss": 0.601, "step": 7616 }, { "epoch": 0.20829687158171079, "grad_norm": 2.1071126461029053, "learning_rate": 1.837938188068002e-05, "loss": 0.5969, "step": 7617 }, { "epoch": 0.2083242178954277, "grad_norm": 1.7047196626663208, "learning_rate": 1.8378898461355118e-05, "loss": 0.5844, "step": 7618 }, { "epoch": 0.2083515642091446, "grad_norm": 1.5774956941604614, "learning_rate": 1.837841497630011e-05, "loss": 0.5072, "step": 7619 }, { "epoch": 0.2083789105228615, "grad_norm": 1.4778887033462524, "learning_rate": 1.83779314255188e-05, "loss": 0.5502, "step": 7620 }, { "epoch": 0.20840625683657843, "grad_norm": 1.4366308450698853, "learning_rate": 1.837744780901497e-05, "loss": 0.5624, "step": 7621 }, { "epoch": 0.20843360315029533, "grad_norm": 2.080282211303711, "learning_rate": 1.837696412679242e-05, "loss": 0.4607, "step": 7622 }, { "epoch": 0.20846094946401225, "grad_norm": 1.6818921566009521, "learning_rate": 1.837648037885495e-05, "loss": 0.8507, "step": 7623 }, { "epoch": 0.20848829577772915, "grad_norm": 2.1020917892456055, "learning_rate": 1.8375996565206343e-05, "loss": 0.5516, "step": 7624 }, { "epoch": 0.20851564209144607, "grad_norm": 1.5719083547592163, "learning_rate": 1.83755126858504e-05, "loss": 0.5374, "step": 7625 }, { "epoch": 0.20854298840516297, "grad_norm": 1.7854363918304443, "learning_rate": 1.8375028740790922e-05, "loss": 0.5876, "step": 7626 }, { "epoch": 0.2085703347188799, "grad_norm": 1.6072031259536743, "learning_rate": 1.8374544730031695e-05, "loss": 0.5569, "step": 7627 }, { "epoch": 0.2085976810325968, "grad_norm": 1.6012943983078003, "learning_rate": 1.8374060653576526e-05, "loss": 0.4404, "step": 7628 }, { "epoch": 0.20862502734631372, "grad_norm": 1.3810997009277344, "learning_rate": 1.8373576511429204e-05, "loss": 0.5651, "step": 7629 }, { "epoch": 0.20865237366003062, "grad_norm": 1.7107696533203125, "learning_rate": 1.8373092303593532e-05, "loss": 0.5342, "step": 7630 }, { "epoch": 0.20867971997374754, "grad_norm": 1.7182111740112305, "learning_rate": 1.8372608030073308e-05, "loss": 0.5783, "step": 7631 }, { "epoch": 0.20870706628746444, "grad_norm": 1.6806951761245728, "learning_rate": 1.837212369087233e-05, "loss": 0.5745, "step": 7632 }, { "epoch": 0.20873441260118136, "grad_norm": 1.3869532346725464, "learning_rate": 1.83716392859944e-05, "loss": 0.5621, "step": 7633 }, { "epoch": 0.20876175891489826, "grad_norm": 1.6377424001693726, "learning_rate": 1.837115481544331e-05, "loss": 0.5902, "step": 7634 }, { "epoch": 0.20878910522861518, "grad_norm": 1.7430278062820435, "learning_rate": 1.837067027922287e-05, "loss": 0.5978, "step": 7635 }, { "epoch": 0.20881645154233208, "grad_norm": 1.872167706489563, "learning_rate": 1.8370185677336877e-05, "loss": 0.5742, "step": 7636 }, { "epoch": 0.208843797856049, "grad_norm": 1.5413380861282349, "learning_rate": 1.8369701009789133e-05, "loss": 0.5558, "step": 7637 }, { "epoch": 0.2088711441697659, "grad_norm": 1.3314235210418701, "learning_rate": 1.836921627658344e-05, "loss": 0.5231, "step": 7638 }, { "epoch": 0.20889849048348283, "grad_norm": 1.3900774717330933, "learning_rate": 1.83687314777236e-05, "loss": 0.5513, "step": 7639 }, { "epoch": 0.20892583679719973, "grad_norm": 1.7844418287277222, "learning_rate": 1.836824661321342e-05, "loss": 0.9151, "step": 7640 }, { "epoch": 0.20895318311091665, "grad_norm": 1.4662880897521973, "learning_rate": 1.8367761683056697e-05, "loss": 0.5616, "step": 7641 }, { "epoch": 0.20898052942463355, "grad_norm": 1.6572011709213257, "learning_rate": 1.836727668725724e-05, "loss": 0.5926, "step": 7642 }, { "epoch": 0.20900787573835047, "grad_norm": 1.6648080348968506, "learning_rate": 1.8366791625818852e-05, "loss": 0.5721, "step": 7643 }, { "epoch": 0.20903522205206737, "grad_norm": 1.4129254817962646, "learning_rate": 1.8366306498745336e-05, "loss": 0.4194, "step": 7644 }, { "epoch": 0.2090625683657843, "grad_norm": 1.6700012683868408, "learning_rate": 1.8365821306040502e-05, "loss": 0.5366, "step": 7645 }, { "epoch": 0.2090899146795012, "grad_norm": 1.292932391166687, "learning_rate": 1.8365336047708155e-05, "loss": 0.5788, "step": 7646 }, { "epoch": 0.20911726099321812, "grad_norm": 1.5909379720687866, "learning_rate": 1.83648507237521e-05, "loss": 0.5535, "step": 7647 }, { "epoch": 0.20914460730693502, "grad_norm": 5.222284317016602, "learning_rate": 1.8364365334176145e-05, "loss": 0.5625, "step": 7648 }, { "epoch": 0.20917195362065194, "grad_norm": 1.5838178396224976, "learning_rate": 1.8363879878984095e-05, "loss": 0.5673, "step": 7649 }, { "epoch": 0.20919929993436884, "grad_norm": 1.5294376611709595, "learning_rate": 1.8363394358179765e-05, "loss": 0.5807, "step": 7650 }, { "epoch": 0.20922664624808576, "grad_norm": 1.349071979522705, "learning_rate": 1.836290877176696e-05, "loss": 0.5366, "step": 7651 }, { "epoch": 0.20925399256180266, "grad_norm": 1.4187450408935547, "learning_rate": 1.8362423119749488e-05, "loss": 0.5784, "step": 7652 }, { "epoch": 0.20928133887551958, "grad_norm": 2.2289085388183594, "learning_rate": 1.836193740213116e-05, "loss": 0.5058, "step": 7653 }, { "epoch": 0.20930868518923648, "grad_norm": 1.595377802848816, "learning_rate": 1.8361451618915787e-05, "loss": 0.5502, "step": 7654 }, { "epoch": 0.2093360315029534, "grad_norm": 1.9955412149429321, "learning_rate": 1.836096577010718e-05, "loss": 0.4089, "step": 7655 }, { "epoch": 0.2093633778166703, "grad_norm": 1.5526087284088135, "learning_rate": 1.8360479855709147e-05, "loss": 0.5457, "step": 7656 }, { "epoch": 0.20939072413038723, "grad_norm": 1.804943323135376, "learning_rate": 1.8359993875725505e-05, "loss": 0.5546, "step": 7657 }, { "epoch": 0.20941807044410413, "grad_norm": 1.686305046081543, "learning_rate": 1.8359507830160063e-05, "loss": 0.5727, "step": 7658 }, { "epoch": 0.20944541675782105, "grad_norm": 4.53040885925293, "learning_rate": 1.8359021719016632e-05, "loss": 0.4077, "step": 7659 }, { "epoch": 0.20947276307153795, "grad_norm": 1.9010673761367798, "learning_rate": 1.835853554229903e-05, "loss": 0.595, "step": 7660 }, { "epoch": 0.20950010938525487, "grad_norm": 5.48769998550415, "learning_rate": 1.835804930001107e-05, "loss": 0.943, "step": 7661 }, { "epoch": 0.20952745569897177, "grad_norm": 1.4649053812026978, "learning_rate": 1.8357562992156564e-05, "loss": 0.5777, "step": 7662 }, { "epoch": 0.2095548020126887, "grad_norm": 1.626541256904602, "learning_rate": 1.8357076618739328e-05, "loss": 0.5631, "step": 7663 }, { "epoch": 0.2095821483264056, "grad_norm": 1.606208086013794, "learning_rate": 1.835659017976318e-05, "loss": 0.587, "step": 7664 }, { "epoch": 0.20960949464012252, "grad_norm": 1.3229924440383911, "learning_rate": 1.835610367523193e-05, "loss": 0.5723, "step": 7665 }, { "epoch": 0.20963684095383942, "grad_norm": 2.257256507873535, "learning_rate": 1.83556171051494e-05, "loss": 0.585, "step": 7666 }, { "epoch": 0.20966418726755634, "grad_norm": 1.4765695333480835, "learning_rate": 1.8355130469519408e-05, "loss": 0.5854, "step": 7667 }, { "epoch": 0.20969153358127324, "grad_norm": 1.531498908996582, "learning_rate": 1.8354643768345765e-05, "loss": 0.5504, "step": 7668 }, { "epoch": 0.20971887989499016, "grad_norm": 1.7780171632766724, "learning_rate": 1.8354157001632297e-05, "loss": 0.5527, "step": 7669 }, { "epoch": 0.20974622620870706, "grad_norm": 1.6355810165405273, "learning_rate": 1.8353670169382813e-05, "loss": 0.905, "step": 7670 }, { "epoch": 0.20977357252242398, "grad_norm": 1.663046956062317, "learning_rate": 1.8353183271601144e-05, "loss": 0.9176, "step": 7671 }, { "epoch": 0.20980091883614088, "grad_norm": 1.4541794061660767, "learning_rate": 1.83526963082911e-05, "loss": 0.554, "step": 7672 }, { "epoch": 0.2098282651498578, "grad_norm": 24.743982315063477, "learning_rate": 1.83522092794565e-05, "loss": 0.5232, "step": 7673 }, { "epoch": 0.2098556114635747, "grad_norm": 1.5453866720199585, "learning_rate": 1.8351722185101174e-05, "loss": 0.8965, "step": 7674 }, { "epoch": 0.20988295777729163, "grad_norm": 1.4869526624679565, "learning_rate": 1.8351235025228934e-05, "loss": 0.5292, "step": 7675 }, { "epoch": 0.20991030409100853, "grad_norm": 1.7609903812408447, "learning_rate": 1.835074779984361e-05, "loss": 0.5749, "step": 7676 }, { "epoch": 0.20993765040472545, "grad_norm": 1.6563279628753662, "learning_rate": 1.8350260508949016e-05, "loss": 0.5852, "step": 7677 }, { "epoch": 0.20996499671844235, "grad_norm": 1.344986081123352, "learning_rate": 1.8349773152548978e-05, "loss": 0.5467, "step": 7678 }, { "epoch": 0.20999234303215927, "grad_norm": 1.325162649154663, "learning_rate": 1.8349285730647323e-05, "loss": 0.5574, "step": 7679 }, { "epoch": 0.21001968934587617, "grad_norm": 1.716505765914917, "learning_rate": 1.8348798243247866e-05, "loss": 0.5339, "step": 7680 }, { "epoch": 0.2100470356595931, "grad_norm": 1.7936652898788452, "learning_rate": 1.834831069035444e-05, "loss": 0.5803, "step": 7681 }, { "epoch": 0.21007438197331, "grad_norm": 1.5954813957214355, "learning_rate": 1.834782307197087e-05, "loss": 0.5701, "step": 7682 }, { "epoch": 0.21010172828702692, "grad_norm": 1.5046732425689697, "learning_rate": 1.834733538810097e-05, "loss": 0.5567, "step": 7683 }, { "epoch": 0.21012907460074381, "grad_norm": 3.7806053161621094, "learning_rate": 1.8346847638748576e-05, "loss": 0.5136, "step": 7684 }, { "epoch": 0.21015642091446074, "grad_norm": 4.730481147766113, "learning_rate": 1.834635982391751e-05, "loss": 0.5592, "step": 7685 }, { "epoch": 0.21018376722817764, "grad_norm": 1.5798335075378418, "learning_rate": 1.83458719436116e-05, "loss": 0.5738, "step": 7686 }, { "epoch": 0.21021111354189456, "grad_norm": 2.551382303237915, "learning_rate": 1.8345383997834677e-05, "loss": 0.5765, "step": 7687 }, { "epoch": 0.21023845985561146, "grad_norm": 1.4629029035568237, "learning_rate": 1.834489598659056e-05, "loss": 0.5716, "step": 7688 }, { "epoch": 0.21026580616932838, "grad_norm": 1.7377196550369263, "learning_rate": 1.8344407909883086e-05, "loss": 0.6179, "step": 7689 }, { "epoch": 0.21029315248304528, "grad_norm": 1.3385250568389893, "learning_rate": 1.834391976771608e-05, "loss": 0.5797, "step": 7690 }, { "epoch": 0.2103204987967622, "grad_norm": 1.3573652505874634, "learning_rate": 1.834343156009337e-05, "loss": 0.5843, "step": 7691 }, { "epoch": 0.2103478451104791, "grad_norm": 2.717782497406006, "learning_rate": 1.834294328701879e-05, "loss": 0.9506, "step": 7692 }, { "epoch": 0.21037519142419603, "grad_norm": 1.4476104974746704, "learning_rate": 1.8342454948496165e-05, "loss": 0.5935, "step": 7693 }, { "epoch": 0.21040253773791293, "grad_norm": 2.224545478820801, "learning_rate": 1.834196654452933e-05, "loss": 0.5729, "step": 7694 }, { "epoch": 0.21042988405162985, "grad_norm": 1.7415651082992554, "learning_rate": 1.8341478075122116e-05, "loss": 0.4955, "step": 7695 }, { "epoch": 0.21045723036534675, "grad_norm": 1.889999508857727, "learning_rate": 1.8340989540278352e-05, "loss": 0.6154, "step": 7696 }, { "epoch": 0.21048457667906367, "grad_norm": 1.5751506090164185, "learning_rate": 1.8340500940001872e-05, "loss": 0.4326, "step": 7697 }, { "epoch": 0.21051192299278057, "grad_norm": 1.4656758308410645, "learning_rate": 1.834001227429651e-05, "loss": 0.5726, "step": 7698 }, { "epoch": 0.2105392693064975, "grad_norm": 1.3284860849380493, "learning_rate": 1.83395235431661e-05, "loss": 0.4625, "step": 7699 }, { "epoch": 0.2105666156202144, "grad_norm": 1.6317260265350342, "learning_rate": 1.8339034746614476e-05, "loss": 0.5706, "step": 7700 }, { "epoch": 0.21059396193393132, "grad_norm": 1.8104299306869507, "learning_rate": 1.8338545884645466e-05, "loss": 0.6053, "step": 7701 }, { "epoch": 0.21062130824764821, "grad_norm": 3.466071605682373, "learning_rate": 1.8338056957262914e-05, "loss": 0.9192, "step": 7702 }, { "epoch": 0.21064865456136514, "grad_norm": 1.6627134084701538, "learning_rate": 1.8337567964470652e-05, "loss": 0.5539, "step": 7703 }, { "epoch": 0.21067600087508204, "grad_norm": 1.3779197931289673, "learning_rate": 1.8337078906272515e-05, "loss": 0.5627, "step": 7704 }, { "epoch": 0.21070334718879896, "grad_norm": 2.102480173110962, "learning_rate": 1.833658978267234e-05, "loss": 0.5665, "step": 7705 }, { "epoch": 0.21073069350251586, "grad_norm": 1.744313359260559, "learning_rate": 1.8336100593673967e-05, "loss": 0.5639, "step": 7706 }, { "epoch": 0.21075803981623278, "grad_norm": 1.7830586433410645, "learning_rate": 1.8335611339281227e-05, "loss": 0.5956, "step": 7707 }, { "epoch": 0.21078538612994968, "grad_norm": 2.7035410404205322, "learning_rate": 1.8335122019497963e-05, "loss": 0.4528, "step": 7708 }, { "epoch": 0.2108127324436666, "grad_norm": 1.5840644836425781, "learning_rate": 1.8334632634328012e-05, "loss": 0.5654, "step": 7709 }, { "epoch": 0.2108400787573835, "grad_norm": 1.5099931955337524, "learning_rate": 1.8334143183775215e-05, "loss": 0.4572, "step": 7710 }, { "epoch": 0.21086742507110043, "grad_norm": 1.7340610027313232, "learning_rate": 1.833365366784341e-05, "loss": 0.6194, "step": 7711 }, { "epoch": 0.21089477138481733, "grad_norm": 1.4622652530670166, "learning_rate": 1.8333164086536434e-05, "loss": 0.5996, "step": 7712 }, { "epoch": 0.21092211769853425, "grad_norm": 1.480705976486206, "learning_rate": 1.8332674439858135e-05, "loss": 0.5727, "step": 7713 }, { "epoch": 0.21094946401225115, "grad_norm": 1.2825032472610474, "learning_rate": 1.833218472781235e-05, "loss": 0.5578, "step": 7714 }, { "epoch": 0.21097681032596807, "grad_norm": 1.3532955646514893, "learning_rate": 1.833169495040292e-05, "loss": 0.5868, "step": 7715 }, { "epoch": 0.21100415663968497, "grad_norm": 1.519748568534851, "learning_rate": 1.8331205107633685e-05, "loss": 0.5906, "step": 7716 }, { "epoch": 0.2110315029534019, "grad_norm": 1.155914306640625, "learning_rate": 1.8330715199508492e-05, "loss": 0.5466, "step": 7717 }, { "epoch": 0.2110588492671188, "grad_norm": 1.2986048460006714, "learning_rate": 1.8330225226031184e-05, "loss": 0.5599, "step": 7718 }, { "epoch": 0.21108619558083572, "grad_norm": 1.4570411443710327, "learning_rate": 1.8329735187205605e-05, "loss": 0.5553, "step": 7719 }, { "epoch": 0.21111354189455261, "grad_norm": 1.6249287128448486, "learning_rate": 1.8329245083035592e-05, "loss": 0.9048, "step": 7720 }, { "epoch": 0.2111408882082695, "grad_norm": 1.2899192571640015, "learning_rate": 1.8328754913525004e-05, "loss": 0.5551, "step": 7721 }, { "epoch": 0.21116823452198644, "grad_norm": 3.646144390106201, "learning_rate": 1.8328264678677668e-05, "loss": 0.4168, "step": 7722 }, { "epoch": 0.21119558083570333, "grad_norm": 1.7973003387451172, "learning_rate": 1.8327774378497445e-05, "loss": 0.5503, "step": 7723 }, { "epoch": 0.21122292714942026, "grad_norm": 1.6857284307479858, "learning_rate": 1.8327284012988172e-05, "loss": 0.5718, "step": 7724 }, { "epoch": 0.21125027346313716, "grad_norm": 2.200040340423584, "learning_rate": 1.8326793582153706e-05, "loss": 0.6012, "step": 7725 }, { "epoch": 0.21127761977685408, "grad_norm": 2.857302188873291, "learning_rate": 1.8326303085997882e-05, "loss": 0.9203, "step": 7726 }, { "epoch": 0.21130496609057098, "grad_norm": 2.012179136276245, "learning_rate": 1.8325812524524556e-05, "loss": 0.5569, "step": 7727 }, { "epoch": 0.2113323124042879, "grad_norm": 3.20878267288208, "learning_rate": 1.8325321897737572e-05, "loss": 0.6325, "step": 7728 }, { "epoch": 0.2113596587180048, "grad_norm": 1.5593453645706177, "learning_rate": 1.8324831205640785e-05, "loss": 0.6057, "step": 7729 }, { "epoch": 0.21138700503172173, "grad_norm": 1.4121124744415283, "learning_rate": 1.8324340448238034e-05, "loss": 0.5714, "step": 7730 }, { "epoch": 0.21141435134543862, "grad_norm": 1.635678768157959, "learning_rate": 1.8323849625533177e-05, "loss": 0.5895, "step": 7731 }, { "epoch": 0.21144169765915555, "grad_norm": 1.5610681772232056, "learning_rate": 1.8323358737530063e-05, "loss": 0.6024, "step": 7732 }, { "epoch": 0.21146904397287244, "grad_norm": 8.271890640258789, "learning_rate": 1.8322867784232542e-05, "loss": 0.5285, "step": 7733 }, { "epoch": 0.21149639028658937, "grad_norm": 1.8535584211349487, "learning_rate": 1.8322376765644465e-05, "loss": 0.5807, "step": 7734 }, { "epoch": 0.21152373660030627, "grad_norm": 1.781293272972107, "learning_rate": 1.832188568176968e-05, "loss": 0.4629, "step": 7735 }, { "epoch": 0.2115510829140232, "grad_norm": 1.6933715343475342, "learning_rate": 1.8321394532612055e-05, "loss": 0.9479, "step": 7736 }, { "epoch": 0.2115784292277401, "grad_norm": 3.6829049587249756, "learning_rate": 1.8320903318175425e-05, "loss": 0.4189, "step": 7737 }, { "epoch": 0.21160577554145701, "grad_norm": 1.6720515489578247, "learning_rate": 1.832041203846365e-05, "loss": 0.5776, "step": 7738 }, { "epoch": 0.2116331218551739, "grad_norm": 1.7597241401672363, "learning_rate": 1.831992069348058e-05, "loss": 0.616, "step": 7739 }, { "epoch": 0.21166046816889084, "grad_norm": 1.555696964263916, "learning_rate": 1.8319429283230083e-05, "loss": 0.5527, "step": 7740 }, { "epoch": 0.21168781448260773, "grad_norm": 1.4329339265823364, "learning_rate": 1.8318937807716e-05, "loss": 0.5602, "step": 7741 }, { "epoch": 0.21171516079632466, "grad_norm": 2.016580581665039, "learning_rate": 1.8318446266942186e-05, "loss": 0.5838, "step": 7742 }, { "epoch": 0.21174250711004156, "grad_norm": 2.5455353260040283, "learning_rate": 1.8317954660912508e-05, "loss": 0.59, "step": 7743 }, { "epoch": 0.21176985342375848, "grad_norm": 3.360264778137207, "learning_rate": 1.8317462989630817e-05, "loss": 0.552, "step": 7744 }, { "epoch": 0.21179719973747538, "grad_norm": 1.7207026481628418, "learning_rate": 1.8316971253100968e-05, "loss": 0.5877, "step": 7745 }, { "epoch": 0.2118245460511923, "grad_norm": 1.7126646041870117, "learning_rate": 1.8316479451326817e-05, "loss": 0.9159, "step": 7746 }, { "epoch": 0.2118518923649092, "grad_norm": 2.031205415725708, "learning_rate": 1.8315987584312228e-05, "loss": 0.5916, "step": 7747 }, { "epoch": 0.21187923867862613, "grad_norm": 2.2245988845825195, "learning_rate": 1.831549565206106e-05, "loss": 0.5933, "step": 7748 }, { "epoch": 0.21190658499234302, "grad_norm": 1.7508900165557861, "learning_rate": 1.8315003654577166e-05, "loss": 0.5613, "step": 7749 }, { "epoch": 0.21193393130605995, "grad_norm": 1.7006205320358276, "learning_rate": 1.8314511591864406e-05, "loss": 0.522, "step": 7750 }, { "epoch": 0.21196127761977684, "grad_norm": 1.579541802406311, "learning_rate": 1.8314019463926643e-05, "loss": 0.6022, "step": 7751 }, { "epoch": 0.21198862393349377, "grad_norm": 1.7047699689865112, "learning_rate": 1.831352727076774e-05, "loss": 0.5499, "step": 7752 }, { "epoch": 0.21201597024721067, "grad_norm": 1.8484086990356445, "learning_rate": 1.8313035012391555e-05, "loss": 0.5894, "step": 7753 }, { "epoch": 0.2120433165609276, "grad_norm": 1.5886479616165161, "learning_rate": 1.8312542688801947e-05, "loss": 0.8907, "step": 7754 }, { "epoch": 0.2120706628746445, "grad_norm": 2.7209079265594482, "learning_rate": 1.8312050300002787e-05, "loss": 0.6017, "step": 7755 }, { "epoch": 0.2120980091883614, "grad_norm": 1.3066903352737427, "learning_rate": 1.8311557845997926e-05, "loss": 0.5627, "step": 7756 }, { "epoch": 0.2121253555020783, "grad_norm": 2.0965733528137207, "learning_rate": 1.8311065326791233e-05, "loss": 0.5471, "step": 7757 }, { "epoch": 0.21215270181579524, "grad_norm": 1.3664515018463135, "learning_rate": 1.831057274238657e-05, "loss": 0.6154, "step": 7758 }, { "epoch": 0.21218004812951213, "grad_norm": 1.9672929048538208, "learning_rate": 1.831008009278781e-05, "loss": 0.6139, "step": 7759 }, { "epoch": 0.21220739444322906, "grad_norm": 1.5603647232055664, "learning_rate": 1.8309587377998803e-05, "loss": 0.9168, "step": 7760 }, { "epoch": 0.21223474075694596, "grad_norm": 1.5621000528335571, "learning_rate": 1.8309094598023422e-05, "loss": 0.5368, "step": 7761 }, { "epoch": 0.21226208707066288, "grad_norm": 1.4946658611297607, "learning_rate": 1.8308601752865535e-05, "loss": 0.5942, "step": 7762 }, { "epoch": 0.21228943338437978, "grad_norm": 1.7787061929702759, "learning_rate": 1.8308108842529006e-05, "loss": 0.5468, "step": 7763 }, { "epoch": 0.2123167796980967, "grad_norm": 1.6397353410720825, "learning_rate": 1.83076158670177e-05, "loss": 0.5597, "step": 7764 }, { "epoch": 0.2123441260118136, "grad_norm": 2.4575071334838867, "learning_rate": 1.8307122826335484e-05, "loss": 0.888, "step": 7765 }, { "epoch": 0.21237147232553052, "grad_norm": 1.5511008501052856, "learning_rate": 1.830662972048623e-05, "loss": 0.6212, "step": 7766 }, { "epoch": 0.21239881863924742, "grad_norm": 1.9200907945632935, "learning_rate": 1.83061365494738e-05, "loss": 0.5484, "step": 7767 }, { "epoch": 0.21242616495296435, "grad_norm": 1.4007712602615356, "learning_rate": 1.8305643313302072e-05, "loss": 0.8882, "step": 7768 }, { "epoch": 0.21245351126668124, "grad_norm": 2.044525623321533, "learning_rate": 1.8305150011974903e-05, "loss": 0.5611, "step": 7769 }, { "epoch": 0.21248085758039817, "grad_norm": 2.9472553730010986, "learning_rate": 1.8304656645496176e-05, "loss": 0.4651, "step": 7770 }, { "epoch": 0.21250820389411507, "grad_norm": 2.203831911087036, "learning_rate": 1.830416321386975e-05, "loss": 0.5819, "step": 7771 }, { "epoch": 0.212535550207832, "grad_norm": 1.7148460149765015, "learning_rate": 1.83036697170995e-05, "loss": 0.5405, "step": 7772 }, { "epoch": 0.2125628965215489, "grad_norm": 1.6750825643539429, "learning_rate": 1.8303176155189303e-05, "loss": 0.5536, "step": 7773 }, { "epoch": 0.2125902428352658, "grad_norm": 1.5444146394729614, "learning_rate": 1.8302682528143018e-05, "loss": 0.5558, "step": 7774 }, { "epoch": 0.2126175891489827, "grad_norm": 1.7350116968154907, "learning_rate": 1.830218883596453e-05, "loss": 0.4228, "step": 7775 }, { "epoch": 0.21264493546269964, "grad_norm": 1.8014053106307983, "learning_rate": 1.830169507865771e-05, "loss": 0.4978, "step": 7776 }, { "epoch": 0.21267228177641653, "grad_norm": 1.8050767183303833, "learning_rate": 1.8301201256226423e-05, "loss": 0.5677, "step": 7777 }, { "epoch": 0.21269962809013346, "grad_norm": 1.873618721961975, "learning_rate": 1.830070736867455e-05, "loss": 0.5757, "step": 7778 }, { "epoch": 0.21272697440385036, "grad_norm": 1.3644773960113525, "learning_rate": 1.8300213416005965e-05, "loss": 0.5131, "step": 7779 }, { "epoch": 0.21275432071756728, "grad_norm": 1.554831624031067, "learning_rate": 1.8299719398224543e-05, "loss": 0.5568, "step": 7780 }, { "epoch": 0.21278166703128418, "grad_norm": 1.8125869035720825, "learning_rate": 1.8299225315334155e-05, "loss": 0.5744, "step": 7781 }, { "epoch": 0.2128090133450011, "grad_norm": 1.6713792085647583, "learning_rate": 1.829873116733868e-05, "loss": 0.5576, "step": 7782 }, { "epoch": 0.212836359658718, "grad_norm": 2.6563007831573486, "learning_rate": 1.8298236954241994e-05, "loss": 0.5364, "step": 7783 }, { "epoch": 0.21286370597243492, "grad_norm": 1.869011402130127, "learning_rate": 1.8297742676047977e-05, "loss": 0.5903, "step": 7784 }, { "epoch": 0.21289105228615182, "grad_norm": 2.873173236846924, "learning_rate": 1.82972483327605e-05, "loss": 0.4097, "step": 7785 }, { "epoch": 0.21291839859986875, "grad_norm": 3.226799726486206, "learning_rate": 1.829675392438345e-05, "loss": 0.5421, "step": 7786 }, { "epoch": 0.21294574491358564, "grad_norm": 1.7551593780517578, "learning_rate": 1.8296259450920697e-05, "loss": 0.4256, "step": 7787 }, { "epoch": 0.21297309122730257, "grad_norm": 2.2698919773101807, "learning_rate": 1.8295764912376125e-05, "loss": 0.5531, "step": 7788 }, { "epoch": 0.21300043754101947, "grad_norm": 2.379828691482544, "learning_rate": 1.829527030875361e-05, "loss": 0.6077, "step": 7789 }, { "epoch": 0.2130277838547364, "grad_norm": 2.1122372150421143, "learning_rate": 1.829477564005704e-05, "loss": 0.6015, "step": 7790 }, { "epoch": 0.2130551301684533, "grad_norm": 1.5618298053741455, "learning_rate": 1.8294280906290282e-05, "loss": 0.6145, "step": 7791 }, { "epoch": 0.2130824764821702, "grad_norm": 2.0858278274536133, "learning_rate": 1.8293786107457228e-05, "loss": 0.5597, "step": 7792 }, { "epoch": 0.2131098227958871, "grad_norm": 1.7444703578948975, "learning_rate": 1.8293291243561754e-05, "loss": 0.5891, "step": 7793 }, { "epoch": 0.21313716910960404, "grad_norm": 1.6831210851669312, "learning_rate": 1.8292796314607747e-05, "loss": 0.5621, "step": 7794 }, { "epoch": 0.21316451542332093, "grad_norm": 1.4875564575195312, "learning_rate": 1.8292301320599084e-05, "loss": 0.5695, "step": 7795 }, { "epoch": 0.21319186173703786, "grad_norm": 1.6645445823669434, "learning_rate": 1.829180626153965e-05, "loss": 0.5266, "step": 7796 }, { "epoch": 0.21321920805075475, "grad_norm": 2.1885454654693604, "learning_rate": 1.829131113743333e-05, "loss": 0.45, "step": 7797 }, { "epoch": 0.21324655436447168, "grad_norm": 1.757097601890564, "learning_rate": 1.829081594828401e-05, "loss": 0.5268, "step": 7798 }, { "epoch": 0.21327390067818858, "grad_norm": 2.3235256671905518, "learning_rate": 1.829032069409557e-05, "loss": 0.5633, "step": 7799 }, { "epoch": 0.2133012469919055, "grad_norm": 1.6173030138015747, "learning_rate": 1.82898253748719e-05, "loss": 0.5873, "step": 7800 }, { "epoch": 0.2133285933056224, "grad_norm": 2.5454766750335693, "learning_rate": 1.828932999061688e-05, "loss": 0.5569, "step": 7801 }, { "epoch": 0.21335593961933932, "grad_norm": 2.342425584793091, "learning_rate": 1.8288834541334403e-05, "loss": 0.5474, "step": 7802 }, { "epoch": 0.21338328593305622, "grad_norm": 1.9643878936767578, "learning_rate": 1.8288339027028346e-05, "loss": 0.5733, "step": 7803 }, { "epoch": 0.21341063224677315, "grad_norm": 1.6297364234924316, "learning_rate": 1.8287843447702604e-05, "loss": 0.51, "step": 7804 }, { "epoch": 0.21343797856049004, "grad_norm": 8.79919719696045, "learning_rate": 1.8287347803361066e-05, "loss": 0.4341, "step": 7805 }, { "epoch": 0.21346532487420697, "grad_norm": 1.41944420337677, "learning_rate": 1.828685209400761e-05, "loss": 0.5177, "step": 7806 }, { "epoch": 0.21349267118792387, "grad_norm": 1.6481109857559204, "learning_rate": 1.8286356319646136e-05, "loss": 0.6119, "step": 7807 }, { "epoch": 0.2135200175016408, "grad_norm": 1.3733121156692505, "learning_rate": 1.828586048028053e-05, "loss": 0.5716, "step": 7808 }, { "epoch": 0.2135473638153577, "grad_norm": 1.5444552898406982, "learning_rate": 1.8285364575914676e-05, "loss": 0.5547, "step": 7809 }, { "epoch": 0.2135747101290746, "grad_norm": 2.023942470550537, "learning_rate": 1.828486860655247e-05, "loss": 0.5313, "step": 7810 }, { "epoch": 0.2136020564427915, "grad_norm": 1.9564056396484375, "learning_rate": 1.8284372572197804e-05, "loss": 0.9231, "step": 7811 }, { "epoch": 0.21362940275650844, "grad_norm": 1.5281217098236084, "learning_rate": 1.828387647285456e-05, "loss": 0.5761, "step": 7812 }, { "epoch": 0.21365674907022533, "grad_norm": 3.0980801582336426, "learning_rate": 1.8283380308526646e-05, "loss": 0.4497, "step": 7813 }, { "epoch": 0.21368409538394226, "grad_norm": 1.6355408430099487, "learning_rate": 1.828288407921794e-05, "loss": 0.6018, "step": 7814 }, { "epoch": 0.21371144169765915, "grad_norm": 2.134713888168335, "learning_rate": 1.8282387784932338e-05, "loss": 0.5708, "step": 7815 }, { "epoch": 0.21373878801137608, "grad_norm": 1.9925495386123657, "learning_rate": 1.828189142567374e-05, "loss": 0.5491, "step": 7816 }, { "epoch": 0.21376613432509298, "grad_norm": 2.6574954986572266, "learning_rate": 1.8281395001446028e-05, "loss": 0.8858, "step": 7817 }, { "epoch": 0.2137934806388099, "grad_norm": 1.4250954389572144, "learning_rate": 1.8280898512253103e-05, "loss": 0.9442, "step": 7818 }, { "epoch": 0.2138208269525268, "grad_norm": 1.3916130065917969, "learning_rate": 1.8280401958098863e-05, "loss": 0.5703, "step": 7819 }, { "epoch": 0.21384817326624372, "grad_norm": 1.5014989376068115, "learning_rate": 1.8279905338987198e-05, "loss": 0.512, "step": 7820 }, { "epoch": 0.21387551957996062, "grad_norm": 1.8021235466003418, "learning_rate": 1.8279408654922007e-05, "loss": 0.5566, "step": 7821 }, { "epoch": 0.21390286589367752, "grad_norm": 1.3002396821975708, "learning_rate": 1.8278911905907187e-05, "loss": 0.5785, "step": 7822 }, { "epoch": 0.21393021220739444, "grad_norm": 1.5850719213485718, "learning_rate": 1.8278415091946628e-05, "loss": 0.5765, "step": 7823 }, { "epoch": 0.21395755852111134, "grad_norm": 1.5218772888183594, "learning_rate": 1.8277918213044235e-05, "loss": 0.5465, "step": 7824 }, { "epoch": 0.21398490483482827, "grad_norm": 1.3589773178100586, "learning_rate": 1.8277421269203906e-05, "loss": 0.5802, "step": 7825 }, { "epoch": 0.21401225114854516, "grad_norm": 1.9339852333068848, "learning_rate": 1.8276924260429533e-05, "loss": 0.5811, "step": 7826 }, { "epoch": 0.2140395974622621, "grad_norm": 1.8325015306472778, "learning_rate": 1.827642718672502e-05, "loss": 0.575, "step": 7827 }, { "epoch": 0.21406694377597899, "grad_norm": 1.8152040243148804, "learning_rate": 1.827593004809426e-05, "loss": 0.5366, "step": 7828 }, { "epoch": 0.2140942900896959, "grad_norm": 1.7850263118743896, "learning_rate": 1.8275432844541165e-05, "loss": 0.6128, "step": 7829 }, { "epoch": 0.2141216364034128, "grad_norm": 1.5946534872055054, "learning_rate": 1.8274935576069624e-05, "loss": 0.5976, "step": 7830 }, { "epoch": 0.21414898271712973, "grad_norm": 1.5187268257141113, "learning_rate": 1.827443824268354e-05, "loss": 0.5866, "step": 7831 }, { "epoch": 0.21417632903084663, "grad_norm": 1.7168623208999634, "learning_rate": 1.8273940844386824e-05, "loss": 0.5954, "step": 7832 }, { "epoch": 0.21420367534456355, "grad_norm": 1.6391098499298096, "learning_rate": 1.8273443381183367e-05, "loss": 0.5575, "step": 7833 }, { "epoch": 0.21423102165828045, "grad_norm": 1.736559510231018, "learning_rate": 1.8272945853077075e-05, "loss": 0.5547, "step": 7834 }, { "epoch": 0.21425836797199738, "grad_norm": 1.3469266891479492, "learning_rate": 1.827244826007185e-05, "loss": 0.5393, "step": 7835 }, { "epoch": 0.21428571428571427, "grad_norm": 1.4276549816131592, "learning_rate": 1.8271950602171596e-05, "loss": 0.5894, "step": 7836 }, { "epoch": 0.2143130605994312, "grad_norm": 1.7807319164276123, "learning_rate": 1.827145287938022e-05, "loss": 0.5725, "step": 7837 }, { "epoch": 0.2143404069131481, "grad_norm": 1.5132728815078735, "learning_rate": 1.8270955091701622e-05, "loss": 0.5724, "step": 7838 }, { "epoch": 0.21436775322686502, "grad_norm": 2.3191001415252686, "learning_rate": 1.827045723913971e-05, "loss": 0.5933, "step": 7839 }, { "epoch": 0.21439509954058192, "grad_norm": 1.2624205350875854, "learning_rate": 1.826995932169839e-05, "loss": 0.5409, "step": 7840 }, { "epoch": 0.21442244585429884, "grad_norm": 1.5180559158325195, "learning_rate": 1.8269461339381564e-05, "loss": 0.5876, "step": 7841 }, { "epoch": 0.21444979216801574, "grad_norm": 1.2387547492980957, "learning_rate": 1.8268963292193142e-05, "loss": 0.5541, "step": 7842 }, { "epoch": 0.21447713848173267, "grad_norm": 1.3893158435821533, "learning_rate": 1.826846518013703e-05, "loss": 0.5775, "step": 7843 }, { "epoch": 0.21450448479544956, "grad_norm": 1.6880043745040894, "learning_rate": 1.8267967003217134e-05, "loss": 0.4917, "step": 7844 }, { "epoch": 0.2145318311091665, "grad_norm": 1.4375674724578857, "learning_rate": 1.8267468761437364e-05, "loss": 0.5884, "step": 7845 }, { "epoch": 0.21455917742288338, "grad_norm": 1.6184686422348022, "learning_rate": 1.826697045480163e-05, "loss": 0.5143, "step": 7846 }, { "epoch": 0.2145865237366003, "grad_norm": 1.4019359350204468, "learning_rate": 1.8266472083313838e-05, "loss": 0.5663, "step": 7847 }, { "epoch": 0.2146138700503172, "grad_norm": 1.5197713375091553, "learning_rate": 1.82659736469779e-05, "loss": 0.5918, "step": 7848 }, { "epoch": 0.21464121636403413, "grad_norm": 1.8507275581359863, "learning_rate": 1.8265475145797723e-05, "loss": 0.618, "step": 7849 }, { "epoch": 0.21466856267775103, "grad_norm": 2.7242300510406494, "learning_rate": 1.826497657977722e-05, "loss": 0.4616, "step": 7850 }, { "epoch": 0.21469590899146795, "grad_norm": 1.3849056959152222, "learning_rate": 1.8264477948920306e-05, "loss": 0.5572, "step": 7851 }, { "epoch": 0.21472325530518485, "grad_norm": 1.251396894454956, "learning_rate": 1.826397925323088e-05, "loss": 0.5566, "step": 7852 }, { "epoch": 0.21475060161890178, "grad_norm": 1.6040128469467163, "learning_rate": 1.826348049271287e-05, "loss": 0.4945, "step": 7853 }, { "epoch": 0.21477794793261867, "grad_norm": 1.2965524196624756, "learning_rate": 1.8262981667370177e-05, "loss": 0.5461, "step": 7854 }, { "epoch": 0.2148052942463356, "grad_norm": 1.751396656036377, "learning_rate": 1.826248277720672e-05, "loss": 0.571, "step": 7855 }, { "epoch": 0.2148326405600525, "grad_norm": 2.6256442070007324, "learning_rate": 1.826198382222641e-05, "loss": 0.9439, "step": 7856 }, { "epoch": 0.21485998687376942, "grad_norm": 1.5809472799301147, "learning_rate": 1.8261484802433163e-05, "loss": 0.5272, "step": 7857 }, { "epoch": 0.21488733318748632, "grad_norm": 1.466049313545227, "learning_rate": 1.8260985717830894e-05, "loss": 0.5876, "step": 7858 }, { "epoch": 0.21491467950120324, "grad_norm": 2.521745204925537, "learning_rate": 1.8260486568423513e-05, "loss": 0.9026, "step": 7859 }, { "epoch": 0.21494202581492014, "grad_norm": 2.5985662937164307, "learning_rate": 1.8259987354214944e-05, "loss": 0.5653, "step": 7860 }, { "epoch": 0.21496937212863707, "grad_norm": 1.7452911138534546, "learning_rate": 1.8259488075209096e-05, "loss": 0.5324, "step": 7861 }, { "epoch": 0.21499671844235396, "grad_norm": 2.0878875255584717, "learning_rate": 1.8258988731409887e-05, "loss": 0.5694, "step": 7862 }, { "epoch": 0.2150240647560709, "grad_norm": 1.5901693105697632, "learning_rate": 1.825848932282124e-05, "loss": 0.5875, "step": 7863 }, { "epoch": 0.21505141106978778, "grad_norm": 1.4636114835739136, "learning_rate": 1.8257989849447066e-05, "loss": 0.5797, "step": 7864 }, { "epoch": 0.2150787573835047, "grad_norm": 1.4966567754745483, "learning_rate": 1.8257490311291285e-05, "loss": 0.5549, "step": 7865 }, { "epoch": 0.2151061036972216, "grad_norm": 1.506625771522522, "learning_rate": 1.8256990708357815e-05, "loss": 0.5513, "step": 7866 }, { "epoch": 0.21513345001093853, "grad_norm": 2.4592461585998535, "learning_rate": 1.825649104065058e-05, "loss": 0.5126, "step": 7867 }, { "epoch": 0.21516079632465543, "grad_norm": 1.7289904356002808, "learning_rate": 1.8255991308173495e-05, "loss": 0.5516, "step": 7868 }, { "epoch": 0.21518814263837235, "grad_norm": 2.0446536540985107, "learning_rate": 1.825549151093048e-05, "loss": 0.9249, "step": 7869 }, { "epoch": 0.21521548895208925, "grad_norm": 1.3336372375488281, "learning_rate": 1.8254991648925462e-05, "loss": 0.5986, "step": 7870 }, { "epoch": 0.21524283526580618, "grad_norm": 3.1807515621185303, "learning_rate": 1.8254491722162356e-05, "loss": 0.3847, "step": 7871 }, { "epoch": 0.21527018157952307, "grad_norm": 1.5925137996673584, "learning_rate": 1.825399173064508e-05, "loss": 0.5695, "step": 7872 }, { "epoch": 0.21529752789324, "grad_norm": 1.5605741739273071, "learning_rate": 1.825349167437757e-05, "loss": 0.5955, "step": 7873 }, { "epoch": 0.2153248742069569, "grad_norm": 1.5584137439727783, "learning_rate": 1.8252991553363736e-05, "loss": 0.5519, "step": 7874 }, { "epoch": 0.21535222052067382, "grad_norm": 1.4360904693603516, "learning_rate": 1.8252491367607502e-05, "loss": 0.5658, "step": 7875 }, { "epoch": 0.21537956683439072, "grad_norm": 1.4600695371627808, "learning_rate": 1.82519911171128e-05, "loss": 0.5915, "step": 7876 }, { "epoch": 0.21540691314810764, "grad_norm": 2.495115280151367, "learning_rate": 1.8251490801883552e-05, "loss": 0.5693, "step": 7877 }, { "epoch": 0.21543425946182454, "grad_norm": 1.4912985563278198, "learning_rate": 1.8250990421923677e-05, "loss": 0.5527, "step": 7878 }, { "epoch": 0.21546160577554146, "grad_norm": 1.4182591438293457, "learning_rate": 1.8250489977237108e-05, "loss": 0.5625, "step": 7879 }, { "epoch": 0.21548895208925836, "grad_norm": 1.4698817729949951, "learning_rate": 1.8249989467827765e-05, "loss": 0.553, "step": 7880 }, { "epoch": 0.2155162984029753, "grad_norm": 1.7367594242095947, "learning_rate": 1.8249488893699576e-05, "loss": 0.6008, "step": 7881 }, { "epoch": 0.21554364471669218, "grad_norm": 1.702025294303894, "learning_rate": 1.8248988254856467e-05, "loss": 0.6265, "step": 7882 }, { "epoch": 0.2155709910304091, "grad_norm": 1.482493281364441, "learning_rate": 1.824848755130237e-05, "loss": 0.57, "step": 7883 }, { "epoch": 0.215598337344126, "grad_norm": 1.3803162574768066, "learning_rate": 1.8247986783041208e-05, "loss": 0.5299, "step": 7884 }, { "epoch": 0.21562568365784293, "grad_norm": 2.104372978210449, "learning_rate": 1.824748595007691e-05, "loss": 0.9309, "step": 7885 }, { "epoch": 0.21565302997155983, "grad_norm": 1.5753848552703857, "learning_rate": 1.8246985052413405e-05, "loss": 0.5957, "step": 7886 }, { "epoch": 0.21568037628527675, "grad_norm": 1.822465181350708, "learning_rate": 1.8246484090054627e-05, "loss": 0.9045, "step": 7887 }, { "epoch": 0.21570772259899365, "grad_norm": 1.4175989627838135, "learning_rate": 1.82459830630045e-05, "loss": 0.5164, "step": 7888 }, { "epoch": 0.21573506891271058, "grad_norm": 1.6234835386276245, "learning_rate": 1.8245481971266953e-05, "loss": 0.5763, "step": 7889 }, { "epoch": 0.21576241522642747, "grad_norm": 1.183196783065796, "learning_rate": 1.8244980814845927e-05, "loss": 0.5536, "step": 7890 }, { "epoch": 0.2157897615401444, "grad_norm": 1.2965209484100342, "learning_rate": 1.8244479593745343e-05, "loss": 0.5605, "step": 7891 }, { "epoch": 0.2158171078538613, "grad_norm": 1.5186035633087158, "learning_rate": 1.8243978307969135e-05, "loss": 0.5568, "step": 7892 }, { "epoch": 0.21584445416757822, "grad_norm": 3.0938239097595215, "learning_rate": 1.824347695752124e-05, "loss": 0.5992, "step": 7893 }, { "epoch": 0.21587180048129512, "grad_norm": 1.9517042636871338, "learning_rate": 1.8242975542405584e-05, "loss": 0.5697, "step": 7894 }, { "epoch": 0.21589914679501204, "grad_norm": 1.6290956735610962, "learning_rate": 1.8242474062626112e-05, "loss": 0.5475, "step": 7895 }, { "epoch": 0.21592649310872894, "grad_norm": 5.449148178100586, "learning_rate": 1.8241972518186747e-05, "loss": 0.9227, "step": 7896 }, { "epoch": 0.21595383942244586, "grad_norm": 1.5269521474838257, "learning_rate": 1.8241470909091425e-05, "loss": 0.5455, "step": 7897 }, { "epoch": 0.21598118573616276, "grad_norm": 1.8565051555633545, "learning_rate": 1.8240969235344085e-05, "loss": 0.9094, "step": 7898 }, { "epoch": 0.2160085320498797, "grad_norm": 1.575221061706543, "learning_rate": 1.8240467496948662e-05, "loss": 0.5424, "step": 7899 }, { "epoch": 0.21603587836359658, "grad_norm": 1.5313512086868286, "learning_rate": 1.8239965693909088e-05, "loss": 0.5556, "step": 7900 }, { "epoch": 0.2160632246773135, "grad_norm": 1.3621972799301147, "learning_rate": 1.8239463826229302e-05, "loss": 0.547, "step": 7901 }, { "epoch": 0.2160905709910304, "grad_norm": 1.8497653007507324, "learning_rate": 1.8238961893913245e-05, "loss": 0.4848, "step": 7902 }, { "epoch": 0.21611791730474733, "grad_norm": 2.423863649368286, "learning_rate": 1.8238459896964848e-05, "loss": 0.5362, "step": 7903 }, { "epoch": 0.21614526361846423, "grad_norm": 1.9367409944534302, "learning_rate": 1.823795783538805e-05, "loss": 0.4607, "step": 7904 }, { "epoch": 0.21617260993218115, "grad_norm": 1.3458006381988525, "learning_rate": 1.823745570918679e-05, "loss": 0.5656, "step": 7905 }, { "epoch": 0.21619995624589805, "grad_norm": 1.1910001039505005, "learning_rate": 1.8236953518365014e-05, "loss": 0.5418, "step": 7906 }, { "epoch": 0.21622730255961498, "grad_norm": 3.6377573013305664, "learning_rate": 1.8236451262926653e-05, "loss": 0.9519, "step": 7907 }, { "epoch": 0.21625464887333187, "grad_norm": 6.463252544403076, "learning_rate": 1.8235948942875647e-05, "loss": 0.4391, "step": 7908 }, { "epoch": 0.2162819951870488, "grad_norm": 1.9064282178878784, "learning_rate": 1.823544655821594e-05, "loss": 0.4521, "step": 7909 }, { "epoch": 0.2163093415007657, "grad_norm": 1.8749476671218872, "learning_rate": 1.8234944108951477e-05, "loss": 0.5427, "step": 7910 }, { "epoch": 0.21633668781448262, "grad_norm": 1.6633576154708862, "learning_rate": 1.823444159508619e-05, "loss": 0.4199, "step": 7911 }, { "epoch": 0.21636403412819952, "grad_norm": 1.5933767557144165, "learning_rate": 1.823393901662403e-05, "loss": 0.4992, "step": 7912 }, { "epoch": 0.21639138044191644, "grad_norm": 1.4962623119354248, "learning_rate": 1.8233436373568932e-05, "loss": 0.5624, "step": 7913 }, { "epoch": 0.21641872675563334, "grad_norm": 1.3819200992584229, "learning_rate": 1.8232933665924845e-05, "loss": 0.4774, "step": 7914 }, { "epoch": 0.21644607306935026, "grad_norm": 3.9957382678985596, "learning_rate": 1.8232430893695708e-05, "loss": 0.5647, "step": 7915 }, { "epoch": 0.21647341938306716, "grad_norm": 1.4669309854507446, "learning_rate": 1.823192805688547e-05, "loss": 0.5543, "step": 7916 }, { "epoch": 0.2165007656967841, "grad_norm": 2.4013566970825195, "learning_rate": 1.8231425155498073e-05, "loss": 0.6218, "step": 7917 }, { "epoch": 0.21652811201050098, "grad_norm": 1.916397213935852, "learning_rate": 1.823092218953746e-05, "loss": 0.549, "step": 7918 }, { "epoch": 0.2165554583242179, "grad_norm": 1.9116380214691162, "learning_rate": 1.8230419159007582e-05, "loss": 0.5534, "step": 7919 }, { "epoch": 0.2165828046379348, "grad_norm": 1.5818824768066406, "learning_rate": 1.822991606391238e-05, "loss": 0.5914, "step": 7920 }, { "epoch": 0.21661015095165173, "grad_norm": 3.9585862159729004, "learning_rate": 1.8229412904255804e-05, "loss": 0.4609, "step": 7921 }, { "epoch": 0.21663749726536863, "grad_norm": 1.46315598487854, "learning_rate": 1.8228909680041796e-05, "loss": 0.557, "step": 7922 }, { "epoch": 0.21666484357908555, "grad_norm": 1.7178189754486084, "learning_rate": 1.822840639127431e-05, "loss": 0.6118, "step": 7923 }, { "epoch": 0.21669218989280245, "grad_norm": 1.9466737508773804, "learning_rate": 1.822790303795729e-05, "loss": 0.4349, "step": 7924 }, { "epoch": 0.21671953620651935, "grad_norm": 1.8880817890167236, "learning_rate": 1.822739962009469e-05, "loss": 0.5598, "step": 7925 }, { "epoch": 0.21674688252023627, "grad_norm": 2.4777729511260986, "learning_rate": 1.822689613769045e-05, "loss": 0.4309, "step": 7926 }, { "epoch": 0.21677422883395317, "grad_norm": 1.5534998178482056, "learning_rate": 1.822639259074853e-05, "loss": 0.5548, "step": 7927 }, { "epoch": 0.2168015751476701, "grad_norm": 1.9999254941940308, "learning_rate": 1.8225888979272874e-05, "loss": 0.5693, "step": 7928 }, { "epoch": 0.216828921461387, "grad_norm": 1.6704384088516235, "learning_rate": 1.8225385303267433e-05, "loss": 0.4453, "step": 7929 }, { "epoch": 0.21685626777510392, "grad_norm": 3.9027163982391357, "learning_rate": 1.8224881562736158e-05, "loss": 0.5376, "step": 7930 }, { "epoch": 0.21688361408882081, "grad_norm": 1.7493896484375, "learning_rate": 1.8224377757683004e-05, "loss": 0.5815, "step": 7931 }, { "epoch": 0.21691096040253774, "grad_norm": 1.8967949151992798, "learning_rate": 1.822387388811192e-05, "loss": 0.515, "step": 7932 }, { "epoch": 0.21693830671625464, "grad_norm": 2.0262153148651123, "learning_rate": 1.822336995402686e-05, "loss": 0.5866, "step": 7933 }, { "epoch": 0.21696565302997156, "grad_norm": 1.800927758216858, "learning_rate": 1.8222865955431777e-05, "loss": 0.5671, "step": 7934 }, { "epoch": 0.21699299934368846, "grad_norm": 1.4765567779541016, "learning_rate": 1.8222361892330623e-05, "loss": 0.5476, "step": 7935 }, { "epoch": 0.21702034565740538, "grad_norm": 1.4737558364868164, "learning_rate": 1.8221857764727357e-05, "loss": 0.5376, "step": 7936 }, { "epoch": 0.21704769197112228, "grad_norm": 1.3389222621917725, "learning_rate": 1.8221353572625927e-05, "loss": 0.5147, "step": 7937 }, { "epoch": 0.2170750382848392, "grad_norm": 1.6935365200042725, "learning_rate": 1.8220849316030297e-05, "loss": 0.5818, "step": 7938 }, { "epoch": 0.2171023845985561, "grad_norm": 2.8785698413848877, "learning_rate": 1.8220344994944415e-05, "loss": 0.515, "step": 7939 }, { "epoch": 0.21712973091227303, "grad_norm": 1.419198751449585, "learning_rate": 1.821984060937224e-05, "loss": 0.5528, "step": 7940 }, { "epoch": 0.21715707722598993, "grad_norm": 2.0189478397369385, "learning_rate": 1.8219336159317728e-05, "loss": 0.5802, "step": 7941 }, { "epoch": 0.21718442353970685, "grad_norm": 1.7260810136795044, "learning_rate": 1.8218831644784834e-05, "loss": 0.5736, "step": 7942 }, { "epoch": 0.21721176985342375, "grad_norm": 1.8245965242385864, "learning_rate": 1.821832706577752e-05, "loss": 0.6036, "step": 7943 }, { "epoch": 0.21723911616714067, "grad_norm": 2.0292482376098633, "learning_rate": 1.8217822422299745e-05, "loss": 0.5633, "step": 7944 }, { "epoch": 0.21726646248085757, "grad_norm": 1.3971261978149414, "learning_rate": 1.821731771435547e-05, "loss": 0.5591, "step": 7945 }, { "epoch": 0.2172938087945745, "grad_norm": 1.5377205610275269, "learning_rate": 1.8216812941948645e-05, "loss": 0.553, "step": 7946 }, { "epoch": 0.2173211551082914, "grad_norm": 2.129746437072754, "learning_rate": 1.8216308105083235e-05, "loss": 0.5812, "step": 7947 }, { "epoch": 0.21734850142200832, "grad_norm": 1.7805955410003662, "learning_rate": 1.8215803203763202e-05, "loss": 0.972, "step": 7948 }, { "epoch": 0.21737584773572521, "grad_norm": 1.6483244895935059, "learning_rate": 1.8215298237992503e-05, "loss": 0.5468, "step": 7949 }, { "epoch": 0.21740319404944214, "grad_norm": 2.6550498008728027, "learning_rate": 1.82147932077751e-05, "loss": 0.5726, "step": 7950 }, { "epoch": 0.21743054036315904, "grad_norm": 2.442103624343872, "learning_rate": 1.8214288113114958e-05, "loss": 0.5905, "step": 7951 }, { "epoch": 0.21745788667687596, "grad_norm": 1.8063193559646606, "learning_rate": 1.821378295401604e-05, "loss": 0.6037, "step": 7952 }, { "epoch": 0.21748523299059286, "grad_norm": 2.921315908432007, "learning_rate": 1.8213277730482302e-05, "loss": 0.5766, "step": 7953 }, { "epoch": 0.21751257930430978, "grad_norm": 1.2905641794204712, "learning_rate": 1.8212772442517716e-05, "loss": 0.948, "step": 7954 }, { "epoch": 0.21753992561802668, "grad_norm": 2.162236452102661, "learning_rate": 1.821226709012624e-05, "loss": 0.9132, "step": 7955 }, { "epoch": 0.2175672719317436, "grad_norm": 1.6103317737579346, "learning_rate": 1.8211761673311837e-05, "loss": 0.5819, "step": 7956 }, { "epoch": 0.2175946182454605, "grad_norm": 2.608095169067383, "learning_rate": 1.8211256192078476e-05, "loss": 0.5784, "step": 7957 }, { "epoch": 0.21762196455917743, "grad_norm": 3.681800127029419, "learning_rate": 1.8210750646430123e-05, "loss": 0.5453, "step": 7958 }, { "epoch": 0.21764931087289432, "grad_norm": 1.782204270362854, "learning_rate": 1.8210245036370738e-05, "loss": 0.5731, "step": 7959 }, { "epoch": 0.21767665718661125, "grad_norm": 1.572635293006897, "learning_rate": 1.8209739361904296e-05, "loss": 0.5585, "step": 7960 }, { "epoch": 0.21770400350032815, "grad_norm": 2.391735553741455, "learning_rate": 1.8209233623034758e-05, "loss": 0.5504, "step": 7961 }, { "epoch": 0.21773134981404507, "grad_norm": 1.8234343528747559, "learning_rate": 1.820872781976609e-05, "loss": 0.6036, "step": 7962 }, { "epoch": 0.21775869612776197, "grad_norm": 1.8036950826644897, "learning_rate": 1.8208221952102265e-05, "loss": 0.5972, "step": 7963 }, { "epoch": 0.2177860424414789, "grad_norm": 2.049304723739624, "learning_rate": 1.8207716020047246e-05, "loss": 0.5509, "step": 7964 }, { "epoch": 0.2178133887551958, "grad_norm": 1.7249163389205933, "learning_rate": 1.8207210023605007e-05, "loss": 0.565, "step": 7965 }, { "epoch": 0.21784073506891272, "grad_norm": 2.4584004878997803, "learning_rate": 1.8206703962779516e-05, "loss": 0.5636, "step": 7966 }, { "epoch": 0.2178680813826296, "grad_norm": 1.5674320459365845, "learning_rate": 1.820619783757474e-05, "loss": 0.5385, "step": 7967 }, { "epoch": 0.21789542769634654, "grad_norm": 1.7007701396942139, "learning_rate": 1.820569164799465e-05, "loss": 0.5527, "step": 7968 }, { "epoch": 0.21792277401006344, "grad_norm": 1.9248186349868774, "learning_rate": 1.8205185394043222e-05, "loss": 0.5724, "step": 7969 }, { "epoch": 0.21795012032378036, "grad_norm": 1.8116084337234497, "learning_rate": 1.820467907572442e-05, "loss": 0.9469, "step": 7970 }, { "epoch": 0.21797746663749726, "grad_norm": 2.3404541015625, "learning_rate": 1.8204172693042223e-05, "loss": 0.5503, "step": 7971 }, { "epoch": 0.21800481295121418, "grad_norm": 1.5850986242294312, "learning_rate": 1.8203666246000598e-05, "loss": 0.5954, "step": 7972 }, { "epoch": 0.21803215926493108, "grad_norm": 1.6126974821090698, "learning_rate": 1.820315973460352e-05, "loss": 0.6022, "step": 7973 }, { "epoch": 0.218059505578648, "grad_norm": 3.0121195316314697, "learning_rate": 1.820265315885496e-05, "loss": 0.5195, "step": 7974 }, { "epoch": 0.2180868518923649, "grad_norm": 2.501929759979248, "learning_rate": 1.82021465187589e-05, "loss": 0.5565, "step": 7975 }, { "epoch": 0.21811419820608183, "grad_norm": 1.4094979763031006, "learning_rate": 1.8201639814319304e-05, "loss": 0.5631, "step": 7976 }, { "epoch": 0.21814154451979872, "grad_norm": 1.7837002277374268, "learning_rate": 1.8201133045540155e-05, "loss": 0.5696, "step": 7977 }, { "epoch": 0.21816889083351565, "grad_norm": 1.8873097896575928, "learning_rate": 1.8200626212425425e-05, "loss": 0.5741, "step": 7978 }, { "epoch": 0.21819623714723255, "grad_norm": 2.5583078861236572, "learning_rate": 1.820011931497909e-05, "loss": 0.5517, "step": 7979 }, { "epoch": 0.21822358346094947, "grad_norm": 1.733655571937561, "learning_rate": 1.8199612353205126e-05, "loss": 0.5733, "step": 7980 }, { "epoch": 0.21825092977466637, "grad_norm": 1.7149831056594849, "learning_rate": 1.819910532710751e-05, "loss": 0.5627, "step": 7981 }, { "epoch": 0.2182782760883833, "grad_norm": 1.8152912855148315, "learning_rate": 1.819859823669022e-05, "loss": 0.9047, "step": 7982 }, { "epoch": 0.2183056224021002, "grad_norm": 1.6699552536010742, "learning_rate": 1.8198091081957237e-05, "loss": 0.5614, "step": 7983 }, { "epoch": 0.21833296871581712, "grad_norm": 2.065953493118286, "learning_rate": 1.8197583862912535e-05, "loss": 0.5724, "step": 7984 }, { "epoch": 0.218360315029534, "grad_norm": 1.9062491655349731, "learning_rate": 1.8197076579560095e-05, "loss": 0.5799, "step": 7985 }, { "epoch": 0.21838766134325094, "grad_norm": 1.886125087738037, "learning_rate": 1.8196569231903892e-05, "loss": 0.4931, "step": 7986 }, { "epoch": 0.21841500765696784, "grad_norm": 1.7260018587112427, "learning_rate": 1.8196061819947914e-05, "loss": 0.594, "step": 7987 }, { "epoch": 0.21844235397068476, "grad_norm": 1.6825063228607178, "learning_rate": 1.8195554343696137e-05, "loss": 0.5307, "step": 7988 }, { "epoch": 0.21846970028440166, "grad_norm": 1.545715093612671, "learning_rate": 1.819504680315254e-05, "loss": 0.5656, "step": 7989 }, { "epoch": 0.21849704659811858, "grad_norm": 1.752174973487854, "learning_rate": 1.8194539198321113e-05, "loss": 0.5793, "step": 7990 }, { "epoch": 0.21852439291183548, "grad_norm": 1.942779541015625, "learning_rate": 1.819403152920583e-05, "loss": 0.5691, "step": 7991 }, { "epoch": 0.2185517392255524, "grad_norm": 2.167424201965332, "learning_rate": 1.8193523795810674e-05, "loss": 0.5646, "step": 7992 }, { "epoch": 0.2185790855392693, "grad_norm": 2.012075185775757, "learning_rate": 1.8193015998139628e-05, "loss": 0.9046, "step": 7993 }, { "epoch": 0.21860643185298623, "grad_norm": 5.075984477996826, "learning_rate": 1.819250813619668e-05, "loss": 0.5669, "step": 7994 }, { "epoch": 0.21863377816670312, "grad_norm": 1.4892687797546387, "learning_rate": 1.819200020998581e-05, "loss": 0.5845, "step": 7995 }, { "epoch": 0.21866112448042005, "grad_norm": 1.5746411085128784, "learning_rate": 1.8191492219511006e-05, "loss": 0.5481, "step": 7996 }, { "epoch": 0.21868847079413695, "grad_norm": 1.9739770889282227, "learning_rate": 1.819098416477625e-05, "loss": 0.5633, "step": 7997 }, { "epoch": 0.21871581710785387, "grad_norm": 1.7774674892425537, "learning_rate": 1.8190476045785525e-05, "loss": 0.5357, "step": 7998 }, { "epoch": 0.21874316342157077, "grad_norm": 1.560253620147705, "learning_rate": 1.8189967862542825e-05, "loss": 0.5933, "step": 7999 }, { "epoch": 0.2187705097352877, "grad_norm": 1.891387939453125, "learning_rate": 1.818945961505213e-05, "loss": 0.565, "step": 8000 }, { "epoch": 0.2187978560490046, "grad_norm": 1.6045457124710083, "learning_rate": 1.8188951303317427e-05, "loss": 0.5435, "step": 8001 }, { "epoch": 0.21882520236272152, "grad_norm": 2.679988145828247, "learning_rate": 1.818844292734271e-05, "loss": 0.9125, "step": 8002 }, { "epoch": 0.2188525486764384, "grad_norm": 1.8777071237564087, "learning_rate": 1.818793448713196e-05, "loss": 0.9078, "step": 8003 }, { "epoch": 0.21887989499015534, "grad_norm": 1.8615297079086304, "learning_rate": 1.818742598268917e-05, "loss": 0.5799, "step": 8004 }, { "epoch": 0.21890724130387224, "grad_norm": 1.7352077960968018, "learning_rate": 1.8186917414018324e-05, "loss": 0.5654, "step": 8005 }, { "epoch": 0.21893458761758916, "grad_norm": 1.6723568439483643, "learning_rate": 1.8186408781123417e-05, "loss": 0.5032, "step": 8006 }, { "epoch": 0.21896193393130606, "grad_norm": 1.6517682075500488, "learning_rate": 1.8185900084008438e-05, "loss": 0.5844, "step": 8007 }, { "epoch": 0.21898928024502298, "grad_norm": 1.7605119943618774, "learning_rate": 1.8185391322677373e-05, "loss": 0.5659, "step": 8008 }, { "epoch": 0.21901662655873988, "grad_norm": 1.6868672370910645, "learning_rate": 1.818488249713422e-05, "loss": 0.5456, "step": 8009 }, { "epoch": 0.2190439728724568, "grad_norm": 1.3583605289459229, "learning_rate": 1.8184373607382967e-05, "loss": 0.5585, "step": 8010 }, { "epoch": 0.2190713191861737, "grad_norm": 1.7686511278152466, "learning_rate": 1.8183864653427604e-05, "loss": 0.5709, "step": 8011 }, { "epoch": 0.21909866549989063, "grad_norm": 1.4777523279190063, "learning_rate": 1.818335563527213e-05, "loss": 0.5688, "step": 8012 }, { "epoch": 0.21912601181360752, "grad_norm": 2.867053985595703, "learning_rate": 1.818284655292053e-05, "loss": 0.4356, "step": 8013 }, { "epoch": 0.21915335812732445, "grad_norm": 3.4730679988861084, "learning_rate": 1.81823374063768e-05, "loss": 0.5535, "step": 8014 }, { "epoch": 0.21918070444104135, "grad_norm": 1.6299848556518555, "learning_rate": 1.818182819564494e-05, "loss": 0.5859, "step": 8015 }, { "epoch": 0.21920805075475827, "grad_norm": 1.6266021728515625, "learning_rate": 1.818131892072894e-05, "loss": 0.9063, "step": 8016 }, { "epoch": 0.21923539706847517, "grad_norm": 2.1845340728759766, "learning_rate": 1.8180809581632793e-05, "loss": 0.5617, "step": 8017 }, { "epoch": 0.2192627433821921, "grad_norm": 2.228152275085449, "learning_rate": 1.81803001783605e-05, "loss": 0.5486, "step": 8018 }, { "epoch": 0.219290089695909, "grad_norm": 1.8378381729125977, "learning_rate": 1.8179790710916055e-05, "loss": 0.5137, "step": 8019 }, { "epoch": 0.21931743600962592, "grad_norm": 1.6570696830749512, "learning_rate": 1.817928117930345e-05, "loss": 0.5619, "step": 8020 }, { "epoch": 0.2193447823233428, "grad_norm": 1.7241575717926025, "learning_rate": 1.8178771583526686e-05, "loss": 0.6004, "step": 8021 }, { "epoch": 0.21937212863705974, "grad_norm": 1.8034800291061401, "learning_rate": 1.8178261923589765e-05, "loss": 0.579, "step": 8022 }, { "epoch": 0.21939947495077664, "grad_norm": 2.7483062744140625, "learning_rate": 1.8177752199496675e-05, "loss": 0.5136, "step": 8023 }, { "epoch": 0.21942682126449356, "grad_norm": 1.4647902250289917, "learning_rate": 1.8177242411251424e-05, "loss": 0.5927, "step": 8024 }, { "epoch": 0.21945416757821046, "grad_norm": 2.1577634811401367, "learning_rate": 1.817673255885801e-05, "loss": 0.5393, "step": 8025 }, { "epoch": 0.21948151389192735, "grad_norm": 1.6385496854782104, "learning_rate": 1.8176222642320426e-05, "loss": 0.5693, "step": 8026 }, { "epoch": 0.21950886020564428, "grad_norm": 1.7470262050628662, "learning_rate": 1.817571266164268e-05, "loss": 0.5616, "step": 8027 }, { "epoch": 0.21953620651936118, "grad_norm": 1.492309808731079, "learning_rate": 1.8175202616828763e-05, "loss": 0.6128, "step": 8028 }, { "epoch": 0.2195635528330781, "grad_norm": 2.121310234069824, "learning_rate": 1.817469250788269e-05, "loss": 0.6275, "step": 8029 }, { "epoch": 0.219590899146795, "grad_norm": 1.4615861177444458, "learning_rate": 1.817418233480845e-05, "loss": 0.5363, "step": 8030 }, { "epoch": 0.21961824546051192, "grad_norm": 1.9510842561721802, "learning_rate": 1.8173672097610053e-05, "loss": 0.5712, "step": 8031 }, { "epoch": 0.21964559177422882, "grad_norm": 1.485002040863037, "learning_rate": 1.8173161796291494e-05, "loss": 0.56, "step": 8032 }, { "epoch": 0.21967293808794575, "grad_norm": 1.6561315059661865, "learning_rate": 1.8172651430856782e-05, "loss": 0.6041, "step": 8033 }, { "epoch": 0.21970028440166264, "grad_norm": 4.160409450531006, "learning_rate": 1.8172141001309923e-05, "loss": 0.4024, "step": 8034 }, { "epoch": 0.21972763071537957, "grad_norm": 1.784995675086975, "learning_rate": 1.8171630507654915e-05, "loss": 0.6043, "step": 8035 }, { "epoch": 0.21975497702909647, "grad_norm": 1.6632057428359985, "learning_rate": 1.8171119949895766e-05, "loss": 0.5447, "step": 8036 }, { "epoch": 0.2197823233428134, "grad_norm": 1.33614182472229, "learning_rate": 1.817060932803648e-05, "loss": 0.5617, "step": 8037 }, { "epoch": 0.2198096696565303, "grad_norm": 1.5714356899261475, "learning_rate": 1.8170098642081063e-05, "loss": 0.5614, "step": 8038 }, { "epoch": 0.2198370159702472, "grad_norm": 1.5901700258255005, "learning_rate": 1.816958789203352e-05, "loss": 0.6256, "step": 8039 }, { "epoch": 0.2198643622839641, "grad_norm": 1.5123662948608398, "learning_rate": 1.816907707789786e-05, "loss": 0.5654, "step": 8040 }, { "epoch": 0.21989170859768103, "grad_norm": 2.319258451461792, "learning_rate": 1.816856619967809e-05, "loss": 0.5459, "step": 8041 }, { "epoch": 0.21991905491139793, "grad_norm": 1.5711674690246582, "learning_rate": 1.8168055257378222e-05, "loss": 0.5972, "step": 8042 }, { "epoch": 0.21994640122511486, "grad_norm": 2.0083391666412354, "learning_rate": 1.8167544251002255e-05, "loss": 0.5621, "step": 8043 }, { "epoch": 0.21997374753883175, "grad_norm": 2.5958187580108643, "learning_rate": 1.81670331805542e-05, "loss": 0.5864, "step": 8044 }, { "epoch": 0.22000109385254868, "grad_norm": 4.442019462585449, "learning_rate": 1.816652204603807e-05, "loss": 0.5822, "step": 8045 }, { "epoch": 0.22002844016626558, "grad_norm": 1.7531278133392334, "learning_rate": 1.8166010847457872e-05, "loss": 0.5593, "step": 8046 }, { "epoch": 0.2200557864799825, "grad_norm": 2.001227855682373, "learning_rate": 1.816549958481762e-05, "loss": 0.5529, "step": 8047 }, { "epoch": 0.2200831327936994, "grad_norm": 1.788459300994873, "learning_rate": 1.8164988258121317e-05, "loss": 0.5923, "step": 8048 }, { "epoch": 0.22011047910741632, "grad_norm": 1.9830973148345947, "learning_rate": 1.8164476867372983e-05, "loss": 0.5944, "step": 8049 }, { "epoch": 0.22013782542113322, "grad_norm": 10.969456672668457, "learning_rate": 1.816396541257662e-05, "loss": 0.8864, "step": 8050 }, { "epoch": 0.22016517173485015, "grad_norm": 2.5655181407928467, "learning_rate": 1.816345389373625e-05, "loss": 0.5236, "step": 8051 }, { "epoch": 0.22019251804856704, "grad_norm": 1.6933025121688843, "learning_rate": 1.816294231085588e-05, "loss": 0.611, "step": 8052 }, { "epoch": 0.22021986436228397, "grad_norm": 2.8422961235046387, "learning_rate": 1.816243066393953e-05, "loss": 0.5827, "step": 8053 }, { "epoch": 0.22024721067600087, "grad_norm": 1.3505221605300903, "learning_rate": 1.8161918952991208e-05, "loss": 0.5871, "step": 8054 }, { "epoch": 0.2202745569897178, "grad_norm": 1.737122654914856, "learning_rate": 1.8161407178014922e-05, "loss": 0.5667, "step": 8055 }, { "epoch": 0.2203019033034347, "grad_norm": 1.3503814935684204, "learning_rate": 1.8160895339014695e-05, "loss": 0.5215, "step": 8056 }, { "epoch": 0.2203292496171516, "grad_norm": 2.3812224864959717, "learning_rate": 1.8160383435994546e-05, "loss": 0.6142, "step": 8057 }, { "epoch": 0.2203565959308685, "grad_norm": 2.2790372371673584, "learning_rate": 1.815987146895848e-05, "loss": 0.5733, "step": 8058 }, { "epoch": 0.22038394224458543, "grad_norm": 1.805145263671875, "learning_rate": 1.8159359437910523e-05, "loss": 0.54, "step": 8059 }, { "epoch": 0.22041128855830233, "grad_norm": 2.122952461242676, "learning_rate": 1.8158847342854683e-05, "loss": 0.5985, "step": 8060 }, { "epoch": 0.22043863487201926, "grad_norm": 11.966798782348633, "learning_rate": 1.8158335183794987e-05, "loss": 0.9191, "step": 8061 }, { "epoch": 0.22046598118573615, "grad_norm": 2.7697432041168213, "learning_rate": 1.8157822960735443e-05, "loss": 0.5678, "step": 8062 }, { "epoch": 0.22049332749945308, "grad_norm": 1.5047581195831299, "learning_rate": 1.8157310673680076e-05, "loss": 0.5612, "step": 8063 }, { "epoch": 0.22052067381316998, "grad_norm": 1.6852755546569824, "learning_rate": 1.81567983226329e-05, "loss": 0.5672, "step": 8064 }, { "epoch": 0.2205480201268869, "grad_norm": 1.8669122457504272, "learning_rate": 1.815628590759794e-05, "loss": 0.5855, "step": 8065 }, { "epoch": 0.2205753664406038, "grad_norm": 1.5607112646102905, "learning_rate": 1.815577342857921e-05, "loss": 0.5775, "step": 8066 }, { "epoch": 0.22060271275432072, "grad_norm": 1.8529598712921143, "learning_rate": 1.815526088558073e-05, "loss": 0.5395, "step": 8067 }, { "epoch": 0.22063005906803762, "grad_norm": 2.3926475048065186, "learning_rate": 1.815474827860653e-05, "loss": 0.5164, "step": 8068 }, { "epoch": 0.22065740538175455, "grad_norm": 2.045710325241089, "learning_rate": 1.8154235607660622e-05, "loss": 0.6168, "step": 8069 }, { "epoch": 0.22068475169547144, "grad_norm": 2.6508495807647705, "learning_rate": 1.8153722872747027e-05, "loss": 0.9281, "step": 8070 }, { "epoch": 0.22071209800918837, "grad_norm": 3.5989749431610107, "learning_rate": 1.8153210073869775e-05, "loss": 0.5865, "step": 8071 }, { "epoch": 0.22073944432290526, "grad_norm": 1.6493122577667236, "learning_rate": 1.8152697211032887e-05, "loss": 0.5808, "step": 8072 }, { "epoch": 0.2207667906366222, "grad_norm": 1.5716241598129272, "learning_rate": 1.8152184284240376e-05, "loss": 0.5654, "step": 8073 }, { "epoch": 0.2207941369503391, "grad_norm": 1.4768822193145752, "learning_rate": 1.8151671293496278e-05, "loss": 0.5547, "step": 8074 }, { "epoch": 0.220821483264056, "grad_norm": 1.5199097394943237, "learning_rate": 1.8151158238804614e-05, "loss": 0.5845, "step": 8075 }, { "epoch": 0.2208488295777729, "grad_norm": 2.1480367183685303, "learning_rate": 1.8150645120169406e-05, "loss": 0.931, "step": 8076 }, { "epoch": 0.22087617589148983, "grad_norm": 1.5926462411880493, "learning_rate": 1.815013193759468e-05, "loss": 0.5581, "step": 8077 }, { "epoch": 0.22090352220520673, "grad_norm": 1.956194281578064, "learning_rate": 1.814961869108446e-05, "loss": 0.5774, "step": 8078 }, { "epoch": 0.22093086851892366, "grad_norm": 1.5697346925735474, "learning_rate": 1.814910538064278e-05, "loss": 0.5331, "step": 8079 }, { "epoch": 0.22095821483264055, "grad_norm": 2.100740671157837, "learning_rate": 1.814859200627366e-05, "loss": 0.577, "step": 8080 }, { "epoch": 0.22098556114635748, "grad_norm": 1.5136408805847168, "learning_rate": 1.8148078567981127e-05, "loss": 0.5364, "step": 8081 }, { "epoch": 0.22101290746007438, "grad_norm": 1.5464812517166138, "learning_rate": 1.8147565065769215e-05, "loss": 0.3854, "step": 8082 }, { "epoch": 0.2210402537737913, "grad_norm": 1.808988332748413, "learning_rate": 1.8147051499641944e-05, "loss": 0.6051, "step": 8083 }, { "epoch": 0.2210676000875082, "grad_norm": 1.571283221244812, "learning_rate": 1.8146537869603347e-05, "loss": 0.6234, "step": 8084 }, { "epoch": 0.22109494640122512, "grad_norm": 1.647178292274475, "learning_rate": 1.8146024175657457e-05, "loss": 0.5873, "step": 8085 }, { "epoch": 0.22112229271494202, "grad_norm": 1.4856315851211548, "learning_rate": 1.8145510417808293e-05, "loss": 0.6072, "step": 8086 }, { "epoch": 0.22114963902865895, "grad_norm": 1.5558161735534668, "learning_rate": 1.81449965960599e-05, "loss": 0.5589, "step": 8087 }, { "epoch": 0.22117698534237584, "grad_norm": 1.87714421749115, "learning_rate": 1.8144482710416297e-05, "loss": 0.5623, "step": 8088 }, { "epoch": 0.22120433165609277, "grad_norm": 1.6329888105392456, "learning_rate": 1.8143968760881518e-05, "loss": 0.5526, "step": 8089 }, { "epoch": 0.22123167796980966, "grad_norm": 2.1025407314300537, "learning_rate": 1.8143454747459597e-05, "loss": 0.5917, "step": 8090 }, { "epoch": 0.2212590242835266, "grad_norm": 1.9159332513809204, "learning_rate": 1.8142940670154566e-05, "loss": 0.4891, "step": 8091 }, { "epoch": 0.2212863705972435, "grad_norm": 3.0148632526397705, "learning_rate": 1.8142426528970455e-05, "loss": 0.4566, "step": 8092 }, { "epoch": 0.2213137169109604, "grad_norm": 1.7748417854309082, "learning_rate": 1.81419123239113e-05, "loss": 0.5624, "step": 8093 }, { "epoch": 0.2213410632246773, "grad_norm": 1.6993417739868164, "learning_rate": 1.8141398054981136e-05, "loss": 0.6078, "step": 8094 }, { "epoch": 0.22136840953839423, "grad_norm": 1.4674783945083618, "learning_rate": 1.8140883722183995e-05, "loss": 0.9426, "step": 8095 }, { "epoch": 0.22139575585211113, "grad_norm": 1.508238673210144, "learning_rate": 1.814036932552391e-05, "loss": 0.5464, "step": 8096 }, { "epoch": 0.22142310216582806, "grad_norm": 1.3940850496292114, "learning_rate": 1.8139854865004923e-05, "loss": 0.577, "step": 8097 }, { "epoch": 0.22145044847954495, "grad_norm": 1.7440751791000366, "learning_rate": 1.813934034063106e-05, "loss": 0.5457, "step": 8098 }, { "epoch": 0.22147779479326188, "grad_norm": 4.22845983505249, "learning_rate": 1.8138825752406365e-05, "loss": 0.4525, "step": 8099 }, { "epoch": 0.22150514110697878, "grad_norm": 1.853915810585022, "learning_rate": 1.8138311100334873e-05, "loss": 0.5734, "step": 8100 }, { "epoch": 0.2215324874206957, "grad_norm": 1.4374407529830933, "learning_rate": 1.8137796384420625e-05, "loss": 0.5901, "step": 8101 }, { "epoch": 0.2215598337344126, "grad_norm": 1.2663486003875732, "learning_rate": 1.8137281604667647e-05, "loss": 0.5618, "step": 8102 }, { "epoch": 0.22158718004812952, "grad_norm": 1.5072381496429443, "learning_rate": 1.8136766761079987e-05, "loss": 0.5464, "step": 8103 }, { "epoch": 0.22161452636184642, "grad_norm": 1.5597162246704102, "learning_rate": 1.8136251853661685e-05, "loss": 0.5654, "step": 8104 }, { "epoch": 0.22164187267556335, "grad_norm": 1.6651976108551025, "learning_rate": 1.813573688241677e-05, "loss": 0.5601, "step": 8105 }, { "epoch": 0.22166921898928024, "grad_norm": 1.4074288606643677, "learning_rate": 1.8135221847349297e-05, "loss": 0.5488, "step": 8106 }, { "epoch": 0.22169656530299717, "grad_norm": 2.199964761734009, "learning_rate": 1.8134706748463294e-05, "loss": 0.5242, "step": 8107 }, { "epoch": 0.22172391161671406, "grad_norm": 1.8667137622833252, "learning_rate": 1.8134191585762804e-05, "loss": 0.4376, "step": 8108 }, { "epoch": 0.221751257930431, "grad_norm": 1.4736133813858032, "learning_rate": 1.8133676359251874e-05, "loss": 0.5481, "step": 8109 }, { "epoch": 0.2217786042441479, "grad_norm": 1.5592851638793945, "learning_rate": 1.8133161068934537e-05, "loss": 0.5802, "step": 8110 }, { "epoch": 0.2218059505578648, "grad_norm": 1.4985477924346924, "learning_rate": 1.8132645714814844e-05, "loss": 0.5715, "step": 8111 }, { "epoch": 0.2218332968715817, "grad_norm": 1.7482357025146484, "learning_rate": 1.8132130296896836e-05, "loss": 0.6324, "step": 8112 }, { "epoch": 0.22186064318529863, "grad_norm": 1.7012913227081299, "learning_rate": 1.8131614815184552e-05, "loss": 0.5538, "step": 8113 }, { "epoch": 0.22188798949901553, "grad_norm": 1.516526699066162, "learning_rate": 1.8131099269682037e-05, "loss": 0.5753, "step": 8114 }, { "epoch": 0.22191533581273246, "grad_norm": 1.6526027917861938, "learning_rate": 1.813058366039334e-05, "loss": 0.5511, "step": 8115 }, { "epoch": 0.22194268212644935, "grad_norm": 1.943555474281311, "learning_rate": 1.81300679873225e-05, "loss": 0.4461, "step": 8116 }, { "epoch": 0.22197002844016628, "grad_norm": 1.3454781770706177, "learning_rate": 1.812955225047356e-05, "loss": 0.5547, "step": 8117 }, { "epoch": 0.22199737475388318, "grad_norm": 1.3718348741531372, "learning_rate": 1.8129036449850576e-05, "loss": 0.5325, "step": 8118 }, { "epoch": 0.2220247210676001, "grad_norm": 1.8459235429763794, "learning_rate": 1.8128520585457588e-05, "loss": 0.5292, "step": 8119 }, { "epoch": 0.222052067381317, "grad_norm": 1.7713642120361328, "learning_rate": 1.8128004657298642e-05, "loss": 0.5129, "step": 8120 }, { "epoch": 0.22207941369503392, "grad_norm": 1.448664665222168, "learning_rate": 1.812748866537779e-05, "loss": 0.5815, "step": 8121 }, { "epoch": 0.22210676000875082, "grad_norm": 1.3660598993301392, "learning_rate": 1.8126972609699077e-05, "loss": 0.5711, "step": 8122 }, { "epoch": 0.22213410632246774, "grad_norm": 1.769174337387085, "learning_rate": 1.8126456490266546e-05, "loss": 0.6072, "step": 8123 }, { "epoch": 0.22216145263618464, "grad_norm": 1.8478666543960571, "learning_rate": 1.8125940307084255e-05, "loss": 0.5266, "step": 8124 }, { "epoch": 0.22218879894990157, "grad_norm": 1.5457723140716553, "learning_rate": 1.812542406015625e-05, "loss": 0.9339, "step": 8125 }, { "epoch": 0.22221614526361846, "grad_norm": 1.8266067504882812, "learning_rate": 1.812490774948658e-05, "loss": 0.5379, "step": 8126 }, { "epoch": 0.22224349157733536, "grad_norm": 1.8087482452392578, "learning_rate": 1.8124391375079292e-05, "loss": 0.9366, "step": 8127 }, { "epoch": 0.2222708378910523, "grad_norm": 1.2758413553237915, "learning_rate": 1.812387493693844e-05, "loss": 0.4342, "step": 8128 }, { "epoch": 0.22229818420476918, "grad_norm": 1.9103999137878418, "learning_rate": 1.8123358435068074e-05, "loss": 0.451, "step": 8129 }, { "epoch": 0.2223255305184861, "grad_norm": 1.582167387008667, "learning_rate": 1.812284186947225e-05, "loss": 0.5885, "step": 8130 }, { "epoch": 0.222352876832203, "grad_norm": 1.516294002532959, "learning_rate": 1.8122325240155016e-05, "loss": 0.6567, "step": 8131 }, { "epoch": 0.22238022314591993, "grad_norm": 1.8980780839920044, "learning_rate": 1.812180854712043e-05, "loss": 0.5757, "step": 8132 }, { "epoch": 0.22240756945963683, "grad_norm": 1.5055922269821167, "learning_rate": 1.812129179037254e-05, "loss": 0.5561, "step": 8133 }, { "epoch": 0.22243491577335375, "grad_norm": 2.339817762374878, "learning_rate": 1.81207749699154e-05, "loss": 0.5717, "step": 8134 }, { "epoch": 0.22246226208707065, "grad_norm": 1.6049593687057495, "learning_rate": 1.8120258085753065e-05, "loss": 0.5428, "step": 8135 }, { "epoch": 0.22248960840078758, "grad_norm": 1.5408425331115723, "learning_rate": 1.8119741137889592e-05, "loss": 0.5411, "step": 8136 }, { "epoch": 0.22251695471450447, "grad_norm": 1.70879328250885, "learning_rate": 1.8119224126329035e-05, "loss": 0.5875, "step": 8137 }, { "epoch": 0.2225443010282214, "grad_norm": 1.4467289447784424, "learning_rate": 1.811870705107545e-05, "loss": 0.5305, "step": 8138 }, { "epoch": 0.2225716473419383, "grad_norm": 1.5762032270431519, "learning_rate": 1.8118189912132893e-05, "loss": 0.5491, "step": 8139 }, { "epoch": 0.22259899365565522, "grad_norm": 1.6257703304290771, "learning_rate": 1.811767270950542e-05, "loss": 0.5721, "step": 8140 }, { "epoch": 0.22262633996937212, "grad_norm": 2.139091968536377, "learning_rate": 1.811715544319709e-05, "loss": 0.9602, "step": 8141 }, { "epoch": 0.22265368628308904, "grad_norm": 1.5045827627182007, "learning_rate": 1.811663811321196e-05, "loss": 0.5768, "step": 8142 }, { "epoch": 0.22268103259680594, "grad_norm": 1.6670942306518555, "learning_rate": 1.8116120719554088e-05, "loss": 0.5518, "step": 8143 }, { "epoch": 0.22270837891052286, "grad_norm": 1.4784212112426758, "learning_rate": 1.811560326222753e-05, "loss": 0.396, "step": 8144 }, { "epoch": 0.22273572522423976, "grad_norm": 1.6992169618606567, "learning_rate": 1.811508574123635e-05, "loss": 0.5385, "step": 8145 }, { "epoch": 0.2227630715379567, "grad_norm": 1.4502851963043213, "learning_rate": 1.8114568156584608e-05, "loss": 0.5717, "step": 8146 }, { "epoch": 0.22279041785167358, "grad_norm": 6.545419692993164, "learning_rate": 1.811405050827636e-05, "loss": 0.6361, "step": 8147 }, { "epoch": 0.2228177641653905, "grad_norm": 1.227750301361084, "learning_rate": 1.811353279631567e-05, "loss": 0.5771, "step": 8148 }, { "epoch": 0.2228451104791074, "grad_norm": 1.8623319864273071, "learning_rate": 1.81130150207066e-05, "loss": 0.547, "step": 8149 }, { "epoch": 0.22287245679282433, "grad_norm": 4.971482753753662, "learning_rate": 1.8112497181453212e-05, "loss": 0.5728, "step": 8150 }, { "epoch": 0.22289980310654123, "grad_norm": 1.9338812828063965, "learning_rate": 1.8111979278559563e-05, "loss": 0.5959, "step": 8151 }, { "epoch": 0.22292714942025815, "grad_norm": 1.5302307605743408, "learning_rate": 1.811146131202972e-05, "loss": 0.5784, "step": 8152 }, { "epoch": 0.22295449573397505, "grad_norm": 2.7480645179748535, "learning_rate": 1.8110943281867747e-05, "loss": 0.5449, "step": 8153 }, { "epoch": 0.22298184204769197, "grad_norm": 1.740990161895752, "learning_rate": 1.8110425188077707e-05, "loss": 0.5338, "step": 8154 }, { "epoch": 0.22300918836140887, "grad_norm": 1.7169787883758545, "learning_rate": 1.8109907030663663e-05, "loss": 0.5333, "step": 8155 }, { "epoch": 0.2230365346751258, "grad_norm": 1.4668647050857544, "learning_rate": 1.810938880962968e-05, "loss": 0.5581, "step": 8156 }, { "epoch": 0.2230638809888427, "grad_norm": 1.4341580867767334, "learning_rate": 1.8108870524979826e-05, "loss": 0.5046, "step": 8157 }, { "epoch": 0.22309122730255962, "grad_norm": 1.7003978490829468, "learning_rate": 1.8108352176718164e-05, "loss": 0.557, "step": 8158 }, { "epoch": 0.22311857361627652, "grad_norm": 1.3592252731323242, "learning_rate": 1.810783376484876e-05, "loss": 0.567, "step": 8159 }, { "epoch": 0.22314591992999344, "grad_norm": 1.3084831237792969, "learning_rate": 1.810731528937568e-05, "loss": 0.5717, "step": 8160 }, { "epoch": 0.22317326624371034, "grad_norm": 1.2671891450881958, "learning_rate": 1.8106796750302998e-05, "loss": 0.4099, "step": 8161 }, { "epoch": 0.22320061255742726, "grad_norm": 12.498122215270996, "learning_rate": 1.8106278147634776e-05, "loss": 0.5217, "step": 8162 }, { "epoch": 0.22322795887114416, "grad_norm": 1.5029120445251465, "learning_rate": 1.8105759481375078e-05, "loss": 0.5593, "step": 8163 }, { "epoch": 0.22325530518486109, "grad_norm": 2.1347672939300537, "learning_rate": 1.8105240751527984e-05, "loss": 0.5355, "step": 8164 }, { "epoch": 0.22328265149857798, "grad_norm": 1.27971613407135, "learning_rate": 1.8104721958097554e-05, "loss": 0.5733, "step": 8165 }, { "epoch": 0.2233099978122949, "grad_norm": 1.5041989088058472, "learning_rate": 1.810420310108786e-05, "loss": 0.5664, "step": 8166 }, { "epoch": 0.2233373441260118, "grad_norm": 1.827509880065918, "learning_rate": 1.8103684180502976e-05, "loss": 0.5546, "step": 8167 }, { "epoch": 0.22336469043972873, "grad_norm": 1.7707830667495728, "learning_rate": 1.810316519634697e-05, "loss": 0.5146, "step": 8168 }, { "epoch": 0.22339203675344563, "grad_norm": 2.631443977355957, "learning_rate": 1.8102646148623915e-05, "loss": 0.5299, "step": 8169 }, { "epoch": 0.22341938306716255, "grad_norm": 2.0300302505493164, "learning_rate": 1.8102127037337878e-05, "loss": 0.5212, "step": 8170 }, { "epoch": 0.22344672938087945, "grad_norm": 1.6357866525650024, "learning_rate": 1.8101607862492937e-05, "loss": 0.5679, "step": 8171 }, { "epoch": 0.22347407569459637, "grad_norm": 1.4704781770706177, "learning_rate": 1.8101088624093163e-05, "loss": 0.5637, "step": 8172 }, { "epoch": 0.22350142200831327, "grad_norm": 1.734835147857666, "learning_rate": 1.810056932214262e-05, "loss": 0.5655, "step": 8173 }, { "epoch": 0.2235287683220302, "grad_norm": 1.7758347988128662, "learning_rate": 1.81000499566454e-05, "loss": 0.5314, "step": 8174 }, { "epoch": 0.2235561146357471, "grad_norm": 2.0260796546936035, "learning_rate": 1.8099530527605568e-05, "loss": 0.5632, "step": 8175 }, { "epoch": 0.22358346094946402, "grad_norm": 1.3190006017684937, "learning_rate": 1.8099011035027195e-05, "loss": 0.5759, "step": 8176 }, { "epoch": 0.22361080726318092, "grad_norm": 1.2749654054641724, "learning_rate": 1.8098491478914358e-05, "loss": 0.5351, "step": 8177 }, { "epoch": 0.22363815357689784, "grad_norm": 1.6723514795303345, "learning_rate": 1.8097971859271136e-05, "loss": 0.396, "step": 8178 }, { "epoch": 0.22366549989061474, "grad_norm": 4.091683387756348, "learning_rate": 1.8097452176101605e-05, "loss": 0.5928, "step": 8179 }, { "epoch": 0.22369284620433166, "grad_norm": 1.7581672668457031, "learning_rate": 1.8096932429409838e-05, "loss": 0.4277, "step": 8180 }, { "epoch": 0.22372019251804856, "grad_norm": 1.4917548894882202, "learning_rate": 1.8096412619199917e-05, "loss": 0.5602, "step": 8181 }, { "epoch": 0.22374753883176549, "grad_norm": 3.0840725898742676, "learning_rate": 1.8095892745475918e-05, "loss": 0.5947, "step": 8182 }, { "epoch": 0.22377488514548238, "grad_norm": 1.5103994607925415, "learning_rate": 1.8095372808241918e-05, "loss": 0.571, "step": 8183 }, { "epoch": 0.2238022314591993, "grad_norm": 1.8039544820785522, "learning_rate": 1.8094852807501995e-05, "loss": 0.6249, "step": 8184 }, { "epoch": 0.2238295777729162, "grad_norm": 1.4305988550186157, "learning_rate": 1.8094332743260232e-05, "loss": 0.5412, "step": 8185 }, { "epoch": 0.22385692408663313, "grad_norm": 1.6115895509719849, "learning_rate": 1.8093812615520705e-05, "loss": 0.5687, "step": 8186 }, { "epoch": 0.22388427040035003, "grad_norm": 1.8958637714385986, "learning_rate": 1.8093292424287497e-05, "loss": 0.5412, "step": 8187 }, { "epoch": 0.22391161671406695, "grad_norm": 10.001611709594727, "learning_rate": 1.8092772169564687e-05, "loss": 0.9333, "step": 8188 }, { "epoch": 0.22393896302778385, "grad_norm": 1.8582459688186646, "learning_rate": 1.8092251851356355e-05, "loss": 0.5332, "step": 8189 }, { "epoch": 0.22396630934150077, "grad_norm": 1.4907456636428833, "learning_rate": 1.8091731469666586e-05, "loss": 0.5555, "step": 8190 }, { "epoch": 0.22399365565521767, "grad_norm": 1.242994785308838, "learning_rate": 1.8091211024499463e-05, "loss": 0.542, "step": 8191 }, { "epoch": 0.2240210019689346, "grad_norm": 1.5554914474487305, "learning_rate": 1.8090690515859062e-05, "loss": 0.5584, "step": 8192 }, { "epoch": 0.2240483482826515, "grad_norm": 2.4616363048553467, "learning_rate": 1.8090169943749477e-05, "loss": 0.5872, "step": 8193 }, { "epoch": 0.22407569459636842, "grad_norm": 1.7437032461166382, "learning_rate": 1.8089649308174782e-05, "loss": 0.8993, "step": 8194 }, { "epoch": 0.22410304091008532, "grad_norm": 1.5496701002120972, "learning_rate": 1.8089128609139064e-05, "loss": 0.5831, "step": 8195 }, { "epoch": 0.22413038722380224, "grad_norm": 1.4084234237670898, "learning_rate": 1.808860784664641e-05, "loss": 0.5792, "step": 8196 }, { "epoch": 0.22415773353751914, "grad_norm": 1.9647085666656494, "learning_rate": 1.80880870207009e-05, "loss": 0.5598, "step": 8197 }, { "epoch": 0.22418507985123606, "grad_norm": 1.5117994546890259, "learning_rate": 1.808756613130663e-05, "loss": 0.5164, "step": 8198 }, { "epoch": 0.22421242616495296, "grad_norm": 1.5414224863052368, "learning_rate": 1.8087045178467675e-05, "loss": 0.5559, "step": 8199 }, { "epoch": 0.22423977247866989, "grad_norm": 1.4914015531539917, "learning_rate": 1.8086524162188127e-05, "loss": 0.5276, "step": 8200 }, { "epoch": 0.22426711879238678, "grad_norm": 2.6014814376831055, "learning_rate": 1.8086003082472075e-05, "loss": 0.4839, "step": 8201 }, { "epoch": 0.2242944651061037, "grad_norm": 1.4957008361816406, "learning_rate": 1.8085481939323604e-05, "loss": 0.5998, "step": 8202 }, { "epoch": 0.2243218114198206, "grad_norm": 1.5945568084716797, "learning_rate": 1.80849607327468e-05, "loss": 0.5396, "step": 8203 }, { "epoch": 0.22434915773353753, "grad_norm": 1.7744941711425781, "learning_rate": 1.8084439462745757e-05, "loss": 0.4211, "step": 8204 }, { "epoch": 0.22437650404725443, "grad_norm": 1.6515041589736938, "learning_rate": 1.808391812932456e-05, "loss": 0.5663, "step": 8205 }, { "epoch": 0.22440385036097135, "grad_norm": 1.6825491189956665, "learning_rate": 1.8083396732487302e-05, "loss": 0.5459, "step": 8206 }, { "epoch": 0.22443119667468825, "grad_norm": 1.5541675090789795, "learning_rate": 1.808287527223807e-05, "loss": 0.5663, "step": 8207 }, { "epoch": 0.22445854298840517, "grad_norm": 1.495126485824585, "learning_rate": 1.808235374858096e-05, "loss": 0.595, "step": 8208 }, { "epoch": 0.22448588930212207, "grad_norm": 1.3132092952728271, "learning_rate": 1.8081832161520053e-05, "loss": 0.5677, "step": 8209 }, { "epoch": 0.224513235615839, "grad_norm": 1.648897409439087, "learning_rate": 1.808131051105945e-05, "loss": 0.5227, "step": 8210 }, { "epoch": 0.2245405819295559, "grad_norm": 4.389049530029297, "learning_rate": 1.808078879720324e-05, "loss": 0.5511, "step": 8211 }, { "epoch": 0.22456792824327282, "grad_norm": 1.711868166923523, "learning_rate": 1.808026701995552e-05, "loss": 0.5747, "step": 8212 }, { "epoch": 0.22459527455698972, "grad_norm": 1.477725625038147, "learning_rate": 1.8079745179320377e-05, "loss": 0.9445, "step": 8213 }, { "epoch": 0.22462262087070664, "grad_norm": 1.7124212980270386, "learning_rate": 1.8079223275301904e-05, "loss": 0.6217, "step": 8214 }, { "epoch": 0.22464996718442354, "grad_norm": 1.3566774129867554, "learning_rate": 1.8078701307904197e-05, "loss": 0.5876, "step": 8215 }, { "epoch": 0.22467731349814046, "grad_norm": 3.30546236038208, "learning_rate": 1.8078179277131357e-05, "loss": 0.4366, "step": 8216 }, { "epoch": 0.22470465981185736, "grad_norm": 2.654315948486328, "learning_rate": 1.8077657182987472e-05, "loss": 0.5713, "step": 8217 }, { "epoch": 0.22473200612557429, "grad_norm": 1.5168126821517944, "learning_rate": 1.8077135025476636e-05, "loss": 0.527, "step": 8218 }, { "epoch": 0.22475935243929118, "grad_norm": 2.5679919719696045, "learning_rate": 1.8076612804602955e-05, "loss": 0.586, "step": 8219 }, { "epoch": 0.2247866987530081, "grad_norm": 1.4183108806610107, "learning_rate": 1.8076090520370514e-05, "loss": 0.5729, "step": 8220 }, { "epoch": 0.224814045066725, "grad_norm": 1.5295205116271973, "learning_rate": 1.807556817278342e-05, "loss": 0.6075, "step": 8221 }, { "epoch": 0.22484139138044193, "grad_norm": 2.0625927448272705, "learning_rate": 1.8075045761845765e-05, "loss": 0.5713, "step": 8222 }, { "epoch": 0.22486873769415883, "grad_norm": 1.485632300376892, "learning_rate": 1.807452328756165e-05, "loss": 0.8983, "step": 8223 }, { "epoch": 0.22489608400787575, "grad_norm": 1.99617338180542, "learning_rate": 1.807400074993517e-05, "loss": 0.5513, "step": 8224 }, { "epoch": 0.22492343032159265, "grad_norm": 1.4257441759109497, "learning_rate": 1.8073478148970424e-05, "loss": 0.4482, "step": 8225 }, { "epoch": 0.22495077663530957, "grad_norm": 1.7929043769836426, "learning_rate": 1.807295548467152e-05, "loss": 0.4516, "step": 8226 }, { "epoch": 0.22497812294902647, "grad_norm": 1.6712969541549683, "learning_rate": 1.8072432757042545e-05, "loss": 0.5771, "step": 8227 }, { "epoch": 0.22500546926274337, "grad_norm": 1.4515254497528076, "learning_rate": 1.807190996608761e-05, "loss": 0.5829, "step": 8228 }, { "epoch": 0.2250328155764603, "grad_norm": 1.5478488206863403, "learning_rate": 1.8071387111810814e-05, "loss": 0.5734, "step": 8229 }, { "epoch": 0.2250601618901772, "grad_norm": 1.4882584810256958, "learning_rate": 1.8070864194216257e-05, "loss": 0.9392, "step": 8230 }, { "epoch": 0.22508750820389412, "grad_norm": 1.3547626733779907, "learning_rate": 1.8070341213308044e-05, "loss": 0.5197, "step": 8231 }, { "epoch": 0.225114854517611, "grad_norm": 1.3035013675689697, "learning_rate": 1.806981816909027e-05, "loss": 0.5715, "step": 8232 }, { "epoch": 0.22514220083132794, "grad_norm": 1.579398274421692, "learning_rate": 1.806929506156705e-05, "loss": 0.5476, "step": 8233 }, { "epoch": 0.22516954714504483, "grad_norm": 1.8469737768173218, "learning_rate": 1.8068771890742476e-05, "loss": 0.5732, "step": 8234 }, { "epoch": 0.22519689345876176, "grad_norm": 1.5077606439590454, "learning_rate": 1.8068248656620658e-05, "loss": 0.5322, "step": 8235 }, { "epoch": 0.22522423977247866, "grad_norm": 1.778806209564209, "learning_rate": 1.8067725359205702e-05, "loss": 0.5561, "step": 8236 }, { "epoch": 0.22525158608619558, "grad_norm": 1.6064625978469849, "learning_rate": 1.806720199850171e-05, "loss": 0.543, "step": 8237 }, { "epoch": 0.22527893239991248, "grad_norm": 1.8671753406524658, "learning_rate": 1.806667857451279e-05, "loss": 0.9211, "step": 8238 }, { "epoch": 0.2253062787136294, "grad_norm": 1.5666694641113281, "learning_rate": 1.8066155087243045e-05, "loss": 0.5833, "step": 8239 }, { "epoch": 0.2253336250273463, "grad_norm": 1.2207258939743042, "learning_rate": 1.8065631536696582e-05, "loss": 0.5515, "step": 8240 }, { "epoch": 0.22536097134106323, "grad_norm": 1.4673038721084595, "learning_rate": 1.806510792287751e-05, "loss": 0.5687, "step": 8241 }, { "epoch": 0.22538831765478012, "grad_norm": 1.4773731231689453, "learning_rate": 1.806458424578994e-05, "loss": 0.925, "step": 8242 }, { "epoch": 0.22541566396849705, "grad_norm": 1.5392521619796753, "learning_rate": 1.8064060505437973e-05, "loss": 0.5826, "step": 8243 }, { "epoch": 0.22544301028221395, "grad_norm": 1.651676893234253, "learning_rate": 1.806353670182572e-05, "loss": 0.5666, "step": 8244 }, { "epoch": 0.22547035659593087, "grad_norm": 1.5068297386169434, "learning_rate": 1.8063012834957292e-05, "loss": 0.5701, "step": 8245 }, { "epoch": 0.22549770290964777, "grad_norm": 1.4668878316879272, "learning_rate": 1.80624889048368e-05, "loss": 0.554, "step": 8246 }, { "epoch": 0.2255250492233647, "grad_norm": 1.4891624450683594, "learning_rate": 1.806196491146835e-05, "loss": 0.5752, "step": 8247 }, { "epoch": 0.2255523955370816, "grad_norm": 3.015920400619507, "learning_rate": 1.806144085485605e-05, "loss": 0.9374, "step": 8248 }, { "epoch": 0.22557974185079852, "grad_norm": 1.1867324113845825, "learning_rate": 1.8060916735004016e-05, "loss": 0.5655, "step": 8249 }, { "epoch": 0.2256070881645154, "grad_norm": 1.5761349201202393, "learning_rate": 1.8060392551916364e-05, "loss": 0.538, "step": 8250 }, { "epoch": 0.22563443447823234, "grad_norm": 1.4610209465026855, "learning_rate": 1.8059868305597197e-05, "loss": 0.5651, "step": 8251 }, { "epoch": 0.22566178079194923, "grad_norm": 1.386548638343811, "learning_rate": 1.805934399605063e-05, "loss": 0.5683, "step": 8252 }, { "epoch": 0.22568912710566616, "grad_norm": 1.2935259342193604, "learning_rate": 1.805881962328078e-05, "loss": 0.5638, "step": 8253 }, { "epoch": 0.22571647341938306, "grad_norm": 1.4191937446594238, "learning_rate": 1.805829518729176e-05, "loss": 0.5624, "step": 8254 }, { "epoch": 0.22574381973309998, "grad_norm": 1.567392110824585, "learning_rate": 1.805777068808768e-05, "loss": 0.5502, "step": 8255 }, { "epoch": 0.22577116604681688, "grad_norm": 2.3249051570892334, "learning_rate": 1.8057246125672655e-05, "loss": 0.5298, "step": 8256 }, { "epoch": 0.2257985123605338, "grad_norm": 1.8574424982070923, "learning_rate": 1.8056721500050803e-05, "loss": 0.6201, "step": 8257 }, { "epoch": 0.2258258586742507, "grad_norm": 1.2381632328033447, "learning_rate": 1.805619681122624e-05, "loss": 0.5749, "step": 8258 }, { "epoch": 0.22585320498796763, "grad_norm": 1.3539270162582397, "learning_rate": 1.8055672059203074e-05, "loss": 0.5455, "step": 8259 }, { "epoch": 0.22588055130168452, "grad_norm": 2.898543357849121, "learning_rate": 1.8055147243985436e-05, "loss": 0.4928, "step": 8260 }, { "epoch": 0.22590789761540145, "grad_norm": 1.1094311475753784, "learning_rate": 1.805462236557743e-05, "loss": 0.5587, "step": 8261 }, { "epoch": 0.22593524392911835, "grad_norm": 1.7526966333389282, "learning_rate": 1.8054097423983178e-05, "loss": 0.5767, "step": 8262 }, { "epoch": 0.22596259024283527, "grad_norm": 1.3198472261428833, "learning_rate": 1.80535724192068e-05, "loss": 0.5768, "step": 8263 }, { "epoch": 0.22598993655655217, "grad_norm": 1.6747117042541504, "learning_rate": 1.8053047351252413e-05, "loss": 0.5695, "step": 8264 }, { "epoch": 0.2260172828702691, "grad_norm": 1.3233697414398193, "learning_rate": 1.8052522220124134e-05, "loss": 0.5339, "step": 8265 }, { "epoch": 0.226044629183986, "grad_norm": 1.2409770488739014, "learning_rate": 1.8051997025826086e-05, "loss": 0.5613, "step": 8266 }, { "epoch": 0.22607197549770292, "grad_norm": 1.4328978061676025, "learning_rate": 1.8051471768362386e-05, "loss": 0.5819, "step": 8267 }, { "epoch": 0.2260993218114198, "grad_norm": 1.724828839302063, "learning_rate": 1.805094644773716e-05, "loss": 0.9196, "step": 8268 }, { "epoch": 0.22612666812513674, "grad_norm": 1.7032935619354248, "learning_rate": 1.805042106395452e-05, "loss": 0.4181, "step": 8269 }, { "epoch": 0.22615401443885363, "grad_norm": 1.4272605180740356, "learning_rate": 1.8049895617018593e-05, "loss": 0.5638, "step": 8270 }, { "epoch": 0.22618136075257056, "grad_norm": 1.7232285737991333, "learning_rate": 1.80493701069335e-05, "loss": 0.5401, "step": 8271 }, { "epoch": 0.22620870706628746, "grad_norm": 1.8162015676498413, "learning_rate": 1.804884453370337e-05, "loss": 0.37, "step": 8272 }, { "epoch": 0.22623605338000438, "grad_norm": 1.325782299041748, "learning_rate": 1.8048318897332312e-05, "loss": 0.5669, "step": 8273 }, { "epoch": 0.22626339969372128, "grad_norm": 1.452662706375122, "learning_rate": 1.804779319782446e-05, "loss": 0.5865, "step": 8274 }, { "epoch": 0.2262907460074382, "grad_norm": 1.388016700744629, "learning_rate": 1.8047267435183938e-05, "loss": 0.5515, "step": 8275 }, { "epoch": 0.2263180923211551, "grad_norm": 1.3421289920806885, "learning_rate": 1.8046741609414862e-05, "loss": 0.5455, "step": 8276 }, { "epoch": 0.22634543863487203, "grad_norm": 1.5337474346160889, "learning_rate": 1.8046215720521368e-05, "loss": 0.5521, "step": 8277 }, { "epoch": 0.22637278494858892, "grad_norm": 1.6731568574905396, "learning_rate": 1.8045689768507574e-05, "loss": 0.5114, "step": 8278 }, { "epoch": 0.22640013126230585, "grad_norm": 1.71560537815094, "learning_rate": 1.8045163753377605e-05, "loss": 0.6108, "step": 8279 }, { "epoch": 0.22642747757602275, "grad_norm": 1.4641846418380737, "learning_rate": 1.8044637675135594e-05, "loss": 0.5638, "step": 8280 }, { "epoch": 0.22645482388973967, "grad_norm": 1.443816065788269, "learning_rate": 1.8044111533785665e-05, "loss": 0.9254, "step": 8281 }, { "epoch": 0.22648217020345657, "grad_norm": 3.9729857444763184, "learning_rate": 1.8043585329331943e-05, "loss": 0.5796, "step": 8282 }, { "epoch": 0.2265095165171735, "grad_norm": 1.3634940385818481, "learning_rate": 1.804305906177856e-05, "loss": 0.5549, "step": 8283 }, { "epoch": 0.2265368628308904, "grad_norm": 1.2903168201446533, "learning_rate": 1.8042532731129637e-05, "loss": 0.5305, "step": 8284 }, { "epoch": 0.22656420914460731, "grad_norm": 1.4144387245178223, "learning_rate": 1.8042006337389314e-05, "loss": 0.5421, "step": 8285 }, { "epoch": 0.2265915554583242, "grad_norm": 2.0166521072387695, "learning_rate": 1.804147988056171e-05, "loss": 0.5402, "step": 8286 }, { "epoch": 0.22661890177204114, "grad_norm": 1.3096354007720947, "learning_rate": 1.8040953360650963e-05, "loss": 0.5596, "step": 8287 }, { "epoch": 0.22664624808575803, "grad_norm": 1.6842989921569824, "learning_rate": 1.8040426777661198e-05, "loss": 0.5178, "step": 8288 }, { "epoch": 0.22667359439947496, "grad_norm": 1.6566940546035767, "learning_rate": 1.8039900131596546e-05, "loss": 0.5635, "step": 8289 }, { "epoch": 0.22670094071319186, "grad_norm": 1.7378135919570923, "learning_rate": 1.8039373422461143e-05, "loss": 0.569, "step": 8290 }, { "epoch": 0.22672828702690878, "grad_norm": 1.3014004230499268, "learning_rate": 1.8038846650259116e-05, "loss": 0.5315, "step": 8291 }, { "epoch": 0.22675563334062568, "grad_norm": 2.762686014175415, "learning_rate": 1.8038319814994602e-05, "loss": 0.6072, "step": 8292 }, { "epoch": 0.2267829796543426, "grad_norm": 1.3505595922470093, "learning_rate": 1.8037792916671727e-05, "loss": 0.5371, "step": 8293 }, { "epoch": 0.2268103259680595, "grad_norm": 1.5409139394760132, "learning_rate": 1.8037265955294632e-05, "loss": 0.5615, "step": 8294 }, { "epoch": 0.22683767228177643, "grad_norm": 1.4960674047470093, "learning_rate": 1.8036738930867447e-05, "loss": 0.5692, "step": 8295 }, { "epoch": 0.22686501859549332, "grad_norm": 2.0985848903656006, "learning_rate": 1.8036211843394306e-05, "loss": 0.5817, "step": 8296 }, { "epoch": 0.22689236490921025, "grad_norm": 1.3134886026382446, "learning_rate": 1.8035684692879348e-05, "loss": 0.5374, "step": 8297 }, { "epoch": 0.22691971122292715, "grad_norm": 1.8614580631256104, "learning_rate": 1.80351574793267e-05, "loss": 0.4441, "step": 8298 }, { "epoch": 0.22694705753664407, "grad_norm": 1.6735841035842896, "learning_rate": 1.8034630202740504e-05, "loss": 0.5683, "step": 8299 }, { "epoch": 0.22697440385036097, "grad_norm": 1.561907410621643, "learning_rate": 1.80341028631249e-05, "loss": 0.5114, "step": 8300 }, { "epoch": 0.2270017501640779, "grad_norm": 1.749639868736267, "learning_rate": 1.8033575460484015e-05, "loss": 0.5344, "step": 8301 }, { "epoch": 0.2270290964777948, "grad_norm": 1.6675516366958618, "learning_rate": 1.803304799482199e-05, "loss": 0.5685, "step": 8302 }, { "epoch": 0.22705644279151171, "grad_norm": 1.427248239517212, "learning_rate": 1.8032520466142965e-05, "loss": 0.5742, "step": 8303 }, { "epoch": 0.2270837891052286, "grad_norm": 1.5400848388671875, "learning_rate": 1.803199287445108e-05, "loss": 0.5483, "step": 8304 }, { "epoch": 0.22711113541894554, "grad_norm": 2.107037305831909, "learning_rate": 1.8031465219750473e-05, "loss": 0.5746, "step": 8305 }, { "epoch": 0.22713848173266243, "grad_norm": 1.6228965520858765, "learning_rate": 1.8030937502045276e-05, "loss": 0.5063, "step": 8306 }, { "epoch": 0.22716582804637936, "grad_norm": 1.6077611446380615, "learning_rate": 1.803040972133964e-05, "loss": 0.5765, "step": 8307 }, { "epoch": 0.22719317436009626, "grad_norm": 1.4644417762756348, "learning_rate": 1.8029881877637697e-05, "loss": 0.5573, "step": 8308 }, { "epoch": 0.22722052067381318, "grad_norm": 1.8342058658599854, "learning_rate": 1.802935397094359e-05, "loss": 0.5563, "step": 8309 }, { "epoch": 0.22724786698753008, "grad_norm": 1.211134433746338, "learning_rate": 1.8028826001261463e-05, "loss": 0.5596, "step": 8310 }, { "epoch": 0.227275213301247, "grad_norm": 1.5332536697387695, "learning_rate": 1.8028297968595453e-05, "loss": 0.9209, "step": 8311 }, { "epoch": 0.2273025596149639, "grad_norm": 1.5248790979385376, "learning_rate": 1.8027769872949705e-05, "loss": 0.5318, "step": 8312 }, { "epoch": 0.22732990592868083, "grad_norm": 1.3565795421600342, "learning_rate": 1.802724171432836e-05, "loss": 0.5726, "step": 8313 }, { "epoch": 0.22735725224239772, "grad_norm": 1.7129344940185547, "learning_rate": 1.802671349273557e-05, "loss": 0.4809, "step": 8314 }, { "epoch": 0.22738459855611465, "grad_norm": 1.3395670652389526, "learning_rate": 1.8026185208175463e-05, "loss": 0.5811, "step": 8315 }, { "epoch": 0.22741194486983154, "grad_norm": 1.3873265981674194, "learning_rate": 1.8025656860652196e-05, "loss": 0.5884, "step": 8316 }, { "epoch": 0.22743929118354847, "grad_norm": 2.0593767166137695, "learning_rate": 1.8025128450169908e-05, "loss": 0.5011, "step": 8317 }, { "epoch": 0.22746663749726537, "grad_norm": 2.140899658203125, "learning_rate": 1.8024599976732747e-05, "loss": 0.5564, "step": 8318 }, { "epoch": 0.2274939838109823, "grad_norm": 3.4230740070343018, "learning_rate": 1.802407144034486e-05, "loss": 0.5507, "step": 8319 }, { "epoch": 0.2275213301246992, "grad_norm": 6.933818340301514, "learning_rate": 1.8023542841010387e-05, "loss": 0.5773, "step": 8320 }, { "epoch": 0.22754867643841611, "grad_norm": 1.32815682888031, "learning_rate": 1.802301417873348e-05, "loss": 0.4851, "step": 8321 }, { "epoch": 0.227576022752133, "grad_norm": 1.1766464710235596, "learning_rate": 1.8022485453518284e-05, "loss": 0.5985, "step": 8322 }, { "epoch": 0.22760336906584994, "grad_norm": 1.3190586566925049, "learning_rate": 1.802195666536895e-05, "loss": 0.6009, "step": 8323 }, { "epoch": 0.22763071537956683, "grad_norm": 1.588657021522522, "learning_rate": 1.802142781428962e-05, "loss": 0.5865, "step": 8324 }, { "epoch": 0.22765806169328376, "grad_norm": 1.9229069948196411, "learning_rate": 1.8020898900284445e-05, "loss": 0.5702, "step": 8325 }, { "epoch": 0.22768540800700066, "grad_norm": 1.558884620666504, "learning_rate": 1.8020369923357578e-05, "loss": 0.5585, "step": 8326 }, { "epoch": 0.22771275432071758, "grad_norm": 1.6255574226379395, "learning_rate": 1.8019840883513168e-05, "loss": 0.5345, "step": 8327 }, { "epoch": 0.22774010063443448, "grad_norm": 1.5856776237487793, "learning_rate": 1.801931178075536e-05, "loss": 0.4773, "step": 8328 }, { "epoch": 0.2277674469481514, "grad_norm": 1.6156601905822754, "learning_rate": 1.8018782615088308e-05, "loss": 0.4107, "step": 8329 }, { "epoch": 0.2277947932618683, "grad_norm": 3.0748050212860107, "learning_rate": 1.801825338651616e-05, "loss": 0.5728, "step": 8330 }, { "epoch": 0.2278221395755852, "grad_norm": 1.5242094993591309, "learning_rate": 1.8017724095043078e-05, "loss": 0.5718, "step": 8331 }, { "epoch": 0.22784948588930212, "grad_norm": 2.6501338481903076, "learning_rate": 1.8017194740673203e-05, "loss": 0.9538, "step": 8332 }, { "epoch": 0.22787683220301902, "grad_norm": 1.6495413780212402, "learning_rate": 1.8016665323410692e-05, "loss": 0.591, "step": 8333 }, { "epoch": 0.22790417851673594, "grad_norm": 1.5606772899627686, "learning_rate": 1.8016135843259698e-05, "loss": 0.577, "step": 8334 }, { "epoch": 0.22793152483045284, "grad_norm": 1.6156903505325317, "learning_rate": 1.8015606300224374e-05, "loss": 0.9073, "step": 8335 }, { "epoch": 0.22795887114416977, "grad_norm": 1.1972404718399048, "learning_rate": 1.8015076694308872e-05, "loss": 0.5561, "step": 8336 }, { "epoch": 0.22798621745788666, "grad_norm": 1.476880669593811, "learning_rate": 1.8014547025517352e-05, "loss": 0.5906, "step": 8337 }, { "epoch": 0.2280135637716036, "grad_norm": 2.5607612133026123, "learning_rate": 1.8014017293853965e-05, "loss": 0.5873, "step": 8338 }, { "epoch": 0.2280409100853205, "grad_norm": 1.7723695039749146, "learning_rate": 1.801348749932287e-05, "loss": 0.5719, "step": 8339 }, { "epoch": 0.2280682563990374, "grad_norm": 1.9213671684265137, "learning_rate": 1.8012957641928216e-05, "loss": 0.6026, "step": 8340 }, { "epoch": 0.2280956027127543, "grad_norm": 1.248243808746338, "learning_rate": 1.801242772167417e-05, "loss": 0.569, "step": 8341 }, { "epoch": 0.22812294902647123, "grad_norm": 1.2294429540634155, "learning_rate": 1.8011897738564877e-05, "loss": 0.5425, "step": 8342 }, { "epoch": 0.22815029534018813, "grad_norm": 1.2372037172317505, "learning_rate": 1.8011367692604507e-05, "loss": 0.55, "step": 8343 }, { "epoch": 0.22817764165390506, "grad_norm": 1.6031328439712524, "learning_rate": 1.8010837583797206e-05, "loss": 0.5303, "step": 8344 }, { "epoch": 0.22820498796762195, "grad_norm": 2.6548235416412354, "learning_rate": 1.8010307412147145e-05, "loss": 0.9263, "step": 8345 }, { "epoch": 0.22823233428133888, "grad_norm": 1.816235065460205, "learning_rate": 1.8009777177658475e-05, "loss": 0.5935, "step": 8346 }, { "epoch": 0.22825968059505578, "grad_norm": 1.29020094871521, "learning_rate": 1.8009246880335355e-05, "loss": 0.5572, "step": 8347 }, { "epoch": 0.2282870269087727, "grad_norm": 1.4140188694000244, "learning_rate": 1.8008716520181947e-05, "loss": 0.576, "step": 8348 }, { "epoch": 0.2283143732224896, "grad_norm": 1.069159746170044, "learning_rate": 1.800818609720241e-05, "loss": 0.5502, "step": 8349 }, { "epoch": 0.22834171953620652, "grad_norm": 1.2277113199234009, "learning_rate": 1.800765561140091e-05, "loss": 0.5693, "step": 8350 }, { "epoch": 0.22836906584992342, "grad_norm": 1.251132607460022, "learning_rate": 1.800712506278161e-05, "loss": 0.5508, "step": 8351 }, { "epoch": 0.22839641216364034, "grad_norm": 1.4502925872802734, "learning_rate": 1.800659445134866e-05, "loss": 0.9158, "step": 8352 }, { "epoch": 0.22842375847735724, "grad_norm": 1.8623608350753784, "learning_rate": 1.800606377710623e-05, "loss": 0.6035, "step": 8353 }, { "epoch": 0.22845110479107417, "grad_norm": 1.6180766820907593, "learning_rate": 1.8005533040058488e-05, "loss": 0.5239, "step": 8354 }, { "epoch": 0.22847845110479106, "grad_norm": 2.045267343521118, "learning_rate": 1.800500224020959e-05, "loss": 0.4989, "step": 8355 }, { "epoch": 0.228505797418508, "grad_norm": 1.2353094816207886, "learning_rate": 1.8004471377563697e-05, "loss": 0.5271, "step": 8356 }, { "epoch": 0.22853314373222489, "grad_norm": 1.4066118001937866, "learning_rate": 1.8003940452124983e-05, "loss": 0.5416, "step": 8357 }, { "epoch": 0.2285604900459418, "grad_norm": 1.6505869626998901, "learning_rate": 1.800340946389761e-05, "loss": 0.5036, "step": 8358 }, { "epoch": 0.2285878363596587, "grad_norm": 1.571224331855774, "learning_rate": 1.800287841288574e-05, "loss": 0.8962, "step": 8359 }, { "epoch": 0.22861518267337563, "grad_norm": 1.5489251613616943, "learning_rate": 1.800234729909354e-05, "loss": 0.5437, "step": 8360 }, { "epoch": 0.22864252898709253, "grad_norm": 1.3718867301940918, "learning_rate": 1.800181612252518e-05, "loss": 0.5655, "step": 8361 }, { "epoch": 0.22866987530080946, "grad_norm": 1.5178980827331543, "learning_rate": 1.8001284883184828e-05, "loss": 0.9569, "step": 8362 }, { "epoch": 0.22869722161452635, "grad_norm": 1.92088782787323, "learning_rate": 1.8000753581076644e-05, "loss": 0.5456, "step": 8363 }, { "epoch": 0.22872456792824328, "grad_norm": 1.5394859313964844, "learning_rate": 1.80002222162048e-05, "loss": 0.5435, "step": 8364 }, { "epoch": 0.22875191424196017, "grad_norm": 1.4036860466003418, "learning_rate": 1.799969078857346e-05, "loss": 0.8915, "step": 8365 }, { "epoch": 0.2287792605556771, "grad_norm": 1.2834253311157227, "learning_rate": 1.79991592981868e-05, "loss": 0.5654, "step": 8366 }, { "epoch": 0.228806606869394, "grad_norm": 1.6617701053619385, "learning_rate": 1.7998627745048986e-05, "loss": 0.5605, "step": 8367 }, { "epoch": 0.22883395318311092, "grad_norm": 1.3148139715194702, "learning_rate": 1.799809612916419e-05, "loss": 0.5627, "step": 8368 }, { "epoch": 0.22886129949682782, "grad_norm": 3.285499095916748, "learning_rate": 1.799756445053658e-05, "loss": 0.572, "step": 8369 }, { "epoch": 0.22888864581054474, "grad_norm": 1.7517385482788086, "learning_rate": 1.7997032709170327e-05, "loss": 0.4996, "step": 8370 }, { "epoch": 0.22891599212426164, "grad_norm": 1.2990918159484863, "learning_rate": 1.7996500905069605e-05, "loss": 0.5141, "step": 8371 }, { "epoch": 0.22894333843797857, "grad_norm": 1.5593563318252563, "learning_rate": 1.7995969038238584e-05, "loss": 0.5519, "step": 8372 }, { "epoch": 0.22897068475169546, "grad_norm": 1.2706232070922852, "learning_rate": 1.7995437108681434e-05, "loss": 0.5576, "step": 8373 }, { "epoch": 0.2289980310654124, "grad_norm": 1.3076483011245728, "learning_rate": 1.799490511640233e-05, "loss": 0.6001, "step": 8374 }, { "epoch": 0.22902537737912929, "grad_norm": 1.393858551979065, "learning_rate": 1.7994373061405445e-05, "loss": 0.5686, "step": 8375 }, { "epoch": 0.2290527236928462, "grad_norm": 1.2635151147842407, "learning_rate": 1.7993840943694954e-05, "loss": 0.5486, "step": 8376 }, { "epoch": 0.2290800700065631, "grad_norm": 1.4765681028366089, "learning_rate": 1.799330876327503e-05, "loss": 0.5627, "step": 8377 }, { "epoch": 0.22910741632028003, "grad_norm": 1.5416215658187866, "learning_rate": 1.7992776520149847e-05, "loss": 0.5995, "step": 8378 }, { "epoch": 0.22913476263399693, "grad_norm": 1.2693595886230469, "learning_rate": 1.7992244214323585e-05, "loss": 0.5559, "step": 8379 }, { "epoch": 0.22916210894771386, "grad_norm": 1.3922703266143799, "learning_rate": 1.7991711845800415e-05, "loss": 0.5536, "step": 8380 }, { "epoch": 0.22918945526143075, "grad_norm": 1.3673512935638428, "learning_rate": 1.7991179414584515e-05, "loss": 0.5568, "step": 8381 }, { "epoch": 0.22921680157514768, "grad_norm": 1.427992820739746, "learning_rate": 1.7990646920680063e-05, "loss": 0.5594, "step": 8382 }, { "epoch": 0.22924414788886457, "grad_norm": 1.6240748167037964, "learning_rate": 1.7990114364091228e-05, "loss": 0.8968, "step": 8383 }, { "epoch": 0.2292714942025815, "grad_norm": 1.6622916460037231, "learning_rate": 1.79895817448222e-05, "loss": 0.535, "step": 8384 }, { "epoch": 0.2292988405162984, "grad_norm": 1.359217882156372, "learning_rate": 1.798904906287715e-05, "loss": 0.5169, "step": 8385 }, { "epoch": 0.22932618683001532, "grad_norm": 1.8693597316741943, "learning_rate": 1.798851631826026e-05, "loss": 0.5746, "step": 8386 }, { "epoch": 0.22935353314373222, "grad_norm": 1.440511703491211, "learning_rate": 1.7987983510975703e-05, "loss": 0.5307, "step": 8387 }, { "epoch": 0.22938087945744914, "grad_norm": 1.3670181035995483, "learning_rate": 1.798745064102767e-05, "loss": 0.5639, "step": 8388 }, { "epoch": 0.22940822577116604, "grad_norm": 1.123405933380127, "learning_rate": 1.7986917708420334e-05, "loss": 0.5594, "step": 8389 }, { "epoch": 0.22943557208488297, "grad_norm": 1.7197766304016113, "learning_rate": 1.798638471315787e-05, "loss": 0.5224, "step": 8390 }, { "epoch": 0.22946291839859986, "grad_norm": 1.3430794477462769, "learning_rate": 1.7985851655244467e-05, "loss": 0.5879, "step": 8391 }, { "epoch": 0.2294902647123168, "grad_norm": 1.2521191835403442, "learning_rate": 1.798531853468431e-05, "loss": 0.5258, "step": 8392 }, { "epoch": 0.22951761102603369, "grad_norm": 3.916656494140625, "learning_rate": 1.7984785351481573e-05, "loss": 0.5663, "step": 8393 }, { "epoch": 0.2295449573397506, "grad_norm": 1.273544192314148, "learning_rate": 1.7984252105640443e-05, "loss": 0.5532, "step": 8394 }, { "epoch": 0.2295723036534675, "grad_norm": 1.54414701461792, "learning_rate": 1.7983718797165103e-05, "loss": 0.5763, "step": 8395 }, { "epoch": 0.22959964996718443, "grad_norm": 1.7499581575393677, "learning_rate": 1.7983185426059734e-05, "loss": 0.5815, "step": 8396 }, { "epoch": 0.22962699628090133, "grad_norm": 1.5520583391189575, "learning_rate": 1.7982651992328523e-05, "loss": 0.5467, "step": 8397 }, { "epoch": 0.22965434259461825, "grad_norm": 1.9392542839050293, "learning_rate": 1.7982118495975654e-05, "loss": 0.6003, "step": 8398 }, { "epoch": 0.22968168890833515, "grad_norm": 1.2920186519622803, "learning_rate": 1.7981584937005312e-05, "loss": 0.5806, "step": 8399 }, { "epoch": 0.22970903522205208, "grad_norm": 1.7242485284805298, "learning_rate": 1.7981051315421682e-05, "loss": 0.5644, "step": 8400 }, { "epoch": 0.22973638153576897, "grad_norm": 1.6030349731445312, "learning_rate": 1.7980517631228947e-05, "loss": 0.5518, "step": 8401 }, { "epoch": 0.2297637278494859, "grad_norm": 2.163079023361206, "learning_rate": 1.79799838844313e-05, "loss": 0.4077, "step": 8402 }, { "epoch": 0.2297910741632028, "grad_norm": 2.4863522052764893, "learning_rate": 1.7979450075032926e-05, "loss": 0.578, "step": 8403 }, { "epoch": 0.22981842047691972, "grad_norm": 1.3550001382827759, "learning_rate": 1.797891620303801e-05, "loss": 0.5692, "step": 8404 }, { "epoch": 0.22984576679063662, "grad_norm": 1.3901647329330444, "learning_rate": 1.7978382268450746e-05, "loss": 0.5657, "step": 8405 }, { "epoch": 0.22987311310435354, "grad_norm": 1.3359298706054688, "learning_rate": 1.7977848271275316e-05, "loss": 0.5468, "step": 8406 }, { "epoch": 0.22990045941807044, "grad_norm": 1.6280611753463745, "learning_rate": 1.797731421151591e-05, "loss": 0.5883, "step": 8407 }, { "epoch": 0.22992780573178737, "grad_norm": 1.2558808326721191, "learning_rate": 1.797678008917672e-05, "loss": 0.5697, "step": 8408 }, { "epoch": 0.22995515204550426, "grad_norm": 1.4683820009231567, "learning_rate": 1.7976245904261934e-05, "loss": 0.5652, "step": 8409 }, { "epoch": 0.2299824983592212, "grad_norm": 1.556091547012329, "learning_rate": 1.7975711656775745e-05, "loss": 0.5993, "step": 8410 }, { "epoch": 0.23000984467293809, "grad_norm": 1.7425130605697632, "learning_rate": 1.7975177346722343e-05, "loss": 0.4205, "step": 8411 }, { "epoch": 0.230037190986655, "grad_norm": 1.2655168771743774, "learning_rate": 1.7974642974105918e-05, "loss": 0.5709, "step": 8412 }, { "epoch": 0.2300645373003719, "grad_norm": 1.8089780807495117, "learning_rate": 1.7974108538930662e-05, "loss": 0.5347, "step": 8413 }, { "epoch": 0.23009188361408883, "grad_norm": 1.766371250152588, "learning_rate": 1.7973574041200773e-05, "loss": 0.5929, "step": 8414 }, { "epoch": 0.23011922992780573, "grad_norm": 1.5997567176818848, "learning_rate": 1.7973039480920434e-05, "loss": 0.5538, "step": 8415 }, { "epoch": 0.23014657624152265, "grad_norm": 1.6623865365982056, "learning_rate": 1.7972504858093848e-05, "loss": 0.5103, "step": 8416 }, { "epoch": 0.23017392255523955, "grad_norm": 2.0071544647216797, "learning_rate": 1.7971970172725205e-05, "loss": 0.5823, "step": 8417 }, { "epoch": 0.23020126886895648, "grad_norm": 1.5932350158691406, "learning_rate": 1.7971435424818696e-05, "loss": 0.5986, "step": 8418 }, { "epoch": 0.23022861518267337, "grad_norm": 1.766526222229004, "learning_rate": 1.797090061437852e-05, "loss": 0.4009, "step": 8419 }, { "epoch": 0.2302559614963903, "grad_norm": 8.300156593322754, "learning_rate": 1.7970365741408878e-05, "loss": 0.5548, "step": 8420 }, { "epoch": 0.2302833078101072, "grad_norm": 1.5662015676498413, "learning_rate": 1.7969830805913954e-05, "loss": 0.5384, "step": 8421 }, { "epoch": 0.23031065412382412, "grad_norm": 1.6747188568115234, "learning_rate": 1.7969295807897952e-05, "loss": 0.5896, "step": 8422 }, { "epoch": 0.23033800043754102, "grad_norm": 1.3879116773605347, "learning_rate": 1.7968760747365068e-05, "loss": 0.5599, "step": 8423 }, { "epoch": 0.23036534675125794, "grad_norm": 1.5020004510879517, "learning_rate": 1.7968225624319498e-05, "loss": 0.6003, "step": 8424 }, { "epoch": 0.23039269306497484, "grad_norm": 1.6657909154891968, "learning_rate": 1.796769043876544e-05, "loss": 0.5674, "step": 8425 }, { "epoch": 0.23042003937869177, "grad_norm": 3.2944421768188477, "learning_rate": 1.796715519070709e-05, "loss": 0.4248, "step": 8426 }, { "epoch": 0.23044738569240866, "grad_norm": 1.44521164894104, "learning_rate": 1.7966619880148653e-05, "loss": 0.5642, "step": 8427 }, { "epoch": 0.2304747320061256, "grad_norm": 1.3142945766448975, "learning_rate": 1.7966084507094323e-05, "loss": 0.5642, "step": 8428 }, { "epoch": 0.23050207831984248, "grad_norm": 1.5732700824737549, "learning_rate": 1.7965549071548304e-05, "loss": 0.5592, "step": 8429 }, { "epoch": 0.2305294246335594, "grad_norm": 2.0830068588256836, "learning_rate": 1.7965013573514794e-05, "loss": 0.6182, "step": 8430 }, { "epoch": 0.2305567709472763, "grad_norm": 1.436961054801941, "learning_rate": 1.7964478012997992e-05, "loss": 0.5218, "step": 8431 }, { "epoch": 0.2305841172609932, "grad_norm": 1.8298940658569336, "learning_rate": 1.7963942390002105e-05, "loss": 0.5201, "step": 8432 }, { "epoch": 0.23061146357471013, "grad_norm": 2.079960584640503, "learning_rate": 1.7963406704531327e-05, "loss": 0.5393, "step": 8433 }, { "epoch": 0.23063880988842703, "grad_norm": 1.3825726509094238, "learning_rate": 1.7962870956589867e-05, "loss": 0.9233, "step": 8434 }, { "epoch": 0.23066615620214395, "grad_norm": 1.3568497896194458, "learning_rate": 1.7962335146181927e-05, "loss": 0.6108, "step": 8435 }, { "epoch": 0.23069350251586085, "grad_norm": 1.3397793769836426, "learning_rate": 1.7961799273311706e-05, "loss": 0.5619, "step": 8436 }, { "epoch": 0.23072084882957777, "grad_norm": 1.5857833623886108, "learning_rate": 1.7961263337983408e-05, "loss": 0.5586, "step": 8437 }, { "epoch": 0.23074819514329467, "grad_norm": 1.2018167972564697, "learning_rate": 1.7960727340201245e-05, "loss": 0.5621, "step": 8438 }, { "epoch": 0.2307755414570116, "grad_norm": 1.9316314458847046, "learning_rate": 1.7960191279969414e-05, "loss": 0.5467, "step": 8439 }, { "epoch": 0.2308028877707285, "grad_norm": 1.4376671314239502, "learning_rate": 1.795965515729212e-05, "loss": 0.5662, "step": 8440 }, { "epoch": 0.23083023408444542, "grad_norm": 1.44086492061615, "learning_rate": 1.7959118972173576e-05, "loss": 0.8983, "step": 8441 }, { "epoch": 0.23085758039816232, "grad_norm": 1.7557282447814941, "learning_rate": 1.7958582724617982e-05, "loss": 0.4145, "step": 8442 }, { "epoch": 0.23088492671187924, "grad_norm": 2.048224687576294, "learning_rate": 1.7958046414629545e-05, "loss": 0.445, "step": 8443 }, { "epoch": 0.23091227302559614, "grad_norm": 1.5756919384002686, "learning_rate": 1.7957510042212473e-05, "loss": 0.5918, "step": 8444 }, { "epoch": 0.23093961933931306, "grad_norm": 1.7914212942123413, "learning_rate": 1.7956973607370972e-05, "loss": 0.5587, "step": 8445 }, { "epoch": 0.23096696565302996, "grad_norm": 1.7634247541427612, "learning_rate": 1.795643711010926e-05, "loss": 0.6136, "step": 8446 }, { "epoch": 0.23099431196674688, "grad_norm": 1.4907686710357666, "learning_rate": 1.7955900550431532e-05, "loss": 0.5542, "step": 8447 }, { "epoch": 0.23102165828046378, "grad_norm": 1.1560804843902588, "learning_rate": 1.7955363928342004e-05, "loss": 0.5586, "step": 8448 }, { "epoch": 0.2310490045941807, "grad_norm": 1.8878545761108398, "learning_rate": 1.7954827243844885e-05, "loss": 0.5115, "step": 8449 }, { "epoch": 0.2310763509078976, "grad_norm": 1.5800273418426514, "learning_rate": 1.7954290496944385e-05, "loss": 0.5791, "step": 8450 }, { "epoch": 0.23110369722161453, "grad_norm": 1.5857603549957275, "learning_rate": 1.7953753687644713e-05, "loss": 0.564, "step": 8451 }, { "epoch": 0.23113104353533143, "grad_norm": 1.5929228067398071, "learning_rate": 1.7953216815950084e-05, "loss": 0.8899, "step": 8452 }, { "epoch": 0.23115838984904835, "grad_norm": 1.519905924797058, "learning_rate": 1.7952679881864708e-05, "loss": 0.5502, "step": 8453 }, { "epoch": 0.23118573616276525, "grad_norm": 1.3512159585952759, "learning_rate": 1.7952142885392798e-05, "loss": 0.5617, "step": 8454 }, { "epoch": 0.23121308247648217, "grad_norm": 1.4530835151672363, "learning_rate": 1.795160582653856e-05, "loss": 0.5826, "step": 8455 }, { "epoch": 0.23124042879019907, "grad_norm": 1.3883976936340332, "learning_rate": 1.7951068705306216e-05, "loss": 0.91, "step": 8456 }, { "epoch": 0.231267775103916, "grad_norm": 1.4882735013961792, "learning_rate": 1.7950531521699974e-05, "loss": 0.571, "step": 8457 }, { "epoch": 0.2312951214176329, "grad_norm": 1.5147194862365723, "learning_rate": 1.794999427572405e-05, "loss": 0.4732, "step": 8458 }, { "epoch": 0.23132246773134982, "grad_norm": 1.9592314958572388, "learning_rate": 1.7949456967382656e-05, "loss": 0.5776, "step": 8459 }, { "epoch": 0.23134981404506672, "grad_norm": 1.6042613983154297, "learning_rate": 1.7948919596680016e-05, "loss": 0.5514, "step": 8460 }, { "epoch": 0.23137716035878364, "grad_norm": 1.9188995361328125, "learning_rate": 1.794838216362033e-05, "loss": 0.5462, "step": 8461 }, { "epoch": 0.23140450667250054, "grad_norm": 1.6725353002548218, "learning_rate": 1.7947844668207827e-05, "loss": 0.5531, "step": 8462 }, { "epoch": 0.23143185298621746, "grad_norm": 1.5931880474090576, "learning_rate": 1.7947307110446722e-05, "loss": 0.9339, "step": 8463 }, { "epoch": 0.23145919929993436, "grad_norm": 1.9622135162353516, "learning_rate": 1.7946769490341225e-05, "loss": 0.6185, "step": 8464 }, { "epoch": 0.23148654561365128, "grad_norm": 1.7360116243362427, "learning_rate": 1.794623180789556e-05, "loss": 0.5582, "step": 8465 }, { "epoch": 0.23151389192736818, "grad_norm": 1.8707174062728882, "learning_rate": 1.794569406311394e-05, "loss": 0.4848, "step": 8466 }, { "epoch": 0.2315412382410851, "grad_norm": 1.6726980209350586, "learning_rate": 1.794515625600059e-05, "loss": 0.5357, "step": 8467 }, { "epoch": 0.231568584554802, "grad_norm": 1.613131046295166, "learning_rate": 1.794461838655972e-05, "loss": 0.5312, "step": 8468 }, { "epoch": 0.23159593086851893, "grad_norm": 1.8399617671966553, "learning_rate": 1.794408045479556e-05, "loss": 0.5865, "step": 8469 }, { "epoch": 0.23162327718223583, "grad_norm": 1.6231591701507568, "learning_rate": 1.7943542460712323e-05, "loss": 0.543, "step": 8470 }, { "epoch": 0.23165062349595275, "grad_norm": 1.477839469909668, "learning_rate": 1.794300440431423e-05, "loss": 0.5703, "step": 8471 }, { "epoch": 0.23167796980966965, "grad_norm": 1.8456547260284424, "learning_rate": 1.7942466285605503e-05, "loss": 0.5886, "step": 8472 }, { "epoch": 0.23170531612338657, "grad_norm": 1.753067135810852, "learning_rate": 1.7941928104590363e-05, "loss": 0.5334, "step": 8473 }, { "epoch": 0.23173266243710347, "grad_norm": 1.6185481548309326, "learning_rate": 1.7941389861273032e-05, "loss": 0.5048, "step": 8474 }, { "epoch": 0.2317600087508204, "grad_norm": 1.4250227212905884, "learning_rate": 1.7940851555657732e-05, "loss": 0.5395, "step": 8475 }, { "epoch": 0.2317873550645373, "grad_norm": 1.4897170066833496, "learning_rate": 1.794031318774869e-05, "loss": 0.5201, "step": 8476 }, { "epoch": 0.23181470137825422, "grad_norm": 1.5730667114257812, "learning_rate": 1.793977475755012e-05, "loss": 0.5584, "step": 8477 }, { "epoch": 0.23184204769197111, "grad_norm": 1.4784950017929077, "learning_rate": 1.7939236265066253e-05, "loss": 0.5803, "step": 8478 }, { "epoch": 0.23186939400568804, "grad_norm": 1.4399162530899048, "learning_rate": 1.7938697710301314e-05, "loss": 0.9224, "step": 8479 }, { "epoch": 0.23189674031940494, "grad_norm": 1.521348237991333, "learning_rate": 1.7938159093259525e-05, "loss": 0.5648, "step": 8480 }, { "epoch": 0.23192408663312186, "grad_norm": 1.9310904741287231, "learning_rate": 1.793762041394511e-05, "loss": 0.5909, "step": 8481 }, { "epoch": 0.23195143294683876, "grad_norm": 1.6519778966903687, "learning_rate": 1.7937081672362298e-05, "loss": 0.5786, "step": 8482 }, { "epoch": 0.23197877926055568, "grad_norm": 1.997976541519165, "learning_rate": 1.7936542868515317e-05, "loss": 0.4981, "step": 8483 }, { "epoch": 0.23200612557427258, "grad_norm": 1.6636977195739746, "learning_rate": 1.7936004002408386e-05, "loss": 0.5781, "step": 8484 }, { "epoch": 0.2320334718879895, "grad_norm": 1.9349457025527954, "learning_rate": 1.793546507404574e-05, "loss": 0.4297, "step": 8485 }, { "epoch": 0.2320608182017064, "grad_norm": 1.6813373565673828, "learning_rate": 1.79349260834316e-05, "loss": 0.5351, "step": 8486 }, { "epoch": 0.23208816451542333, "grad_norm": 1.709116816520691, "learning_rate": 1.7934387030570202e-05, "loss": 0.5339, "step": 8487 }, { "epoch": 0.23211551082914023, "grad_norm": 1.7268798351287842, "learning_rate": 1.793384791546577e-05, "loss": 0.574, "step": 8488 }, { "epoch": 0.23214285714285715, "grad_norm": 1.662081003189087, "learning_rate": 1.7933308738122534e-05, "loss": 0.523, "step": 8489 }, { "epoch": 0.23217020345657405, "grad_norm": 1.5106698274612427, "learning_rate": 1.793276949854472e-05, "loss": 0.5529, "step": 8490 }, { "epoch": 0.23219754977029097, "grad_norm": 1.8117444515228271, "learning_rate": 1.7932230196736564e-05, "loss": 0.5378, "step": 8491 }, { "epoch": 0.23222489608400787, "grad_norm": 1.6416690349578857, "learning_rate": 1.7931690832702294e-05, "loss": 0.535, "step": 8492 }, { "epoch": 0.2322522423977248, "grad_norm": 1.4356005191802979, "learning_rate": 1.7931151406446145e-05, "loss": 0.5355, "step": 8493 }, { "epoch": 0.2322795887114417, "grad_norm": 1.313454031944275, "learning_rate": 1.793061191797234e-05, "loss": 0.5746, "step": 8494 }, { "epoch": 0.23230693502515862, "grad_norm": 1.3165290355682373, "learning_rate": 1.793007236728512e-05, "loss": 0.8806, "step": 8495 }, { "epoch": 0.23233428133887551, "grad_norm": 1.492013931274414, "learning_rate": 1.7929532754388714e-05, "loss": 0.8851, "step": 8496 }, { "epoch": 0.23236162765259244, "grad_norm": 1.3354321718215942, "learning_rate": 1.7928993079287354e-05, "loss": 0.5824, "step": 8497 }, { "epoch": 0.23238897396630934, "grad_norm": 1.3094602823257446, "learning_rate": 1.7928453341985274e-05, "loss": 0.544, "step": 8498 }, { "epoch": 0.23241632028002626, "grad_norm": 2.2033538818359375, "learning_rate": 1.792791354248671e-05, "loss": 0.4085, "step": 8499 }, { "epoch": 0.23244366659374316, "grad_norm": 1.6878533363342285, "learning_rate": 1.7927373680795894e-05, "loss": 0.5678, "step": 8500 }, { "epoch": 0.23247101290746008, "grad_norm": 1.3178759813308716, "learning_rate": 1.7926833756917067e-05, "loss": 0.5593, "step": 8501 }, { "epoch": 0.23249835922117698, "grad_norm": 1.6350008249282837, "learning_rate": 1.7926293770854455e-05, "loss": 0.5892, "step": 8502 }, { "epoch": 0.2325257055348939, "grad_norm": 1.4899965524673462, "learning_rate": 1.79257537226123e-05, "loss": 0.533, "step": 8503 }, { "epoch": 0.2325530518486108, "grad_norm": 1.5756447315216064, "learning_rate": 1.792521361219484e-05, "loss": 0.5408, "step": 8504 }, { "epoch": 0.23258039816232773, "grad_norm": 1.436669945716858, "learning_rate": 1.7924673439606306e-05, "loss": 0.5281, "step": 8505 }, { "epoch": 0.23260774447604463, "grad_norm": 1.6171083450317383, "learning_rate": 1.792413320485094e-05, "loss": 0.5062, "step": 8506 }, { "epoch": 0.23263509078976155, "grad_norm": 1.402481198310852, "learning_rate": 1.792359290793298e-05, "loss": 0.5623, "step": 8507 }, { "epoch": 0.23266243710347845, "grad_norm": 1.330686330795288, "learning_rate": 1.7923052548856662e-05, "loss": 0.5518, "step": 8508 }, { "epoch": 0.23268978341719537, "grad_norm": 1.2454217672348022, "learning_rate": 1.7922512127626228e-05, "loss": 0.4356, "step": 8509 }, { "epoch": 0.23271712973091227, "grad_norm": 2.057962656021118, "learning_rate": 1.7921971644245914e-05, "loss": 0.4893, "step": 8510 }, { "epoch": 0.2327444760446292, "grad_norm": 1.534170150756836, "learning_rate": 1.7921431098719964e-05, "loss": 0.8984, "step": 8511 }, { "epoch": 0.2327718223583461, "grad_norm": 1.5348536968231201, "learning_rate": 1.7920890491052615e-05, "loss": 0.4599, "step": 8512 }, { "epoch": 0.23279916867206302, "grad_norm": 1.7952970266342163, "learning_rate": 1.792034982124811e-05, "loss": 0.5711, "step": 8513 }, { "epoch": 0.23282651498577991, "grad_norm": 3.1393656730651855, "learning_rate": 1.7919809089310692e-05, "loss": 0.576, "step": 8514 }, { "epoch": 0.23285386129949684, "grad_norm": 2.0244297981262207, "learning_rate": 1.7919268295244595e-05, "loss": 0.6088, "step": 8515 }, { "epoch": 0.23288120761321374, "grad_norm": 1.8263847827911377, "learning_rate": 1.7918727439054072e-05, "loss": 0.5548, "step": 8516 }, { "epoch": 0.23290855392693066, "grad_norm": 1.929470419883728, "learning_rate": 1.7918186520743363e-05, "loss": 0.5292, "step": 8517 }, { "epoch": 0.23293590024064756, "grad_norm": 1.2738087177276611, "learning_rate": 1.7917645540316702e-05, "loss": 0.9203, "step": 8518 }, { "epoch": 0.23296324655436448, "grad_norm": 1.4350956678390503, "learning_rate": 1.7917104497778345e-05, "loss": 0.5854, "step": 8519 }, { "epoch": 0.23299059286808138, "grad_norm": 1.4029947519302368, "learning_rate": 1.7916563393132527e-05, "loss": 0.5538, "step": 8520 }, { "epoch": 0.2330179391817983, "grad_norm": 1.346179485321045, "learning_rate": 1.7916022226383497e-05, "loss": 0.8691, "step": 8521 }, { "epoch": 0.2330452854955152, "grad_norm": 1.5527433156967163, "learning_rate": 1.7915480997535506e-05, "loss": 0.5575, "step": 8522 }, { "epoch": 0.23307263180923213, "grad_norm": 1.6031310558319092, "learning_rate": 1.7914939706592788e-05, "loss": 0.4199, "step": 8523 }, { "epoch": 0.23309997812294903, "grad_norm": 1.4988304376602173, "learning_rate": 1.7914398353559603e-05, "loss": 0.5594, "step": 8524 }, { "epoch": 0.23312732443666595, "grad_norm": 1.6787843704223633, "learning_rate": 1.791385693844018e-05, "loss": 0.5355, "step": 8525 }, { "epoch": 0.23315467075038285, "grad_norm": 1.6905349493026733, "learning_rate": 1.7913315461238782e-05, "loss": 0.5793, "step": 8526 }, { "epoch": 0.23318201706409977, "grad_norm": 2.126526117324829, "learning_rate": 1.791277392195965e-05, "loss": 0.4913, "step": 8527 }, { "epoch": 0.23320936337781667, "grad_norm": 2.1866157054901123, "learning_rate": 1.7912232320607038e-05, "loss": 0.5055, "step": 8528 }, { "epoch": 0.2332367096915336, "grad_norm": 1.387359380722046, "learning_rate": 1.791169065718519e-05, "loss": 0.5809, "step": 8529 }, { "epoch": 0.2332640560052505, "grad_norm": 1.5876346826553345, "learning_rate": 1.791114893169835e-05, "loss": 0.9034, "step": 8530 }, { "epoch": 0.23329140231896742, "grad_norm": 1.472961187362671, "learning_rate": 1.7910607144150774e-05, "loss": 0.5595, "step": 8531 }, { "epoch": 0.23331874863268431, "grad_norm": 5.526522636413574, "learning_rate": 1.7910065294546713e-05, "loss": 0.555, "step": 8532 }, { "epoch": 0.2333460949464012, "grad_norm": 1.2213853597640991, "learning_rate": 1.7909523382890413e-05, "loss": 0.5481, "step": 8533 }, { "epoch": 0.23337344126011814, "grad_norm": 1.3846783638000488, "learning_rate": 1.7908981409186132e-05, "loss": 0.9056, "step": 8534 }, { "epoch": 0.23340078757383503, "grad_norm": 2.0085253715515137, "learning_rate": 1.7908439373438117e-05, "loss": 0.5038, "step": 8535 }, { "epoch": 0.23342813388755196, "grad_norm": 2.7390003204345703, "learning_rate": 1.790789727565062e-05, "loss": 0.568, "step": 8536 }, { "epoch": 0.23345548020126886, "grad_norm": 2.641587018966675, "learning_rate": 1.7907355115827894e-05, "loss": 0.5599, "step": 8537 }, { "epoch": 0.23348282651498578, "grad_norm": 1.4699777364730835, "learning_rate": 1.790681289397419e-05, "loss": 0.5563, "step": 8538 }, { "epoch": 0.23351017282870268, "grad_norm": 1.5581649541854858, "learning_rate": 1.7906270610093766e-05, "loss": 0.583, "step": 8539 }, { "epoch": 0.2335375191424196, "grad_norm": 1.8855167627334595, "learning_rate": 1.7905728264190874e-05, "loss": 0.578, "step": 8540 }, { "epoch": 0.2335648654561365, "grad_norm": 1.402301549911499, "learning_rate": 1.790518585626977e-05, "loss": 0.5653, "step": 8541 }, { "epoch": 0.23359221176985343, "grad_norm": 1.3589332103729248, "learning_rate": 1.7904643386334708e-05, "loss": 0.5518, "step": 8542 }, { "epoch": 0.23361955808357032, "grad_norm": 1.4642623662948608, "learning_rate": 1.790410085438994e-05, "loss": 0.5594, "step": 8543 }, { "epoch": 0.23364690439728725, "grad_norm": 1.7293543815612793, "learning_rate": 1.790355826043973e-05, "loss": 0.9205, "step": 8544 }, { "epoch": 0.23367425071100414, "grad_norm": 1.4615932703018188, "learning_rate": 1.7903015604488324e-05, "loss": 0.5652, "step": 8545 }, { "epoch": 0.23370159702472107, "grad_norm": 1.3621656894683838, "learning_rate": 1.790247288653999e-05, "loss": 0.9457, "step": 8546 }, { "epoch": 0.23372894333843797, "grad_norm": 1.4535677433013916, "learning_rate": 1.7901930106598975e-05, "loss": 0.5462, "step": 8547 }, { "epoch": 0.2337562896521549, "grad_norm": 1.5646871328353882, "learning_rate": 1.7901387264669545e-05, "loss": 0.5771, "step": 8548 }, { "epoch": 0.2337836359658718, "grad_norm": 1.5641388893127441, "learning_rate": 1.7900844360755957e-05, "loss": 0.5625, "step": 8549 }, { "epoch": 0.2338109822795887, "grad_norm": 1.2325568199157715, "learning_rate": 1.7900301394862465e-05, "loss": 0.5437, "step": 8550 }, { "epoch": 0.2338383285933056, "grad_norm": 1.2513824701309204, "learning_rate": 1.7899758366993338e-05, "loss": 0.5647, "step": 8551 }, { "epoch": 0.23386567490702254, "grad_norm": 1.3405277729034424, "learning_rate": 1.7899215277152823e-05, "loss": 0.8835, "step": 8552 }, { "epoch": 0.23389302122073943, "grad_norm": 2.095508575439453, "learning_rate": 1.7898672125345188e-05, "loss": 0.3867, "step": 8553 }, { "epoch": 0.23392036753445636, "grad_norm": 1.4006372690200806, "learning_rate": 1.7898128911574693e-05, "loss": 0.928, "step": 8554 }, { "epoch": 0.23394771384817326, "grad_norm": 1.4923133850097656, "learning_rate": 1.7897585635845606e-05, "loss": 0.5773, "step": 8555 }, { "epoch": 0.23397506016189018, "grad_norm": 1.4360688924789429, "learning_rate": 1.7897042298162174e-05, "loss": 0.8975, "step": 8556 }, { "epoch": 0.23400240647560708, "grad_norm": 1.796188235282898, "learning_rate": 1.7896498898528673e-05, "loss": 0.55, "step": 8557 }, { "epoch": 0.234029752789324, "grad_norm": 1.4722415208816528, "learning_rate": 1.789595543694936e-05, "loss": 0.8902, "step": 8558 }, { "epoch": 0.2340570991030409, "grad_norm": 1.070250391960144, "learning_rate": 1.7895411913428494e-05, "loss": 0.5531, "step": 8559 }, { "epoch": 0.23408444541675782, "grad_norm": 1.4383960962295532, "learning_rate": 1.7894868327970347e-05, "loss": 0.9041, "step": 8560 }, { "epoch": 0.23411179173047472, "grad_norm": 17.6072940826416, "learning_rate": 1.789432468057918e-05, "loss": 0.5744, "step": 8561 }, { "epoch": 0.23413913804419165, "grad_norm": 1.5909301042556763, "learning_rate": 1.7893780971259257e-05, "loss": 0.5809, "step": 8562 }, { "epoch": 0.23416648435790854, "grad_norm": 1.0766513347625732, "learning_rate": 1.7893237200014845e-05, "loss": 0.5218, "step": 8563 }, { "epoch": 0.23419383067162547, "grad_norm": 1.4055689573287964, "learning_rate": 1.7892693366850203e-05, "loss": 0.5273, "step": 8564 }, { "epoch": 0.23422117698534237, "grad_norm": 1.3766264915466309, "learning_rate": 1.7892149471769608e-05, "loss": 0.6157, "step": 8565 }, { "epoch": 0.2342485232990593, "grad_norm": 1.3857804536819458, "learning_rate": 1.789160551477732e-05, "loss": 0.5653, "step": 8566 }, { "epoch": 0.2342758696127762, "grad_norm": 1.356823205947876, "learning_rate": 1.789106149587761e-05, "loss": 0.5316, "step": 8567 }, { "epoch": 0.2343032159264931, "grad_norm": 9.346429824829102, "learning_rate": 1.789051741507474e-05, "loss": 0.5216, "step": 8568 }, { "epoch": 0.23433056224021, "grad_norm": 1.3248634338378906, "learning_rate": 1.7889973272372984e-05, "loss": 0.5519, "step": 8569 }, { "epoch": 0.23435790855392694, "grad_norm": 1.6016149520874023, "learning_rate": 1.7889429067776606e-05, "loss": 0.5197, "step": 8570 }, { "epoch": 0.23438525486764383, "grad_norm": 1.7809451818466187, "learning_rate": 1.788888480128988e-05, "loss": 0.4346, "step": 8571 }, { "epoch": 0.23441260118136076, "grad_norm": 1.7558974027633667, "learning_rate": 1.788834047291707e-05, "loss": 0.6079, "step": 8572 }, { "epoch": 0.23443994749507766, "grad_norm": 1.3479251861572266, "learning_rate": 1.788779608266245e-05, "loss": 0.54, "step": 8573 }, { "epoch": 0.23446729380879458, "grad_norm": 1.8045661449432373, "learning_rate": 1.788725163053029e-05, "loss": 0.4053, "step": 8574 }, { "epoch": 0.23449464012251148, "grad_norm": 1.5019571781158447, "learning_rate": 1.788670711652486e-05, "loss": 0.4123, "step": 8575 }, { "epoch": 0.2345219864362284, "grad_norm": 1.3733770847320557, "learning_rate": 1.788616254065043e-05, "loss": 0.6029, "step": 8576 }, { "epoch": 0.2345493327499453, "grad_norm": 1.4015045166015625, "learning_rate": 1.7885617902911277e-05, "loss": 0.549, "step": 8577 }, { "epoch": 0.23457667906366222, "grad_norm": 1.6905218362808228, "learning_rate": 1.788507320331167e-05, "loss": 0.5607, "step": 8578 }, { "epoch": 0.23460402537737912, "grad_norm": 1.4381040334701538, "learning_rate": 1.7884528441855887e-05, "loss": 0.5986, "step": 8579 }, { "epoch": 0.23463137169109605, "grad_norm": 1.770385980606079, "learning_rate": 1.7883983618548192e-05, "loss": 0.9378, "step": 8580 }, { "epoch": 0.23465871800481294, "grad_norm": 2.4110937118530273, "learning_rate": 1.7883438733392865e-05, "loss": 0.5521, "step": 8581 }, { "epoch": 0.23468606431852987, "grad_norm": 1.4083471298217773, "learning_rate": 1.788289378639418e-05, "loss": 0.5654, "step": 8582 }, { "epoch": 0.23471341063224677, "grad_norm": 1.7584271430969238, "learning_rate": 1.7882348777556413e-05, "loss": 0.5821, "step": 8583 }, { "epoch": 0.2347407569459637, "grad_norm": 1.6477253437042236, "learning_rate": 1.7881803706883835e-05, "loss": 0.5981, "step": 8584 }, { "epoch": 0.2347681032596806, "grad_norm": 1.249214768409729, "learning_rate": 1.7881258574380725e-05, "loss": 0.5526, "step": 8585 }, { "epoch": 0.2347954495733975, "grad_norm": 1.4788267612457275, "learning_rate": 1.7880713380051363e-05, "loss": 0.5848, "step": 8586 }, { "epoch": 0.2348227958871144, "grad_norm": 1.7424904108047485, "learning_rate": 1.7880168123900023e-05, "loss": 0.5239, "step": 8587 }, { "epoch": 0.23485014220083134, "grad_norm": 1.7133764028549194, "learning_rate": 1.787962280593098e-05, "loss": 0.5339, "step": 8588 }, { "epoch": 0.23487748851454823, "grad_norm": 1.398362636566162, "learning_rate": 1.7879077426148513e-05, "loss": 0.5589, "step": 8589 }, { "epoch": 0.23490483482826516, "grad_norm": 1.363507866859436, "learning_rate": 1.78785319845569e-05, "loss": 0.5616, "step": 8590 }, { "epoch": 0.23493218114198205, "grad_norm": 1.2725614309310913, "learning_rate": 1.7877986481160422e-05, "loss": 0.5456, "step": 8591 }, { "epoch": 0.23495952745569898, "grad_norm": 1.4869319200515747, "learning_rate": 1.787744091596336e-05, "loss": 0.5811, "step": 8592 }, { "epoch": 0.23498687376941588, "grad_norm": 1.5284355878829956, "learning_rate": 1.7876895288969987e-05, "loss": 0.9173, "step": 8593 }, { "epoch": 0.2350142200831328, "grad_norm": 1.1869333982467651, "learning_rate": 1.787634960018459e-05, "loss": 0.5616, "step": 8594 }, { "epoch": 0.2350415663968497, "grad_norm": 1.3291926383972168, "learning_rate": 1.787580384961144e-05, "loss": 0.5488, "step": 8595 }, { "epoch": 0.23506891271056662, "grad_norm": 2.2114391326904297, "learning_rate": 1.7875258037254833e-05, "loss": 0.542, "step": 8596 }, { "epoch": 0.23509625902428352, "grad_norm": 1.2702388763427734, "learning_rate": 1.787471216311904e-05, "loss": 0.5548, "step": 8597 }, { "epoch": 0.23512360533800045, "grad_norm": 1.2488621473312378, "learning_rate": 1.787416622720835e-05, "loss": 0.5291, "step": 8598 }, { "epoch": 0.23515095165171734, "grad_norm": 1.4616999626159668, "learning_rate": 1.787362022952704e-05, "loss": 0.8929, "step": 8599 }, { "epoch": 0.23517829796543427, "grad_norm": 1.2697546482086182, "learning_rate": 1.7873074170079393e-05, "loss": 0.5553, "step": 8600 }, { "epoch": 0.23520564427915117, "grad_norm": 1.8049559593200684, "learning_rate": 1.7872528048869693e-05, "loss": 0.532, "step": 8601 }, { "epoch": 0.2352329905928681, "grad_norm": 1.3015060424804688, "learning_rate": 1.7871981865902228e-05, "loss": 0.864, "step": 8602 }, { "epoch": 0.235260336906585, "grad_norm": 1.2504860162734985, "learning_rate": 1.787143562118128e-05, "loss": 0.5551, "step": 8603 }, { "epoch": 0.2352876832203019, "grad_norm": 1.1785866022109985, "learning_rate": 1.787088931471114e-05, "loss": 0.5527, "step": 8604 }, { "epoch": 0.2353150295340188, "grad_norm": 1.8469774723052979, "learning_rate": 1.787034294649608e-05, "loss": 0.8856, "step": 8605 }, { "epoch": 0.23534237584773574, "grad_norm": 1.560290813446045, "learning_rate": 1.7869796516540402e-05, "loss": 0.5974, "step": 8606 }, { "epoch": 0.23536972216145263, "grad_norm": 1.4577828645706177, "learning_rate": 1.786925002484838e-05, "loss": 0.5382, "step": 8607 }, { "epoch": 0.23539706847516956, "grad_norm": 1.345589280128479, "learning_rate": 1.786870347142431e-05, "loss": 0.5692, "step": 8608 }, { "epoch": 0.23542441478888645, "grad_norm": 2.104337453842163, "learning_rate": 1.7868156856272473e-05, "loss": 0.4624, "step": 8609 }, { "epoch": 0.23545176110260338, "grad_norm": 1.4533880949020386, "learning_rate": 1.786761017939716e-05, "loss": 0.5645, "step": 8610 }, { "epoch": 0.23547910741632028, "grad_norm": 1.4573860168457031, "learning_rate": 1.786706344080266e-05, "loss": 0.581, "step": 8611 }, { "epoch": 0.2355064537300372, "grad_norm": 1.359351634979248, "learning_rate": 1.7866516640493263e-05, "loss": 0.575, "step": 8612 }, { "epoch": 0.2355338000437541, "grad_norm": 1.7448267936706543, "learning_rate": 1.7865969778473253e-05, "loss": 0.548, "step": 8613 }, { "epoch": 0.23556114635747102, "grad_norm": 1.1664576530456543, "learning_rate": 1.7865422854746924e-05, "loss": 0.5365, "step": 8614 }, { "epoch": 0.23558849267118792, "grad_norm": 1.342465877532959, "learning_rate": 1.786487586931857e-05, "loss": 0.554, "step": 8615 }, { "epoch": 0.23561583898490485, "grad_norm": 1.2979692220687866, "learning_rate": 1.7864328822192477e-05, "loss": 0.5785, "step": 8616 }, { "epoch": 0.23564318529862174, "grad_norm": 1.448897123336792, "learning_rate": 1.7863781713372935e-05, "loss": 0.5104, "step": 8617 }, { "epoch": 0.23567053161233867, "grad_norm": 1.3967968225479126, "learning_rate": 1.786323454286424e-05, "loss": 0.5886, "step": 8618 }, { "epoch": 0.23569787792605557, "grad_norm": 1.8475242853164673, "learning_rate": 1.7862687310670684e-05, "loss": 0.5595, "step": 8619 }, { "epoch": 0.2357252242397725, "grad_norm": 2.4899230003356934, "learning_rate": 1.7862140016796557e-05, "loss": 0.9393, "step": 8620 }, { "epoch": 0.2357525705534894, "grad_norm": 1.294697880744934, "learning_rate": 1.7861592661246157e-05, "loss": 0.5851, "step": 8621 }, { "epoch": 0.2357799168672063, "grad_norm": 1.31215500831604, "learning_rate": 1.7861045244023772e-05, "loss": 0.5619, "step": 8622 }, { "epoch": 0.2358072631809232, "grad_norm": 1.4069119691848755, "learning_rate": 1.78604977651337e-05, "loss": 0.5296, "step": 8623 }, { "epoch": 0.23583460949464014, "grad_norm": 1.290582299232483, "learning_rate": 1.7859950224580237e-05, "loss": 0.5709, "step": 8624 }, { "epoch": 0.23586195580835703, "grad_norm": 1.0552759170532227, "learning_rate": 1.7859402622367676e-05, "loss": 0.5927, "step": 8625 }, { "epoch": 0.23588930212207396, "grad_norm": 1.212498426437378, "learning_rate": 1.785885495850031e-05, "loss": 0.5225, "step": 8626 }, { "epoch": 0.23591664843579085, "grad_norm": 1.5681606531143188, "learning_rate": 1.7858307232982444e-05, "loss": 0.5821, "step": 8627 }, { "epoch": 0.23594399474950778, "grad_norm": 1.3118737936019897, "learning_rate": 1.7857759445818365e-05, "loss": 0.5844, "step": 8628 }, { "epoch": 0.23597134106322468, "grad_norm": 1.316938042640686, "learning_rate": 1.7857211597012375e-05, "loss": 0.5416, "step": 8629 }, { "epoch": 0.2359986873769416, "grad_norm": 1.7094974517822266, "learning_rate": 1.7856663686568773e-05, "loss": 0.4314, "step": 8630 }, { "epoch": 0.2360260336906585, "grad_norm": 1.5455702543258667, "learning_rate": 1.7856115714491852e-05, "loss": 0.5554, "step": 8631 }, { "epoch": 0.23605338000437542, "grad_norm": 1.5618668794631958, "learning_rate": 1.7855567680785918e-05, "loss": 0.6196, "step": 8632 }, { "epoch": 0.23608072631809232, "grad_norm": 2.2653682231903076, "learning_rate": 1.7855019585455264e-05, "loss": 0.5639, "step": 8633 }, { "epoch": 0.23610807263180925, "grad_norm": 1.4136903285980225, "learning_rate": 1.785447142850419e-05, "loss": 0.542, "step": 8634 }, { "epoch": 0.23613541894552614, "grad_norm": 1.582560420036316, "learning_rate": 1.7853923209937006e-05, "loss": 0.5593, "step": 8635 }, { "epoch": 0.23616276525924304, "grad_norm": 1.6146317720413208, "learning_rate": 1.7853374929757996e-05, "loss": 0.9402, "step": 8636 }, { "epoch": 0.23619011157295997, "grad_norm": 1.5293585062026978, "learning_rate": 1.7852826587971475e-05, "loss": 0.5599, "step": 8637 }, { "epoch": 0.23621745788667686, "grad_norm": 1.4388859272003174, "learning_rate": 1.785227818458174e-05, "loss": 0.578, "step": 8638 }, { "epoch": 0.2362448042003938, "grad_norm": 1.3911044597625732, "learning_rate": 1.7851729719593086e-05, "loss": 0.5829, "step": 8639 }, { "epoch": 0.23627215051411068, "grad_norm": 1.2803465127944946, "learning_rate": 1.7851181193009827e-05, "loss": 0.5373, "step": 8640 }, { "epoch": 0.2362994968278276, "grad_norm": 1.960867166519165, "learning_rate": 1.785063260483626e-05, "loss": 0.5656, "step": 8641 }, { "epoch": 0.2363268431415445, "grad_norm": 1.6178042888641357, "learning_rate": 1.785008395507669e-05, "loss": 0.5767, "step": 8642 }, { "epoch": 0.23635418945526143, "grad_norm": 1.3332173824310303, "learning_rate": 1.784953524373542e-05, "loss": 0.565, "step": 8643 }, { "epoch": 0.23638153576897833, "grad_norm": 2.1786458492279053, "learning_rate": 1.7848986470816755e-05, "loss": 0.5558, "step": 8644 }, { "epoch": 0.23640888208269525, "grad_norm": 1.4104979038238525, "learning_rate": 1.7848437636325e-05, "loss": 0.5251, "step": 8645 }, { "epoch": 0.23643622839641215, "grad_norm": 1.3297901153564453, "learning_rate": 1.7847888740264464e-05, "loss": 0.561, "step": 8646 }, { "epoch": 0.23646357471012908, "grad_norm": 1.658036708831787, "learning_rate": 1.7847339782639447e-05, "loss": 0.5134, "step": 8647 }, { "epoch": 0.23649092102384597, "grad_norm": 1.21195650100708, "learning_rate": 1.7846790763454257e-05, "loss": 0.5442, "step": 8648 }, { "epoch": 0.2365182673375629, "grad_norm": 1.4368367195129395, "learning_rate": 1.78462416827132e-05, "loss": 0.5714, "step": 8649 }, { "epoch": 0.2365456136512798, "grad_norm": 1.4119840860366821, "learning_rate": 1.784569254042059e-05, "loss": 0.5673, "step": 8650 }, { "epoch": 0.23657295996499672, "grad_norm": 1.3841800689697266, "learning_rate": 1.7845143336580723e-05, "loss": 0.4956, "step": 8651 }, { "epoch": 0.23660030627871362, "grad_norm": 1.5201351642608643, "learning_rate": 1.784459407119792e-05, "loss": 0.4064, "step": 8652 }, { "epoch": 0.23662765259243054, "grad_norm": 1.4358388185501099, "learning_rate": 1.7844044744276485e-05, "loss": 0.5708, "step": 8653 }, { "epoch": 0.23665499890614744, "grad_norm": 1.6079131364822388, "learning_rate": 1.7843495355820722e-05, "loss": 0.5403, "step": 8654 }, { "epoch": 0.23668234521986437, "grad_norm": 3.3392605781555176, "learning_rate": 1.7842945905834948e-05, "loss": 0.5188, "step": 8655 }, { "epoch": 0.23670969153358126, "grad_norm": 1.9406347274780273, "learning_rate": 1.7842396394323466e-05, "loss": 0.399, "step": 8656 }, { "epoch": 0.2367370378472982, "grad_norm": 1.6737024784088135, "learning_rate": 1.7841846821290598e-05, "loss": 0.5342, "step": 8657 }, { "epoch": 0.23676438416101508, "grad_norm": 1.4335894584655762, "learning_rate": 1.7841297186740644e-05, "loss": 0.5967, "step": 8658 }, { "epoch": 0.236791730474732, "grad_norm": 1.6694837808609009, "learning_rate": 1.7840747490677925e-05, "loss": 0.4616, "step": 8659 }, { "epoch": 0.2368190767884489, "grad_norm": 1.403368353843689, "learning_rate": 1.7840197733106746e-05, "loss": 0.5602, "step": 8660 }, { "epoch": 0.23684642310216583, "grad_norm": 1.2068235874176025, "learning_rate": 1.783964791403142e-05, "loss": 0.552, "step": 8661 }, { "epoch": 0.23687376941588273, "grad_norm": 1.4791866540908813, "learning_rate": 1.7839098033456264e-05, "loss": 0.5628, "step": 8662 }, { "epoch": 0.23690111572959965, "grad_norm": 1.526018738746643, "learning_rate": 1.7838548091385592e-05, "loss": 0.5551, "step": 8663 }, { "epoch": 0.23692846204331655, "grad_norm": 1.2672582864761353, "learning_rate": 1.7837998087823712e-05, "loss": 0.5555, "step": 8664 }, { "epoch": 0.23695580835703348, "grad_norm": 1.2530291080474854, "learning_rate": 1.7837448022774945e-05, "loss": 0.5336, "step": 8665 }, { "epoch": 0.23698315467075037, "grad_norm": 1.0397506952285767, "learning_rate": 1.7836897896243607e-05, "loss": 0.5512, "step": 8666 }, { "epoch": 0.2370105009844673, "grad_norm": 1.1825395822525024, "learning_rate": 1.7836347708234004e-05, "loss": 0.5591, "step": 8667 }, { "epoch": 0.2370378472981842, "grad_norm": 1.512722373008728, "learning_rate": 1.7835797458750464e-05, "loss": 0.619, "step": 8668 }, { "epoch": 0.23706519361190112, "grad_norm": 1.2865469455718994, "learning_rate": 1.7835247147797295e-05, "loss": 0.4793, "step": 8669 }, { "epoch": 0.23709253992561802, "grad_norm": 1.1389048099517822, "learning_rate": 1.783469677537882e-05, "loss": 0.5642, "step": 8670 }, { "epoch": 0.23711988623933494, "grad_norm": 1.3104963302612305, "learning_rate": 1.7834146341499356e-05, "loss": 0.6049, "step": 8671 }, { "epoch": 0.23714723255305184, "grad_norm": 1.323700189590454, "learning_rate": 1.783359584616321e-05, "loss": 0.5711, "step": 8672 }, { "epoch": 0.23717457886676876, "grad_norm": 2.7739810943603516, "learning_rate": 1.7833045289374715e-05, "loss": 0.5414, "step": 8673 }, { "epoch": 0.23720192518048566, "grad_norm": 1.4170037508010864, "learning_rate": 1.7832494671138183e-05, "loss": 0.9025, "step": 8674 }, { "epoch": 0.2372292714942026, "grad_norm": 1.4731597900390625, "learning_rate": 1.7831943991457935e-05, "loss": 0.5084, "step": 8675 }, { "epoch": 0.23725661780791948, "grad_norm": 1.53327214717865, "learning_rate": 1.7831393250338288e-05, "loss": 0.4463, "step": 8676 }, { "epoch": 0.2372839641216364, "grad_norm": 1.2818964719772339, "learning_rate": 1.7830842447783568e-05, "loss": 0.5669, "step": 8677 }, { "epoch": 0.2373113104353533, "grad_norm": 1.3522491455078125, "learning_rate": 1.7830291583798095e-05, "loss": 0.5406, "step": 8678 }, { "epoch": 0.23733865674907023, "grad_norm": 1.6069798469543457, "learning_rate": 1.7829740658386183e-05, "loss": 0.5902, "step": 8679 }, { "epoch": 0.23736600306278713, "grad_norm": 1.578325867652893, "learning_rate": 1.782918967155216e-05, "loss": 0.5839, "step": 8680 }, { "epoch": 0.23739334937650405, "grad_norm": 2.2833092212677, "learning_rate": 1.782863862330035e-05, "loss": 0.565, "step": 8681 }, { "epoch": 0.23742069569022095, "grad_norm": 6.15608024597168, "learning_rate": 1.782808751363507e-05, "loss": 0.8925, "step": 8682 }, { "epoch": 0.23744804200393788, "grad_norm": 1.7108861207962036, "learning_rate": 1.7827536342560646e-05, "loss": 0.5272, "step": 8683 }, { "epoch": 0.23747538831765477, "grad_norm": 1.1712161302566528, "learning_rate": 1.7826985110081408e-05, "loss": 0.5448, "step": 8684 }, { "epoch": 0.2375027346313717, "grad_norm": 1.522463321685791, "learning_rate": 1.782643381620167e-05, "loss": 0.5628, "step": 8685 }, { "epoch": 0.2375300809450886, "grad_norm": 1.4695030450820923, "learning_rate": 1.7825882460925764e-05, "loss": 0.5663, "step": 8686 }, { "epoch": 0.23755742725880552, "grad_norm": 1.4015449285507202, "learning_rate": 1.782533104425801e-05, "loss": 0.5622, "step": 8687 }, { "epoch": 0.23758477357252242, "grad_norm": 1.320909023284912, "learning_rate": 1.7824779566202733e-05, "loss": 0.5328, "step": 8688 }, { "epoch": 0.23761211988623934, "grad_norm": 1.1643933057785034, "learning_rate": 1.7824228026764267e-05, "loss": 0.5513, "step": 8689 }, { "epoch": 0.23763946619995624, "grad_norm": 1.3860342502593994, "learning_rate": 1.7823676425946934e-05, "loss": 0.5617, "step": 8690 }, { "epoch": 0.23766681251367316, "grad_norm": 1.3317592144012451, "learning_rate": 1.782312476375506e-05, "loss": 0.5925, "step": 8691 }, { "epoch": 0.23769415882739006, "grad_norm": 1.1614100933074951, "learning_rate": 1.7822573040192974e-05, "loss": 0.5838, "step": 8692 }, { "epoch": 0.237721505141107, "grad_norm": 1.843859076499939, "learning_rate": 1.7822021255265004e-05, "loss": 0.5654, "step": 8693 }, { "epoch": 0.23774885145482388, "grad_norm": 1.2499771118164062, "learning_rate": 1.782146940897548e-05, "loss": 0.5819, "step": 8694 }, { "epoch": 0.2377761977685408, "grad_norm": 1.2690699100494385, "learning_rate": 1.7820917501328726e-05, "loss": 0.5294, "step": 8695 }, { "epoch": 0.2378035440822577, "grad_norm": 1.1207787990570068, "learning_rate": 1.782036553232908e-05, "loss": 0.5715, "step": 8696 }, { "epoch": 0.23783089039597463, "grad_norm": 3.5679945945739746, "learning_rate": 1.7819813501980866e-05, "loss": 0.8947, "step": 8697 }, { "epoch": 0.23785823670969153, "grad_norm": 1.5932390689849854, "learning_rate": 1.781926141028841e-05, "loss": 0.5434, "step": 8698 }, { "epoch": 0.23788558302340845, "grad_norm": 1.4788950681686401, "learning_rate": 1.7818709257256057e-05, "loss": 0.5464, "step": 8699 }, { "epoch": 0.23791292933712535, "grad_norm": 1.232704520225525, "learning_rate": 1.7818157042888128e-05, "loss": 0.9393, "step": 8700 }, { "epoch": 0.23794027565084228, "grad_norm": 1.2236100435256958, "learning_rate": 1.7817604767188953e-05, "loss": 0.5778, "step": 8701 }, { "epoch": 0.23796762196455917, "grad_norm": 1.683504581451416, "learning_rate": 1.7817052430162875e-05, "loss": 0.5355, "step": 8702 }, { "epoch": 0.2379949682782761, "grad_norm": 1.2663586139678955, "learning_rate": 1.7816500031814213e-05, "loss": 0.5591, "step": 8703 }, { "epoch": 0.238022314591993, "grad_norm": 1.4408607482910156, "learning_rate": 1.7815947572147312e-05, "loss": 0.5683, "step": 8704 }, { "epoch": 0.23804966090570992, "grad_norm": 1.2954477071762085, "learning_rate": 1.7815395051166504e-05, "loss": 0.5602, "step": 8705 }, { "epoch": 0.23807700721942682, "grad_norm": 1.520150065422058, "learning_rate": 1.7814842468876117e-05, "loss": 0.5286, "step": 8706 }, { "epoch": 0.23810435353314374, "grad_norm": 1.5385239124298096, "learning_rate": 1.7814289825280494e-05, "loss": 0.5242, "step": 8707 }, { "epoch": 0.23813169984686064, "grad_norm": 1.3587373495101929, "learning_rate": 1.7813737120383964e-05, "loss": 0.5682, "step": 8708 }, { "epoch": 0.23815904616057756, "grad_norm": 1.8003653287887573, "learning_rate": 1.7813184354190866e-05, "loss": 0.5275, "step": 8709 }, { "epoch": 0.23818639247429446, "grad_norm": 1.624325156211853, "learning_rate": 1.781263152670554e-05, "loss": 0.6015, "step": 8710 }, { "epoch": 0.2382137387880114, "grad_norm": 1.7348963022232056, "learning_rate": 1.7812078637932312e-05, "loss": 0.8898, "step": 8711 }, { "epoch": 0.23824108510172828, "grad_norm": 1.1932272911071777, "learning_rate": 1.7811525687875526e-05, "loss": 0.5747, "step": 8712 }, { "epoch": 0.2382684314154452, "grad_norm": 1.6898201704025269, "learning_rate": 1.781097267653952e-05, "loss": 0.5964, "step": 8713 }, { "epoch": 0.2382957777291621, "grad_norm": 1.169160008430481, "learning_rate": 1.7810419603928633e-05, "loss": 0.557, "step": 8714 }, { "epoch": 0.23832312404287903, "grad_norm": 1.311042070388794, "learning_rate": 1.7809866470047203e-05, "loss": 0.5356, "step": 8715 }, { "epoch": 0.23835047035659593, "grad_norm": 1.102768898010254, "learning_rate": 1.7809313274899564e-05, "loss": 0.391, "step": 8716 }, { "epoch": 0.23837781667031285, "grad_norm": 1.3845009803771973, "learning_rate": 1.7808760018490067e-05, "loss": 0.5817, "step": 8717 }, { "epoch": 0.23840516298402975, "grad_norm": 1.4053922891616821, "learning_rate": 1.780820670082304e-05, "loss": 0.562, "step": 8718 }, { "epoch": 0.23843250929774668, "grad_norm": 1.4552448987960815, "learning_rate": 1.780765332190283e-05, "loss": 0.5716, "step": 8719 }, { "epoch": 0.23845985561146357, "grad_norm": 1.390328288078308, "learning_rate": 1.7807099881733778e-05, "loss": 0.5762, "step": 8720 }, { "epoch": 0.2384872019251805, "grad_norm": 1.1704494953155518, "learning_rate": 1.780654638032022e-05, "loss": 0.5754, "step": 8721 }, { "epoch": 0.2385145482388974, "grad_norm": 1.23390531539917, "learning_rate": 1.780599281766651e-05, "loss": 0.5467, "step": 8722 }, { "epoch": 0.23854189455261432, "grad_norm": 1.2066584825515747, "learning_rate": 1.7805439193776978e-05, "loss": 0.4181, "step": 8723 }, { "epoch": 0.23856924086633122, "grad_norm": 1.347747564315796, "learning_rate": 1.7804885508655972e-05, "loss": 0.5853, "step": 8724 }, { "epoch": 0.23859658718004814, "grad_norm": 1.5510562658309937, "learning_rate": 1.780433176230784e-05, "loss": 0.6291, "step": 8725 }, { "epoch": 0.23862393349376504, "grad_norm": 1.3846802711486816, "learning_rate": 1.7803777954736918e-05, "loss": 0.538, "step": 8726 }, { "epoch": 0.23865127980748196, "grad_norm": 1.465798258781433, "learning_rate": 1.7803224085947554e-05, "loss": 0.5768, "step": 8727 }, { "epoch": 0.23867862612119886, "grad_norm": 1.1947191953659058, "learning_rate": 1.7802670155944098e-05, "loss": 0.5456, "step": 8728 }, { "epoch": 0.2387059724349158, "grad_norm": 1.393363356590271, "learning_rate": 1.7802116164730884e-05, "loss": 0.5757, "step": 8729 }, { "epoch": 0.23873331874863268, "grad_norm": 1.1820127964019775, "learning_rate": 1.7801562112312268e-05, "loss": 0.5602, "step": 8730 }, { "epoch": 0.2387606650623496, "grad_norm": 1.2796965837478638, "learning_rate": 1.780100799869259e-05, "loss": 0.5655, "step": 8731 }, { "epoch": 0.2387880113760665, "grad_norm": 1.7905930280685425, "learning_rate": 1.7800453823876203e-05, "loss": 0.5198, "step": 8732 }, { "epoch": 0.23881535768978343, "grad_norm": 1.474380612373352, "learning_rate": 1.7799899587867448e-05, "loss": 0.3974, "step": 8733 }, { "epoch": 0.23884270400350033, "grad_norm": 1.6478089094161987, "learning_rate": 1.779934529067068e-05, "loss": 0.5792, "step": 8734 }, { "epoch": 0.23887005031721725, "grad_norm": 1.4586232900619507, "learning_rate": 1.779879093229024e-05, "loss": 0.577, "step": 8735 }, { "epoch": 0.23889739663093415, "grad_norm": 1.3163444995880127, "learning_rate": 1.779823651273048e-05, "loss": 0.5451, "step": 8736 }, { "epoch": 0.23892474294465105, "grad_norm": 1.25039803981781, "learning_rate": 1.779768203199575e-05, "loss": 0.562, "step": 8737 }, { "epoch": 0.23895208925836797, "grad_norm": 1.7690179347991943, "learning_rate": 1.7797127490090397e-05, "loss": 0.5085, "step": 8738 }, { "epoch": 0.23897943557208487, "grad_norm": 3.2162680625915527, "learning_rate": 1.7796572887018776e-05, "loss": 0.557, "step": 8739 }, { "epoch": 0.2390067818858018, "grad_norm": 1.5342992544174194, "learning_rate": 1.7796018222785233e-05, "loss": 0.5846, "step": 8740 }, { "epoch": 0.2390341281995187, "grad_norm": 1.3887622356414795, "learning_rate": 1.779546349739412e-05, "loss": 0.5811, "step": 8741 }, { "epoch": 0.23906147451323562, "grad_norm": 1.6834814548492432, "learning_rate": 1.7794908710849792e-05, "loss": 0.5884, "step": 8742 }, { "epoch": 0.23908882082695251, "grad_norm": 1.2816107273101807, "learning_rate": 1.77943538631566e-05, "loss": 0.5438, "step": 8743 }, { "epoch": 0.23911616714066944, "grad_norm": 1.3475100994110107, "learning_rate": 1.7793798954318893e-05, "loss": 0.5558, "step": 8744 }, { "epoch": 0.23914351345438634, "grad_norm": 1.498869776725769, "learning_rate": 1.7793243984341025e-05, "loss": 0.562, "step": 8745 }, { "epoch": 0.23917085976810326, "grad_norm": 1.3943344354629517, "learning_rate": 1.7792688953227352e-05, "loss": 0.5538, "step": 8746 }, { "epoch": 0.23919820608182016, "grad_norm": 1.5099408626556396, "learning_rate": 1.779213386098223e-05, "loss": 0.5584, "step": 8747 }, { "epoch": 0.23922555239553708, "grad_norm": 1.5873967409133911, "learning_rate": 1.779157870761001e-05, "loss": 0.5824, "step": 8748 }, { "epoch": 0.23925289870925398, "grad_norm": 1.993685007095337, "learning_rate": 1.7791023493115046e-05, "loss": 0.9202, "step": 8749 }, { "epoch": 0.2392802450229709, "grad_norm": 1.7066478729248047, "learning_rate": 1.7790468217501694e-05, "loss": 0.5215, "step": 8750 }, { "epoch": 0.2393075913366878, "grad_norm": 1.789578914642334, "learning_rate": 1.7789912880774315e-05, "loss": 0.5037, "step": 8751 }, { "epoch": 0.23933493765040473, "grad_norm": 2.9409215450286865, "learning_rate": 1.7789357482937263e-05, "loss": 0.5625, "step": 8752 }, { "epoch": 0.23936228396412162, "grad_norm": 1.5055419206619263, "learning_rate": 1.778880202399489e-05, "loss": 0.5736, "step": 8753 }, { "epoch": 0.23938963027783855, "grad_norm": 1.299507737159729, "learning_rate": 1.778824650395156e-05, "loss": 0.5849, "step": 8754 }, { "epoch": 0.23941697659155545, "grad_norm": 1.451014757156372, "learning_rate": 1.7787690922811623e-05, "loss": 0.5077, "step": 8755 }, { "epoch": 0.23944432290527237, "grad_norm": 1.4862323999404907, "learning_rate": 1.7787135280579444e-05, "loss": 0.5531, "step": 8756 }, { "epoch": 0.23947166921898927, "grad_norm": 1.7103102207183838, "learning_rate": 1.7786579577259384e-05, "loss": 0.56, "step": 8757 }, { "epoch": 0.2394990155327062, "grad_norm": 1.3696537017822266, "learning_rate": 1.7786023812855796e-05, "loss": 0.5552, "step": 8758 }, { "epoch": 0.2395263618464231, "grad_norm": 1.281295895576477, "learning_rate": 1.7785467987373043e-05, "loss": 0.5617, "step": 8759 }, { "epoch": 0.23955370816014002, "grad_norm": 1.470381259918213, "learning_rate": 1.778491210081548e-05, "loss": 0.5393, "step": 8760 }, { "epoch": 0.2395810544738569, "grad_norm": 2.0858311653137207, "learning_rate": 1.778435615318748e-05, "loss": 0.5638, "step": 8761 }, { "epoch": 0.23960840078757384, "grad_norm": 2.0565757751464844, "learning_rate": 1.778380014449339e-05, "loss": 0.5207, "step": 8762 }, { "epoch": 0.23963574710129074, "grad_norm": 1.326586365699768, "learning_rate": 1.7783244074737586e-05, "loss": 0.5662, "step": 8763 }, { "epoch": 0.23966309341500766, "grad_norm": 1.705427885055542, "learning_rate": 1.7782687943924415e-05, "loss": 0.5424, "step": 8764 }, { "epoch": 0.23969043972872456, "grad_norm": 1.2284623384475708, "learning_rate": 1.778213175205825e-05, "loss": 0.5586, "step": 8765 }, { "epoch": 0.23971778604244148, "grad_norm": 1.2891018390655518, "learning_rate": 1.7781575499143452e-05, "loss": 0.585, "step": 8766 }, { "epoch": 0.23974513235615838, "grad_norm": 2.5092523097991943, "learning_rate": 1.7781019185184384e-05, "loss": 0.5569, "step": 8767 }, { "epoch": 0.2397724786698753, "grad_norm": 1.4729413986206055, "learning_rate": 1.778046281018541e-05, "loss": 0.5796, "step": 8768 }, { "epoch": 0.2397998249835922, "grad_norm": 1.213801383972168, "learning_rate": 1.7779906374150897e-05, "loss": 0.5241, "step": 8769 }, { "epoch": 0.23982717129730913, "grad_norm": 1.1380563974380493, "learning_rate": 1.7779349877085207e-05, "loss": 0.6041, "step": 8770 }, { "epoch": 0.23985451761102602, "grad_norm": 1.310422658920288, "learning_rate": 1.7778793318992704e-05, "loss": 0.5425, "step": 8771 }, { "epoch": 0.23988186392474295, "grad_norm": 1.6476417779922485, "learning_rate": 1.7778236699877758e-05, "loss": 0.5304, "step": 8772 }, { "epoch": 0.23990921023845985, "grad_norm": 1.2810724973678589, "learning_rate": 1.7777680019744737e-05, "loss": 0.615, "step": 8773 }, { "epoch": 0.23993655655217677, "grad_norm": 1.2978817224502563, "learning_rate": 1.7777123278598002e-05, "loss": 0.5706, "step": 8774 }, { "epoch": 0.23996390286589367, "grad_norm": 1.3099284172058105, "learning_rate": 1.7776566476441923e-05, "loss": 0.5709, "step": 8775 }, { "epoch": 0.2399912491796106, "grad_norm": 1.5474382638931274, "learning_rate": 1.7776009613280868e-05, "loss": 0.5974, "step": 8776 }, { "epoch": 0.2400185954933275, "grad_norm": 2.3180482387542725, "learning_rate": 1.7775452689119207e-05, "loss": 0.8786, "step": 8777 }, { "epoch": 0.24004594180704442, "grad_norm": 1.590155005455017, "learning_rate": 1.777489570396131e-05, "loss": 0.5866, "step": 8778 }, { "epoch": 0.2400732881207613, "grad_norm": 1.1270231008529663, "learning_rate": 1.7774338657811544e-05, "loss": 0.5456, "step": 8779 }, { "epoch": 0.24010063443447824, "grad_norm": 1.1181751489639282, "learning_rate": 1.7773781550674277e-05, "loss": 0.5571, "step": 8780 }, { "epoch": 0.24012798074819514, "grad_norm": 2.7302794456481934, "learning_rate": 1.777322438255388e-05, "loss": 0.5605, "step": 8781 }, { "epoch": 0.24015532706191206, "grad_norm": 1.3380697965621948, "learning_rate": 1.777266715345473e-05, "loss": 0.5439, "step": 8782 }, { "epoch": 0.24018267337562896, "grad_norm": 1.203262209892273, "learning_rate": 1.777210986338119e-05, "loss": 0.5862, "step": 8783 }, { "epoch": 0.24021001968934588, "grad_norm": 1.6598434448242188, "learning_rate": 1.777155251233764e-05, "loss": 0.9036, "step": 8784 }, { "epoch": 0.24023736600306278, "grad_norm": 1.391021490097046, "learning_rate": 1.777099510032844e-05, "loss": 0.4771, "step": 8785 }, { "epoch": 0.2402647123167797, "grad_norm": 1.4588557481765747, "learning_rate": 1.7770437627357974e-05, "loss": 0.5266, "step": 8786 }, { "epoch": 0.2402920586304966, "grad_norm": 1.285249948501587, "learning_rate": 1.7769880093430613e-05, "loss": 0.5735, "step": 8787 }, { "epoch": 0.24031940494421353, "grad_norm": 1.4713915586471558, "learning_rate": 1.7769322498550728e-05, "loss": 0.5284, "step": 8788 }, { "epoch": 0.24034675125793042, "grad_norm": 1.4419031143188477, "learning_rate": 1.7768764842722695e-05, "loss": 0.5555, "step": 8789 }, { "epoch": 0.24037409757164735, "grad_norm": 1.3707311153411865, "learning_rate": 1.776820712595089e-05, "loss": 0.5642, "step": 8790 }, { "epoch": 0.24040144388536425, "grad_norm": 1.3837921619415283, "learning_rate": 1.7767649348239683e-05, "loss": 0.5728, "step": 8791 }, { "epoch": 0.24042879019908117, "grad_norm": 1.2552911043167114, "learning_rate": 1.7767091509593453e-05, "loss": 0.5477, "step": 8792 }, { "epoch": 0.24045613651279807, "grad_norm": 1.9135026931762695, "learning_rate": 1.7766533610016578e-05, "loss": 0.5737, "step": 8793 }, { "epoch": 0.240483482826515, "grad_norm": 1.370400071144104, "learning_rate": 1.776597564951343e-05, "loss": 0.5504, "step": 8794 }, { "epoch": 0.2405108291402319, "grad_norm": 1.3860749006271362, "learning_rate": 1.776541762808839e-05, "loss": 0.4397, "step": 8795 }, { "epoch": 0.24053817545394882, "grad_norm": 1.387292504310608, "learning_rate": 1.7764859545745836e-05, "loss": 0.4902, "step": 8796 }, { "epoch": 0.2405655217676657, "grad_norm": 1.706091046333313, "learning_rate": 1.776430140249014e-05, "loss": 0.5521, "step": 8797 }, { "epoch": 0.24059286808138264, "grad_norm": 1.3138978481292725, "learning_rate": 1.776374319832569e-05, "loss": 0.5486, "step": 8798 }, { "epoch": 0.24062021439509954, "grad_norm": 1.4163097143173218, "learning_rate": 1.7763184933256855e-05, "loss": 0.59, "step": 8799 }, { "epoch": 0.24064756070881646, "grad_norm": 1.35196852684021, "learning_rate": 1.776262660728802e-05, "loss": 0.6328, "step": 8800 }, { "epoch": 0.24067490702253336, "grad_norm": 1.413033366203308, "learning_rate": 1.7762068220423563e-05, "loss": 0.404, "step": 8801 }, { "epoch": 0.24070225333625028, "grad_norm": 1.1867209672927856, "learning_rate": 1.7761509772667866e-05, "loss": 0.5746, "step": 8802 }, { "epoch": 0.24072959964996718, "grad_norm": 1.5986828804016113, "learning_rate": 1.776095126402531e-05, "loss": 0.615, "step": 8803 }, { "epoch": 0.2407569459636841, "grad_norm": 1.1583775281906128, "learning_rate": 1.7760392694500276e-05, "loss": 0.5308, "step": 8804 }, { "epoch": 0.240784292277401, "grad_norm": 1.545644998550415, "learning_rate": 1.7759834064097143e-05, "loss": 0.6242, "step": 8805 }, { "epoch": 0.24081163859111793, "grad_norm": 1.3641464710235596, "learning_rate": 1.77592753728203e-05, "loss": 0.5529, "step": 8806 }, { "epoch": 0.24083898490483482, "grad_norm": 1.4871597290039062, "learning_rate": 1.775871662067412e-05, "loss": 0.5967, "step": 8807 }, { "epoch": 0.24086633121855175, "grad_norm": 1.971970796585083, "learning_rate": 1.7758157807662996e-05, "loss": 0.4401, "step": 8808 }, { "epoch": 0.24089367753226865, "grad_norm": 1.233978509902954, "learning_rate": 1.7757598933791302e-05, "loss": 0.5643, "step": 8809 }, { "epoch": 0.24092102384598557, "grad_norm": 1.1018637418746948, "learning_rate": 1.775703999906343e-05, "loss": 0.5412, "step": 8810 }, { "epoch": 0.24094837015970247, "grad_norm": 1.6684530973434448, "learning_rate": 1.775648100348376e-05, "loss": 0.9109, "step": 8811 }, { "epoch": 0.2409757164734194, "grad_norm": 1.525464415550232, "learning_rate": 1.7755921947056687e-05, "loss": 0.9218, "step": 8812 }, { "epoch": 0.2410030627871363, "grad_norm": 2.7737536430358887, "learning_rate": 1.7755362829786583e-05, "loss": 0.5748, "step": 8813 }, { "epoch": 0.24103040910085322, "grad_norm": 1.855921983718872, "learning_rate": 1.7754803651677842e-05, "loss": 0.5693, "step": 8814 }, { "epoch": 0.2410577554145701, "grad_norm": 1.7442898750305176, "learning_rate": 1.7754244412734846e-05, "loss": 0.5835, "step": 8815 }, { "epoch": 0.24108510172828704, "grad_norm": 1.376901388168335, "learning_rate": 1.775368511296199e-05, "loss": 0.5446, "step": 8816 }, { "epoch": 0.24111244804200394, "grad_norm": 1.3098742961883545, "learning_rate": 1.7753125752363652e-05, "loss": 0.5663, "step": 8817 }, { "epoch": 0.24113979435572086, "grad_norm": 1.4849762916564941, "learning_rate": 1.7752566330944226e-05, "loss": 0.569, "step": 8818 }, { "epoch": 0.24116714066943776, "grad_norm": 1.2420772314071655, "learning_rate": 1.77520068487081e-05, "loss": 0.5391, "step": 8819 }, { "epoch": 0.24119448698315468, "grad_norm": 1.6698967218399048, "learning_rate": 1.7751447305659658e-05, "loss": 0.5687, "step": 8820 }, { "epoch": 0.24122183329687158, "grad_norm": 1.8258774280548096, "learning_rate": 1.7750887701803298e-05, "loss": 0.5993, "step": 8821 }, { "epoch": 0.2412491796105885, "grad_norm": 1.5105245113372803, "learning_rate": 1.77503280371434e-05, "loss": 0.5575, "step": 8822 }, { "epoch": 0.2412765259243054, "grad_norm": 1.1525763273239136, "learning_rate": 1.7749768311684367e-05, "loss": 0.5629, "step": 8823 }, { "epoch": 0.24130387223802233, "grad_norm": 1.1167552471160889, "learning_rate": 1.7749208525430578e-05, "loss": 0.506, "step": 8824 }, { "epoch": 0.24133121855173922, "grad_norm": 1.2221579551696777, "learning_rate": 1.774864867838643e-05, "loss": 0.5375, "step": 8825 }, { "epoch": 0.24135856486545615, "grad_norm": 1.279525876045227, "learning_rate": 1.7748088770556312e-05, "loss": 0.5568, "step": 8826 }, { "epoch": 0.24138591117917305, "grad_norm": 1.7080906629562378, "learning_rate": 1.7747528801944617e-05, "loss": 0.605, "step": 8827 }, { "epoch": 0.24141325749288997, "grad_norm": 1.4052600860595703, "learning_rate": 1.7746968772555743e-05, "loss": 0.5779, "step": 8828 }, { "epoch": 0.24144060380660687, "grad_norm": 1.334236741065979, "learning_rate": 1.7746408682394078e-05, "loss": 0.5531, "step": 8829 }, { "epoch": 0.2414679501203238, "grad_norm": 1.1922311782836914, "learning_rate": 1.7745848531464016e-05, "loss": 0.5469, "step": 8830 }, { "epoch": 0.2414952964340407, "grad_norm": 1.7866238355636597, "learning_rate": 1.7745288319769952e-05, "loss": 0.5539, "step": 8831 }, { "epoch": 0.24152264274775762, "grad_norm": 1.2564200162887573, "learning_rate": 1.774472804731628e-05, "loss": 0.5996, "step": 8832 }, { "epoch": 0.2415499890614745, "grad_norm": 3.3476619720458984, "learning_rate": 1.7744167714107398e-05, "loss": 0.9512, "step": 8833 }, { "epoch": 0.24157733537519144, "grad_norm": 1.2304660081863403, "learning_rate": 1.7743607320147696e-05, "loss": 0.5618, "step": 8834 }, { "epoch": 0.24160468168890833, "grad_norm": 1.1021095514297485, "learning_rate": 1.774304686544158e-05, "loss": 0.5626, "step": 8835 }, { "epoch": 0.24163202800262526, "grad_norm": 1.401839256286621, "learning_rate": 1.7742486349993434e-05, "loss": 0.562, "step": 8836 }, { "epoch": 0.24165937431634216, "grad_norm": 1.5273544788360596, "learning_rate": 1.7741925773807666e-05, "loss": 0.5621, "step": 8837 }, { "epoch": 0.24168672063005905, "grad_norm": 1.6416000127792358, "learning_rate": 1.7741365136888666e-05, "loss": 0.4424, "step": 8838 }, { "epoch": 0.24171406694377598, "grad_norm": 1.7218587398529053, "learning_rate": 1.774080443924084e-05, "loss": 0.5787, "step": 8839 }, { "epoch": 0.24174141325749288, "grad_norm": 1.4755818843841553, "learning_rate": 1.7740243680868577e-05, "loss": 0.5804, "step": 8840 }, { "epoch": 0.2417687595712098, "grad_norm": 1.299462914466858, "learning_rate": 1.7739682861776284e-05, "loss": 0.5539, "step": 8841 }, { "epoch": 0.2417961058849267, "grad_norm": 1.1889413595199585, "learning_rate": 1.7739121981968354e-05, "loss": 0.6042, "step": 8842 }, { "epoch": 0.24182345219864362, "grad_norm": 1.2434102296829224, "learning_rate": 1.7738561041449188e-05, "loss": 0.548, "step": 8843 }, { "epoch": 0.24185079851236052, "grad_norm": 1.1951887607574463, "learning_rate": 1.7738000040223194e-05, "loss": 0.5669, "step": 8844 }, { "epoch": 0.24187814482607745, "grad_norm": 1.3804882764816284, "learning_rate": 1.7737438978294764e-05, "loss": 0.5492, "step": 8845 }, { "epoch": 0.24190549113979434, "grad_norm": 2.3054535388946533, "learning_rate": 1.7736877855668302e-05, "loss": 0.4176, "step": 8846 }, { "epoch": 0.24193283745351127, "grad_norm": 1.347248911857605, "learning_rate": 1.7736316672348216e-05, "loss": 0.5304, "step": 8847 }, { "epoch": 0.24196018376722817, "grad_norm": 1.376505732536316, "learning_rate": 1.7735755428338897e-05, "loss": 0.5526, "step": 8848 }, { "epoch": 0.2419875300809451, "grad_norm": 1.4259189367294312, "learning_rate": 1.7735194123644756e-05, "loss": 0.4913, "step": 8849 }, { "epoch": 0.242014876394662, "grad_norm": 1.231537938117981, "learning_rate": 1.7734632758270195e-05, "loss": 0.54, "step": 8850 }, { "epoch": 0.2420422227083789, "grad_norm": 1.2355530261993408, "learning_rate": 1.7734071332219614e-05, "loss": 0.5673, "step": 8851 }, { "epoch": 0.2420695690220958, "grad_norm": 1.1577186584472656, "learning_rate": 1.7733509845497422e-05, "loss": 0.5436, "step": 8852 }, { "epoch": 0.24209691533581273, "grad_norm": 1.499863862991333, "learning_rate": 1.773294829810802e-05, "loss": 0.5938, "step": 8853 }, { "epoch": 0.24212426164952963, "grad_norm": 1.5744668245315552, "learning_rate": 1.773238669005582e-05, "loss": 0.5368, "step": 8854 }, { "epoch": 0.24215160796324656, "grad_norm": 1.1993364095687866, "learning_rate": 1.7731825021345214e-05, "loss": 0.5781, "step": 8855 }, { "epoch": 0.24217895427696345, "grad_norm": 1.2778379917144775, "learning_rate": 1.7731263291980622e-05, "loss": 0.5756, "step": 8856 }, { "epoch": 0.24220630059068038, "grad_norm": 4.010147571563721, "learning_rate": 1.7730701501966445e-05, "loss": 0.5499, "step": 8857 }, { "epoch": 0.24223364690439728, "grad_norm": 2.758981227874756, "learning_rate": 1.773013965130709e-05, "loss": 0.948, "step": 8858 }, { "epoch": 0.2422609932181142, "grad_norm": 1.4893912076950073, "learning_rate": 1.7729577740006964e-05, "loss": 0.5604, "step": 8859 }, { "epoch": 0.2422883395318311, "grad_norm": 1.4908335208892822, "learning_rate": 1.7729015768070476e-05, "loss": 0.5303, "step": 8860 }, { "epoch": 0.24231568584554802, "grad_norm": 1.1465190649032593, "learning_rate": 1.7728453735502035e-05, "loss": 0.5415, "step": 8861 }, { "epoch": 0.24234303215926492, "grad_norm": 1.2837796211242676, "learning_rate": 1.772789164230605e-05, "loss": 0.5786, "step": 8862 }, { "epoch": 0.24237037847298185, "grad_norm": 1.5460880994796753, "learning_rate": 1.772732948848693e-05, "loss": 0.5561, "step": 8863 }, { "epoch": 0.24239772478669874, "grad_norm": 1.272022008895874, "learning_rate": 1.772676727404908e-05, "loss": 0.5436, "step": 8864 }, { "epoch": 0.24242507110041567, "grad_norm": 1.5390580892562866, "learning_rate": 1.7726204998996923e-05, "loss": 0.5036, "step": 8865 }, { "epoch": 0.24245241741413256, "grad_norm": 1.343769907951355, "learning_rate": 1.7725642663334855e-05, "loss": 0.548, "step": 8866 }, { "epoch": 0.2424797637278495, "grad_norm": 19.021074295043945, "learning_rate": 1.77250802670673e-05, "loss": 0.5279, "step": 8867 }, { "epoch": 0.2425071100415664, "grad_norm": 1.8197983503341675, "learning_rate": 1.7724517810198662e-05, "loss": 0.6359, "step": 8868 }, { "epoch": 0.2425344563552833, "grad_norm": 1.1781729459762573, "learning_rate": 1.7723955292733355e-05, "loss": 0.5457, "step": 8869 }, { "epoch": 0.2425618026690002, "grad_norm": 1.351388692855835, "learning_rate": 1.7723392714675794e-05, "loss": 0.5608, "step": 8870 }, { "epoch": 0.24258914898271713, "grad_norm": 1.5357800722122192, "learning_rate": 1.772283007603039e-05, "loss": 0.5586, "step": 8871 }, { "epoch": 0.24261649529643403, "grad_norm": 2.2558114528656006, "learning_rate": 1.7722267376801557e-05, "loss": 0.4848, "step": 8872 }, { "epoch": 0.24264384161015096, "grad_norm": 2.1597530841827393, "learning_rate": 1.772170461699371e-05, "loss": 0.9875, "step": 8873 }, { "epoch": 0.24267118792386785, "grad_norm": 1.9785710573196411, "learning_rate": 1.7721141796611266e-05, "loss": 0.9346, "step": 8874 }, { "epoch": 0.24269853423758478, "grad_norm": 3.6630754470825195, "learning_rate": 1.7720578915658633e-05, "loss": 0.576, "step": 8875 }, { "epoch": 0.24272588055130168, "grad_norm": 2.4396626949310303, "learning_rate": 1.7720015974140233e-05, "loss": 0.562, "step": 8876 }, { "epoch": 0.2427532268650186, "grad_norm": 1.922651767730713, "learning_rate": 1.771945297206048e-05, "loss": 0.5263, "step": 8877 }, { "epoch": 0.2427805731787355, "grad_norm": 1.545457124710083, "learning_rate": 1.7718889909423792e-05, "loss": 0.5911, "step": 8878 }, { "epoch": 0.24280791949245242, "grad_norm": 1.3810172080993652, "learning_rate": 1.7718326786234583e-05, "loss": 0.5614, "step": 8879 }, { "epoch": 0.24283526580616932, "grad_norm": 1.701507806777954, "learning_rate": 1.7717763602497277e-05, "loss": 0.5851, "step": 8880 }, { "epoch": 0.24286261211988625, "grad_norm": 1.3305877447128296, "learning_rate": 1.771720035821628e-05, "loss": 0.4921, "step": 8881 }, { "epoch": 0.24288995843360314, "grad_norm": 1.3956724405288696, "learning_rate": 1.7716637053396026e-05, "loss": 0.5652, "step": 8882 }, { "epoch": 0.24291730474732007, "grad_norm": 1.9701616764068604, "learning_rate": 1.7716073688040923e-05, "loss": 0.5502, "step": 8883 }, { "epoch": 0.24294465106103696, "grad_norm": 1.3585577011108398, "learning_rate": 1.771551026215539e-05, "loss": 0.5699, "step": 8884 }, { "epoch": 0.2429719973747539, "grad_norm": 1.6097317934036255, "learning_rate": 1.7714946775743854e-05, "loss": 0.5749, "step": 8885 }, { "epoch": 0.2429993436884708, "grad_norm": 1.6484992504119873, "learning_rate": 1.771438322881073e-05, "loss": 0.5171, "step": 8886 }, { "epoch": 0.2430266900021877, "grad_norm": 1.6030547618865967, "learning_rate": 1.771381962136044e-05, "loss": 0.5092, "step": 8887 }, { "epoch": 0.2430540363159046, "grad_norm": 1.6991206407546997, "learning_rate": 1.7713255953397408e-05, "loss": 0.5966, "step": 8888 }, { "epoch": 0.24308138262962153, "grad_norm": 1.460339903831482, "learning_rate": 1.7712692224926054e-05, "loss": 0.568, "step": 8889 }, { "epoch": 0.24310872894333843, "grad_norm": 1.7250325679779053, "learning_rate": 1.7712128435950798e-05, "loss": 0.6543, "step": 8890 }, { "epoch": 0.24313607525705536, "grad_norm": 1.6439718008041382, "learning_rate": 1.7711564586476068e-05, "loss": 0.5954, "step": 8891 }, { "epoch": 0.24316342157077225, "grad_norm": 1.7451468706130981, "learning_rate": 1.771100067650628e-05, "loss": 0.6004, "step": 8892 }, { "epoch": 0.24319076788448918, "grad_norm": 1.5527082681655884, "learning_rate": 1.7710436706045862e-05, "loss": 0.5388, "step": 8893 }, { "epoch": 0.24321811419820608, "grad_norm": 3.5206542015075684, "learning_rate": 1.770987267509924e-05, "loss": 0.5404, "step": 8894 }, { "epoch": 0.243245460511923, "grad_norm": 5.593219757080078, "learning_rate": 1.7709308583670836e-05, "loss": 1.0527, "step": 8895 }, { "epoch": 0.2432728068256399, "grad_norm": 1.4586158990859985, "learning_rate": 1.7708744431765076e-05, "loss": 0.6282, "step": 8896 }, { "epoch": 0.24330015313935682, "grad_norm": 2.7748498916625977, "learning_rate": 1.7708180219386385e-05, "loss": 0.9352, "step": 8897 }, { "epoch": 0.24332749945307372, "grad_norm": 1.5511667728424072, "learning_rate": 1.770761594653919e-05, "loss": 0.9089, "step": 8898 }, { "epoch": 0.24335484576679065, "grad_norm": 1.9960758686065674, "learning_rate": 1.7707051613227918e-05, "loss": 0.5515, "step": 8899 }, { "epoch": 0.24338219208050754, "grad_norm": 1.4585816860198975, "learning_rate": 1.7706487219456992e-05, "loss": 0.5786, "step": 8900 }, { "epoch": 0.24340953839422447, "grad_norm": 1.788124918937683, "learning_rate": 1.7705922765230845e-05, "loss": 0.5285, "step": 8901 }, { "epoch": 0.24343688470794136, "grad_norm": 2.450544595718384, "learning_rate": 1.77053582505539e-05, "loss": 0.4803, "step": 8902 }, { "epoch": 0.2434642310216583, "grad_norm": 1.456782579421997, "learning_rate": 1.7704793675430592e-05, "loss": 0.56, "step": 8903 }, { "epoch": 0.2434915773353752, "grad_norm": 1.426788568496704, "learning_rate": 1.7704229039865343e-05, "loss": 0.5697, "step": 8904 }, { "epoch": 0.2435189236490921, "grad_norm": 1.40869140625, "learning_rate": 1.770366434386259e-05, "loss": 0.5574, "step": 8905 }, { "epoch": 0.243546269962809, "grad_norm": 2.429800271987915, "learning_rate": 1.770309958742675e-05, "loss": 0.526, "step": 8906 }, { "epoch": 0.24357361627652593, "grad_norm": 1.5229120254516602, "learning_rate": 1.7702534770562266e-05, "loss": 0.5813, "step": 8907 }, { "epoch": 0.24360096259024283, "grad_norm": 1.7588468790054321, "learning_rate": 1.7701969893273566e-05, "loss": 0.5132, "step": 8908 }, { "epoch": 0.24362830890395976, "grad_norm": 1.818427324295044, "learning_rate": 1.7701404955565075e-05, "loss": 0.5169, "step": 8909 }, { "epoch": 0.24365565521767665, "grad_norm": 1.9944626092910767, "learning_rate": 1.7700839957441234e-05, "loss": 0.5491, "step": 8910 }, { "epoch": 0.24368300153139358, "grad_norm": 1.3956358432769775, "learning_rate": 1.770027489890647e-05, "loss": 0.5691, "step": 8911 }, { "epoch": 0.24371034784511048, "grad_norm": 1.607738733291626, "learning_rate": 1.7699709779965215e-05, "loss": 0.563, "step": 8912 }, { "epoch": 0.2437376941588274, "grad_norm": 7.576751708984375, "learning_rate": 1.7699144600621903e-05, "loss": 1.2323, "step": 8913 }, { "epoch": 0.2437650404725443, "grad_norm": 1.5825468301773071, "learning_rate": 1.769857936088097e-05, "loss": 0.5428, "step": 8914 }, { "epoch": 0.24379238678626122, "grad_norm": 1.7118675708770752, "learning_rate": 1.769801406074685e-05, "loss": 0.5773, "step": 8915 }, { "epoch": 0.24381973309997812, "grad_norm": 8.738729476928711, "learning_rate": 1.7697448700223973e-05, "loss": 0.5709, "step": 8916 }, { "epoch": 0.24384707941369504, "grad_norm": 2.0134620666503906, "learning_rate": 1.769688327931678e-05, "loss": 0.5282, "step": 8917 }, { "epoch": 0.24387442572741194, "grad_norm": 1.5128979682922363, "learning_rate": 1.76963177980297e-05, "loss": 0.5777, "step": 8918 }, { "epoch": 0.24390177204112887, "grad_norm": 1.3351047039031982, "learning_rate": 1.7695752256367176e-05, "loss": 0.5624, "step": 8919 }, { "epoch": 0.24392911835484576, "grad_norm": 2.1213364601135254, "learning_rate": 1.769518665433364e-05, "loss": 0.9814, "step": 8920 }, { "epoch": 0.2439564646685627, "grad_norm": 1.538597822189331, "learning_rate": 1.769462099193353e-05, "loss": 0.5117, "step": 8921 }, { "epoch": 0.2439838109822796, "grad_norm": 1.983386754989624, "learning_rate": 1.7694055269171286e-05, "loss": 0.5507, "step": 8922 }, { "epoch": 0.2440111572959965, "grad_norm": 1.4109258651733398, "learning_rate": 1.769348948605134e-05, "loss": 0.5617, "step": 8923 }, { "epoch": 0.2440385036097134, "grad_norm": 1.7803040742874146, "learning_rate": 1.7692923642578138e-05, "loss": 0.5799, "step": 8924 }, { "epoch": 0.24406584992343033, "grad_norm": 1.5615371465682983, "learning_rate": 1.7692357738756115e-05, "loss": 0.9159, "step": 8925 }, { "epoch": 0.24409319623714723, "grad_norm": 1.6641120910644531, "learning_rate": 1.7691791774589707e-05, "loss": 0.4545, "step": 8926 }, { "epoch": 0.24412054255086416, "grad_norm": 1.4985829591751099, "learning_rate": 1.7691225750083358e-05, "loss": 0.5425, "step": 8927 }, { "epoch": 0.24414788886458105, "grad_norm": 1.7821495532989502, "learning_rate": 1.769065966524151e-05, "loss": 0.6019, "step": 8928 }, { "epoch": 0.24417523517829798, "grad_norm": 16.06151008605957, "learning_rate": 1.76900935200686e-05, "loss": 0.5729, "step": 8929 }, { "epoch": 0.24420258149201488, "grad_norm": 1.8838090896606445, "learning_rate": 1.7689527314569068e-05, "loss": 0.5389, "step": 8930 }, { "epoch": 0.2442299278057318, "grad_norm": 1.6607303619384766, "learning_rate": 1.768896104874736e-05, "loss": 0.9339, "step": 8931 }, { "epoch": 0.2442572741194487, "grad_norm": 1.272764801979065, "learning_rate": 1.768839472260792e-05, "loss": 0.5786, "step": 8932 }, { "epoch": 0.24428462043316562, "grad_norm": 1.4739021062850952, "learning_rate": 1.7687828336155183e-05, "loss": 0.5624, "step": 8933 }, { "epoch": 0.24431196674688252, "grad_norm": 1.5570063591003418, "learning_rate": 1.7687261889393593e-05, "loss": 0.9319, "step": 8934 }, { "epoch": 0.24433931306059944, "grad_norm": 1.4969727993011475, "learning_rate": 1.7686695382327604e-05, "loss": 0.5505, "step": 8935 }, { "epoch": 0.24436665937431634, "grad_norm": 1.315581202507019, "learning_rate": 1.768612881496165e-05, "loss": 0.8787, "step": 8936 }, { "epoch": 0.24439400568803327, "grad_norm": 1.5694315433502197, "learning_rate": 1.768556218730018e-05, "loss": 0.574, "step": 8937 }, { "epoch": 0.24442135200175016, "grad_norm": 2.4371466636657715, "learning_rate": 1.7684995499347634e-05, "loss": 0.4458, "step": 8938 }, { "epoch": 0.24444869831546706, "grad_norm": 1.5099142789840698, "learning_rate": 1.768442875110846e-05, "loss": 0.563, "step": 8939 }, { "epoch": 0.244476044629184, "grad_norm": 2.1961395740509033, "learning_rate": 1.7683861942587112e-05, "loss": 0.9269, "step": 8940 }, { "epoch": 0.24450339094290088, "grad_norm": 1.7356312274932861, "learning_rate": 1.7683295073788022e-05, "loss": 0.5712, "step": 8941 }, { "epoch": 0.2445307372566178, "grad_norm": 1.3130367994308472, "learning_rate": 1.768272814471565e-05, "loss": 0.5241, "step": 8942 }, { "epoch": 0.2445580835703347, "grad_norm": 1.389711618423462, "learning_rate": 1.7682161155374437e-05, "loss": 0.9142, "step": 8943 }, { "epoch": 0.24458542988405163, "grad_norm": 1.4782485961914062, "learning_rate": 1.768159410576883e-05, "loss": 0.5738, "step": 8944 }, { "epoch": 0.24461277619776853, "grad_norm": 1.5536346435546875, "learning_rate": 1.7681026995903277e-05, "loss": 0.5693, "step": 8945 }, { "epoch": 0.24464012251148545, "grad_norm": 1.3536227941513062, "learning_rate": 1.768045982578223e-05, "loss": 0.5802, "step": 8946 }, { "epoch": 0.24466746882520235, "grad_norm": 1.6247048377990723, "learning_rate": 1.7679892595410143e-05, "loss": 0.5828, "step": 8947 }, { "epoch": 0.24469481513891927, "grad_norm": 1.2528342008590698, "learning_rate": 1.7679325304791454e-05, "loss": 0.5764, "step": 8948 }, { "epoch": 0.24472216145263617, "grad_norm": 1.4971610307693481, "learning_rate": 1.767875795393062e-05, "loss": 0.6315, "step": 8949 }, { "epoch": 0.2447495077663531, "grad_norm": 1.2794113159179688, "learning_rate": 1.767819054283209e-05, "loss": 0.5817, "step": 8950 }, { "epoch": 0.24477685408007, "grad_norm": 1.8637919425964355, "learning_rate": 1.767762307150032e-05, "loss": 0.5058, "step": 8951 }, { "epoch": 0.24480420039378692, "grad_norm": 1.6650075912475586, "learning_rate": 1.7677055539939754e-05, "loss": 0.5967, "step": 8952 }, { "epoch": 0.24483154670750382, "grad_norm": 1.3364672660827637, "learning_rate": 1.7676487948154848e-05, "loss": 0.5654, "step": 8953 }, { "epoch": 0.24485889302122074, "grad_norm": 1.3284188508987427, "learning_rate": 1.7675920296150057e-05, "loss": 0.5481, "step": 8954 }, { "epoch": 0.24488623933493764, "grad_norm": 1.3761627674102783, "learning_rate": 1.767535258392983e-05, "loss": 0.6335, "step": 8955 }, { "epoch": 0.24491358564865456, "grad_norm": 1.3229742050170898, "learning_rate": 1.767478481149862e-05, "loss": 0.5731, "step": 8956 }, { "epoch": 0.24494093196237146, "grad_norm": 1.2349756956100464, "learning_rate": 1.7674216978860884e-05, "loss": 0.5616, "step": 8957 }, { "epoch": 0.24496827827608839, "grad_norm": 1.237176537513733, "learning_rate": 1.7673649086021075e-05, "loss": 0.5499, "step": 8958 }, { "epoch": 0.24499562458980528, "grad_norm": 1.3794866800308228, "learning_rate": 1.767308113298365e-05, "loss": 0.5545, "step": 8959 }, { "epoch": 0.2450229709035222, "grad_norm": 1.3448866605758667, "learning_rate": 1.7672513119753067e-05, "loss": 0.574, "step": 8960 }, { "epoch": 0.2450503172172391, "grad_norm": 1.5109913349151611, "learning_rate": 1.767194504633377e-05, "loss": 0.5709, "step": 8961 }, { "epoch": 0.24507766353095603, "grad_norm": 1.5571165084838867, "learning_rate": 1.7671376912730228e-05, "loss": 0.561, "step": 8962 }, { "epoch": 0.24510500984467293, "grad_norm": 1.3107273578643799, "learning_rate": 1.767080871894689e-05, "loss": 0.584, "step": 8963 }, { "epoch": 0.24513235615838985, "grad_norm": 1.4074276685714722, "learning_rate": 1.7670240464988218e-05, "loss": 0.5858, "step": 8964 }, { "epoch": 0.24515970247210675, "grad_norm": 1.3784046173095703, "learning_rate": 1.7669672150858666e-05, "loss": 0.9374, "step": 8965 }, { "epoch": 0.24518704878582367, "grad_norm": 1.5115231275558472, "learning_rate": 1.76691037765627e-05, "loss": 0.5669, "step": 8966 }, { "epoch": 0.24521439509954057, "grad_norm": 1.3885681629180908, "learning_rate": 1.766853534210477e-05, "loss": 0.9058, "step": 8967 }, { "epoch": 0.2452417414132575, "grad_norm": 1.1333221197128296, "learning_rate": 1.7667966847489337e-05, "loss": 0.5469, "step": 8968 }, { "epoch": 0.2452690877269744, "grad_norm": 1.7714406251907349, "learning_rate": 1.7667398292720862e-05, "loss": 0.6003, "step": 8969 }, { "epoch": 0.24529643404069132, "grad_norm": 1.548285961151123, "learning_rate": 1.766682967780381e-05, "loss": 0.5154, "step": 8970 }, { "epoch": 0.24532378035440822, "grad_norm": 1.4903898239135742, "learning_rate": 1.7666261002742632e-05, "loss": 0.5823, "step": 8971 }, { "epoch": 0.24535112666812514, "grad_norm": 1.4039194583892822, "learning_rate": 1.766569226754179e-05, "loss": 0.5882, "step": 8972 }, { "epoch": 0.24537847298184204, "grad_norm": 1.6774303913116455, "learning_rate": 1.7665123472205758e-05, "loss": 0.5491, "step": 8973 }, { "epoch": 0.24540581929555896, "grad_norm": 1.7970789670944214, "learning_rate": 1.7664554616738985e-05, "loss": 0.568, "step": 8974 }, { "epoch": 0.24543316560927586, "grad_norm": 1.897323727607727, "learning_rate": 1.766398570114594e-05, "loss": 0.6152, "step": 8975 }, { "epoch": 0.24546051192299279, "grad_norm": 1.2184230089187622, "learning_rate": 1.7663416725431083e-05, "loss": 0.5743, "step": 8976 }, { "epoch": 0.24548785823670968, "grad_norm": 1.4067440032958984, "learning_rate": 1.7662847689598875e-05, "loss": 0.5858, "step": 8977 }, { "epoch": 0.2455152045504266, "grad_norm": 1.4294335842132568, "learning_rate": 1.766227859365379e-05, "loss": 0.5944, "step": 8978 }, { "epoch": 0.2455425508641435, "grad_norm": 1.8700588941574097, "learning_rate": 1.7661709437600282e-05, "loss": 0.4401, "step": 8979 }, { "epoch": 0.24556989717786043, "grad_norm": 2.121330738067627, "learning_rate": 1.766114022144282e-05, "loss": 0.9083, "step": 8980 }, { "epoch": 0.24559724349157733, "grad_norm": 1.4138696193695068, "learning_rate": 1.7660570945185868e-05, "loss": 0.5831, "step": 8981 }, { "epoch": 0.24562458980529425, "grad_norm": 1.3187779188156128, "learning_rate": 1.7660001608833896e-05, "loss": 0.8843, "step": 8982 }, { "epoch": 0.24565193611901115, "grad_norm": 1.3316010236740112, "learning_rate": 1.7659432212391367e-05, "loss": 0.568, "step": 8983 }, { "epoch": 0.24567928243272807, "grad_norm": 1.421846866607666, "learning_rate": 1.7658862755862743e-05, "loss": 0.5615, "step": 8984 }, { "epoch": 0.24570662874644497, "grad_norm": 1.5410542488098145, "learning_rate": 1.76582932392525e-05, "loss": 0.5785, "step": 8985 }, { "epoch": 0.2457339750601619, "grad_norm": 1.7328858375549316, "learning_rate": 1.7657723662565104e-05, "loss": 0.5521, "step": 8986 }, { "epoch": 0.2457613213738788, "grad_norm": 1.4629045724868774, "learning_rate": 1.7657154025805013e-05, "loss": 0.5417, "step": 8987 }, { "epoch": 0.24578866768759572, "grad_norm": 1.6970809698104858, "learning_rate": 1.765658432897671e-05, "loss": 0.5985, "step": 8988 }, { "epoch": 0.24581601400131262, "grad_norm": 1.2229300737380981, "learning_rate": 1.7656014572084657e-05, "loss": 0.5511, "step": 8989 }, { "epoch": 0.24584336031502954, "grad_norm": 1.2028815746307373, "learning_rate": 1.7655444755133322e-05, "loss": 0.5704, "step": 8990 }, { "epoch": 0.24587070662874644, "grad_norm": 1.2428175210952759, "learning_rate": 1.765487487812718e-05, "loss": 0.5429, "step": 8991 }, { "epoch": 0.24589805294246336, "grad_norm": 2.129378318786621, "learning_rate": 1.7654304941070693e-05, "loss": 0.9411, "step": 8992 }, { "epoch": 0.24592539925618026, "grad_norm": 1.7474963665008545, "learning_rate": 1.7653734943968346e-05, "loss": 0.6008, "step": 8993 }, { "epoch": 0.24595274556989719, "grad_norm": 1.8306783437728882, "learning_rate": 1.7653164886824597e-05, "loss": 0.9189, "step": 8994 }, { "epoch": 0.24598009188361408, "grad_norm": 1.4156410694122314, "learning_rate": 1.7652594769643926e-05, "loss": 0.4949, "step": 8995 }, { "epoch": 0.246007438197331, "grad_norm": 1.4695560932159424, "learning_rate": 1.76520245924308e-05, "loss": 0.5692, "step": 8996 }, { "epoch": 0.2460347845110479, "grad_norm": 1.9579585790634155, "learning_rate": 1.76514543551897e-05, "loss": 0.5756, "step": 8997 }, { "epoch": 0.24606213082476483, "grad_norm": 3.996657133102417, "learning_rate": 1.7650884057925087e-05, "loss": 0.4005, "step": 8998 }, { "epoch": 0.24608947713848173, "grad_norm": 1.3459281921386719, "learning_rate": 1.7650313700641447e-05, "loss": 0.5896, "step": 8999 }, { "epoch": 0.24611682345219865, "grad_norm": 1.2470169067382812, "learning_rate": 1.7649743283343243e-05, "loss": 0.5684, "step": 9000 }, { "epoch": 0.24614416976591555, "grad_norm": 1.387251853942871, "learning_rate": 1.764917280603496e-05, "loss": 0.5515, "step": 9001 }, { "epoch": 0.24617151607963247, "grad_norm": 1.6115477085113525, "learning_rate": 1.764860226872107e-05, "loss": 0.5659, "step": 9002 }, { "epoch": 0.24619886239334937, "grad_norm": 1.525063395500183, "learning_rate": 1.764803167140605e-05, "loss": 0.5533, "step": 9003 }, { "epoch": 0.2462262087070663, "grad_norm": 1.98280668258667, "learning_rate": 1.7647461014094367e-05, "loss": 0.5575, "step": 9004 }, { "epoch": 0.2462535550207832, "grad_norm": 1.30042564868927, "learning_rate": 1.7646890296790507e-05, "loss": 0.5513, "step": 9005 }, { "epoch": 0.24628090133450012, "grad_norm": 1.3673423528671265, "learning_rate": 1.7646319519498947e-05, "loss": 0.5411, "step": 9006 }, { "epoch": 0.24630824764821702, "grad_norm": 1.6231166124343872, "learning_rate": 1.764574868222416e-05, "loss": 0.5526, "step": 9007 }, { "epoch": 0.24633559396193394, "grad_norm": 1.3420230150222778, "learning_rate": 1.7645177784970625e-05, "loss": 0.5269, "step": 9008 }, { "epoch": 0.24636294027565084, "grad_norm": 1.4124531745910645, "learning_rate": 1.7644606827742822e-05, "loss": 0.4657, "step": 9009 }, { "epoch": 0.24639028658936776, "grad_norm": 2.626385450363159, "learning_rate": 1.7644035810545233e-05, "loss": 0.9483, "step": 9010 }, { "epoch": 0.24641763290308466, "grad_norm": 1.5004609823226929, "learning_rate": 1.764346473338233e-05, "loss": 0.5799, "step": 9011 }, { "epoch": 0.24644497921680159, "grad_norm": 1.766958475112915, "learning_rate": 1.7642893596258602e-05, "loss": 0.5628, "step": 9012 }, { "epoch": 0.24647232553051848, "grad_norm": 1.5829671621322632, "learning_rate": 1.7642322399178522e-05, "loss": 0.5921, "step": 9013 }, { "epoch": 0.2464996718442354, "grad_norm": 1.3871451616287231, "learning_rate": 1.7641751142146576e-05, "loss": 0.5846, "step": 9014 }, { "epoch": 0.2465270181579523, "grad_norm": 1.4059463739395142, "learning_rate": 1.7641179825167238e-05, "loss": 0.8964, "step": 9015 }, { "epoch": 0.24655436447166923, "grad_norm": 1.2635172605514526, "learning_rate": 1.7640608448245e-05, "loss": 0.5713, "step": 9016 }, { "epoch": 0.24658171078538613, "grad_norm": 1.213003396987915, "learning_rate": 1.7640037011384335e-05, "loss": 0.5864, "step": 9017 }, { "epoch": 0.24660905709910305, "grad_norm": 1.742762565612793, "learning_rate": 1.763946551458973e-05, "loss": 0.5806, "step": 9018 }, { "epoch": 0.24663640341281995, "grad_norm": 1.8353426456451416, "learning_rate": 1.763889395786567e-05, "loss": 0.5641, "step": 9019 }, { "epoch": 0.24666374972653687, "grad_norm": 1.7653499841690063, "learning_rate": 1.763832234121664e-05, "loss": 0.5416, "step": 9020 }, { "epoch": 0.24669109604025377, "grad_norm": 1.4526692628860474, "learning_rate": 1.7637750664647115e-05, "loss": 0.4834, "step": 9021 }, { "epoch": 0.2467184423539707, "grad_norm": 1.55801260471344, "learning_rate": 1.7637178928161588e-05, "loss": 0.4985, "step": 9022 }, { "epoch": 0.2467457886676876, "grad_norm": 1.417345404624939, "learning_rate": 1.763660713176454e-05, "loss": 0.5677, "step": 9023 }, { "epoch": 0.24677313498140452, "grad_norm": 1.3167575597763062, "learning_rate": 1.763603527546046e-05, "loss": 0.5455, "step": 9024 }, { "epoch": 0.24680048129512142, "grad_norm": 2.707395076751709, "learning_rate": 1.7635463359253832e-05, "loss": 0.5547, "step": 9025 }, { "epoch": 0.24682782760883834, "grad_norm": 1.3849260807037354, "learning_rate": 1.7634891383149138e-05, "loss": 0.5477, "step": 9026 }, { "epoch": 0.24685517392255524, "grad_norm": 1.684958577156067, "learning_rate": 1.7634319347150873e-05, "loss": 0.5676, "step": 9027 }, { "epoch": 0.24688252023627216, "grad_norm": 1.7189033031463623, "learning_rate": 1.7633747251263524e-05, "loss": 0.5739, "step": 9028 }, { "epoch": 0.24690986654998906, "grad_norm": 2.7571868896484375, "learning_rate": 1.7633175095491575e-05, "loss": 0.9359, "step": 9029 }, { "epoch": 0.24693721286370598, "grad_norm": 1.5412925481796265, "learning_rate": 1.7632602879839512e-05, "loss": 0.5941, "step": 9030 }, { "epoch": 0.24696455917742288, "grad_norm": 1.520646095275879, "learning_rate": 1.7632030604311833e-05, "loss": 0.534, "step": 9031 }, { "epoch": 0.2469919054911398, "grad_norm": 1.7746011018753052, "learning_rate": 1.763145826891302e-05, "loss": 0.548, "step": 9032 }, { "epoch": 0.2470192518048567, "grad_norm": 2.31014347076416, "learning_rate": 1.763088587364756e-05, "loss": 0.6222, "step": 9033 }, { "epoch": 0.24704659811857363, "grad_norm": 1.8143521547317505, "learning_rate": 1.763031341851995e-05, "loss": 0.5544, "step": 9034 }, { "epoch": 0.24707394443229053, "grad_norm": 3.2528131008148193, "learning_rate": 1.762974090353468e-05, "loss": 0.4485, "step": 9035 }, { "epoch": 0.24710129074600745, "grad_norm": 1.4307178258895874, "learning_rate": 1.762916832869624e-05, "loss": 0.5455, "step": 9036 }, { "epoch": 0.24712863705972435, "grad_norm": 1.4319242238998413, "learning_rate": 1.762859569400912e-05, "loss": 0.5588, "step": 9037 }, { "epoch": 0.24715598337344127, "grad_norm": 2.754102945327759, "learning_rate": 1.7628022999477815e-05, "loss": 0.4352, "step": 9038 }, { "epoch": 0.24718332968715817, "grad_norm": 1.4529303312301636, "learning_rate": 1.762745024510682e-05, "loss": 0.5459, "step": 9039 }, { "epoch": 0.2472106760008751, "grad_norm": 1.6479301452636719, "learning_rate": 1.7626877430900618e-05, "loss": 0.8883, "step": 9040 }, { "epoch": 0.247238022314592, "grad_norm": 1.598027229309082, "learning_rate": 1.762630455686371e-05, "loss": 0.5402, "step": 9041 }, { "epoch": 0.2472653686283089, "grad_norm": 1.4657105207443237, "learning_rate": 1.762573162300059e-05, "loss": 0.5675, "step": 9042 }, { "epoch": 0.24729271494202582, "grad_norm": 1.4564613103866577, "learning_rate": 1.7625158629315753e-05, "loss": 0.5861, "step": 9043 }, { "epoch": 0.2473200612557427, "grad_norm": 2.25063157081604, "learning_rate": 1.762458557581369e-05, "loss": 0.5218, "step": 9044 }, { "epoch": 0.24734740756945964, "grad_norm": 1.545392394065857, "learning_rate": 1.76240124624989e-05, "loss": 0.5669, "step": 9045 }, { "epoch": 0.24737475388317653, "grad_norm": 1.3149667978286743, "learning_rate": 1.762343928937588e-05, "loss": 0.5397, "step": 9046 }, { "epoch": 0.24740210019689346, "grad_norm": 1.2793258428573608, "learning_rate": 1.7622866056449122e-05, "loss": 0.5509, "step": 9047 }, { "epoch": 0.24742944651061036, "grad_norm": 1.3274191617965698, "learning_rate": 1.7622292763723127e-05, "loss": 0.5484, "step": 9048 }, { "epoch": 0.24745679282432728, "grad_norm": 1.7533012628555298, "learning_rate": 1.7621719411202386e-05, "loss": 0.5488, "step": 9049 }, { "epoch": 0.24748413913804418, "grad_norm": 1.528191328048706, "learning_rate": 1.7621145998891406e-05, "loss": 0.5359, "step": 9050 }, { "epoch": 0.2475114854517611, "grad_norm": 2.7907156944274902, "learning_rate": 1.7620572526794678e-05, "loss": 0.6141, "step": 9051 }, { "epoch": 0.247538831765478, "grad_norm": 1.7446457147598267, "learning_rate": 1.76199989949167e-05, "loss": 0.5323, "step": 9052 }, { "epoch": 0.24756617807919493, "grad_norm": 1.5137859582901, "learning_rate": 1.761942540326198e-05, "loss": 0.5464, "step": 9053 }, { "epoch": 0.24759352439291182, "grad_norm": 1.3647329807281494, "learning_rate": 1.761885175183501e-05, "loss": 0.563, "step": 9054 }, { "epoch": 0.24762087070662875, "grad_norm": 2.014024257659912, "learning_rate": 1.7618278040640292e-05, "loss": 0.5682, "step": 9055 }, { "epoch": 0.24764821702034565, "grad_norm": 1.1603188514709473, "learning_rate": 1.7617704269682327e-05, "loss": 0.5348, "step": 9056 }, { "epoch": 0.24767556333406257, "grad_norm": 1.5379241704940796, "learning_rate": 1.7617130438965616e-05, "loss": 0.5624, "step": 9057 }, { "epoch": 0.24770290964777947, "grad_norm": 1.5970078706741333, "learning_rate": 1.7616556548494657e-05, "loss": 0.5836, "step": 9058 }, { "epoch": 0.2477302559614964, "grad_norm": 2.1297008991241455, "learning_rate": 1.761598259827396e-05, "loss": 0.5916, "step": 9059 }, { "epoch": 0.2477576022752133, "grad_norm": 1.5595382452011108, "learning_rate": 1.7615408588308017e-05, "loss": 0.5975, "step": 9060 }, { "epoch": 0.24778494858893022, "grad_norm": 5.364439487457275, "learning_rate": 1.7614834518601342e-05, "loss": 0.6148, "step": 9061 }, { "epoch": 0.2478122949026471, "grad_norm": 2.1505608558654785, "learning_rate": 1.7614260389158434e-05, "loss": 0.5853, "step": 9062 }, { "epoch": 0.24783964121636404, "grad_norm": 1.555417776107788, "learning_rate": 1.7613686199983793e-05, "loss": 0.6005, "step": 9063 }, { "epoch": 0.24786698753008093, "grad_norm": 2.1260440349578857, "learning_rate": 1.7613111951081928e-05, "loss": 0.5507, "step": 9064 }, { "epoch": 0.24789433384379786, "grad_norm": 1.6515294313430786, "learning_rate": 1.761253764245734e-05, "loss": 0.555, "step": 9065 }, { "epoch": 0.24792168015751476, "grad_norm": 1.8891867399215698, "learning_rate": 1.7611963274114538e-05, "loss": 0.5654, "step": 9066 }, { "epoch": 0.24794902647123168, "grad_norm": 1.5121886730194092, "learning_rate": 1.761138884605803e-05, "loss": 0.5092, "step": 9067 }, { "epoch": 0.24797637278494858, "grad_norm": 1.2912392616271973, "learning_rate": 1.7610814358292313e-05, "loss": 0.5534, "step": 9068 }, { "epoch": 0.2480037190986655, "grad_norm": 1.7640048265457153, "learning_rate": 1.7610239810821902e-05, "loss": 0.58, "step": 9069 }, { "epoch": 0.2480310654123824, "grad_norm": 1.6581286191940308, "learning_rate": 1.76096652036513e-05, "loss": 0.5647, "step": 9070 }, { "epoch": 0.24805841172609933, "grad_norm": 1.5208741426467896, "learning_rate": 1.7609090536785016e-05, "loss": 0.5123, "step": 9071 }, { "epoch": 0.24808575803981622, "grad_norm": 1.818545937538147, "learning_rate": 1.760851581022756e-05, "loss": 0.5886, "step": 9072 }, { "epoch": 0.24811310435353315, "grad_norm": 1.6347576379776, "learning_rate": 1.7607941023983433e-05, "loss": 0.5129, "step": 9073 }, { "epoch": 0.24814045066725005, "grad_norm": 1.966111660003662, "learning_rate": 1.7607366178057158e-05, "loss": 0.9349, "step": 9074 }, { "epoch": 0.24816779698096697, "grad_norm": 1.4458813667297363, "learning_rate": 1.760679127245323e-05, "loss": 0.5568, "step": 9075 }, { "epoch": 0.24819514329468387, "grad_norm": 1.4583808183670044, "learning_rate": 1.760621630717617e-05, "loss": 0.6024, "step": 9076 }, { "epoch": 0.2482224896084008, "grad_norm": 1.5943927764892578, "learning_rate": 1.760564128223048e-05, "loss": 0.5333, "step": 9077 }, { "epoch": 0.2482498359221177, "grad_norm": 1.4483062028884888, "learning_rate": 1.7605066197620676e-05, "loss": 0.5794, "step": 9078 }, { "epoch": 0.24827718223583461, "grad_norm": 1.6697787046432495, "learning_rate": 1.760449105335127e-05, "loss": 0.5226, "step": 9079 }, { "epoch": 0.2483045285495515, "grad_norm": 1.3172816038131714, "learning_rate": 1.7603915849426768e-05, "loss": 0.5449, "step": 9080 }, { "epoch": 0.24833187486326844, "grad_norm": 1.3714556694030762, "learning_rate": 1.7603340585851687e-05, "loss": 0.5397, "step": 9081 }, { "epoch": 0.24835922117698533, "grad_norm": 1.6407023668289185, "learning_rate": 1.760276526263054e-05, "loss": 0.5855, "step": 9082 }, { "epoch": 0.24838656749070226, "grad_norm": 1.1332048177719116, "learning_rate": 1.760218987976784e-05, "loss": 0.5537, "step": 9083 }, { "epoch": 0.24841391380441916, "grad_norm": 1.226042628288269, "learning_rate": 1.7601614437268097e-05, "loss": 0.5338, "step": 9084 }, { "epoch": 0.24844126011813608, "grad_norm": 1.679836630821228, "learning_rate": 1.760103893513583e-05, "loss": 0.5953, "step": 9085 }, { "epoch": 0.24846860643185298, "grad_norm": 1.3559186458587646, "learning_rate": 1.760046337337555e-05, "loss": 0.4833, "step": 9086 }, { "epoch": 0.2484959527455699, "grad_norm": 2.1743972301483154, "learning_rate": 1.7599887751991776e-05, "loss": 0.5655, "step": 9087 }, { "epoch": 0.2485232990592868, "grad_norm": 1.7590850591659546, "learning_rate": 1.759931207098902e-05, "loss": 0.6062, "step": 9088 }, { "epoch": 0.24855064537300373, "grad_norm": 1.7778209447860718, "learning_rate": 1.75987363303718e-05, "loss": 0.5792, "step": 9089 }, { "epoch": 0.24857799168672062, "grad_norm": 1.4751540422439575, "learning_rate": 1.759816053014463e-05, "loss": 0.561, "step": 9090 }, { "epoch": 0.24860533800043755, "grad_norm": 1.4677422046661377, "learning_rate": 1.759758467031203e-05, "loss": 0.5509, "step": 9091 }, { "epoch": 0.24863268431415445, "grad_norm": 1.6892502307891846, "learning_rate": 1.759700875087852e-05, "loss": 0.5972, "step": 9092 }, { "epoch": 0.24866003062787137, "grad_norm": 2.0443804264068604, "learning_rate": 1.759643277184861e-05, "loss": 0.4984, "step": 9093 }, { "epoch": 0.24868737694158827, "grad_norm": 1.7878530025482178, "learning_rate": 1.7595856733226825e-05, "loss": 0.5867, "step": 9094 }, { "epoch": 0.2487147232553052, "grad_norm": 1.1978471279144287, "learning_rate": 1.759528063501768e-05, "loss": 0.5397, "step": 9095 }, { "epoch": 0.2487420695690221, "grad_norm": 1.3577989339828491, "learning_rate": 1.7594704477225698e-05, "loss": 0.5095, "step": 9096 }, { "epoch": 0.24876941588273901, "grad_norm": 6.051300048828125, "learning_rate": 1.7594128259855395e-05, "loss": 0.4186, "step": 9097 }, { "epoch": 0.2487967621964559, "grad_norm": 1.275429368019104, "learning_rate": 1.7593551982911292e-05, "loss": 0.5728, "step": 9098 }, { "epoch": 0.24882410851017284, "grad_norm": 1.6392576694488525, "learning_rate": 1.759297564639791e-05, "loss": 0.5714, "step": 9099 }, { "epoch": 0.24885145482388973, "grad_norm": 1.9112447500228882, "learning_rate": 1.7592399250319773e-05, "loss": 0.5436, "step": 9100 }, { "epoch": 0.24887880113760666, "grad_norm": 1.53587007522583, "learning_rate": 1.7591822794681405e-05, "loss": 0.5592, "step": 9101 }, { "epoch": 0.24890614745132356, "grad_norm": 1.3724665641784668, "learning_rate": 1.7591246279487318e-05, "loss": 0.5477, "step": 9102 }, { "epoch": 0.24893349376504048, "grad_norm": 1.546637773513794, "learning_rate": 1.759066970474204e-05, "loss": 0.6017, "step": 9103 }, { "epoch": 0.24896084007875738, "grad_norm": 2.187485694885254, "learning_rate": 1.7590093070450096e-05, "loss": 0.4858, "step": 9104 }, { "epoch": 0.2489881863924743, "grad_norm": 1.380885362625122, "learning_rate": 1.7589516376616007e-05, "loss": 0.5379, "step": 9105 }, { "epoch": 0.2490155327061912, "grad_norm": 1.3434289693832397, "learning_rate": 1.7588939623244302e-05, "loss": 0.5442, "step": 9106 }, { "epoch": 0.24904287901990813, "grad_norm": 1.2703986167907715, "learning_rate": 1.7588362810339495e-05, "loss": 0.5281, "step": 9107 }, { "epoch": 0.24907022533362502, "grad_norm": 1.2064646482467651, "learning_rate": 1.7587785937906118e-05, "loss": 0.5289, "step": 9108 }, { "epoch": 0.24909757164734195, "grad_norm": 1.444522738456726, "learning_rate": 1.7587209005948703e-05, "loss": 0.5532, "step": 9109 }, { "epoch": 0.24912491796105884, "grad_norm": 1.4437333345413208, "learning_rate": 1.7586632014471764e-05, "loss": 0.5892, "step": 9110 }, { "epoch": 0.24915226427477577, "grad_norm": 1.400288701057434, "learning_rate": 1.7586054963479827e-05, "loss": 0.5675, "step": 9111 }, { "epoch": 0.24917961058849267, "grad_norm": 1.1928658485412598, "learning_rate": 1.758547785297743e-05, "loss": 0.5576, "step": 9112 }, { "epoch": 0.2492069569022096, "grad_norm": 1.2508610486984253, "learning_rate": 1.7584900682969094e-05, "loss": 0.5142, "step": 9113 }, { "epoch": 0.2492343032159265, "grad_norm": 1.4861171245574951, "learning_rate": 1.7584323453459342e-05, "loss": 0.5508, "step": 9114 }, { "epoch": 0.24926164952964341, "grad_norm": 2.400707244873047, "learning_rate": 1.7583746164452712e-05, "loss": 0.4119, "step": 9115 }, { "epoch": 0.2492889958433603, "grad_norm": 1.3519208431243896, "learning_rate": 1.7583168815953723e-05, "loss": 0.571, "step": 9116 }, { "epoch": 0.24931634215707724, "grad_norm": 1.4322702884674072, "learning_rate": 1.7582591407966913e-05, "loss": 0.5913, "step": 9117 }, { "epoch": 0.24934368847079413, "grad_norm": 1.1396708488464355, "learning_rate": 1.7582013940496807e-05, "loss": 0.5612, "step": 9118 }, { "epoch": 0.24937103478451106, "grad_norm": 1.4022074937820435, "learning_rate": 1.7581436413547932e-05, "loss": 0.5552, "step": 9119 }, { "epoch": 0.24939838109822796, "grad_norm": 1.3564581871032715, "learning_rate": 1.758085882712482e-05, "loss": 0.564, "step": 9120 }, { "epoch": 0.24942572741194488, "grad_norm": 1.383715271949768, "learning_rate": 1.758028118123201e-05, "loss": 0.5851, "step": 9121 }, { "epoch": 0.24945307372566178, "grad_norm": 1.3412824869155884, "learning_rate": 1.757970347587403e-05, "loss": 0.5526, "step": 9122 }, { "epoch": 0.2494804200393787, "grad_norm": 1.3860459327697754, "learning_rate": 1.75791257110554e-05, "loss": 0.8883, "step": 9123 }, { "epoch": 0.2495077663530956, "grad_norm": 1.210504174232483, "learning_rate": 1.7578547886780666e-05, "loss": 0.5431, "step": 9124 }, { "epoch": 0.24953511266681253, "grad_norm": 1.3480513095855713, "learning_rate": 1.7577970003054358e-05, "loss": 0.5533, "step": 9125 }, { "epoch": 0.24956245898052942, "grad_norm": 1.2427701950073242, "learning_rate": 1.7577392059881008e-05, "loss": 0.5724, "step": 9126 }, { "epoch": 0.24958980529424635, "grad_norm": 1.3230377435684204, "learning_rate": 1.757681405726515e-05, "loss": 0.9377, "step": 9127 }, { "epoch": 0.24961715160796324, "grad_norm": 1.5217061042785645, "learning_rate": 1.7576235995211314e-05, "loss": 0.5633, "step": 9128 }, { "epoch": 0.24964449792168017, "grad_norm": 1.387519121170044, "learning_rate": 1.7575657873724044e-05, "loss": 0.5521, "step": 9129 }, { "epoch": 0.24967184423539707, "grad_norm": 1.9887664318084717, "learning_rate": 1.7575079692807865e-05, "loss": 0.567, "step": 9130 }, { "epoch": 0.249699190549114, "grad_norm": 1.24322509765625, "learning_rate": 1.757450145246732e-05, "loss": 0.5666, "step": 9131 }, { "epoch": 0.2497265368628309, "grad_norm": 1.415833592414856, "learning_rate": 1.7573923152706943e-05, "loss": 0.5796, "step": 9132 }, { "epoch": 0.24975388317654781, "grad_norm": 1.3469595909118652, "learning_rate": 1.7573344793531267e-05, "loss": 0.5831, "step": 9133 }, { "epoch": 0.2497812294902647, "grad_norm": 1.9559125900268555, "learning_rate": 1.757276637494484e-05, "loss": 0.5771, "step": 9134 }, { "epoch": 0.24980857580398164, "grad_norm": 1.2617679834365845, "learning_rate": 1.7572187896952186e-05, "loss": 0.5369, "step": 9135 }, { "epoch": 0.24983592211769853, "grad_norm": 1.8574206829071045, "learning_rate": 1.757160935955785e-05, "loss": 0.516, "step": 9136 }, { "epoch": 0.24986326843141546, "grad_norm": 1.3066911697387695, "learning_rate": 1.757103076276637e-05, "loss": 0.5207, "step": 9137 }, { "epoch": 0.24989061474513236, "grad_norm": 1.34614896774292, "learning_rate": 1.7570452106582284e-05, "loss": 0.5655, "step": 9138 }, { "epoch": 0.24991796105884928, "grad_norm": 1.2014455795288086, "learning_rate": 1.7569873391010133e-05, "loss": 0.5419, "step": 9139 }, { "epoch": 0.24994530737256618, "grad_norm": 1.207930564880371, "learning_rate": 1.7569294616054454e-05, "loss": 0.5498, "step": 9140 }, { "epoch": 0.2499726536862831, "grad_norm": 1.675126314163208, "learning_rate": 1.7568715781719786e-05, "loss": 0.5601, "step": 9141 }, { "epoch": 0.25, "grad_norm": 1.3186933994293213, "learning_rate": 1.7568136888010677e-05, "loss": 0.5652, "step": 9142 }, { "epoch": 0.2500273463137169, "grad_norm": 1.664175271987915, "learning_rate": 1.7567557934931664e-05, "loss": 0.5048, "step": 9143 }, { "epoch": 0.2500546926274338, "grad_norm": 1.5990549325942993, "learning_rate": 1.756697892248729e-05, "loss": 0.8941, "step": 9144 }, { "epoch": 0.2500820389411507, "grad_norm": 1.3665704727172852, "learning_rate": 1.7566399850682093e-05, "loss": 0.5663, "step": 9145 }, { "epoch": 0.25010938525486764, "grad_norm": 1.4176543951034546, "learning_rate": 1.7565820719520616e-05, "loss": 0.5614, "step": 9146 }, { "epoch": 0.25013673156858457, "grad_norm": 1.2897717952728271, "learning_rate": 1.756524152900741e-05, "loss": 0.511, "step": 9147 }, { "epoch": 0.25016407788230144, "grad_norm": 1.6330009698867798, "learning_rate": 1.756466227914701e-05, "loss": 0.5694, "step": 9148 }, { "epoch": 0.25019142419601836, "grad_norm": 1.64024817943573, "learning_rate": 1.7564082969943964e-05, "loss": 0.5725, "step": 9149 }, { "epoch": 0.2502187705097353, "grad_norm": 2.8595707416534424, "learning_rate": 1.7563503601402815e-05, "loss": 0.5759, "step": 9150 }, { "epoch": 0.2502461168234522, "grad_norm": 1.4112614393234253, "learning_rate": 1.7562924173528113e-05, "loss": 0.5515, "step": 9151 }, { "epoch": 0.2502734631371691, "grad_norm": 1.171035647392273, "learning_rate": 1.7562344686324397e-05, "loss": 0.5544, "step": 9152 }, { "epoch": 0.250300809450886, "grad_norm": 1.4599534273147583, "learning_rate": 1.756176513979621e-05, "loss": 0.5556, "step": 9153 }, { "epoch": 0.25032815576460293, "grad_norm": 1.8771517276763916, "learning_rate": 1.7561185533948112e-05, "loss": 0.5825, "step": 9154 }, { "epoch": 0.25035550207831986, "grad_norm": 1.1964648962020874, "learning_rate": 1.7560605868784634e-05, "loss": 0.5552, "step": 9155 }, { "epoch": 0.2503828483920367, "grad_norm": 1.375892162322998, "learning_rate": 1.7560026144310337e-05, "loss": 0.5603, "step": 9156 }, { "epoch": 0.25041019470575365, "grad_norm": 1.2440195083618164, "learning_rate": 1.7559446360529758e-05, "loss": 0.5615, "step": 9157 }, { "epoch": 0.2504375410194706, "grad_norm": 1.293251395225525, "learning_rate": 1.7558866517447453e-05, "loss": 0.5385, "step": 9158 }, { "epoch": 0.2504648873331875, "grad_norm": 1.2589075565338135, "learning_rate": 1.7558286615067963e-05, "loss": 0.5709, "step": 9159 }, { "epoch": 0.25049223364690437, "grad_norm": 1.364640474319458, "learning_rate": 1.7557706653395845e-05, "loss": 0.5397, "step": 9160 }, { "epoch": 0.2505195799606213, "grad_norm": 1.4447568655014038, "learning_rate": 1.7557126632435645e-05, "loss": 0.5809, "step": 9161 }, { "epoch": 0.2505469262743382, "grad_norm": 2.3351478576660156, "learning_rate": 1.7556546552191915e-05, "loss": 0.4286, "step": 9162 }, { "epoch": 0.25057427258805515, "grad_norm": 1.5366705656051636, "learning_rate": 1.75559664126692e-05, "loss": 0.9099, "step": 9163 }, { "epoch": 0.250601618901772, "grad_norm": 1.2829258441925049, "learning_rate": 1.7555386213872058e-05, "loss": 0.5417, "step": 9164 }, { "epoch": 0.25062896521548894, "grad_norm": 1.4453612565994263, "learning_rate": 1.7554805955805036e-05, "loss": 0.5595, "step": 9165 }, { "epoch": 0.25065631152920587, "grad_norm": 1.7959471940994263, "learning_rate": 1.7554225638472687e-05, "loss": 0.8909, "step": 9166 }, { "epoch": 0.2506836578429228, "grad_norm": 1.6894664764404297, "learning_rate": 1.7553645261879562e-05, "loss": 0.5677, "step": 9167 }, { "epoch": 0.25071100415663966, "grad_norm": 1.2941977977752686, "learning_rate": 1.755306482603022e-05, "loss": 0.5611, "step": 9168 }, { "epoch": 0.2507383504703566, "grad_norm": 1.587454080581665, "learning_rate": 1.7552484330929208e-05, "loss": 0.5729, "step": 9169 }, { "epoch": 0.2507656967840735, "grad_norm": 1.4877886772155762, "learning_rate": 1.7551903776581082e-05, "loss": 0.5687, "step": 9170 }, { "epoch": 0.25079304309779044, "grad_norm": 1.4745895862579346, "learning_rate": 1.7551323162990395e-05, "loss": 0.5435, "step": 9171 }, { "epoch": 0.2508203894115073, "grad_norm": 1.753359079360962, "learning_rate": 1.7550742490161706e-05, "loss": 0.5555, "step": 9172 }, { "epoch": 0.25084773572522423, "grad_norm": 1.5738641023635864, "learning_rate": 1.7550161758099563e-05, "loss": 0.5443, "step": 9173 }, { "epoch": 0.25087508203894116, "grad_norm": 1.1652185916900635, "learning_rate": 1.7549580966808526e-05, "loss": 0.5623, "step": 9174 }, { "epoch": 0.2509024283526581, "grad_norm": 2.0805492401123047, "learning_rate": 1.7549000116293156e-05, "loss": 0.574, "step": 9175 }, { "epoch": 0.25092977466637495, "grad_norm": 1.5539931058883667, "learning_rate": 1.7548419206558e-05, "loss": 0.591, "step": 9176 }, { "epoch": 0.2509571209800919, "grad_norm": 1.3854337930679321, "learning_rate": 1.754783823760762e-05, "loss": 0.5612, "step": 9177 }, { "epoch": 0.2509844672938088, "grad_norm": 1.474726915359497, "learning_rate": 1.7547257209446575e-05, "loss": 0.5623, "step": 9178 }, { "epoch": 0.2510118136075257, "grad_norm": 10.239627838134766, "learning_rate": 1.754667612207942e-05, "loss": 0.4502, "step": 9179 }, { "epoch": 0.2510391599212426, "grad_norm": 1.5987365245819092, "learning_rate": 1.7546094975510713e-05, "loss": 0.5347, "step": 9180 }, { "epoch": 0.2510665062349595, "grad_norm": 2.13059401512146, "learning_rate": 1.7545513769745018e-05, "loss": 0.5817, "step": 9181 }, { "epoch": 0.25109385254867644, "grad_norm": 4.607237815856934, "learning_rate": 1.7544932504786892e-05, "loss": 0.4978, "step": 9182 }, { "epoch": 0.25112119886239337, "grad_norm": 1.199704647064209, "learning_rate": 1.754435118064089e-05, "loss": 0.4313, "step": 9183 }, { "epoch": 0.25114854517611024, "grad_norm": 1.6424322128295898, "learning_rate": 1.754376979731158e-05, "loss": 0.5922, "step": 9184 }, { "epoch": 0.25117589148982716, "grad_norm": 1.668513298034668, "learning_rate": 1.7543188354803513e-05, "loss": 0.603, "step": 9185 }, { "epoch": 0.2512032378035441, "grad_norm": 1.236834168434143, "learning_rate": 1.754260685312126e-05, "loss": 0.5588, "step": 9186 }, { "epoch": 0.251230584117261, "grad_norm": 1.4345299005508423, "learning_rate": 1.7542025292269377e-05, "loss": 0.511, "step": 9187 }, { "epoch": 0.2512579304309779, "grad_norm": 1.8713388442993164, "learning_rate": 1.754144367225243e-05, "loss": 0.5183, "step": 9188 }, { "epoch": 0.2512852767446948, "grad_norm": 1.466834306716919, "learning_rate": 1.7540861993074983e-05, "loss": 0.5681, "step": 9189 }, { "epoch": 0.25131262305841173, "grad_norm": 1.8601770401000977, "learning_rate": 1.7540280254741592e-05, "loss": 0.5178, "step": 9190 }, { "epoch": 0.25133996937212866, "grad_norm": 1.307655692100525, "learning_rate": 1.753969845725682e-05, "loss": 0.564, "step": 9191 }, { "epoch": 0.2513673156858455, "grad_norm": 1.5312342643737793, "learning_rate": 1.7539116600625242e-05, "loss": 0.5719, "step": 9192 }, { "epoch": 0.25139466199956245, "grad_norm": 1.721966028213501, "learning_rate": 1.7538534684851415e-05, "loss": 0.5443, "step": 9193 }, { "epoch": 0.2514220083132794, "grad_norm": 1.8219263553619385, "learning_rate": 1.75379527099399e-05, "loss": 0.5997, "step": 9194 }, { "epoch": 0.2514493546269963, "grad_norm": 1.6973856687545776, "learning_rate": 1.7537370675895277e-05, "loss": 0.5801, "step": 9195 }, { "epoch": 0.25147670094071317, "grad_norm": 1.5525845289230347, "learning_rate": 1.7536788582722095e-05, "loss": 0.5613, "step": 9196 }, { "epoch": 0.2515040472544301, "grad_norm": 1.8258205652236938, "learning_rate": 1.7536206430424928e-05, "loss": 0.9038, "step": 9197 }, { "epoch": 0.251531393568147, "grad_norm": 4.256941318511963, "learning_rate": 1.7535624219008345e-05, "loss": 0.8878, "step": 9198 }, { "epoch": 0.25155873988186395, "grad_norm": 2.2554056644439697, "learning_rate": 1.753504194847691e-05, "loss": 0.5931, "step": 9199 }, { "epoch": 0.2515860861955808, "grad_norm": 1.5753562450408936, "learning_rate": 1.7534459618835187e-05, "loss": 0.5615, "step": 9200 }, { "epoch": 0.25161343250929774, "grad_norm": 1.6960605382919312, "learning_rate": 1.7533877230087754e-05, "loss": 0.5718, "step": 9201 }, { "epoch": 0.25164077882301467, "grad_norm": 1.9302058219909668, "learning_rate": 1.7533294782239173e-05, "loss": 0.5894, "step": 9202 }, { "epoch": 0.2516681251367316, "grad_norm": 1.6488053798675537, "learning_rate": 1.7532712275294014e-05, "loss": 0.9395, "step": 9203 }, { "epoch": 0.25169547145044846, "grad_norm": 1.4647828340530396, "learning_rate": 1.753212970925685e-05, "loss": 0.5754, "step": 9204 }, { "epoch": 0.2517228177641654, "grad_norm": 1.4849803447723389, "learning_rate": 1.7531547084132247e-05, "loss": 0.8933, "step": 9205 }, { "epoch": 0.2517501640778823, "grad_norm": 1.538435697555542, "learning_rate": 1.7530964399924773e-05, "loss": 0.5565, "step": 9206 }, { "epoch": 0.25177751039159924, "grad_norm": 1.6809253692626953, "learning_rate": 1.7530381656639004e-05, "loss": 0.5358, "step": 9207 }, { "epoch": 0.2518048567053161, "grad_norm": 1.9132851362228394, "learning_rate": 1.752979885427951e-05, "loss": 0.5512, "step": 9208 }, { "epoch": 0.25183220301903303, "grad_norm": 1.5810872316360474, "learning_rate": 1.7529215992850864e-05, "loss": 0.5476, "step": 9209 }, { "epoch": 0.25185954933274995, "grad_norm": 1.4010493755340576, "learning_rate": 1.752863307235764e-05, "loss": 0.4105, "step": 9210 }, { "epoch": 0.2518868956464669, "grad_norm": 2.1589269638061523, "learning_rate": 1.7528050092804405e-05, "loss": 0.5776, "step": 9211 }, { "epoch": 0.25191424196018375, "grad_norm": 1.369510293006897, "learning_rate": 1.7527467054195737e-05, "loss": 0.4006, "step": 9212 }, { "epoch": 0.2519415882739007, "grad_norm": 2.1479151248931885, "learning_rate": 1.7526883956536208e-05, "loss": 0.5824, "step": 9213 }, { "epoch": 0.2519689345876176, "grad_norm": 1.406389832496643, "learning_rate": 1.752630079983039e-05, "loss": 0.5552, "step": 9214 }, { "epoch": 0.2519962809013345, "grad_norm": 1.383573293685913, "learning_rate": 1.7525717584082865e-05, "loss": 0.5795, "step": 9215 }, { "epoch": 0.2520236272150514, "grad_norm": 1.5717536211013794, "learning_rate": 1.75251343092982e-05, "loss": 0.5306, "step": 9216 }, { "epoch": 0.2520509735287683, "grad_norm": 1.5768468379974365, "learning_rate": 1.7524550975480975e-05, "loss": 0.5577, "step": 9217 }, { "epoch": 0.25207831984248524, "grad_norm": 1.7906919717788696, "learning_rate": 1.7523967582635767e-05, "loss": 0.593, "step": 9218 }, { "epoch": 0.25210566615620217, "grad_norm": 1.6077650785446167, "learning_rate": 1.7523384130767147e-05, "loss": 0.5175, "step": 9219 }, { "epoch": 0.25213301246991904, "grad_norm": 1.2662253379821777, "learning_rate": 1.7522800619879698e-05, "loss": 0.564, "step": 9220 }, { "epoch": 0.25216035878363596, "grad_norm": 1.7096428871154785, "learning_rate": 1.7522217049977996e-05, "loss": 0.5663, "step": 9221 }, { "epoch": 0.2521877050973529, "grad_norm": 1.3869191408157349, "learning_rate": 1.7521633421066616e-05, "loss": 0.5295, "step": 9222 }, { "epoch": 0.2522150514110698, "grad_norm": 1.5669862031936646, "learning_rate": 1.752104973315014e-05, "loss": 0.4996, "step": 9223 }, { "epoch": 0.2522423977247867, "grad_norm": 1.7792736291885376, "learning_rate": 1.7520465986233146e-05, "loss": 0.9211, "step": 9224 }, { "epoch": 0.2522697440385036, "grad_norm": 1.2167260646820068, "learning_rate": 1.7519882180320215e-05, "loss": 0.5445, "step": 9225 }, { "epoch": 0.25229709035222053, "grad_norm": 1.8310197591781616, "learning_rate": 1.751929831541592e-05, "loss": 0.5797, "step": 9226 }, { "epoch": 0.25232443666593746, "grad_norm": 1.414517879486084, "learning_rate": 1.7518714391524846e-05, "loss": 0.561, "step": 9227 }, { "epoch": 0.2523517829796543, "grad_norm": 2.0863747596740723, "learning_rate": 1.7518130408651577e-05, "loss": 0.5524, "step": 9228 }, { "epoch": 0.25237912929337125, "grad_norm": 1.322798728942871, "learning_rate": 1.7517546366800688e-05, "loss": 0.5462, "step": 9229 }, { "epoch": 0.2524064756070882, "grad_norm": 1.4116953611373901, "learning_rate": 1.7516962265976766e-05, "loss": 0.5877, "step": 9230 }, { "epoch": 0.2524338219208051, "grad_norm": 1.425805687904358, "learning_rate": 1.751637810618439e-05, "loss": 0.5439, "step": 9231 }, { "epoch": 0.25246116823452197, "grad_norm": 1.5236390829086304, "learning_rate": 1.7515793887428142e-05, "loss": 0.5812, "step": 9232 }, { "epoch": 0.2524885145482389, "grad_norm": 2.0364465713500977, "learning_rate": 1.7515209609712604e-05, "loss": 0.5577, "step": 9233 }, { "epoch": 0.2525158608619558, "grad_norm": 1.5922363996505737, "learning_rate": 1.7514625273042363e-05, "loss": 0.5846, "step": 9234 }, { "epoch": 0.25254320717567275, "grad_norm": 1.2909458875656128, "learning_rate": 1.7514040877422004e-05, "loss": 0.9117, "step": 9235 }, { "epoch": 0.2525705534893896, "grad_norm": 1.4895802736282349, "learning_rate": 1.7513456422856104e-05, "loss": 0.591, "step": 9236 }, { "epoch": 0.25259789980310654, "grad_norm": 1.4040324687957764, "learning_rate": 1.751287190934926e-05, "loss": 0.5501, "step": 9237 }, { "epoch": 0.25262524611682347, "grad_norm": 2.0501413345336914, "learning_rate": 1.7512287336906043e-05, "loss": 0.4424, "step": 9238 }, { "epoch": 0.2526525924305404, "grad_norm": 1.6714791059494019, "learning_rate": 1.7511702705531048e-05, "loss": 0.4998, "step": 9239 }, { "epoch": 0.25267993874425726, "grad_norm": 1.5886518955230713, "learning_rate": 1.751111801522886e-05, "loss": 0.5905, "step": 9240 }, { "epoch": 0.2527072850579742, "grad_norm": 1.546221375465393, "learning_rate": 1.7510533266004065e-05, "loss": 0.5379, "step": 9241 }, { "epoch": 0.2527346313716911, "grad_norm": 1.1512961387634277, "learning_rate": 1.7509948457861252e-05, "loss": 0.5463, "step": 9242 }, { "epoch": 0.25276197768540803, "grad_norm": 1.447919249534607, "learning_rate": 1.7509363590805004e-05, "loss": 0.5535, "step": 9243 }, { "epoch": 0.2527893239991249, "grad_norm": 1.7505238056182861, "learning_rate": 1.7508778664839915e-05, "loss": 0.5485, "step": 9244 }, { "epoch": 0.25281667031284183, "grad_norm": 1.3128690719604492, "learning_rate": 1.750819367997057e-05, "loss": 0.5055, "step": 9245 }, { "epoch": 0.25284401662655875, "grad_norm": 1.5920209884643555, "learning_rate": 1.7507608636201554e-05, "loss": 0.5678, "step": 9246 }, { "epoch": 0.2528713629402756, "grad_norm": 1.9449830055236816, "learning_rate": 1.7507023533537464e-05, "loss": 0.6204, "step": 9247 }, { "epoch": 0.25289870925399255, "grad_norm": 1.551581859588623, "learning_rate": 1.7506438371982887e-05, "loss": 0.5515, "step": 9248 }, { "epoch": 0.2529260555677095, "grad_norm": 1.2470455169677734, "learning_rate": 1.7505853151542417e-05, "loss": 0.5621, "step": 9249 }, { "epoch": 0.2529534018814264, "grad_norm": 1.527706265449524, "learning_rate": 1.7505267872220634e-05, "loss": 0.5482, "step": 9250 }, { "epoch": 0.25298074819514327, "grad_norm": 1.503728985786438, "learning_rate": 1.7504682534022143e-05, "loss": 0.5737, "step": 9251 }, { "epoch": 0.2530080945088602, "grad_norm": 1.683934211730957, "learning_rate": 1.750409713695153e-05, "loss": 0.4559, "step": 9252 }, { "epoch": 0.2530354408225771, "grad_norm": 2.2836859226226807, "learning_rate": 1.750351168101338e-05, "loss": 0.9047, "step": 9253 }, { "epoch": 0.25306278713629404, "grad_norm": 1.4377610683441162, "learning_rate": 1.75029261662123e-05, "loss": 0.5586, "step": 9254 }, { "epoch": 0.2530901334500109, "grad_norm": 2.339709758758545, "learning_rate": 1.750234059255287e-05, "loss": 0.8816, "step": 9255 }, { "epoch": 0.25311747976372784, "grad_norm": 1.3507909774780273, "learning_rate": 1.7501754960039692e-05, "loss": 0.5574, "step": 9256 }, { "epoch": 0.25314482607744476, "grad_norm": 1.5550072193145752, "learning_rate": 1.750116926867736e-05, "loss": 0.5433, "step": 9257 }, { "epoch": 0.2531721723911617, "grad_norm": 1.9164365530014038, "learning_rate": 1.750058351847046e-05, "loss": 0.6209, "step": 9258 }, { "epoch": 0.25319951870487856, "grad_norm": 1.486350178718567, "learning_rate": 1.74999977094236e-05, "loss": 0.585, "step": 9259 }, { "epoch": 0.2532268650185955, "grad_norm": 1.258152961730957, "learning_rate": 1.7499411841541362e-05, "loss": 0.5664, "step": 9260 }, { "epoch": 0.2532542113323124, "grad_norm": 1.6297094821929932, "learning_rate": 1.7498825914828354e-05, "loss": 0.5654, "step": 9261 }, { "epoch": 0.25328155764602933, "grad_norm": 1.4296374320983887, "learning_rate": 1.7498239929289162e-05, "loss": 0.5214, "step": 9262 }, { "epoch": 0.2533089039597462, "grad_norm": 1.5112117528915405, "learning_rate": 1.7497653884928393e-05, "loss": 0.5197, "step": 9263 }, { "epoch": 0.2533362502734631, "grad_norm": 1.5024889707565308, "learning_rate": 1.7497067781750636e-05, "loss": 0.5516, "step": 9264 }, { "epoch": 0.25336359658718005, "grad_norm": 1.1956924200057983, "learning_rate": 1.749648161976049e-05, "loss": 0.5322, "step": 9265 }, { "epoch": 0.253390942900897, "grad_norm": 1.2598627805709839, "learning_rate": 1.749589539896256e-05, "loss": 0.5778, "step": 9266 }, { "epoch": 0.25341828921461385, "grad_norm": 1.5608506202697754, "learning_rate": 1.749530911936144e-05, "loss": 0.5975, "step": 9267 }, { "epoch": 0.25344563552833077, "grad_norm": 1.6037676334381104, "learning_rate": 1.749472278096173e-05, "loss": 0.554, "step": 9268 }, { "epoch": 0.2534729818420477, "grad_norm": 1.6157572269439697, "learning_rate": 1.7494136383768027e-05, "loss": 0.5105, "step": 9269 }, { "epoch": 0.2535003281557646, "grad_norm": 1.220923900604248, "learning_rate": 1.7493549927784933e-05, "loss": 0.5534, "step": 9270 }, { "epoch": 0.2535276744694815, "grad_norm": 1.9383479356765747, "learning_rate": 1.749296341301705e-05, "loss": 0.6259, "step": 9271 }, { "epoch": 0.2535550207831984, "grad_norm": 1.4345695972442627, "learning_rate": 1.7492376839468973e-05, "loss": 0.5585, "step": 9272 }, { "epoch": 0.25358236709691534, "grad_norm": 1.4988975524902344, "learning_rate": 1.7491790207145312e-05, "loss": 0.5574, "step": 9273 }, { "epoch": 0.25360971341063226, "grad_norm": 1.2433159351348877, "learning_rate": 1.7491203516050667e-05, "loss": 0.5719, "step": 9274 }, { "epoch": 0.25363705972434913, "grad_norm": 1.9612449407577515, "learning_rate": 1.7490616766189635e-05, "loss": 0.5589, "step": 9275 }, { "epoch": 0.25366440603806606, "grad_norm": 1.4241931438446045, "learning_rate": 1.7490029957566826e-05, "loss": 0.5776, "step": 9276 }, { "epoch": 0.253691752351783, "grad_norm": 1.3203750848770142, "learning_rate": 1.7489443090186837e-05, "loss": 0.5531, "step": 9277 }, { "epoch": 0.2537190986654999, "grad_norm": 1.6425402164459229, "learning_rate": 1.7488856164054274e-05, "loss": 0.5707, "step": 9278 }, { "epoch": 0.2537464449792168, "grad_norm": 1.308669924736023, "learning_rate": 1.7488269179173745e-05, "loss": 0.5508, "step": 9279 }, { "epoch": 0.2537737912929337, "grad_norm": 1.592851161956787, "learning_rate": 1.7487682135549852e-05, "loss": 0.6071, "step": 9280 }, { "epoch": 0.25380113760665063, "grad_norm": 2.092078447341919, "learning_rate": 1.7487095033187195e-05, "loss": 0.4098, "step": 9281 }, { "epoch": 0.25382848392036755, "grad_norm": 2.15736722946167, "learning_rate": 1.748650787209039e-05, "loss": 0.5668, "step": 9282 }, { "epoch": 0.2538558302340844, "grad_norm": 5.786008358001709, "learning_rate": 1.7485920652264036e-05, "loss": 0.5817, "step": 9283 }, { "epoch": 0.25388317654780135, "grad_norm": 1.5649456977844238, "learning_rate": 1.748533337371274e-05, "loss": 0.5363, "step": 9284 }, { "epoch": 0.2539105228615183, "grad_norm": 1.401413917541504, "learning_rate": 1.748474603644111e-05, "loss": 0.5203, "step": 9285 }, { "epoch": 0.2539378691752352, "grad_norm": 1.3047579526901245, "learning_rate": 1.7484158640453754e-05, "loss": 0.5495, "step": 9286 }, { "epoch": 0.25396521548895207, "grad_norm": 1.2226463556289673, "learning_rate": 1.748357118575528e-05, "loss": 0.5626, "step": 9287 }, { "epoch": 0.253992561802669, "grad_norm": 1.7666319608688354, "learning_rate": 1.7482983672350297e-05, "loss": 0.4095, "step": 9288 }, { "epoch": 0.2540199081163859, "grad_norm": 1.632870078086853, "learning_rate": 1.748239610024341e-05, "loss": 0.5987, "step": 9289 }, { "epoch": 0.25404725443010284, "grad_norm": 1.784332036972046, "learning_rate": 1.7481808469439237e-05, "loss": 0.5534, "step": 9290 }, { "epoch": 0.2540746007438197, "grad_norm": 1.2854247093200684, "learning_rate": 1.7481220779942377e-05, "loss": 0.5349, "step": 9291 }, { "epoch": 0.25410194705753664, "grad_norm": 1.6269375085830688, "learning_rate": 1.7480633031757447e-05, "loss": 0.5586, "step": 9292 }, { "epoch": 0.25412929337125356, "grad_norm": 1.647945761680603, "learning_rate": 1.7480045224889055e-05, "loss": 0.5385, "step": 9293 }, { "epoch": 0.2541566396849705, "grad_norm": 1.5786595344543457, "learning_rate": 1.7479457359341815e-05, "loss": 0.5271, "step": 9294 }, { "epoch": 0.25418398599868736, "grad_norm": 1.64309823513031, "learning_rate": 1.747886943512034e-05, "loss": 0.5577, "step": 9295 }, { "epoch": 0.2542113323124043, "grad_norm": 1.9439620971679688, "learning_rate": 1.7478281452229233e-05, "loss": 0.9019, "step": 9296 }, { "epoch": 0.2542386786261212, "grad_norm": 1.281418800354004, "learning_rate": 1.7477693410673114e-05, "loss": 0.5823, "step": 9297 }, { "epoch": 0.25426602493983813, "grad_norm": 1.2851340770721436, "learning_rate": 1.7477105310456596e-05, "loss": 0.5409, "step": 9298 }, { "epoch": 0.254293371253555, "grad_norm": 1.4562036991119385, "learning_rate": 1.747651715158429e-05, "loss": 0.5494, "step": 9299 }, { "epoch": 0.2543207175672719, "grad_norm": 1.5158250331878662, "learning_rate": 1.7475928934060812e-05, "loss": 0.502, "step": 9300 }, { "epoch": 0.25434806388098885, "grad_norm": 1.932973027229309, "learning_rate": 1.7475340657890773e-05, "loss": 0.5595, "step": 9301 }, { "epoch": 0.2543754101947058, "grad_norm": 1.5139240026474, "learning_rate": 1.7474752323078793e-05, "loss": 0.5572, "step": 9302 }, { "epoch": 0.25440275650842264, "grad_norm": 1.4788031578063965, "learning_rate": 1.7474163929629486e-05, "loss": 0.5602, "step": 9303 }, { "epoch": 0.25443010282213957, "grad_norm": 1.4227814674377441, "learning_rate": 1.7473575477547465e-05, "loss": 0.5858, "step": 9304 }, { "epoch": 0.2544574491358565, "grad_norm": 1.465468406677246, "learning_rate": 1.7472986966837347e-05, "loss": 0.5888, "step": 9305 }, { "epoch": 0.2544847954495734, "grad_norm": 1.3238531351089478, "learning_rate": 1.7472398397503746e-05, "loss": 0.6078, "step": 9306 }, { "epoch": 0.2545121417632903, "grad_norm": 1.3723206520080566, "learning_rate": 1.747180976955129e-05, "loss": 0.5676, "step": 9307 }, { "epoch": 0.2545394880770072, "grad_norm": 1.26853346824646, "learning_rate": 1.747122108298458e-05, "loss": 0.4067, "step": 9308 }, { "epoch": 0.25456683439072414, "grad_norm": 1.3356693983078003, "learning_rate": 1.7470632337808248e-05, "loss": 0.5635, "step": 9309 }, { "epoch": 0.25459418070444106, "grad_norm": 1.4410195350646973, "learning_rate": 1.747004353402691e-05, "loss": 0.5851, "step": 9310 }, { "epoch": 0.25462152701815793, "grad_norm": 1.4596306085586548, "learning_rate": 1.746945467164518e-05, "loss": 0.4706, "step": 9311 }, { "epoch": 0.25464887333187486, "grad_norm": 1.311335802078247, "learning_rate": 1.746886575066768e-05, "loss": 0.5491, "step": 9312 }, { "epoch": 0.2546762196455918, "grad_norm": 1.4159369468688965, "learning_rate": 1.7468276771099033e-05, "loss": 0.8834, "step": 9313 }, { "epoch": 0.2547035659593087, "grad_norm": 3.0032002925872803, "learning_rate": 1.7467687732943854e-05, "loss": 0.4647, "step": 9314 }, { "epoch": 0.2547309122730256, "grad_norm": 1.6637465953826904, "learning_rate": 1.7467098636206764e-05, "loss": 0.5689, "step": 9315 }, { "epoch": 0.2547582585867425, "grad_norm": 1.307065486907959, "learning_rate": 1.746650948089239e-05, "loss": 0.5274, "step": 9316 }, { "epoch": 0.25478560490045943, "grad_norm": 1.5354934930801392, "learning_rate": 1.746592026700535e-05, "loss": 0.618, "step": 9317 }, { "epoch": 0.25481295121417635, "grad_norm": 1.5033330917358398, "learning_rate": 1.7465330994550266e-05, "loss": 0.5552, "step": 9318 }, { "epoch": 0.2548402975278932, "grad_norm": 1.6348334550857544, "learning_rate": 1.7464741663531765e-05, "loss": 0.4662, "step": 9319 }, { "epoch": 0.25486764384161015, "grad_norm": 1.479927897453308, "learning_rate": 1.746415227395446e-05, "loss": 0.8749, "step": 9320 }, { "epoch": 0.2548949901553271, "grad_norm": 1.7060023546218872, "learning_rate": 1.7463562825822987e-05, "loss": 0.5494, "step": 9321 }, { "epoch": 0.254922336469044, "grad_norm": 1.7862671613693237, "learning_rate": 1.746297331914196e-05, "loss": 0.543, "step": 9322 }, { "epoch": 0.25494968278276087, "grad_norm": 2.4661900997161865, "learning_rate": 1.7462383753916007e-05, "loss": 0.4119, "step": 9323 }, { "epoch": 0.2549770290964778, "grad_norm": 1.3198540210723877, "learning_rate": 1.7461794130149753e-05, "loss": 0.5633, "step": 9324 }, { "epoch": 0.2550043754101947, "grad_norm": 1.4901759624481201, "learning_rate": 1.746120444784783e-05, "loss": 0.5558, "step": 9325 }, { "epoch": 0.25503172172391164, "grad_norm": 2.3957808017730713, "learning_rate": 1.7460614707014852e-05, "loss": 0.5547, "step": 9326 }, { "epoch": 0.2550590680376285, "grad_norm": 1.5872408151626587, "learning_rate": 1.7460024907655452e-05, "loss": 0.5592, "step": 9327 }, { "epoch": 0.25508641435134544, "grad_norm": 2.184852361679077, "learning_rate": 1.7459435049774258e-05, "loss": 0.5717, "step": 9328 }, { "epoch": 0.25511376066506236, "grad_norm": 1.377456784248352, "learning_rate": 1.7458845133375898e-05, "loss": 0.8991, "step": 9329 }, { "epoch": 0.2551411069787793, "grad_norm": 2.249058485031128, "learning_rate": 1.7458255158464993e-05, "loss": 0.8736, "step": 9330 }, { "epoch": 0.25516845329249616, "grad_norm": 1.5616010427474976, "learning_rate": 1.745766512504617e-05, "loss": 0.5817, "step": 9331 }, { "epoch": 0.2551957996062131, "grad_norm": 1.6129846572875977, "learning_rate": 1.7457075033124075e-05, "loss": 0.5554, "step": 9332 }, { "epoch": 0.25522314591993, "grad_norm": 2.0218958854675293, "learning_rate": 1.7456484882703315e-05, "loss": 0.5486, "step": 9333 }, { "epoch": 0.25525049223364693, "grad_norm": 1.3806166648864746, "learning_rate": 1.7455894673788533e-05, "loss": 0.5563, "step": 9334 }, { "epoch": 0.2552778385473638, "grad_norm": 1.2594386339187622, "learning_rate": 1.7455304406384358e-05, "loss": 0.9154, "step": 9335 }, { "epoch": 0.2553051848610807, "grad_norm": 1.3758749961853027, "learning_rate": 1.7454714080495414e-05, "loss": 0.6112, "step": 9336 }, { "epoch": 0.25533253117479765, "grad_norm": 1.1348662376403809, "learning_rate": 1.7454123696126336e-05, "loss": 0.5492, "step": 9337 }, { "epoch": 0.2553598774885146, "grad_norm": 1.3890407085418701, "learning_rate": 1.745353325328176e-05, "loss": 0.5686, "step": 9338 }, { "epoch": 0.25538722380223144, "grad_norm": 1.396441102027893, "learning_rate": 1.7452942751966308e-05, "loss": 0.528, "step": 9339 }, { "epoch": 0.25541457011594837, "grad_norm": 2.1665947437286377, "learning_rate": 1.745235219218462e-05, "loss": 0.5246, "step": 9340 }, { "epoch": 0.2554419164296653, "grad_norm": 2.0801286697387695, "learning_rate": 1.7451761573941327e-05, "loss": 0.573, "step": 9341 }, { "epoch": 0.2554692627433822, "grad_norm": 2.6176064014434814, "learning_rate": 1.745117089724106e-05, "loss": 0.5602, "step": 9342 }, { "epoch": 0.2554966090570991, "grad_norm": 1.5319344997406006, "learning_rate": 1.7450580162088456e-05, "loss": 0.5783, "step": 9343 }, { "epoch": 0.255523955370816, "grad_norm": 1.1733877658843994, "learning_rate": 1.7449989368488145e-05, "loss": 0.5552, "step": 9344 }, { "epoch": 0.25555130168453294, "grad_norm": 1.5204898118972778, "learning_rate": 1.7449398516444767e-05, "loss": 0.5749, "step": 9345 }, { "epoch": 0.2555786479982498, "grad_norm": 1.5399335622787476, "learning_rate": 1.7448807605962953e-05, "loss": 0.5591, "step": 9346 }, { "epoch": 0.25560599431196673, "grad_norm": 1.6008802652359009, "learning_rate": 1.744821663704734e-05, "loss": 0.8821, "step": 9347 }, { "epoch": 0.25563334062568366, "grad_norm": 2.160815477371216, "learning_rate": 1.744762560970256e-05, "loss": 0.9163, "step": 9348 }, { "epoch": 0.2556606869394006, "grad_norm": 1.3721507787704468, "learning_rate": 1.7447034523933257e-05, "loss": 0.4167, "step": 9349 }, { "epoch": 0.25568803325311745, "grad_norm": 1.3263907432556152, "learning_rate": 1.744644337974406e-05, "loss": 0.5686, "step": 9350 }, { "epoch": 0.2557153795668344, "grad_norm": 1.5683507919311523, "learning_rate": 1.7445852177139615e-05, "loss": 0.5428, "step": 9351 }, { "epoch": 0.2557427258805513, "grad_norm": 2.010651111602783, "learning_rate": 1.7445260916124555e-05, "loss": 0.5178, "step": 9352 }, { "epoch": 0.2557700721942682, "grad_norm": 3.020104169845581, "learning_rate": 1.7444669596703515e-05, "loss": 0.3933, "step": 9353 }, { "epoch": 0.2557974185079851, "grad_norm": 2.8527591228485107, "learning_rate": 1.7444078218881138e-05, "loss": 0.4266, "step": 9354 }, { "epoch": 0.255824764821702, "grad_norm": 1.406326174736023, "learning_rate": 1.7443486782662063e-05, "loss": 0.5607, "step": 9355 }, { "epoch": 0.25585211113541895, "grad_norm": 1.4798372983932495, "learning_rate": 1.744289528805093e-05, "loss": 0.5605, "step": 9356 }, { "epoch": 0.25587945744913587, "grad_norm": 2.087251663208008, "learning_rate": 1.7442303735052375e-05, "loss": 0.5487, "step": 9357 }, { "epoch": 0.25590680376285274, "grad_norm": 1.2718497514724731, "learning_rate": 1.7441712123671044e-05, "loss": 0.5263, "step": 9358 }, { "epoch": 0.25593415007656967, "grad_norm": 1.3714193105697632, "learning_rate": 1.7441120453911576e-05, "loss": 0.5375, "step": 9359 }, { "epoch": 0.2559614963902866, "grad_norm": 1.3236644268035889, "learning_rate": 1.7440528725778612e-05, "loss": 0.5376, "step": 9360 }, { "epoch": 0.2559888427040035, "grad_norm": 1.2414194345474243, "learning_rate": 1.743993693927679e-05, "loss": 0.5811, "step": 9361 }, { "epoch": 0.2560161890177204, "grad_norm": 1.2962030172348022, "learning_rate": 1.7439345094410764e-05, "loss": 0.5483, "step": 9362 }, { "epoch": 0.2560435353314373, "grad_norm": 1.4521087408065796, "learning_rate": 1.7438753191185163e-05, "loss": 0.5348, "step": 9363 }, { "epoch": 0.25607088164515424, "grad_norm": 1.5883309841156006, "learning_rate": 1.743816122960464e-05, "loss": 0.8778, "step": 9364 }, { "epoch": 0.25609822795887116, "grad_norm": 1.3543084859848022, "learning_rate": 1.7437569209673833e-05, "loss": 0.5355, "step": 9365 }, { "epoch": 0.25612557427258803, "grad_norm": 1.5797226428985596, "learning_rate": 1.743697713139739e-05, "loss": 0.5615, "step": 9366 }, { "epoch": 0.25615292058630496, "grad_norm": 5.361927032470703, "learning_rate": 1.7436384994779956e-05, "loss": 0.4538, "step": 9367 }, { "epoch": 0.2561802669000219, "grad_norm": 1.2328572273254395, "learning_rate": 1.7435792799826173e-05, "loss": 0.5685, "step": 9368 }, { "epoch": 0.2562076132137388, "grad_norm": 1.7171757221221924, "learning_rate": 1.7435200546540686e-05, "loss": 0.598, "step": 9369 }, { "epoch": 0.2562349595274557, "grad_norm": 1.4217331409454346, "learning_rate": 1.7434608234928148e-05, "loss": 0.5752, "step": 9370 }, { "epoch": 0.2562623058411726, "grad_norm": 1.0678722858428955, "learning_rate": 1.7434015864993197e-05, "loss": 0.5693, "step": 9371 }, { "epoch": 0.2562896521548895, "grad_norm": 1.3884702920913696, "learning_rate": 1.7433423436740485e-05, "loss": 0.5494, "step": 9372 }, { "epoch": 0.25631699846860645, "grad_norm": 1.3555843830108643, "learning_rate": 1.7432830950174654e-05, "loss": 0.5465, "step": 9373 }, { "epoch": 0.2563443447823233, "grad_norm": 1.4359551668167114, "learning_rate": 1.7432238405300362e-05, "loss": 0.5137, "step": 9374 }, { "epoch": 0.25637169109604024, "grad_norm": 8.201171875, "learning_rate": 1.7431645802122248e-05, "loss": 0.5217, "step": 9375 }, { "epoch": 0.25639903740975717, "grad_norm": 1.3205413818359375, "learning_rate": 1.7431053140644967e-05, "loss": 0.5313, "step": 9376 }, { "epoch": 0.2564263837234741, "grad_norm": 1.4640618562698364, "learning_rate": 1.743046042087316e-05, "loss": 0.5791, "step": 9377 }, { "epoch": 0.25645373003719096, "grad_norm": 1.3995869159698486, "learning_rate": 1.7429867642811488e-05, "loss": 0.5869, "step": 9378 }, { "epoch": 0.2564810763509079, "grad_norm": 1.5369720458984375, "learning_rate": 1.742927480646459e-05, "loss": 0.5547, "step": 9379 }, { "epoch": 0.2565084226646248, "grad_norm": 1.7315077781677246, "learning_rate": 1.7428681911837127e-05, "loss": 0.5035, "step": 9380 }, { "epoch": 0.25653576897834174, "grad_norm": 1.5287784337997437, "learning_rate": 1.7428088958933742e-05, "loss": 0.57, "step": 9381 }, { "epoch": 0.2565631152920586, "grad_norm": 1.2477664947509766, "learning_rate": 1.742749594775909e-05, "loss": 0.5505, "step": 9382 }, { "epoch": 0.25659046160577553, "grad_norm": 1.3016867637634277, "learning_rate": 1.7426902878317828e-05, "loss": 0.551, "step": 9383 }, { "epoch": 0.25661780791949246, "grad_norm": 1.6209378242492676, "learning_rate": 1.7426309750614595e-05, "loss": 0.6076, "step": 9384 }, { "epoch": 0.2566451542332094, "grad_norm": 2.7065513134002686, "learning_rate": 1.7425716564654055e-05, "loss": 0.4324, "step": 9385 }, { "epoch": 0.25667250054692625, "grad_norm": 1.670219898223877, "learning_rate": 1.7425123320440858e-05, "loss": 0.5382, "step": 9386 }, { "epoch": 0.2566998468606432, "grad_norm": 1.6762869358062744, "learning_rate": 1.7424530017979658e-05, "loss": 0.5635, "step": 9387 }, { "epoch": 0.2567271931743601, "grad_norm": 2.1456360816955566, "learning_rate": 1.742393665727511e-05, "loss": 0.3905, "step": 9388 }, { "epoch": 0.256754539488077, "grad_norm": 1.4781244993209839, "learning_rate": 1.7423343238331867e-05, "loss": 0.5486, "step": 9389 }, { "epoch": 0.2567818858017939, "grad_norm": 1.6373893022537231, "learning_rate": 1.7422749761154587e-05, "loss": 0.592, "step": 9390 }, { "epoch": 0.2568092321155108, "grad_norm": 1.4799655675888062, "learning_rate": 1.7422156225747925e-05, "loss": 0.5886, "step": 9391 }, { "epoch": 0.25683657842922775, "grad_norm": 1.7357429265975952, "learning_rate": 1.7421562632116533e-05, "loss": 0.5386, "step": 9392 }, { "epoch": 0.25686392474294467, "grad_norm": 1.1709719896316528, "learning_rate": 1.742096898026507e-05, "loss": 0.5432, "step": 9393 }, { "epoch": 0.25689127105666154, "grad_norm": 1.6562983989715576, "learning_rate": 1.7420375270198196e-05, "loss": 0.4202, "step": 9394 }, { "epoch": 0.25691861737037847, "grad_norm": 1.797215223312378, "learning_rate": 1.741978150192057e-05, "loss": 0.5621, "step": 9395 }, { "epoch": 0.2569459636840954, "grad_norm": 1.4662652015686035, "learning_rate": 1.741918767543684e-05, "loss": 0.5324, "step": 9396 }, { "epoch": 0.2569733099978123, "grad_norm": 1.2598843574523926, "learning_rate": 1.7418593790751674e-05, "loss": 0.5463, "step": 9397 }, { "epoch": 0.2570006563115292, "grad_norm": 1.72127103805542, "learning_rate": 1.741799984786973e-05, "loss": 0.5734, "step": 9398 }, { "epoch": 0.2570280026252461, "grad_norm": 1.8498337268829346, "learning_rate": 1.741740584679566e-05, "loss": 0.4084, "step": 9399 }, { "epoch": 0.25705534893896304, "grad_norm": 1.524253010749817, "learning_rate": 1.741681178753413e-05, "loss": 0.567, "step": 9400 }, { "epoch": 0.25708269525267996, "grad_norm": 1.7422634363174438, "learning_rate": 1.74162176700898e-05, "loss": 0.5095, "step": 9401 }, { "epoch": 0.25711004156639683, "grad_norm": 1.3163557052612305, "learning_rate": 1.7415623494467327e-05, "loss": 0.5799, "step": 9402 }, { "epoch": 0.25713738788011375, "grad_norm": 1.3868283033370972, "learning_rate": 1.7415029260671375e-05, "loss": 0.5873, "step": 9403 }, { "epoch": 0.2571647341938307, "grad_norm": 1.492263913154602, "learning_rate": 1.7414434968706604e-05, "loss": 0.4766, "step": 9404 }, { "epoch": 0.2571920805075476, "grad_norm": 1.3150454759597778, "learning_rate": 1.7413840618577684e-05, "loss": 0.571, "step": 9405 }, { "epoch": 0.2572194268212645, "grad_norm": 1.8253062963485718, "learning_rate": 1.7413246210289265e-05, "loss": 0.4343, "step": 9406 }, { "epoch": 0.2572467731349814, "grad_norm": 1.7171498537063599, "learning_rate": 1.7412651743846017e-05, "loss": 0.5831, "step": 9407 }, { "epoch": 0.2572741194486983, "grad_norm": 1.4156728982925415, "learning_rate": 1.7412057219252603e-05, "loss": 0.5817, "step": 9408 }, { "epoch": 0.25730146576241525, "grad_norm": 1.573980689048767, "learning_rate": 1.7411462636513686e-05, "loss": 0.536, "step": 9409 }, { "epoch": 0.2573288120761321, "grad_norm": 1.4380625486373901, "learning_rate": 1.741086799563393e-05, "loss": 0.5514, "step": 9410 }, { "epoch": 0.25735615838984904, "grad_norm": 1.9060776233673096, "learning_rate": 1.7410273296618e-05, "loss": 0.9211, "step": 9411 }, { "epoch": 0.25738350470356597, "grad_norm": 1.4319226741790771, "learning_rate": 1.740967853947056e-05, "loss": 0.4984, "step": 9412 }, { "epoch": 0.2574108510172829, "grad_norm": 1.6898378133773804, "learning_rate": 1.740908372419628e-05, "loss": 0.511, "step": 9413 }, { "epoch": 0.25743819733099976, "grad_norm": 1.5860280990600586, "learning_rate": 1.740848885079982e-05, "loss": 0.5541, "step": 9414 }, { "epoch": 0.2574655436447167, "grad_norm": 1.8953043222427368, "learning_rate": 1.740789391928585e-05, "loss": 0.5379, "step": 9415 }, { "epoch": 0.2574928899584336, "grad_norm": 1.2348661422729492, "learning_rate": 1.740729892965904e-05, "loss": 0.576, "step": 9416 }, { "epoch": 0.25752023627215054, "grad_norm": 1.4803249835968018, "learning_rate": 1.7406703881924053e-05, "loss": 0.4857, "step": 9417 }, { "epoch": 0.2575475825858674, "grad_norm": 1.4731401205062866, "learning_rate": 1.740610877608556e-05, "loss": 0.5779, "step": 9418 }, { "epoch": 0.25757492889958433, "grad_norm": 1.3436583280563354, "learning_rate": 1.7405513612148228e-05, "loss": 0.9497, "step": 9419 }, { "epoch": 0.25760227521330126, "grad_norm": 1.2901997566223145, "learning_rate": 1.7404918390116724e-05, "loss": 0.5732, "step": 9420 }, { "epoch": 0.2576296215270182, "grad_norm": 1.639438271522522, "learning_rate": 1.7404323109995717e-05, "loss": 0.5277, "step": 9421 }, { "epoch": 0.25765696784073505, "grad_norm": 1.6273363828659058, "learning_rate": 1.7403727771789884e-05, "loss": 0.4534, "step": 9422 }, { "epoch": 0.257684314154452, "grad_norm": 1.5653409957885742, "learning_rate": 1.7403132375503885e-05, "loss": 0.5787, "step": 9423 }, { "epoch": 0.2577116604681689, "grad_norm": 1.291133165359497, "learning_rate": 1.7402536921142397e-05, "loss": 0.4592, "step": 9424 }, { "epoch": 0.2577390067818858, "grad_norm": 1.3110120296478271, "learning_rate": 1.7401941408710096e-05, "loss": 0.5723, "step": 9425 }, { "epoch": 0.2577663530956027, "grad_norm": 1.4782586097717285, "learning_rate": 1.7401345838211643e-05, "loss": 0.5586, "step": 9426 }, { "epoch": 0.2577936994093196, "grad_norm": 2.084688186645508, "learning_rate": 1.7400750209651714e-05, "loss": 0.5559, "step": 9427 }, { "epoch": 0.25782104572303655, "grad_norm": 1.2200583219528198, "learning_rate": 1.7400154523034982e-05, "loss": 0.5257, "step": 9428 }, { "epoch": 0.25784839203675347, "grad_norm": 1.1885138750076294, "learning_rate": 1.739955877836612e-05, "loss": 0.5514, "step": 9429 }, { "epoch": 0.25787573835047034, "grad_norm": 2.004546880722046, "learning_rate": 1.7398962975649808e-05, "loss": 0.5263, "step": 9430 }, { "epoch": 0.25790308466418727, "grad_norm": 1.7344470024108887, "learning_rate": 1.7398367114890708e-05, "loss": 0.4948, "step": 9431 }, { "epoch": 0.2579304309779042, "grad_norm": 1.2143279314041138, "learning_rate": 1.73977711960935e-05, "loss": 0.5376, "step": 9432 }, { "epoch": 0.2579577772916211, "grad_norm": 1.687629222869873, "learning_rate": 1.7397175219262862e-05, "loss": 0.5774, "step": 9433 }, { "epoch": 0.257985123605338, "grad_norm": 1.1832983493804932, "learning_rate": 1.7396579184403465e-05, "loss": 0.5357, "step": 9434 }, { "epoch": 0.2580124699190549, "grad_norm": 1.5033751726150513, "learning_rate": 1.7395983091519983e-05, "loss": 0.5257, "step": 9435 }, { "epoch": 0.25803981623277183, "grad_norm": 1.6373146772384644, "learning_rate": 1.73953869406171e-05, "loss": 0.5581, "step": 9436 }, { "epoch": 0.25806716254648876, "grad_norm": 1.9617232084274292, "learning_rate": 1.7394790731699485e-05, "loss": 0.4939, "step": 9437 }, { "epoch": 0.25809450886020563, "grad_norm": 1.341557502746582, "learning_rate": 1.7394194464771817e-05, "loss": 0.5806, "step": 9438 }, { "epoch": 0.25812185517392255, "grad_norm": 1.6169105768203735, "learning_rate": 1.7393598139838773e-05, "loss": 0.5405, "step": 9439 }, { "epoch": 0.2581492014876395, "grad_norm": 1.343425989151001, "learning_rate": 1.7393001756905037e-05, "loss": 0.5372, "step": 9440 }, { "epoch": 0.2581765478013564, "grad_norm": 1.595600962638855, "learning_rate": 1.739240531597528e-05, "loss": 0.6166, "step": 9441 }, { "epoch": 0.2582038941150733, "grad_norm": 2.436476230621338, "learning_rate": 1.7391808817054185e-05, "loss": 0.4711, "step": 9442 }, { "epoch": 0.2582312404287902, "grad_norm": 1.291091799736023, "learning_rate": 1.7391212260146427e-05, "loss": 0.5537, "step": 9443 }, { "epoch": 0.2582585867425071, "grad_norm": 1.3659610748291016, "learning_rate": 1.7390615645256692e-05, "loss": 0.938, "step": 9444 }, { "epoch": 0.25828593305622405, "grad_norm": 2.0568528175354004, "learning_rate": 1.7390018972389656e-05, "loss": 0.531, "step": 9445 }, { "epoch": 0.2583132793699409, "grad_norm": 2.18117094039917, "learning_rate": 1.7389422241550002e-05, "loss": 0.4729, "step": 9446 }, { "epoch": 0.25834062568365784, "grad_norm": 2.7122838497161865, "learning_rate": 1.7388825452742407e-05, "loss": 0.5677, "step": 9447 }, { "epoch": 0.25836797199737477, "grad_norm": 1.385600209236145, "learning_rate": 1.738822860597156e-05, "loss": 0.5513, "step": 9448 }, { "epoch": 0.25839531831109164, "grad_norm": 1.5888049602508545, "learning_rate": 1.7387631701242135e-05, "loss": 0.5711, "step": 9449 }, { "epoch": 0.25842266462480856, "grad_norm": 1.459027647972107, "learning_rate": 1.7387034738558823e-05, "loss": 0.5607, "step": 9450 }, { "epoch": 0.2584500109385255, "grad_norm": 1.3612632751464844, "learning_rate": 1.7386437717926296e-05, "loss": 0.563, "step": 9451 }, { "epoch": 0.2584773572522424, "grad_norm": 1.6051955223083496, "learning_rate": 1.738584063934925e-05, "loss": 0.5942, "step": 9452 }, { "epoch": 0.2585047035659593, "grad_norm": 1.1551945209503174, "learning_rate": 1.738524350283236e-05, "loss": 0.5547, "step": 9453 }, { "epoch": 0.2585320498796762, "grad_norm": 1.8512396812438965, "learning_rate": 1.7384646308380314e-05, "loss": 0.898, "step": 9454 }, { "epoch": 0.25855939619339313, "grad_norm": 1.3733127117156982, "learning_rate": 1.7384049055997794e-05, "loss": 0.522, "step": 9455 }, { "epoch": 0.25858674250711006, "grad_norm": 2.7933719158172607, "learning_rate": 1.7383451745689487e-05, "loss": 0.5569, "step": 9456 }, { "epoch": 0.2586140888208269, "grad_norm": 1.474387764930725, "learning_rate": 1.738285437746008e-05, "loss": 0.5388, "step": 9457 }, { "epoch": 0.25864143513454385, "grad_norm": 1.5049378871917725, "learning_rate": 1.7382256951314257e-05, "loss": 0.5368, "step": 9458 }, { "epoch": 0.2586687814482608, "grad_norm": 2.0418143272399902, "learning_rate": 1.7381659467256707e-05, "loss": 0.548, "step": 9459 }, { "epoch": 0.2586961277619777, "grad_norm": 1.410070776939392, "learning_rate": 1.7381061925292116e-05, "loss": 0.5653, "step": 9460 }, { "epoch": 0.25872347407569457, "grad_norm": 1.298155426979065, "learning_rate": 1.738046432542517e-05, "loss": 0.5472, "step": 9461 }, { "epoch": 0.2587508203894115, "grad_norm": 1.4984852075576782, "learning_rate": 1.737986666766056e-05, "loss": 0.557, "step": 9462 }, { "epoch": 0.2587781667031284, "grad_norm": 1.6408257484436035, "learning_rate": 1.7379268952002973e-05, "loss": 0.545, "step": 9463 }, { "epoch": 0.25880551301684535, "grad_norm": 2.7006137371063232, "learning_rate": 1.7378671178457097e-05, "loss": 0.5984, "step": 9464 }, { "epoch": 0.2588328593305622, "grad_norm": 2.093614101409912, "learning_rate": 1.7378073347027625e-05, "loss": 0.588, "step": 9465 }, { "epoch": 0.25886020564427914, "grad_norm": 1.2026883363723755, "learning_rate": 1.737747545771924e-05, "loss": 0.8756, "step": 9466 }, { "epoch": 0.25888755195799606, "grad_norm": 1.6460477113723755, "learning_rate": 1.7376877510536637e-05, "loss": 0.5327, "step": 9467 }, { "epoch": 0.258914898271713, "grad_norm": 1.9023218154907227, "learning_rate": 1.7376279505484507e-05, "loss": 0.5588, "step": 9468 }, { "epoch": 0.25894224458542986, "grad_norm": 1.3357545137405396, "learning_rate": 1.7375681442567544e-05, "loss": 0.5722, "step": 9469 }, { "epoch": 0.2589695908991468, "grad_norm": 1.7931451797485352, "learning_rate": 1.7375083321790428e-05, "loss": 0.5323, "step": 9470 }, { "epoch": 0.2589969372128637, "grad_norm": 1.6882596015930176, "learning_rate": 1.7374485143157862e-05, "loss": 0.5308, "step": 9471 }, { "epoch": 0.25902428352658063, "grad_norm": 1.7216731309890747, "learning_rate": 1.7373886906674538e-05, "loss": 0.5776, "step": 9472 }, { "epoch": 0.2590516298402975, "grad_norm": 1.5971752405166626, "learning_rate": 1.737328861234514e-05, "loss": 0.5513, "step": 9473 }, { "epoch": 0.25907897615401443, "grad_norm": 1.6437203884124756, "learning_rate": 1.7372690260174376e-05, "loss": 0.5862, "step": 9474 }, { "epoch": 0.25910632246773135, "grad_norm": 2.2247252464294434, "learning_rate": 1.7372091850166926e-05, "loss": 0.582, "step": 9475 }, { "epoch": 0.2591336687814483, "grad_norm": 1.3860470056533813, "learning_rate": 1.7371493382327493e-05, "loss": 0.5593, "step": 9476 }, { "epoch": 0.25916101509516515, "grad_norm": 1.6724010705947876, "learning_rate": 1.7370894856660768e-05, "loss": 0.5866, "step": 9477 }, { "epoch": 0.2591883614088821, "grad_norm": 1.3979215621948242, "learning_rate": 1.7370296273171447e-05, "loss": 0.5755, "step": 9478 }, { "epoch": 0.259215707722599, "grad_norm": 1.2359174489974976, "learning_rate": 1.736969763186423e-05, "loss": 0.5525, "step": 9479 }, { "epoch": 0.2592430540363159, "grad_norm": 1.937940001487732, "learning_rate": 1.7369098932743802e-05, "loss": 0.5882, "step": 9480 }, { "epoch": 0.2592704003500328, "grad_norm": 1.5764607191085815, "learning_rate": 1.736850017581487e-05, "loss": 0.5264, "step": 9481 }, { "epoch": 0.2592977466637497, "grad_norm": 1.5439982414245605, "learning_rate": 1.7367901361082125e-05, "loss": 0.492, "step": 9482 }, { "epoch": 0.25932509297746664, "grad_norm": 1.4465595483779907, "learning_rate": 1.7367302488550272e-05, "loss": 0.5731, "step": 9483 }, { "epoch": 0.25935243929118357, "grad_norm": 1.4126068353652954, "learning_rate": 1.7366703558224002e-05, "loss": 0.5481, "step": 9484 }, { "epoch": 0.25937978560490044, "grad_norm": 1.7097833156585693, "learning_rate": 1.7366104570108014e-05, "loss": 0.5766, "step": 9485 }, { "epoch": 0.25940713191861736, "grad_norm": 1.7658414840698242, "learning_rate": 1.7365505524207012e-05, "loss": 0.5501, "step": 9486 }, { "epoch": 0.2594344782323343, "grad_norm": 1.6230871677398682, "learning_rate": 1.7364906420525692e-05, "loss": 0.5759, "step": 9487 }, { "epoch": 0.2594618245460512, "grad_norm": 1.3562939167022705, "learning_rate": 1.7364307259068752e-05, "loss": 0.5569, "step": 9488 }, { "epoch": 0.2594891708597681, "grad_norm": 1.9451225996017456, "learning_rate": 1.7363708039840896e-05, "loss": 0.8783, "step": 9489 }, { "epoch": 0.259516517173485, "grad_norm": 1.777320146560669, "learning_rate": 1.7363108762846822e-05, "loss": 0.5298, "step": 9490 }, { "epoch": 0.25954386348720193, "grad_norm": 1.6567002534866333, "learning_rate": 1.736250942809123e-05, "loss": 0.5602, "step": 9491 }, { "epoch": 0.25957120980091886, "grad_norm": 1.6523826122283936, "learning_rate": 1.7361910035578824e-05, "loss": 0.5573, "step": 9492 }, { "epoch": 0.2595985561146357, "grad_norm": 1.2623143196105957, "learning_rate": 1.7361310585314305e-05, "loss": 0.5651, "step": 9493 }, { "epoch": 0.25962590242835265, "grad_norm": 1.4064970016479492, "learning_rate": 1.736071107730238e-05, "loss": 0.567, "step": 9494 }, { "epoch": 0.2596532487420696, "grad_norm": 1.375557780265808, "learning_rate": 1.7360111511547747e-05, "loss": 0.5598, "step": 9495 }, { "epoch": 0.2596805950557865, "grad_norm": 1.528522253036499, "learning_rate": 1.7359511888055104e-05, "loss": 0.5778, "step": 9496 }, { "epoch": 0.25970794136950337, "grad_norm": 1.3063116073608398, "learning_rate": 1.735891220682917e-05, "loss": 0.5435, "step": 9497 }, { "epoch": 0.2597352876832203, "grad_norm": 1.6919710636138916, "learning_rate": 1.7358312467874636e-05, "loss": 0.5482, "step": 9498 }, { "epoch": 0.2597626339969372, "grad_norm": 1.3775534629821777, "learning_rate": 1.7357712671196213e-05, "loss": 0.5309, "step": 9499 }, { "epoch": 0.25978998031065414, "grad_norm": 1.2358776330947876, "learning_rate": 1.7357112816798604e-05, "loss": 0.5808, "step": 9500 }, { "epoch": 0.259817326624371, "grad_norm": 1.3910229206085205, "learning_rate": 1.7356512904686514e-05, "loss": 0.5578, "step": 9501 }, { "epoch": 0.25984467293808794, "grad_norm": 1.3836321830749512, "learning_rate": 1.7355912934864654e-05, "loss": 0.5469, "step": 9502 }, { "epoch": 0.25987201925180486, "grad_norm": 2.0736145973205566, "learning_rate": 1.7355312907337728e-05, "loss": 0.6071, "step": 9503 }, { "epoch": 0.2598993655655218, "grad_norm": 1.482366919517517, "learning_rate": 1.7354712822110437e-05, "loss": 0.5533, "step": 9504 }, { "epoch": 0.25992671187923866, "grad_norm": 1.2520099878311157, "learning_rate": 1.7354112679187497e-05, "loss": 0.562, "step": 9505 }, { "epoch": 0.2599540581929556, "grad_norm": 1.1622236967086792, "learning_rate": 1.735351247857361e-05, "loss": 0.8815, "step": 9506 }, { "epoch": 0.2599814045066725, "grad_norm": 1.259545922279358, "learning_rate": 1.7352912220273494e-05, "loss": 0.5431, "step": 9507 }, { "epoch": 0.26000875082038943, "grad_norm": 4.701207637786865, "learning_rate": 1.7352311904291843e-05, "loss": 0.4428, "step": 9508 }, { "epoch": 0.2600360971341063, "grad_norm": 1.6319059133529663, "learning_rate": 1.7351711530633377e-05, "loss": 0.4193, "step": 9509 }, { "epoch": 0.26006344344782323, "grad_norm": 1.322055459022522, "learning_rate": 1.7351111099302803e-05, "loss": 0.5795, "step": 9510 }, { "epoch": 0.26009078976154015, "grad_norm": 1.6869680881500244, "learning_rate": 1.7350510610304833e-05, "loss": 0.5848, "step": 9511 }, { "epoch": 0.2601181360752571, "grad_norm": 1.4104267358779907, "learning_rate": 1.7349910063644174e-05, "loss": 0.5564, "step": 9512 }, { "epoch": 0.26014548238897395, "grad_norm": 1.3221375942230225, "learning_rate": 1.7349309459325538e-05, "loss": 0.5318, "step": 9513 }, { "epoch": 0.2601728287026909, "grad_norm": 2.301718235015869, "learning_rate": 1.7348708797353642e-05, "loss": 0.5635, "step": 9514 }, { "epoch": 0.2602001750164078, "grad_norm": 1.359674096107483, "learning_rate": 1.734810807773319e-05, "loss": 0.5424, "step": 9515 }, { "epoch": 0.2602275213301247, "grad_norm": 1.4937021732330322, "learning_rate": 1.7347507300468897e-05, "loss": 0.5787, "step": 9516 }, { "epoch": 0.2602548676438416, "grad_norm": 1.297330379486084, "learning_rate": 1.7346906465565477e-05, "loss": 0.538, "step": 9517 }, { "epoch": 0.2602822139575585, "grad_norm": 1.2036583423614502, "learning_rate": 1.7346305573027646e-05, "loss": 0.5754, "step": 9518 }, { "epoch": 0.26030956027127544, "grad_norm": 1.384143590927124, "learning_rate": 1.7345704622860114e-05, "loss": 0.5269, "step": 9519 }, { "epoch": 0.26033690658499237, "grad_norm": 1.4334492683410645, "learning_rate": 1.7345103615067597e-05, "loss": 0.5693, "step": 9520 }, { "epoch": 0.26036425289870924, "grad_norm": 1.3923972845077515, "learning_rate": 1.734450254965481e-05, "loss": 0.4765, "step": 9521 }, { "epoch": 0.26039159921242616, "grad_norm": 1.4084478616714478, "learning_rate": 1.7343901426626465e-05, "loss": 0.5432, "step": 9522 }, { "epoch": 0.2604189455261431, "grad_norm": 1.5245403051376343, "learning_rate": 1.7343300245987283e-05, "loss": 0.5674, "step": 9523 }, { "epoch": 0.26044629183986, "grad_norm": 1.2783373594284058, "learning_rate": 1.7342699007741976e-05, "loss": 0.9163, "step": 9524 }, { "epoch": 0.2604736381535769, "grad_norm": 2.0009241104125977, "learning_rate": 1.734209771189526e-05, "loss": 0.5168, "step": 9525 }, { "epoch": 0.2605009844672938, "grad_norm": 1.3189928531646729, "learning_rate": 1.7341496358451854e-05, "loss": 0.901, "step": 9526 }, { "epoch": 0.26052833078101073, "grad_norm": 1.358212947845459, "learning_rate": 1.7340894947416477e-05, "loss": 0.5559, "step": 9527 }, { "epoch": 0.26055567709472766, "grad_norm": 3.6588594913482666, "learning_rate": 1.7340293478793845e-05, "loss": 0.5834, "step": 9528 }, { "epoch": 0.2605830234084445, "grad_norm": 1.4551035165786743, "learning_rate": 1.7339691952588677e-05, "loss": 0.5927, "step": 9529 }, { "epoch": 0.26061036972216145, "grad_norm": 1.3221758604049683, "learning_rate": 1.7339090368805688e-05, "loss": 0.4524, "step": 9530 }, { "epoch": 0.2606377160358784, "grad_norm": 1.4423397779464722, "learning_rate": 1.7338488727449602e-05, "loss": 0.5448, "step": 9531 }, { "epoch": 0.2606650623495953, "grad_norm": 1.403279423713684, "learning_rate": 1.733788702852514e-05, "loss": 0.5374, "step": 9532 }, { "epoch": 0.26069240866331217, "grad_norm": 1.5623935461044312, "learning_rate": 1.7337285272037014e-05, "loss": 0.5367, "step": 9533 }, { "epoch": 0.2607197549770291, "grad_norm": 1.6643686294555664, "learning_rate": 1.7336683457989952e-05, "loss": 0.5304, "step": 9534 }, { "epoch": 0.260747101290746, "grad_norm": 1.9212939739227295, "learning_rate": 1.7336081586388674e-05, "loss": 0.501, "step": 9535 }, { "epoch": 0.26077444760446294, "grad_norm": 1.716685175895691, "learning_rate": 1.7335479657237898e-05, "loss": 0.5879, "step": 9536 }, { "epoch": 0.2608017939181798, "grad_norm": 1.283296823501587, "learning_rate": 1.7334877670542353e-05, "loss": 0.5789, "step": 9537 }, { "epoch": 0.26082914023189674, "grad_norm": 1.1741660833358765, "learning_rate": 1.7334275626306755e-05, "loss": 0.5662, "step": 9538 }, { "epoch": 0.26085648654561366, "grad_norm": 1.517313003540039, "learning_rate": 1.7333673524535828e-05, "loss": 0.4634, "step": 9539 }, { "epoch": 0.2608838328593306, "grad_norm": 1.4783557653427124, "learning_rate": 1.7333071365234294e-05, "loss": 0.4326, "step": 9540 }, { "epoch": 0.26091117917304746, "grad_norm": 1.4020400047302246, "learning_rate": 1.7332469148406883e-05, "loss": 0.5382, "step": 9541 }, { "epoch": 0.2609385254867644, "grad_norm": 1.5073814392089844, "learning_rate": 1.7331866874058316e-05, "loss": 0.5489, "step": 9542 }, { "epoch": 0.2609658718004813, "grad_norm": 2.1474215984344482, "learning_rate": 1.733126454219331e-05, "loss": 0.5301, "step": 9543 }, { "epoch": 0.26099321811419823, "grad_norm": 1.5207213163375854, "learning_rate": 1.73306621528166e-05, "loss": 0.5777, "step": 9544 }, { "epoch": 0.2610205644279151, "grad_norm": 1.518623948097229, "learning_rate": 1.733005970593291e-05, "loss": 0.5822, "step": 9545 }, { "epoch": 0.261047910741632, "grad_norm": 1.2819910049438477, "learning_rate": 1.7329457201546963e-05, "loss": 0.5695, "step": 9546 }, { "epoch": 0.26107525705534895, "grad_norm": 1.5147215127944946, "learning_rate": 1.732885463966349e-05, "loss": 0.9733, "step": 9547 }, { "epoch": 0.2611026033690659, "grad_norm": 1.7406480312347412, "learning_rate": 1.7328252020287212e-05, "loss": 0.5414, "step": 9548 }, { "epoch": 0.26112994968278275, "grad_norm": 1.478896141052246, "learning_rate": 1.7327649343422858e-05, "loss": 0.5616, "step": 9549 }, { "epoch": 0.26115729599649967, "grad_norm": 1.351505160331726, "learning_rate": 1.732704660907516e-05, "loss": 0.5788, "step": 9550 }, { "epoch": 0.2611846423102166, "grad_norm": 1.6545510292053223, "learning_rate": 1.732644381724884e-05, "loss": 0.5484, "step": 9551 }, { "epoch": 0.26121198862393347, "grad_norm": 1.7497278451919556, "learning_rate": 1.7325840967948634e-05, "loss": 0.5468, "step": 9552 }, { "epoch": 0.2612393349376504, "grad_norm": 1.2082324028015137, "learning_rate": 1.7325238061179267e-05, "loss": 0.5611, "step": 9553 }, { "epoch": 0.2612666812513673, "grad_norm": 1.5484788417816162, "learning_rate": 1.7324635096945468e-05, "loss": 0.6103, "step": 9554 }, { "epoch": 0.26129402756508424, "grad_norm": 1.4609564542770386, "learning_rate": 1.7324032075251965e-05, "loss": 0.5618, "step": 9555 }, { "epoch": 0.2613213738788011, "grad_norm": 1.8919090032577515, "learning_rate": 1.7323428996103496e-05, "loss": 0.5751, "step": 9556 }, { "epoch": 0.26134872019251804, "grad_norm": 1.5085198879241943, "learning_rate": 1.7322825859504784e-05, "loss": 0.5818, "step": 9557 }, { "epoch": 0.26137606650623496, "grad_norm": 1.4331244230270386, "learning_rate": 1.7322222665460568e-05, "loss": 0.5617, "step": 9558 }, { "epoch": 0.2614034128199519, "grad_norm": 1.2629624605178833, "learning_rate": 1.732161941397557e-05, "loss": 0.5059, "step": 9559 }, { "epoch": 0.26143075913366876, "grad_norm": 1.2317190170288086, "learning_rate": 1.732101610505453e-05, "loss": 0.5459, "step": 9560 }, { "epoch": 0.2614581054473857, "grad_norm": 1.6368781328201294, "learning_rate": 1.7320412738702184e-05, "loss": 0.4656, "step": 9561 }, { "epoch": 0.2614854517611026, "grad_norm": 1.3933953046798706, "learning_rate": 1.7319809314923256e-05, "loss": 0.5198, "step": 9562 }, { "epoch": 0.26151279807481953, "grad_norm": 1.4011746644973755, "learning_rate": 1.7319205833722484e-05, "loss": 0.5645, "step": 9563 }, { "epoch": 0.2615401443885364, "grad_norm": 1.4278186559677124, "learning_rate": 1.7318602295104604e-05, "loss": 0.574, "step": 9564 }, { "epoch": 0.2615674907022533, "grad_norm": 1.4423493146896362, "learning_rate": 1.7317998699074344e-05, "loss": 0.8954, "step": 9565 }, { "epoch": 0.26159483701597025, "grad_norm": 1.3230705261230469, "learning_rate": 1.731739504563645e-05, "loss": 0.5296, "step": 9566 }, { "epoch": 0.2616221833296872, "grad_norm": 1.2675672769546509, "learning_rate": 1.7316791334795646e-05, "loss": 0.5222, "step": 9567 }, { "epoch": 0.26164952964340404, "grad_norm": 1.30001699924469, "learning_rate": 1.7316187566556675e-05, "loss": 0.5349, "step": 9568 }, { "epoch": 0.26167687595712097, "grad_norm": 1.3003462553024292, "learning_rate": 1.7315583740924274e-05, "loss": 0.5449, "step": 9569 }, { "epoch": 0.2617042222708379, "grad_norm": 1.287636637687683, "learning_rate": 1.7314979857903174e-05, "loss": 0.8889, "step": 9570 }, { "epoch": 0.2617315685845548, "grad_norm": 1.4343132972717285, "learning_rate": 1.7314375917498116e-05, "loss": 0.5807, "step": 9571 }, { "epoch": 0.2617589148982717, "grad_norm": 1.5882740020751953, "learning_rate": 1.731377191971384e-05, "loss": 0.5687, "step": 9572 }, { "epoch": 0.2617862612119886, "grad_norm": 1.4608373641967773, "learning_rate": 1.731316786455508e-05, "loss": 0.8861, "step": 9573 }, { "epoch": 0.26181360752570554, "grad_norm": 1.1956517696380615, "learning_rate": 1.7312563752026574e-05, "loss": 0.8887, "step": 9574 }, { "epoch": 0.26184095383942246, "grad_norm": 1.5063927173614502, "learning_rate": 1.7311959582133064e-05, "loss": 0.9094, "step": 9575 }, { "epoch": 0.26186830015313933, "grad_norm": 1.1970582008361816, "learning_rate": 1.7311355354879292e-05, "loss": 0.5796, "step": 9576 }, { "epoch": 0.26189564646685626, "grad_norm": 1.3097347021102905, "learning_rate": 1.7310751070269992e-05, "loss": 0.6017, "step": 9577 }, { "epoch": 0.2619229927805732, "grad_norm": 1.3405230045318604, "learning_rate": 1.7310146728309907e-05, "loss": 0.5319, "step": 9578 }, { "epoch": 0.2619503390942901, "grad_norm": 1.5385578870773315, "learning_rate": 1.7309542329003778e-05, "loss": 0.546, "step": 9579 }, { "epoch": 0.261977685408007, "grad_norm": 1.4862840175628662, "learning_rate": 1.730893787235635e-05, "loss": 0.4228, "step": 9580 }, { "epoch": 0.2620050317217239, "grad_norm": 1.5732827186584473, "learning_rate": 1.7308333358372357e-05, "loss": 0.4806, "step": 9581 }, { "epoch": 0.2620323780354408, "grad_norm": 1.6616652011871338, "learning_rate": 1.7307728787056547e-05, "loss": 0.5746, "step": 9582 }, { "epoch": 0.26205972434915775, "grad_norm": 1.0728658437728882, "learning_rate": 1.7307124158413665e-05, "loss": 0.5368, "step": 9583 }, { "epoch": 0.2620870706628746, "grad_norm": 1.62201726436615, "learning_rate": 1.7306519472448445e-05, "loss": 0.5596, "step": 9584 }, { "epoch": 0.26211441697659155, "grad_norm": 1.0974267721176147, "learning_rate": 1.7305914729165637e-05, "loss": 0.5397, "step": 9585 }, { "epoch": 0.26214176329030847, "grad_norm": 1.5693167448043823, "learning_rate": 1.7305309928569985e-05, "loss": 0.5503, "step": 9586 }, { "epoch": 0.2621691096040254, "grad_norm": 1.8690145015716553, "learning_rate": 1.7304705070666235e-05, "loss": 0.4516, "step": 9587 }, { "epoch": 0.26219645591774227, "grad_norm": 1.39262056350708, "learning_rate": 1.7304100155459127e-05, "loss": 0.5472, "step": 9588 }, { "epoch": 0.2622238022314592, "grad_norm": 1.3495254516601562, "learning_rate": 1.730349518295341e-05, "loss": 0.5734, "step": 9589 }, { "epoch": 0.2622511485451761, "grad_norm": 1.967430591583252, "learning_rate": 1.7302890153153826e-05, "loss": 0.9267, "step": 9590 }, { "epoch": 0.26227849485889304, "grad_norm": 1.1491262912750244, "learning_rate": 1.7302285066065125e-05, "loss": 0.5772, "step": 9591 }, { "epoch": 0.2623058411726099, "grad_norm": 1.4340957403182983, "learning_rate": 1.7301679921692054e-05, "loss": 0.5926, "step": 9592 }, { "epoch": 0.26233318748632684, "grad_norm": 1.2943607568740845, "learning_rate": 1.730107472003936e-05, "loss": 0.544, "step": 9593 }, { "epoch": 0.26236053380004376, "grad_norm": 1.657319188117981, "learning_rate": 1.7300469461111787e-05, "loss": 0.5662, "step": 9594 }, { "epoch": 0.2623878801137607, "grad_norm": 1.2595202922821045, "learning_rate": 1.7299864144914085e-05, "loss": 0.5617, "step": 9595 }, { "epoch": 0.26241522642747755, "grad_norm": 1.460777759552002, "learning_rate": 1.7299258771451005e-05, "loss": 0.9094, "step": 9596 }, { "epoch": 0.2624425727411945, "grad_norm": 1.0902448892593384, "learning_rate": 1.7298653340727293e-05, "loss": 0.571, "step": 9597 }, { "epoch": 0.2624699190549114, "grad_norm": 1.4405369758605957, "learning_rate": 1.72980478527477e-05, "loss": 0.5035, "step": 9598 }, { "epoch": 0.26249726536862833, "grad_norm": 1.4078965187072754, "learning_rate": 1.7297442307516975e-05, "loss": 0.8752, "step": 9599 }, { "epoch": 0.2625246116823452, "grad_norm": 1.4885878562927246, "learning_rate": 1.729683670503987e-05, "loss": 0.5621, "step": 9600 }, { "epoch": 0.2625519579960621, "grad_norm": 1.419532299041748, "learning_rate": 1.7296231045321132e-05, "loss": 0.544, "step": 9601 }, { "epoch": 0.26257930430977905, "grad_norm": 1.27424955368042, "learning_rate": 1.7295625328365518e-05, "loss": 0.5496, "step": 9602 }, { "epoch": 0.262606650623496, "grad_norm": 1.420396327972412, "learning_rate": 1.7295019554177775e-05, "loss": 0.9099, "step": 9603 }, { "epoch": 0.26263399693721284, "grad_norm": 1.276180386543274, "learning_rate": 1.7294413722762654e-05, "loss": 0.5616, "step": 9604 }, { "epoch": 0.26266134325092977, "grad_norm": 1.4041247367858887, "learning_rate": 1.7293807834124917e-05, "loss": 0.5693, "step": 9605 }, { "epoch": 0.2626886895646467, "grad_norm": 1.5174859762191772, "learning_rate": 1.7293201888269308e-05, "loss": 0.5567, "step": 9606 }, { "epoch": 0.2627160358783636, "grad_norm": 1.9791895151138306, "learning_rate": 1.7292595885200578e-05, "loss": 0.5456, "step": 9607 }, { "epoch": 0.2627433821920805, "grad_norm": 1.4316086769104004, "learning_rate": 1.7291989824923486e-05, "loss": 0.5686, "step": 9608 }, { "epoch": 0.2627707285057974, "grad_norm": 1.316809892654419, "learning_rate": 1.729138370744279e-05, "loss": 0.5627, "step": 9609 }, { "epoch": 0.26279807481951434, "grad_norm": 1.6099604368209839, "learning_rate": 1.729077753276324e-05, "loss": 0.5727, "step": 9610 }, { "epoch": 0.26282542113323126, "grad_norm": 1.468556523323059, "learning_rate": 1.7290171300889593e-05, "loss": 0.587, "step": 9611 }, { "epoch": 0.26285276744694813, "grad_norm": 1.3563544750213623, "learning_rate": 1.7289565011826598e-05, "loss": 0.4945, "step": 9612 }, { "epoch": 0.26288011376066506, "grad_norm": 1.5756983757019043, "learning_rate": 1.728895866557902e-05, "loss": 0.5876, "step": 9613 }, { "epoch": 0.262907460074382, "grad_norm": 1.5795985460281372, "learning_rate": 1.7288352262151616e-05, "loss": 0.5958, "step": 9614 }, { "epoch": 0.2629348063880989, "grad_norm": 1.3118500709533691, "learning_rate": 1.7287745801549135e-05, "loss": 0.4846, "step": 9615 }, { "epoch": 0.2629621527018158, "grad_norm": 1.6951689720153809, "learning_rate": 1.7287139283776345e-05, "loss": 0.5824, "step": 9616 }, { "epoch": 0.2629894990155327, "grad_norm": 1.368864893913269, "learning_rate": 1.728653270883799e-05, "loss": 0.5518, "step": 9617 }, { "epoch": 0.2630168453292496, "grad_norm": 1.1490658521652222, "learning_rate": 1.728592607673884e-05, "loss": 0.5333, "step": 9618 }, { "epoch": 0.26304419164296655, "grad_norm": 1.9105217456817627, "learning_rate": 1.7285319387483655e-05, "loss": 0.9391, "step": 9619 }, { "epoch": 0.2630715379566834, "grad_norm": 1.3715490102767944, "learning_rate": 1.7284712641077184e-05, "loss": 0.4957, "step": 9620 }, { "epoch": 0.26309888427040035, "grad_norm": 1.4221655130386353, "learning_rate": 1.7284105837524196e-05, "loss": 0.6008, "step": 9621 }, { "epoch": 0.26312623058411727, "grad_norm": 1.3338279724121094, "learning_rate": 1.7283498976829446e-05, "loss": 0.522, "step": 9622 }, { "epoch": 0.2631535768978342, "grad_norm": 1.315550446510315, "learning_rate": 1.7282892058997696e-05, "loss": 0.5201, "step": 9623 }, { "epoch": 0.26318092321155107, "grad_norm": 1.4494692087173462, "learning_rate": 1.7282285084033706e-05, "loss": 0.5935, "step": 9624 }, { "epoch": 0.263208269525268, "grad_norm": 1.5790657997131348, "learning_rate": 1.728167805194224e-05, "loss": 0.4179, "step": 9625 }, { "epoch": 0.2632356158389849, "grad_norm": 4.321783542633057, "learning_rate": 1.728107096272806e-05, "loss": 0.5696, "step": 9626 }, { "epoch": 0.26326296215270184, "grad_norm": 1.3420475721359253, "learning_rate": 1.728046381639593e-05, "loss": 0.5661, "step": 9627 }, { "epoch": 0.2632903084664187, "grad_norm": 1.3217593431472778, "learning_rate": 1.7279856612950605e-05, "loss": 0.5371, "step": 9628 }, { "epoch": 0.26331765478013563, "grad_norm": 1.8495122194290161, "learning_rate": 1.7279249352396856e-05, "loss": 0.5679, "step": 9629 }, { "epoch": 0.26334500109385256, "grad_norm": 1.4233349561691284, "learning_rate": 1.7278642034739443e-05, "loss": 0.4034, "step": 9630 }, { "epoch": 0.2633723474075695, "grad_norm": 1.2581582069396973, "learning_rate": 1.7278034659983135e-05, "loss": 0.5397, "step": 9631 }, { "epoch": 0.26339969372128635, "grad_norm": 5.114449977874756, "learning_rate": 1.7277427228132687e-05, "loss": 0.5466, "step": 9632 }, { "epoch": 0.2634270400350033, "grad_norm": 1.5800557136535645, "learning_rate": 1.7276819739192875e-05, "loss": 0.5547, "step": 9633 }, { "epoch": 0.2634543863487202, "grad_norm": 1.2690765857696533, "learning_rate": 1.727621219316846e-05, "loss": 0.5703, "step": 9634 }, { "epoch": 0.26348173266243713, "grad_norm": 1.7172998189926147, "learning_rate": 1.727560459006421e-05, "loss": 0.5093, "step": 9635 }, { "epoch": 0.263509078976154, "grad_norm": 1.4339503049850464, "learning_rate": 1.7274996929884884e-05, "loss": 0.555, "step": 9636 }, { "epoch": 0.2635364252898709, "grad_norm": 1.7215343713760376, "learning_rate": 1.727438921263526e-05, "loss": 0.8935, "step": 9637 }, { "epoch": 0.26356377160358785, "grad_norm": 1.62638258934021, "learning_rate": 1.7273781438320097e-05, "loss": 0.5638, "step": 9638 }, { "epoch": 0.2635911179173048, "grad_norm": 4.678657531738281, "learning_rate": 1.7273173606944164e-05, "loss": 0.5713, "step": 9639 }, { "epoch": 0.26361846423102164, "grad_norm": 1.481799840927124, "learning_rate": 1.7272565718512234e-05, "loss": 0.4175, "step": 9640 }, { "epoch": 0.26364581054473857, "grad_norm": 1.248876929283142, "learning_rate": 1.727195777302907e-05, "loss": 0.5806, "step": 9641 }, { "epoch": 0.2636731568584555, "grad_norm": 1.393800973892212, "learning_rate": 1.7271349770499447e-05, "loss": 0.5478, "step": 9642 }, { "epoch": 0.2637005031721724, "grad_norm": 1.3218291997909546, "learning_rate": 1.7270741710928128e-05, "loss": 0.5219, "step": 9643 }, { "epoch": 0.2637278494858893, "grad_norm": 1.1659188270568848, "learning_rate": 1.7270133594319888e-05, "loss": 0.5421, "step": 9644 }, { "epoch": 0.2637551957996062, "grad_norm": 1.3219730854034424, "learning_rate": 1.7269525420679496e-05, "loss": 0.5545, "step": 9645 }, { "epoch": 0.26378254211332314, "grad_norm": 1.3627790212631226, "learning_rate": 1.7268917190011723e-05, "loss": 0.5551, "step": 9646 }, { "epoch": 0.26380988842704006, "grad_norm": 1.3490915298461914, "learning_rate": 1.726830890232134e-05, "loss": 0.5588, "step": 9647 }, { "epoch": 0.26383723474075693, "grad_norm": 1.4277406930923462, "learning_rate": 1.726770055761312e-05, "loss": 0.8939, "step": 9648 }, { "epoch": 0.26386458105447386, "grad_norm": 3.720284938812256, "learning_rate": 1.7267092155891834e-05, "loss": 0.5573, "step": 9649 }, { "epoch": 0.2638919273681908, "grad_norm": 1.4559763669967651, "learning_rate": 1.7266483697162256e-05, "loss": 0.8611, "step": 9650 }, { "epoch": 0.26391927368190765, "grad_norm": 1.7231431007385254, "learning_rate": 1.7265875181429157e-05, "loss": 0.5957, "step": 9651 }, { "epoch": 0.2639466199956246, "grad_norm": 1.6357028484344482, "learning_rate": 1.726526660869731e-05, "loss": 0.4094, "step": 9652 }, { "epoch": 0.2639739663093415, "grad_norm": 1.5369138717651367, "learning_rate": 1.7264657978971495e-05, "loss": 0.9049, "step": 9653 }, { "epoch": 0.2640013126230584, "grad_norm": 1.262152910232544, "learning_rate": 1.726404929225648e-05, "loss": 0.565, "step": 9654 }, { "epoch": 0.2640286589367753, "grad_norm": 1.301846981048584, "learning_rate": 1.7263440548557043e-05, "loss": 0.557, "step": 9655 }, { "epoch": 0.2640560052504922, "grad_norm": 1.4407200813293457, "learning_rate": 1.726283174787796e-05, "loss": 0.5578, "step": 9656 }, { "epoch": 0.26408335156420915, "grad_norm": 1.4446594715118408, "learning_rate": 1.7262222890224007e-05, "loss": 0.5373, "step": 9657 }, { "epoch": 0.26411069787792607, "grad_norm": 1.5250016450881958, "learning_rate": 1.7261613975599955e-05, "loss": 0.5033, "step": 9658 }, { "epoch": 0.26413804419164294, "grad_norm": 1.4350050687789917, "learning_rate": 1.7261005004010586e-05, "loss": 0.4983, "step": 9659 }, { "epoch": 0.26416539050535986, "grad_norm": 1.2964235544204712, "learning_rate": 1.726039597546068e-05, "loss": 0.5536, "step": 9660 }, { "epoch": 0.2641927368190768, "grad_norm": 1.7356617450714111, "learning_rate": 1.725978688995501e-05, "loss": 0.9212, "step": 9661 }, { "epoch": 0.2642200831327937, "grad_norm": 1.3164737224578857, "learning_rate": 1.7259177747498352e-05, "loss": 0.5573, "step": 9662 }, { "epoch": 0.2642474294465106, "grad_norm": 1.7690389156341553, "learning_rate": 1.7258568548095488e-05, "loss": 0.5761, "step": 9663 }, { "epoch": 0.2642747757602275, "grad_norm": 1.6052918434143066, "learning_rate": 1.72579592917512e-05, "loss": 0.5568, "step": 9664 }, { "epoch": 0.26430212207394443, "grad_norm": 1.1345123052597046, "learning_rate": 1.725734997847026e-05, "loss": 0.537, "step": 9665 }, { "epoch": 0.26432946838766136, "grad_norm": 1.4894163608551025, "learning_rate": 1.7256740608257453e-05, "loss": 0.559, "step": 9666 }, { "epoch": 0.26435681470137823, "grad_norm": 1.191743016242981, "learning_rate": 1.725613118111756e-05, "loss": 0.5534, "step": 9667 }, { "epoch": 0.26438416101509515, "grad_norm": 1.432770013809204, "learning_rate": 1.7255521697055357e-05, "loss": 0.5324, "step": 9668 }, { "epoch": 0.2644115073288121, "grad_norm": 1.3498704433441162, "learning_rate": 1.725491215607563e-05, "loss": 0.9212, "step": 9669 }, { "epoch": 0.264438853642529, "grad_norm": 1.513982892036438, "learning_rate": 1.725430255818316e-05, "loss": 0.8875, "step": 9670 }, { "epoch": 0.2644661999562459, "grad_norm": 1.8189178705215454, "learning_rate": 1.7253692903382724e-05, "loss": 0.5467, "step": 9671 }, { "epoch": 0.2644935462699628, "grad_norm": 3.0820493698120117, "learning_rate": 1.725308319167911e-05, "loss": 0.5884, "step": 9672 }, { "epoch": 0.2645208925836797, "grad_norm": 1.9196382761001587, "learning_rate": 1.72524734230771e-05, "loss": 0.5346, "step": 9673 }, { "epoch": 0.26454823889739665, "grad_norm": 1.4708049297332764, "learning_rate": 1.725186359758148e-05, "loss": 0.4294, "step": 9674 }, { "epoch": 0.2645755852111135, "grad_norm": 1.103170394897461, "learning_rate": 1.7251253715197026e-05, "loss": 0.514, "step": 9675 }, { "epoch": 0.26460293152483044, "grad_norm": 1.4158114194869995, "learning_rate": 1.725064377592853e-05, "loss": 0.5873, "step": 9676 }, { "epoch": 0.26463027783854737, "grad_norm": 1.68473482131958, "learning_rate": 1.7250033779780774e-05, "loss": 0.5422, "step": 9677 }, { "epoch": 0.2646576241522643, "grad_norm": 1.3057695627212524, "learning_rate": 1.7249423726758542e-05, "loss": 0.5692, "step": 9678 }, { "epoch": 0.26468497046598116, "grad_norm": 1.3957889080047607, "learning_rate": 1.7248813616866623e-05, "loss": 0.5717, "step": 9679 }, { "epoch": 0.2647123167796981, "grad_norm": 1.2418630123138428, "learning_rate": 1.7248203450109798e-05, "loss": 0.5249, "step": 9680 }, { "epoch": 0.264739663093415, "grad_norm": 1.8038499355316162, "learning_rate": 1.724759322649286e-05, "loss": 0.5049, "step": 9681 }, { "epoch": 0.26476700940713194, "grad_norm": 2.4028186798095703, "learning_rate": 1.7246982946020596e-05, "loss": 0.4512, "step": 9682 }, { "epoch": 0.2647943557208488, "grad_norm": 1.4779582023620605, "learning_rate": 1.7246372608697783e-05, "loss": 0.5855, "step": 9683 }, { "epoch": 0.26482170203456573, "grad_norm": 1.6048921346664429, "learning_rate": 1.7245762214529217e-05, "loss": 0.5796, "step": 9684 }, { "epoch": 0.26484904834828266, "grad_norm": 1.6790273189544678, "learning_rate": 1.724515176351969e-05, "loss": 0.5807, "step": 9685 }, { "epoch": 0.2648763946619996, "grad_norm": 2.26849365234375, "learning_rate": 1.7244541255673984e-05, "loss": 0.5464, "step": 9686 }, { "epoch": 0.26490374097571645, "grad_norm": 1.8388786315917969, "learning_rate": 1.724393069099689e-05, "loss": 0.8923, "step": 9687 }, { "epoch": 0.2649310872894334, "grad_norm": 1.9976974725723267, "learning_rate": 1.7243320069493198e-05, "loss": 0.5647, "step": 9688 }, { "epoch": 0.2649584336031503, "grad_norm": 1.4565861225128174, "learning_rate": 1.72427093911677e-05, "loss": 0.5604, "step": 9689 }, { "epoch": 0.2649857799168672, "grad_norm": 1.6090151071548462, "learning_rate": 1.7242098656025185e-05, "loss": 0.5303, "step": 9690 }, { "epoch": 0.2650131262305841, "grad_norm": 1.8271381855010986, "learning_rate": 1.7241487864070444e-05, "loss": 0.4765, "step": 9691 }, { "epoch": 0.265040472544301, "grad_norm": 1.3143478631973267, "learning_rate": 1.7240877015308267e-05, "loss": 0.5403, "step": 9692 }, { "epoch": 0.26506781885801795, "grad_norm": 1.260244607925415, "learning_rate": 1.724026610974345e-05, "loss": 0.5449, "step": 9693 }, { "epoch": 0.26509516517173487, "grad_norm": 1.3073359727859497, "learning_rate": 1.7239655147380782e-05, "loss": 0.5491, "step": 9694 }, { "epoch": 0.26512251148545174, "grad_norm": 1.5435727834701538, "learning_rate": 1.7239044128225056e-05, "loss": 0.5618, "step": 9695 }, { "epoch": 0.26514985779916866, "grad_norm": 1.6128380298614502, "learning_rate": 1.723843305228107e-05, "loss": 0.5053, "step": 9696 }, { "epoch": 0.2651772041128856, "grad_norm": 1.8617095947265625, "learning_rate": 1.723782191955361e-05, "loss": 0.5658, "step": 9697 }, { "epoch": 0.2652045504266025, "grad_norm": 1.337043046951294, "learning_rate": 1.7237210730047474e-05, "loss": 0.5524, "step": 9698 }, { "epoch": 0.2652318967403194, "grad_norm": 1.7552636861801147, "learning_rate": 1.7236599483767456e-05, "loss": 0.5491, "step": 9699 }, { "epoch": 0.2652592430540363, "grad_norm": 1.5620133876800537, "learning_rate": 1.723598818071835e-05, "loss": 0.5621, "step": 9700 }, { "epoch": 0.26528658936775323, "grad_norm": 1.2688063383102417, "learning_rate": 1.723537682090496e-05, "loss": 0.5596, "step": 9701 }, { "epoch": 0.26531393568147016, "grad_norm": 1.595786690711975, "learning_rate": 1.723476540433207e-05, "loss": 0.5428, "step": 9702 }, { "epoch": 0.26534128199518703, "grad_norm": 2.402918577194214, "learning_rate": 1.7234153931004483e-05, "loss": 0.5094, "step": 9703 }, { "epoch": 0.26536862830890395, "grad_norm": 1.546566128730774, "learning_rate": 1.723354240092699e-05, "loss": 0.5445, "step": 9704 }, { "epoch": 0.2653959746226209, "grad_norm": 1.6245359182357788, "learning_rate": 1.7232930814104397e-05, "loss": 0.5557, "step": 9705 }, { "epoch": 0.2654233209363378, "grad_norm": 1.5325119495391846, "learning_rate": 1.7232319170541498e-05, "loss": 0.8936, "step": 9706 }, { "epoch": 0.2654506672500547, "grad_norm": 2.3940300941467285, "learning_rate": 1.7231707470243092e-05, "loss": 0.5296, "step": 9707 }, { "epoch": 0.2654780135637716, "grad_norm": 2.1165902614593506, "learning_rate": 1.723109571321397e-05, "loss": 0.5772, "step": 9708 }, { "epoch": 0.2655053598774885, "grad_norm": 1.8682940006256104, "learning_rate": 1.723048389945894e-05, "loss": 0.5401, "step": 9709 }, { "epoch": 0.26553270619120545, "grad_norm": 1.4049841165542603, "learning_rate": 1.72298720289828e-05, "loss": 0.5199, "step": 9710 }, { "epoch": 0.2655600525049223, "grad_norm": 1.635320782661438, "learning_rate": 1.722926010179035e-05, "loss": 0.5691, "step": 9711 }, { "epoch": 0.26558739881863924, "grad_norm": 1.994271159172058, "learning_rate": 1.7228648117886384e-05, "loss": 0.6395, "step": 9712 }, { "epoch": 0.26561474513235617, "grad_norm": 2.696995496749878, "learning_rate": 1.722803607727571e-05, "loss": 0.5766, "step": 9713 }, { "epoch": 0.2656420914460731, "grad_norm": 1.4619299173355103, "learning_rate": 1.7227423979963127e-05, "loss": 0.5512, "step": 9714 }, { "epoch": 0.26566943775978996, "grad_norm": 1.593145728111267, "learning_rate": 1.7226811825953437e-05, "loss": 0.5567, "step": 9715 }, { "epoch": 0.2656967840735069, "grad_norm": 1.5815925598144531, "learning_rate": 1.7226199615251443e-05, "loss": 0.5763, "step": 9716 }, { "epoch": 0.2657241303872238, "grad_norm": 1.6281018257141113, "learning_rate": 1.7225587347861945e-05, "loss": 0.481, "step": 9717 }, { "epoch": 0.26575147670094074, "grad_norm": 1.285194993019104, "learning_rate": 1.7224975023789747e-05, "loss": 0.543, "step": 9718 }, { "epoch": 0.2657788230146576, "grad_norm": 1.8378726243972778, "learning_rate": 1.7224362643039655e-05, "loss": 0.5598, "step": 9719 }, { "epoch": 0.26580616932837453, "grad_norm": 1.4721980094909668, "learning_rate": 1.722375020561647e-05, "loss": 0.5822, "step": 9720 }, { "epoch": 0.26583351564209146, "grad_norm": 1.6604212522506714, "learning_rate": 1.7223137711524998e-05, "loss": 0.5364, "step": 9721 }, { "epoch": 0.2658608619558084, "grad_norm": 1.41288161277771, "learning_rate": 1.7222525160770047e-05, "loss": 0.5353, "step": 9722 }, { "epoch": 0.26588820826952525, "grad_norm": 1.6870180368423462, "learning_rate": 1.7221912553356417e-05, "loss": 0.5583, "step": 9723 }, { "epoch": 0.2659155545832422, "grad_norm": 1.4405258893966675, "learning_rate": 1.722129988928891e-05, "loss": 0.5639, "step": 9724 }, { "epoch": 0.2659429008969591, "grad_norm": 1.6043013334274292, "learning_rate": 1.7220687168572344e-05, "loss": 0.9214, "step": 9725 }, { "epoch": 0.265970247210676, "grad_norm": 1.5682507753372192, "learning_rate": 1.7220074391211518e-05, "loss": 0.5509, "step": 9726 }, { "epoch": 0.2659975935243929, "grad_norm": 1.7163678407669067, "learning_rate": 1.721946155721124e-05, "loss": 0.5622, "step": 9727 }, { "epoch": 0.2660249398381098, "grad_norm": 1.5430290699005127, "learning_rate": 1.7218848666576315e-05, "loss": 0.5645, "step": 9728 }, { "epoch": 0.26605228615182674, "grad_norm": 1.773331642150879, "learning_rate": 1.7218235719311556e-05, "loss": 0.8619, "step": 9729 }, { "epoch": 0.26607963246554367, "grad_norm": 1.5879758596420288, "learning_rate": 1.721762271542177e-05, "loss": 0.526, "step": 9730 }, { "epoch": 0.26610697877926054, "grad_norm": 1.6641063690185547, "learning_rate": 1.7217009654911763e-05, "loss": 0.5622, "step": 9731 }, { "epoch": 0.26613432509297746, "grad_norm": 1.7201224565505981, "learning_rate": 1.7216396537786347e-05, "loss": 0.8719, "step": 9732 }, { "epoch": 0.2661616714066944, "grad_norm": 1.4141769409179688, "learning_rate": 1.721578336405033e-05, "loss": 0.5667, "step": 9733 }, { "epoch": 0.2661890177204113, "grad_norm": 1.756833791732788, "learning_rate": 1.7215170133708527e-05, "loss": 0.8474, "step": 9734 }, { "epoch": 0.2662163640341282, "grad_norm": 1.6784378290176392, "learning_rate": 1.721455684676574e-05, "loss": 0.5489, "step": 9735 }, { "epoch": 0.2662437103478451, "grad_norm": 1.4204647541046143, "learning_rate": 1.721394350322679e-05, "loss": 0.4863, "step": 9736 }, { "epoch": 0.26627105666156203, "grad_norm": 1.5757677555084229, "learning_rate": 1.721333010309648e-05, "loss": 0.5168, "step": 9737 }, { "epoch": 0.26629840297527896, "grad_norm": 1.5501980781555176, "learning_rate": 1.7212716646379627e-05, "loss": 0.5589, "step": 9738 }, { "epoch": 0.26632574928899583, "grad_norm": 1.1964221000671387, "learning_rate": 1.721210313308104e-05, "loss": 0.5766, "step": 9739 }, { "epoch": 0.26635309560271275, "grad_norm": 1.5029996633529663, "learning_rate": 1.7211489563205535e-05, "loss": 0.5646, "step": 9740 }, { "epoch": 0.2663804419164297, "grad_norm": 1.2818934917449951, "learning_rate": 1.721087593675792e-05, "loss": 0.5709, "step": 9741 }, { "epoch": 0.2664077882301466, "grad_norm": 1.4916280508041382, "learning_rate": 1.7210262253743017e-05, "loss": 0.6149, "step": 9742 }, { "epoch": 0.26643513454386347, "grad_norm": 3.9509117603302, "learning_rate": 1.7209648514165635e-05, "loss": 0.5529, "step": 9743 }, { "epoch": 0.2664624808575804, "grad_norm": 1.586880087852478, "learning_rate": 1.720903471803059e-05, "loss": 0.4831, "step": 9744 }, { "epoch": 0.2664898271712973, "grad_norm": 1.5299986600875854, "learning_rate": 1.7208420865342693e-05, "loss": 0.5847, "step": 9745 }, { "epoch": 0.26651717348501425, "grad_norm": 1.442094326019287, "learning_rate": 1.7207806956106767e-05, "loss": 0.5426, "step": 9746 }, { "epoch": 0.2665445197987311, "grad_norm": 1.283051609992981, "learning_rate": 1.720719299032762e-05, "loss": 0.5919, "step": 9747 }, { "epoch": 0.26657186611244804, "grad_norm": 1.6033068895339966, "learning_rate": 1.7206578968010074e-05, "loss": 0.551, "step": 9748 }, { "epoch": 0.26659921242616497, "grad_norm": 1.2980979681015015, "learning_rate": 1.7205964889158948e-05, "loss": 0.5626, "step": 9749 }, { "epoch": 0.2666265587398819, "grad_norm": 1.4160993099212646, "learning_rate": 1.720535075377905e-05, "loss": 0.5174, "step": 9750 }, { "epoch": 0.26665390505359876, "grad_norm": 1.254503846168518, "learning_rate": 1.7204736561875206e-05, "loss": 0.5516, "step": 9751 }, { "epoch": 0.2666812513673157, "grad_norm": 1.7382256984710693, "learning_rate": 1.720412231345223e-05, "loss": 0.5761, "step": 9752 }, { "epoch": 0.2667085976810326, "grad_norm": 1.62142813205719, "learning_rate": 1.7203508008514945e-05, "loss": 0.5602, "step": 9753 }, { "epoch": 0.2667359439947495, "grad_norm": 2.0179595947265625, "learning_rate": 1.7202893647068162e-05, "loss": 0.5256, "step": 9754 }, { "epoch": 0.2667632903084664, "grad_norm": 2.6032421588897705, "learning_rate": 1.720227922911671e-05, "loss": 0.3917, "step": 9755 }, { "epoch": 0.26679063662218333, "grad_norm": 1.4209219217300415, "learning_rate": 1.72016647546654e-05, "loss": 0.575, "step": 9756 }, { "epoch": 0.26681798293590026, "grad_norm": 1.2264060974121094, "learning_rate": 1.7201050223719057e-05, "loss": 0.5359, "step": 9757 }, { "epoch": 0.2668453292496171, "grad_norm": 1.309441328048706, "learning_rate": 1.7200435636282505e-05, "loss": 0.5515, "step": 9758 }, { "epoch": 0.26687267556333405, "grad_norm": 1.4312111139297485, "learning_rate": 1.7199820992360557e-05, "loss": 0.5617, "step": 9759 }, { "epoch": 0.266900021877051, "grad_norm": 1.4182742834091187, "learning_rate": 1.7199206291958043e-05, "loss": 0.5783, "step": 9760 }, { "epoch": 0.2669273681907679, "grad_norm": 1.3887147903442383, "learning_rate": 1.7198591535079783e-05, "loss": 0.563, "step": 9761 }, { "epoch": 0.26695471450448477, "grad_norm": 1.459121584892273, "learning_rate": 1.7197976721730596e-05, "loss": 0.5486, "step": 9762 }, { "epoch": 0.2669820608182017, "grad_norm": 1.8530404567718506, "learning_rate": 1.7197361851915305e-05, "loss": 0.5704, "step": 9763 }, { "epoch": 0.2670094071319186, "grad_norm": 1.4913603067398071, "learning_rate": 1.719674692563874e-05, "loss": 0.8558, "step": 9764 }, { "epoch": 0.26703675344563554, "grad_norm": 1.3872143030166626, "learning_rate": 1.719613194290572e-05, "loss": 0.5477, "step": 9765 }, { "epoch": 0.2670640997593524, "grad_norm": 1.9176188707351685, "learning_rate": 1.7195516903721066e-05, "loss": 0.9016, "step": 9766 }, { "epoch": 0.26709144607306934, "grad_norm": 1.2631467580795288, "learning_rate": 1.719490180808961e-05, "loss": 0.5537, "step": 9767 }, { "epoch": 0.26711879238678626, "grad_norm": 1.8598135709762573, "learning_rate": 1.7194286656016176e-05, "loss": 0.5471, "step": 9768 }, { "epoch": 0.2671461387005032, "grad_norm": 1.3082939386367798, "learning_rate": 1.719367144750559e-05, "loss": 0.5858, "step": 9769 }, { "epoch": 0.26717348501422006, "grad_norm": 1.4387892484664917, "learning_rate": 1.719305618256267e-05, "loss": 0.5644, "step": 9770 }, { "epoch": 0.267200831327937, "grad_norm": 1.5809437036514282, "learning_rate": 1.719244086119225e-05, "loss": 0.5451, "step": 9771 }, { "epoch": 0.2672281776416539, "grad_norm": 1.1346659660339355, "learning_rate": 1.7191825483399157e-05, "loss": 0.5679, "step": 9772 }, { "epoch": 0.26725552395537083, "grad_norm": 1.3691035509109497, "learning_rate": 1.7191210049188213e-05, "loss": 0.561, "step": 9773 }, { "epoch": 0.2672828702690877, "grad_norm": 1.34015953540802, "learning_rate": 1.7190594558564254e-05, "loss": 0.5616, "step": 9774 }, { "epoch": 0.2673102165828046, "grad_norm": 1.4314534664154053, "learning_rate": 1.7189979011532102e-05, "loss": 0.8519, "step": 9775 }, { "epoch": 0.26733756289652155, "grad_norm": 1.34898841381073, "learning_rate": 1.7189363408096592e-05, "loss": 0.5695, "step": 9776 }, { "epoch": 0.2673649092102385, "grad_norm": 1.8440368175506592, "learning_rate": 1.7188747748262548e-05, "loss": 0.8802, "step": 9777 }, { "epoch": 0.26739225552395535, "grad_norm": 1.4292865991592407, "learning_rate": 1.71881320320348e-05, "loss": 0.9006, "step": 9778 }, { "epoch": 0.26741960183767227, "grad_norm": 2.0335583686828613, "learning_rate": 1.7187516259418177e-05, "loss": 0.9496, "step": 9779 }, { "epoch": 0.2674469481513892, "grad_norm": 1.4493166208267212, "learning_rate": 1.7186900430417515e-05, "loss": 0.5375, "step": 9780 }, { "epoch": 0.2674742944651061, "grad_norm": 1.625836730003357, "learning_rate": 1.718628454503764e-05, "loss": 0.8721, "step": 9781 }, { "epoch": 0.267501640778823, "grad_norm": 1.4362725019454956, "learning_rate": 1.7185668603283387e-05, "loss": 0.5583, "step": 9782 }, { "epoch": 0.2675289870925399, "grad_norm": 1.2976490259170532, "learning_rate": 1.7185052605159583e-05, "loss": 0.534, "step": 9783 }, { "epoch": 0.26755633340625684, "grad_norm": 1.4998798370361328, "learning_rate": 1.7184436550671067e-05, "loss": 0.5762, "step": 9784 }, { "epoch": 0.26758367971997377, "grad_norm": 1.5761148929595947, "learning_rate": 1.7183820439822665e-05, "loss": 0.5788, "step": 9785 }, { "epoch": 0.26761102603369064, "grad_norm": 1.5566737651824951, "learning_rate": 1.7183204272619214e-05, "loss": 0.5463, "step": 9786 }, { "epoch": 0.26763837234740756, "grad_norm": 1.5884740352630615, "learning_rate": 1.7182588049065547e-05, "loss": 0.5647, "step": 9787 }, { "epoch": 0.2676657186611245, "grad_norm": 1.4404826164245605, "learning_rate": 1.71819717691665e-05, "loss": 0.5665, "step": 9788 }, { "epoch": 0.2676930649748414, "grad_norm": 1.4779895544052124, "learning_rate": 1.7181355432926903e-05, "loss": 0.5743, "step": 9789 }, { "epoch": 0.2677204112885583, "grad_norm": 1.3787692785263062, "learning_rate": 1.7180739040351592e-05, "loss": 0.5325, "step": 9790 }, { "epoch": 0.2677477576022752, "grad_norm": 1.8164228200912476, "learning_rate": 1.718012259144541e-05, "loss": 0.5654, "step": 9791 }, { "epoch": 0.26777510391599213, "grad_norm": 1.2487726211547852, "learning_rate": 1.7179506086213185e-05, "loss": 0.54, "step": 9792 }, { "epoch": 0.26780245022970905, "grad_norm": 2.0748636722564697, "learning_rate": 1.7178889524659752e-05, "loss": 0.5287, "step": 9793 }, { "epoch": 0.2678297965434259, "grad_norm": 1.4786441326141357, "learning_rate": 1.717827290678995e-05, "loss": 0.5913, "step": 9794 }, { "epoch": 0.26785714285714285, "grad_norm": 1.1753188371658325, "learning_rate": 1.7177656232608623e-05, "loss": 0.572, "step": 9795 }, { "epoch": 0.2678844891708598, "grad_norm": 1.287445068359375, "learning_rate": 1.71770395021206e-05, "loss": 0.5906, "step": 9796 }, { "epoch": 0.2679118354845767, "grad_norm": 1.2576184272766113, "learning_rate": 1.7176422715330722e-05, "loss": 0.5814, "step": 9797 }, { "epoch": 0.26793918179829357, "grad_norm": 1.274922251701355, "learning_rate": 1.717580587224383e-05, "loss": 0.5482, "step": 9798 }, { "epoch": 0.2679665281120105, "grad_norm": 1.6633899211883545, "learning_rate": 1.7175188972864755e-05, "loss": 0.5986, "step": 9799 }, { "epoch": 0.2679938744257274, "grad_norm": 1.3899574279785156, "learning_rate": 1.7174572017198347e-05, "loss": 0.5496, "step": 9800 }, { "epoch": 0.26802122073944434, "grad_norm": 1.6206998825073242, "learning_rate": 1.7173955005249438e-05, "loss": 0.5206, "step": 9801 }, { "epoch": 0.2680485670531612, "grad_norm": 1.631654143333435, "learning_rate": 1.7173337937022868e-05, "loss": 0.5555, "step": 9802 }, { "epoch": 0.26807591336687814, "grad_norm": 2.5484566688537598, "learning_rate": 1.7172720812523484e-05, "loss": 0.5941, "step": 9803 }, { "epoch": 0.26810325968059506, "grad_norm": 1.7479978799819946, "learning_rate": 1.7172103631756125e-05, "loss": 0.5777, "step": 9804 }, { "epoch": 0.268130605994312, "grad_norm": 1.8152471780776978, "learning_rate": 1.717148639472563e-05, "loss": 0.5419, "step": 9805 }, { "epoch": 0.26815795230802886, "grad_norm": 1.5767077207565308, "learning_rate": 1.717086910143684e-05, "loss": 0.4799, "step": 9806 }, { "epoch": 0.2681852986217458, "grad_norm": 1.6097408533096313, "learning_rate": 1.7170251751894603e-05, "loss": 0.549, "step": 9807 }, { "epoch": 0.2682126449354627, "grad_norm": 1.3298311233520508, "learning_rate": 1.716963434610376e-05, "loss": 0.55, "step": 9808 }, { "epoch": 0.26823999124917963, "grad_norm": 2.160139560699463, "learning_rate": 1.7169016884069153e-05, "loss": 0.5301, "step": 9809 }, { "epoch": 0.2682673375628965, "grad_norm": 1.4885759353637695, "learning_rate": 1.7168399365795624e-05, "loss": 0.5561, "step": 9810 }, { "epoch": 0.2682946838766134, "grad_norm": 1.2450648546218872, "learning_rate": 1.7167781791288018e-05, "loss": 0.561, "step": 9811 }, { "epoch": 0.26832203019033035, "grad_norm": 1.6887123584747314, "learning_rate": 1.7167164160551184e-05, "loss": 0.5827, "step": 9812 }, { "epoch": 0.2683493765040473, "grad_norm": 1.4846347570419312, "learning_rate": 1.7166546473589963e-05, "loss": 0.5916, "step": 9813 }, { "epoch": 0.26837672281776415, "grad_norm": 1.917984962463379, "learning_rate": 1.71659287304092e-05, "loss": 0.468, "step": 9814 }, { "epoch": 0.26840406913148107, "grad_norm": 1.459330439567566, "learning_rate": 1.7165310931013747e-05, "loss": 0.4183, "step": 9815 }, { "epoch": 0.268431415445198, "grad_norm": 1.4404106140136719, "learning_rate": 1.7164693075408442e-05, "loss": 0.5308, "step": 9816 }, { "epoch": 0.2684587617589149, "grad_norm": 2.240755319595337, "learning_rate": 1.716407516359814e-05, "loss": 0.9679, "step": 9817 }, { "epoch": 0.2684861080726318, "grad_norm": 1.3343061208724976, "learning_rate": 1.716345719558768e-05, "loss": 0.5738, "step": 9818 }, { "epoch": 0.2685134543863487, "grad_norm": 1.340572714805603, "learning_rate": 1.716283917138192e-05, "loss": 0.5455, "step": 9819 }, { "epoch": 0.26854080070006564, "grad_norm": 1.3749200105667114, "learning_rate": 1.7162221090985698e-05, "loss": 0.5881, "step": 9820 }, { "epoch": 0.26856814701378257, "grad_norm": 2.6456425189971924, "learning_rate": 1.7161602954403867e-05, "loss": 0.867, "step": 9821 }, { "epoch": 0.26859549332749943, "grad_norm": 1.5204155445098877, "learning_rate": 1.716098476164128e-05, "loss": 0.565, "step": 9822 }, { "epoch": 0.26862283964121636, "grad_norm": 1.074317216873169, "learning_rate": 1.716036651270278e-05, "loss": 0.6044, "step": 9823 }, { "epoch": 0.2686501859549333, "grad_norm": 1.3205082416534424, "learning_rate": 1.715974820759322e-05, "loss": 0.5441, "step": 9824 }, { "epoch": 0.2686775322686502, "grad_norm": 1.7391587495803833, "learning_rate": 1.715912984631745e-05, "loss": 0.5348, "step": 9825 }, { "epoch": 0.2687048785823671, "grad_norm": 1.222179651260376, "learning_rate": 1.715851142888032e-05, "loss": 0.5459, "step": 9826 }, { "epoch": 0.268732224896084, "grad_norm": 2.3350658416748047, "learning_rate": 1.7157892955286687e-05, "loss": 0.5813, "step": 9827 }, { "epoch": 0.26875957120980093, "grad_norm": 1.4763661623001099, "learning_rate": 1.7157274425541395e-05, "loss": 0.5664, "step": 9828 }, { "epoch": 0.26878691752351785, "grad_norm": 1.681452989578247, "learning_rate": 1.7156655839649298e-05, "loss": 0.5408, "step": 9829 }, { "epoch": 0.2688142638372347, "grad_norm": 1.4856888055801392, "learning_rate": 1.715603719761525e-05, "loss": 0.5413, "step": 9830 }, { "epoch": 0.26884161015095165, "grad_norm": 1.6109986305236816, "learning_rate": 1.7155418499444108e-05, "loss": 0.5601, "step": 9831 }, { "epoch": 0.2688689564646686, "grad_norm": 1.170913577079773, "learning_rate": 1.715479974514072e-05, "loss": 0.5322, "step": 9832 }, { "epoch": 0.2688963027783855, "grad_norm": 1.4225285053253174, "learning_rate": 1.715418093470994e-05, "loss": 0.5912, "step": 9833 }, { "epoch": 0.26892364909210237, "grad_norm": 1.8292979001998901, "learning_rate": 1.7153562068156625e-05, "loss": 0.5876, "step": 9834 }, { "epoch": 0.2689509954058193, "grad_norm": 1.4570742845535278, "learning_rate": 1.715294314548563e-05, "loss": 0.5581, "step": 9835 }, { "epoch": 0.2689783417195362, "grad_norm": 1.2891513109207153, "learning_rate": 1.7152324166701807e-05, "loss": 0.5431, "step": 9836 }, { "epoch": 0.26900568803325314, "grad_norm": 1.5973013639450073, "learning_rate": 1.7151705131810013e-05, "loss": 0.5627, "step": 9837 }, { "epoch": 0.26903303434697, "grad_norm": 1.54258394241333, "learning_rate": 1.7151086040815107e-05, "loss": 0.5383, "step": 9838 }, { "epoch": 0.26906038066068694, "grad_norm": 1.4713339805603027, "learning_rate": 1.7150466893721944e-05, "loss": 0.5339, "step": 9839 }, { "epoch": 0.26908772697440386, "grad_norm": 1.9453058242797852, "learning_rate": 1.714984769053538e-05, "loss": 0.6112, "step": 9840 }, { "epoch": 0.2691150732881208, "grad_norm": 1.7229149341583252, "learning_rate": 1.714922843126027e-05, "loss": 0.5073, "step": 9841 }, { "epoch": 0.26914241960183766, "grad_norm": 1.8661879301071167, "learning_rate": 1.7148609115901478e-05, "loss": 0.5966, "step": 9842 }, { "epoch": 0.2691697659155546, "grad_norm": 1.7083582878112793, "learning_rate": 1.7147989744463856e-05, "loss": 0.507, "step": 9843 }, { "epoch": 0.2691971122292715, "grad_norm": 1.7211527824401855, "learning_rate": 1.714737031695227e-05, "loss": 0.5312, "step": 9844 }, { "epoch": 0.26922445854298843, "grad_norm": 1.5866169929504395, "learning_rate": 1.7146750833371576e-05, "loss": 0.5546, "step": 9845 }, { "epoch": 0.2692518048567053, "grad_norm": 1.4278513193130493, "learning_rate": 1.7146131293726628e-05, "loss": 0.585, "step": 9846 }, { "epoch": 0.2692791511704222, "grad_norm": 1.2528879642486572, "learning_rate": 1.7145511698022295e-05, "loss": 0.5576, "step": 9847 }, { "epoch": 0.26930649748413915, "grad_norm": 1.8356671333312988, "learning_rate": 1.7144892046263432e-05, "loss": 0.5653, "step": 9848 }, { "epoch": 0.2693338437978561, "grad_norm": 1.6766899824142456, "learning_rate": 1.7144272338454902e-05, "loss": 0.5208, "step": 9849 }, { "epoch": 0.26936119011157295, "grad_norm": 1.437586784362793, "learning_rate": 1.714365257460157e-05, "loss": 0.5856, "step": 9850 }, { "epoch": 0.26938853642528987, "grad_norm": 1.8141943216323853, "learning_rate": 1.7143032754708286e-05, "loss": 0.533, "step": 9851 }, { "epoch": 0.2694158827390068, "grad_norm": 1.6079094409942627, "learning_rate": 1.7142412878779925e-05, "loss": 0.5626, "step": 9852 }, { "epoch": 0.2694432290527237, "grad_norm": 1.7681560516357422, "learning_rate": 1.7141792946821344e-05, "loss": 0.5642, "step": 9853 }, { "epoch": 0.2694705753664406, "grad_norm": 1.412996768951416, "learning_rate": 1.7141172958837408e-05, "loss": 0.5664, "step": 9854 }, { "epoch": 0.2694979216801575, "grad_norm": 1.4919105768203735, "learning_rate": 1.714055291483298e-05, "loss": 0.5293, "step": 9855 }, { "epoch": 0.26952526799387444, "grad_norm": 2.08026123046875, "learning_rate": 1.7139932814812924e-05, "loss": 0.5761, "step": 9856 }, { "epoch": 0.2695526143075913, "grad_norm": 3.0202527046203613, "learning_rate": 1.7139312658782104e-05, "loss": 0.9656, "step": 9857 }, { "epoch": 0.26957996062130823, "grad_norm": 1.7478405237197876, "learning_rate": 1.7138692446745384e-05, "loss": 0.5572, "step": 9858 }, { "epoch": 0.26960730693502516, "grad_norm": 1.8017393350601196, "learning_rate": 1.7138072178707632e-05, "loss": 0.5054, "step": 9859 }, { "epoch": 0.2696346532487421, "grad_norm": 1.806063175201416, "learning_rate": 1.7137451854673712e-05, "loss": 0.551, "step": 9860 }, { "epoch": 0.26966199956245895, "grad_norm": 1.456319808959961, "learning_rate": 1.7136831474648495e-05, "loss": 0.5192, "step": 9861 }, { "epoch": 0.2696893458761759, "grad_norm": 1.6327300071716309, "learning_rate": 1.713621103863684e-05, "loss": 0.5614, "step": 9862 }, { "epoch": 0.2697166921898928, "grad_norm": 1.334517478942871, "learning_rate": 1.7135590546643614e-05, "loss": 0.543, "step": 9863 }, { "epoch": 0.26974403850360973, "grad_norm": 1.4368237257003784, "learning_rate": 1.713496999867369e-05, "loss": 0.5572, "step": 9864 }, { "epoch": 0.2697713848173266, "grad_norm": 3.5603652000427246, "learning_rate": 1.713434939473194e-05, "loss": 0.9042, "step": 9865 }, { "epoch": 0.2697987311310435, "grad_norm": 1.4460372924804688, "learning_rate": 1.713372873482322e-05, "loss": 0.5475, "step": 9866 }, { "epoch": 0.26982607744476045, "grad_norm": 1.377009391784668, "learning_rate": 1.7133108018952412e-05, "loss": 0.5328, "step": 9867 }, { "epoch": 0.2698534237584774, "grad_norm": 1.5069547891616821, "learning_rate": 1.7132487247124373e-05, "loss": 0.9227, "step": 9868 }, { "epoch": 0.26988077007219424, "grad_norm": 1.3847770690917969, "learning_rate": 1.7131866419343984e-05, "loss": 0.5578, "step": 9869 }, { "epoch": 0.26990811638591117, "grad_norm": 1.607893943786621, "learning_rate": 1.7131245535616106e-05, "loss": 0.5713, "step": 9870 }, { "epoch": 0.2699354626996281, "grad_norm": 1.7075984477996826, "learning_rate": 1.7130624595945616e-05, "loss": 0.5411, "step": 9871 }, { "epoch": 0.269962809013345, "grad_norm": 1.334245204925537, "learning_rate": 1.713000360033738e-05, "loss": 0.5662, "step": 9872 }, { "epoch": 0.2699901553270619, "grad_norm": 1.2174780368804932, "learning_rate": 1.7129382548796275e-05, "loss": 0.5584, "step": 9873 }, { "epoch": 0.2700175016407788, "grad_norm": 1.1978590488433838, "learning_rate": 1.7128761441327172e-05, "loss": 0.552, "step": 9874 }, { "epoch": 0.27004484795449574, "grad_norm": 1.272934079170227, "learning_rate": 1.7128140277934938e-05, "loss": 0.5636, "step": 9875 }, { "epoch": 0.27007219426821266, "grad_norm": 1.2633090019226074, "learning_rate": 1.712751905862445e-05, "loss": 0.5651, "step": 9876 }, { "epoch": 0.27009954058192953, "grad_norm": 1.1758676767349243, "learning_rate": 1.7126897783400585e-05, "loss": 0.5403, "step": 9877 }, { "epoch": 0.27012688689564646, "grad_norm": 1.3032119274139404, "learning_rate": 1.7126276452268207e-05, "loss": 0.5577, "step": 9878 }, { "epoch": 0.2701542332093634, "grad_norm": 1.359978437423706, "learning_rate": 1.71256550652322e-05, "loss": 0.5783, "step": 9879 }, { "epoch": 0.2701815795230803, "grad_norm": 1.4162671566009521, "learning_rate": 1.712503362229743e-05, "loss": 0.5615, "step": 9880 }, { "epoch": 0.2702089258367972, "grad_norm": 2.0620057582855225, "learning_rate": 1.7124412123468782e-05, "loss": 0.5514, "step": 9881 }, { "epoch": 0.2702362721505141, "grad_norm": 1.6117526292800903, "learning_rate": 1.712379056875112e-05, "loss": 0.5403, "step": 9882 }, { "epoch": 0.270263618464231, "grad_norm": 1.3761228322982788, "learning_rate": 1.712316895814933e-05, "loss": 0.5545, "step": 9883 }, { "epoch": 0.27029096477794795, "grad_norm": 4.511586666107178, "learning_rate": 1.7122547291668283e-05, "loss": 0.4204, "step": 9884 }, { "epoch": 0.2703183110916648, "grad_norm": 1.6025428771972656, "learning_rate": 1.7121925569312858e-05, "loss": 0.6113, "step": 9885 }, { "epoch": 0.27034565740538175, "grad_norm": 1.5638792514801025, "learning_rate": 1.7121303791087925e-05, "loss": 0.5223, "step": 9886 }, { "epoch": 0.27037300371909867, "grad_norm": 1.6398495435714722, "learning_rate": 1.7120681956998378e-05, "loss": 0.571, "step": 9887 }, { "epoch": 0.2704003500328156, "grad_norm": 1.2516645193099976, "learning_rate": 1.7120060067049078e-05, "loss": 0.5446, "step": 9888 }, { "epoch": 0.27042769634653246, "grad_norm": 1.4554548263549805, "learning_rate": 1.711943812124491e-05, "loss": 0.5781, "step": 9889 }, { "epoch": 0.2704550426602494, "grad_norm": 1.7058498859405518, "learning_rate": 1.7118816119590756e-05, "loss": 0.5111, "step": 9890 }, { "epoch": 0.2704823889739663, "grad_norm": 1.8438745737075806, "learning_rate": 1.7118194062091495e-05, "loss": 0.9239, "step": 9891 }, { "epoch": 0.27050973528768324, "grad_norm": 1.9525628089904785, "learning_rate": 1.7117571948752e-05, "loss": 0.8999, "step": 9892 }, { "epoch": 0.2705370816014001, "grad_norm": 1.4021588563919067, "learning_rate": 1.711694977957716e-05, "loss": 0.4957, "step": 9893 }, { "epoch": 0.27056442791511703, "grad_norm": 1.2259010076522827, "learning_rate": 1.711632755457185e-05, "loss": 0.5867, "step": 9894 }, { "epoch": 0.27059177422883396, "grad_norm": 1.472230076789856, "learning_rate": 1.7115705273740953e-05, "loss": 0.8726, "step": 9895 }, { "epoch": 0.2706191205425509, "grad_norm": 1.1940933465957642, "learning_rate": 1.7115082937089354e-05, "loss": 0.5355, "step": 9896 }, { "epoch": 0.27064646685626775, "grad_norm": 1.3268412351608276, "learning_rate": 1.7114460544621928e-05, "loss": 0.5477, "step": 9897 }, { "epoch": 0.2706738131699847, "grad_norm": 7.356316089630127, "learning_rate": 1.711383809634356e-05, "loss": 0.9005, "step": 9898 }, { "epoch": 0.2707011594837016, "grad_norm": 1.3475701808929443, "learning_rate": 1.7113215592259137e-05, "loss": 0.5697, "step": 9899 }, { "epoch": 0.27072850579741853, "grad_norm": 1.6220366954803467, "learning_rate": 1.711259303237354e-05, "loss": 0.5423, "step": 9900 }, { "epoch": 0.2707558521111354, "grad_norm": 1.3586275577545166, "learning_rate": 1.711197041669165e-05, "loss": 0.5391, "step": 9901 }, { "epoch": 0.2707831984248523, "grad_norm": 1.3211575746536255, "learning_rate": 1.7111347745218356e-05, "loss": 0.5394, "step": 9902 }, { "epoch": 0.27081054473856925, "grad_norm": 1.4143599271774292, "learning_rate": 1.7110725017958535e-05, "loss": 0.5381, "step": 9903 }, { "epoch": 0.2708378910522862, "grad_norm": 1.4723472595214844, "learning_rate": 1.7110102234917083e-05, "loss": 0.496, "step": 9904 }, { "epoch": 0.27086523736600304, "grad_norm": 1.4021185636520386, "learning_rate": 1.710947939609888e-05, "loss": 0.5268, "step": 9905 }, { "epoch": 0.27089258367971997, "grad_norm": 1.3706021308898926, "learning_rate": 1.710885650150881e-05, "loss": 0.5464, "step": 9906 }, { "epoch": 0.2709199299934369, "grad_norm": 1.5726087093353271, "learning_rate": 1.710823355115176e-05, "loss": 0.9314, "step": 9907 }, { "epoch": 0.2709472763071538, "grad_norm": 1.441566824913025, "learning_rate": 1.710761054503262e-05, "loss": 0.4448, "step": 9908 }, { "epoch": 0.2709746226208707, "grad_norm": 1.5706099271774292, "learning_rate": 1.7106987483156275e-05, "loss": 0.5583, "step": 9909 }, { "epoch": 0.2710019689345876, "grad_norm": 1.3625491857528687, "learning_rate": 1.7106364365527613e-05, "loss": 0.5405, "step": 9910 }, { "epoch": 0.27102931524830454, "grad_norm": 1.1972142457962036, "learning_rate": 1.7105741192151525e-05, "loss": 0.5541, "step": 9911 }, { "epoch": 0.27105666156202146, "grad_norm": 1.581276297569275, "learning_rate": 1.7105117963032893e-05, "loss": 0.5763, "step": 9912 }, { "epoch": 0.27108400787573833, "grad_norm": 3.937044382095337, "learning_rate": 1.7104494678176613e-05, "loss": 0.391, "step": 9913 }, { "epoch": 0.27111135418945526, "grad_norm": 1.4357824325561523, "learning_rate": 1.710387133758757e-05, "loss": 0.5353, "step": 9914 }, { "epoch": 0.2711387005031722, "grad_norm": 1.4843480587005615, "learning_rate": 1.7103247941270656e-05, "loss": 0.5493, "step": 9915 }, { "epoch": 0.2711660468168891, "grad_norm": 1.475054383277893, "learning_rate": 1.710262448923076e-05, "loss": 0.5474, "step": 9916 }, { "epoch": 0.271193393130606, "grad_norm": 1.609309196472168, "learning_rate": 1.7102000981472776e-05, "loss": 0.555, "step": 9917 }, { "epoch": 0.2712207394443229, "grad_norm": 1.3942651748657227, "learning_rate": 1.7101377418001594e-05, "loss": 0.5693, "step": 9918 }, { "epoch": 0.2712480857580398, "grad_norm": 1.6855053901672363, "learning_rate": 1.71007537988221e-05, "loss": 0.4958, "step": 9919 }, { "epoch": 0.27127543207175675, "grad_norm": 1.3600252866744995, "learning_rate": 1.7100130123939194e-05, "loss": 0.5514, "step": 9920 }, { "epoch": 0.2713027783854736, "grad_norm": 1.3917381763458252, "learning_rate": 1.7099506393357765e-05, "loss": 0.545, "step": 9921 }, { "epoch": 0.27133012469919054, "grad_norm": 1.4138152599334717, "learning_rate": 1.7098882607082707e-05, "loss": 0.5709, "step": 9922 }, { "epoch": 0.27135747101290747, "grad_norm": 1.401074767112732, "learning_rate": 1.7098258765118912e-05, "loss": 0.5286, "step": 9923 }, { "epoch": 0.2713848173266244, "grad_norm": 1.5925877094268799, "learning_rate": 1.7097634867471273e-05, "loss": 0.495, "step": 9924 }, { "epoch": 0.27141216364034126, "grad_norm": 1.5052285194396973, "learning_rate": 1.709701091414469e-05, "loss": 0.5526, "step": 9925 }, { "epoch": 0.2714395099540582, "grad_norm": 1.3589277267456055, "learning_rate": 1.709638690514405e-05, "loss": 0.576, "step": 9926 }, { "epoch": 0.2714668562677751, "grad_norm": 1.5429280996322632, "learning_rate": 1.7095762840474252e-05, "loss": 0.924, "step": 9927 }, { "epoch": 0.27149420258149204, "grad_norm": 1.5641794204711914, "learning_rate": 1.709513872014019e-05, "loss": 0.5413, "step": 9928 }, { "epoch": 0.2715215488952089, "grad_norm": 1.1992369890213013, "learning_rate": 1.7094514544146764e-05, "loss": 0.5239, "step": 9929 }, { "epoch": 0.27154889520892583, "grad_norm": 1.275985598564148, "learning_rate": 1.709389031249887e-05, "loss": 0.5495, "step": 9930 }, { "epoch": 0.27157624152264276, "grad_norm": 1.539015769958496, "learning_rate": 1.70932660252014e-05, "loss": 0.5275, "step": 9931 }, { "epoch": 0.2716035878363597, "grad_norm": 1.2089710235595703, "learning_rate": 1.7092641682259254e-05, "loss": 0.5319, "step": 9932 }, { "epoch": 0.27163093415007655, "grad_norm": 1.473157525062561, "learning_rate": 1.7092017283677333e-05, "loss": 0.5494, "step": 9933 }, { "epoch": 0.2716582804637935, "grad_norm": 1.1164621114730835, "learning_rate": 1.709139282946053e-05, "loss": 0.5502, "step": 9934 }, { "epoch": 0.2716856267775104, "grad_norm": 1.543480396270752, "learning_rate": 1.7090768319613746e-05, "loss": 0.5607, "step": 9935 }, { "epoch": 0.27171297309122733, "grad_norm": 1.6661039590835571, "learning_rate": 1.7090143754141878e-05, "loss": 0.3943, "step": 9936 }, { "epoch": 0.2717403194049442, "grad_norm": 1.68058180809021, "learning_rate": 1.708951913304983e-05, "loss": 0.9078, "step": 9937 }, { "epoch": 0.2717676657186611, "grad_norm": 1.1431227922439575, "learning_rate": 1.70888944563425e-05, "loss": 0.5325, "step": 9938 }, { "epoch": 0.27179501203237805, "grad_norm": 1.5774480104446411, "learning_rate": 1.7088269724024785e-05, "loss": 0.5459, "step": 9939 }, { "epoch": 0.27182235834609497, "grad_norm": 1.4941569566726685, "learning_rate": 1.708764493610159e-05, "loss": 0.513, "step": 9940 }, { "epoch": 0.27184970465981184, "grad_norm": 2.024275541305542, "learning_rate": 1.708702009257782e-05, "loss": 0.4392, "step": 9941 }, { "epoch": 0.27187705097352877, "grad_norm": 1.2135268449783325, "learning_rate": 1.7086395193458364e-05, "loss": 0.5593, "step": 9942 }, { "epoch": 0.2719043972872457, "grad_norm": 1.3090068101882935, "learning_rate": 1.7085770238748138e-05, "loss": 0.3743, "step": 9943 }, { "epoch": 0.2719317436009626, "grad_norm": 1.5884538888931274, "learning_rate": 1.7085145228452036e-05, "loss": 0.5571, "step": 9944 }, { "epoch": 0.2719590899146795, "grad_norm": 1.2905364036560059, "learning_rate": 1.7084520162574967e-05, "loss": 0.5396, "step": 9945 }, { "epoch": 0.2719864362283964, "grad_norm": 1.386725664138794, "learning_rate": 1.7083895041121827e-05, "loss": 0.5176, "step": 9946 }, { "epoch": 0.27201378254211334, "grad_norm": 2.3873894214630127, "learning_rate": 1.7083269864097525e-05, "loss": 0.4925, "step": 9947 }, { "epoch": 0.27204112885583026, "grad_norm": 1.3354699611663818, "learning_rate": 1.7082644631506964e-05, "loss": 0.5102, "step": 9948 }, { "epoch": 0.27206847516954713, "grad_norm": 1.3213684558868408, "learning_rate": 1.708201934335505e-05, "loss": 0.5764, "step": 9949 }, { "epoch": 0.27209582148326406, "grad_norm": 1.6479592323303223, "learning_rate": 1.7081393999646688e-05, "loss": 0.4083, "step": 9950 }, { "epoch": 0.272123167796981, "grad_norm": 1.515986442565918, "learning_rate": 1.7080768600386782e-05, "loss": 0.5241, "step": 9951 }, { "epoch": 0.2721505141106979, "grad_norm": 1.3174768686294556, "learning_rate": 1.7080143145580237e-05, "loss": 0.5495, "step": 9952 }, { "epoch": 0.2721778604244148, "grad_norm": 1.20112144947052, "learning_rate": 1.7079517635231964e-05, "loss": 0.5674, "step": 9953 }, { "epoch": 0.2722052067381317, "grad_norm": 1.307328462600708, "learning_rate": 1.7078892069346867e-05, "loss": 0.5584, "step": 9954 }, { "epoch": 0.2722325530518486, "grad_norm": 1.7499557733535767, "learning_rate": 1.7078266447929853e-05, "loss": 0.5828, "step": 9955 }, { "epoch": 0.2722598993655655, "grad_norm": 1.375014305114746, "learning_rate": 1.707764077098583e-05, "loss": 0.5316, "step": 9956 }, { "epoch": 0.2722872456792824, "grad_norm": 1.4694393873214722, "learning_rate": 1.707701503851971e-05, "loss": 0.5523, "step": 9957 }, { "epoch": 0.27231459199299934, "grad_norm": 1.2117384672164917, "learning_rate": 1.70763892505364e-05, "loss": 0.5462, "step": 9958 }, { "epoch": 0.27234193830671627, "grad_norm": 2.4317636489868164, "learning_rate": 1.70757634070408e-05, "loss": 0.5687, "step": 9959 }, { "epoch": 0.27236928462043314, "grad_norm": 1.3673193454742432, "learning_rate": 1.7075137508037836e-05, "loss": 0.5264, "step": 9960 }, { "epoch": 0.27239663093415006, "grad_norm": 1.265288233757019, "learning_rate": 1.7074511553532403e-05, "loss": 0.5674, "step": 9961 }, { "epoch": 0.272423977247867, "grad_norm": 1.314154863357544, "learning_rate": 1.7073885543529423e-05, "loss": 0.5616, "step": 9962 }, { "epoch": 0.2724513235615839, "grad_norm": 1.2403483390808105, "learning_rate": 1.70732594780338e-05, "loss": 0.5835, "step": 9963 }, { "epoch": 0.2724786698753008, "grad_norm": 1.3434921503067017, "learning_rate": 1.7072633357050445e-05, "loss": 0.5677, "step": 9964 }, { "epoch": 0.2725060161890177, "grad_norm": 1.2364753484725952, "learning_rate": 1.7072007180584274e-05, "loss": 0.5655, "step": 9965 }, { "epoch": 0.27253336250273463, "grad_norm": 1.3263710737228394, "learning_rate": 1.7071380948640196e-05, "loss": 0.5499, "step": 9966 }, { "epoch": 0.27256070881645156, "grad_norm": 1.8904473781585693, "learning_rate": 1.7070754661223124e-05, "loss": 0.8885, "step": 9967 }, { "epoch": 0.2725880551301684, "grad_norm": 1.7853535413742065, "learning_rate": 1.7070128318337974e-05, "loss": 0.5712, "step": 9968 }, { "epoch": 0.27261540144388535, "grad_norm": 2.2367124557495117, "learning_rate": 1.7069501919989653e-05, "loss": 0.5386, "step": 9969 }, { "epoch": 0.2726427477576023, "grad_norm": 2.3515138626098633, "learning_rate": 1.7068875466183082e-05, "loss": 0.5441, "step": 9970 }, { "epoch": 0.2726700940713192, "grad_norm": 2.9727210998535156, "learning_rate": 1.706824895692317e-05, "loss": 0.871, "step": 9971 }, { "epoch": 0.27269744038503607, "grad_norm": 2.2070302963256836, "learning_rate": 1.7067622392214834e-05, "loss": 0.5793, "step": 9972 }, { "epoch": 0.272724786698753, "grad_norm": 1.887603998184204, "learning_rate": 1.7066995772062994e-05, "loss": 0.56, "step": 9973 }, { "epoch": 0.2727521330124699, "grad_norm": 1.5742857456207275, "learning_rate": 1.7066369096472555e-05, "loss": 0.5396, "step": 9974 }, { "epoch": 0.27277947932618685, "grad_norm": 1.6949020624160767, "learning_rate": 1.706574236544844e-05, "loss": 0.4819, "step": 9975 }, { "epoch": 0.2728068256399037, "grad_norm": 1.4999728202819824, "learning_rate": 1.706511557899557e-05, "loss": 0.5592, "step": 9976 }, { "epoch": 0.27283417195362064, "grad_norm": 2.0004916191101074, "learning_rate": 1.706448873711885e-05, "loss": 0.5458, "step": 9977 }, { "epoch": 0.27286151826733757, "grad_norm": 2.0883982181549072, "learning_rate": 1.7063861839823207e-05, "loss": 0.5469, "step": 9978 }, { "epoch": 0.2728888645810545, "grad_norm": 1.566479206085205, "learning_rate": 1.7063234887113556e-05, "loss": 0.5023, "step": 9979 }, { "epoch": 0.27291621089477136, "grad_norm": 1.4019322395324707, "learning_rate": 1.7062607878994818e-05, "loss": 0.5728, "step": 9980 }, { "epoch": 0.2729435572084883, "grad_norm": 1.3233240842819214, "learning_rate": 1.7061980815471906e-05, "loss": 0.4085, "step": 9981 }, { "epoch": 0.2729709035222052, "grad_norm": 1.4466179609298706, "learning_rate": 1.7061353696549742e-05, "loss": 0.5234, "step": 9982 }, { "epoch": 0.27299824983592214, "grad_norm": 1.3705272674560547, "learning_rate": 1.7060726522233243e-05, "loss": 0.5622, "step": 9983 }, { "epoch": 0.273025596149639, "grad_norm": 1.4860423803329468, "learning_rate": 1.7060099292527335e-05, "loss": 0.5626, "step": 9984 }, { "epoch": 0.27305294246335593, "grad_norm": 1.4551351070404053, "learning_rate": 1.7059472007436934e-05, "loss": 0.5382, "step": 9985 }, { "epoch": 0.27308028877707285, "grad_norm": 1.4440147876739502, "learning_rate": 1.7058844666966957e-05, "loss": 0.5454, "step": 9986 }, { "epoch": 0.2731076350907898, "grad_norm": 1.1920028924942017, "learning_rate": 1.7058217271122336e-05, "loss": 0.5348, "step": 9987 }, { "epoch": 0.27313498140450665, "grad_norm": 1.9543293714523315, "learning_rate": 1.7057589819907985e-05, "loss": 0.5808, "step": 9988 }, { "epoch": 0.2731623277182236, "grad_norm": 1.3780789375305176, "learning_rate": 1.705696231332883e-05, "loss": 0.5468, "step": 9989 }, { "epoch": 0.2731896740319405, "grad_norm": 1.4195700883865356, "learning_rate": 1.705633475138979e-05, "loss": 0.6154, "step": 9990 }, { "epoch": 0.2732170203456574, "grad_norm": 1.2144532203674316, "learning_rate": 1.705570713409579e-05, "loss": 0.5785, "step": 9991 }, { "epoch": 0.2732443666593743, "grad_norm": 1.5749740600585938, "learning_rate": 1.705507946145175e-05, "loss": 0.4144, "step": 9992 }, { "epoch": 0.2732717129730912, "grad_norm": 1.4567621946334839, "learning_rate": 1.7054451733462604e-05, "loss": 0.5577, "step": 9993 }, { "epoch": 0.27329905928680814, "grad_norm": 1.212842583656311, "learning_rate": 1.7053823950133264e-05, "loss": 0.5376, "step": 9994 }, { "epoch": 0.27332640560052507, "grad_norm": 1.5082064867019653, "learning_rate": 1.705319611146866e-05, "loss": 0.5819, "step": 9995 }, { "epoch": 0.27335375191424194, "grad_norm": 1.3715592622756958, "learning_rate": 1.705256821747372e-05, "loss": 0.5257, "step": 9996 }, { "epoch": 0.27338109822795886, "grad_norm": 1.470634937286377, "learning_rate": 1.7051940268153363e-05, "loss": 0.5574, "step": 9997 }, { "epoch": 0.2734084445416758, "grad_norm": 1.7242621183395386, "learning_rate": 1.7051312263512526e-05, "loss": 0.5606, "step": 9998 }, { "epoch": 0.2734357908553927, "grad_norm": 1.5283658504486084, "learning_rate": 1.7050684203556124e-05, "loss": 0.5542, "step": 9999 }, { "epoch": 0.2734631371691096, "grad_norm": 1.1928327083587646, "learning_rate": 1.705005608828909e-05, "loss": 0.5494, "step": 10000 }, { "epoch": 0.2734904834828265, "grad_norm": 1.3719911575317383, "learning_rate": 1.704942791771635e-05, "loss": 0.5573, "step": 10001 }, { "epoch": 0.27351782979654343, "grad_norm": 1.482125163078308, "learning_rate": 1.7048799691842832e-05, "loss": 0.5332, "step": 10002 }, { "epoch": 0.27354517611026036, "grad_norm": 1.4446885585784912, "learning_rate": 1.7048171410673463e-05, "loss": 0.566, "step": 10003 }, { "epoch": 0.2735725224239772, "grad_norm": 2.3648183345794678, "learning_rate": 1.7047543074213174e-05, "loss": 0.5077, "step": 10004 }, { "epoch": 0.27359986873769415, "grad_norm": 1.3481757640838623, "learning_rate": 1.704691468246689e-05, "loss": 0.555, "step": 10005 }, { "epoch": 0.2736272150514111, "grad_norm": 1.1600431203842163, "learning_rate": 1.7046286235439548e-05, "loss": 0.5519, "step": 10006 }, { "epoch": 0.273654561365128, "grad_norm": 1.4676234722137451, "learning_rate": 1.7045657733136073e-05, "loss": 0.5543, "step": 10007 }, { "epoch": 0.27368190767884487, "grad_norm": 1.4631767272949219, "learning_rate": 1.704502917556139e-05, "loss": 0.5668, "step": 10008 }, { "epoch": 0.2737092539925618, "grad_norm": 1.1436998844146729, "learning_rate": 1.7044400562720442e-05, "loss": 0.5527, "step": 10009 }, { "epoch": 0.2737366003062787, "grad_norm": 1.306404709815979, "learning_rate": 1.7043771894618152e-05, "loss": 0.5519, "step": 10010 }, { "epoch": 0.27376394661999565, "grad_norm": 1.2062950134277344, "learning_rate": 1.7043143171259454e-05, "loss": 0.564, "step": 10011 }, { "epoch": 0.2737912929337125, "grad_norm": 1.249045729637146, "learning_rate": 1.704251439264928e-05, "loss": 0.5741, "step": 10012 }, { "epoch": 0.27381863924742944, "grad_norm": 1.6127848625183105, "learning_rate": 1.7041885558792564e-05, "loss": 0.5262, "step": 10013 }, { "epoch": 0.27384598556114637, "grad_norm": 1.1443443298339844, "learning_rate": 1.7041256669694235e-05, "loss": 0.5331, "step": 10014 }, { "epoch": 0.2738733318748633, "grad_norm": 1.3170336484909058, "learning_rate": 1.704062772535923e-05, "loss": 0.5685, "step": 10015 }, { "epoch": 0.27390067818858016, "grad_norm": 1.2922663688659668, "learning_rate": 1.703999872579248e-05, "loss": 0.5524, "step": 10016 }, { "epoch": 0.2739280245022971, "grad_norm": 1.6591424942016602, "learning_rate": 1.7039369670998925e-05, "loss": 0.9255, "step": 10017 }, { "epoch": 0.273955370816014, "grad_norm": 1.2890806198120117, "learning_rate": 1.7038740560983493e-05, "loss": 0.5558, "step": 10018 }, { "epoch": 0.27398271712973093, "grad_norm": 1.4038987159729004, "learning_rate": 1.7038111395751125e-05, "loss": 0.5974, "step": 10019 }, { "epoch": 0.2740100634434478, "grad_norm": 1.444597840309143, "learning_rate": 1.703748217530675e-05, "loss": 0.5691, "step": 10020 }, { "epoch": 0.27403740975716473, "grad_norm": 1.3538252115249634, "learning_rate": 1.703685289965531e-05, "loss": 0.551, "step": 10021 }, { "epoch": 0.27406475607088165, "grad_norm": 1.4172115325927734, "learning_rate": 1.703622356880174e-05, "loss": 0.5323, "step": 10022 }, { "epoch": 0.2740921023845986, "grad_norm": 1.3831113576889038, "learning_rate": 1.7035594182750977e-05, "loss": 0.5444, "step": 10023 }, { "epoch": 0.27411944869831545, "grad_norm": 1.5086380243301392, "learning_rate": 1.7034964741507957e-05, "loss": 0.855, "step": 10024 }, { "epoch": 0.2741467950120324, "grad_norm": 1.3073197603225708, "learning_rate": 1.7034335245077612e-05, "loss": 0.8704, "step": 10025 }, { "epoch": 0.2741741413257493, "grad_norm": 1.364353060722351, "learning_rate": 1.7033705693464894e-05, "loss": 0.578, "step": 10026 }, { "epoch": 0.2742014876394662, "grad_norm": 1.6310523748397827, "learning_rate": 1.7033076086674732e-05, "loss": 0.6182, "step": 10027 }, { "epoch": 0.2742288339531831, "grad_norm": 1.3546802997589111, "learning_rate": 1.7032446424712066e-05, "loss": 0.5515, "step": 10028 }, { "epoch": 0.2742561802669, "grad_norm": 1.4028456211090088, "learning_rate": 1.7031816707581843e-05, "loss": 0.5302, "step": 10029 }, { "epoch": 0.27428352658061694, "grad_norm": 1.3128280639648438, "learning_rate": 1.7031186935288992e-05, "loss": 0.5525, "step": 10030 }, { "epoch": 0.27431087289433387, "grad_norm": 1.4594577550888062, "learning_rate": 1.703055710783846e-05, "loss": 0.5139, "step": 10031 }, { "epoch": 0.27433821920805074, "grad_norm": 3.0301599502563477, "learning_rate": 1.7029927225235183e-05, "loss": 0.5451, "step": 10032 }, { "epoch": 0.27436556552176766, "grad_norm": 1.3865182399749756, "learning_rate": 1.702929728748411e-05, "loss": 0.4898, "step": 10033 }, { "epoch": 0.2743929118354846, "grad_norm": 2.0523269176483154, "learning_rate": 1.7028667294590173e-05, "loss": 0.5433, "step": 10034 }, { "epoch": 0.2744202581492015, "grad_norm": 1.591990351676941, "learning_rate": 1.7028037246558323e-05, "loss": 0.5039, "step": 10035 }, { "epoch": 0.2744476044629184, "grad_norm": 1.3857698440551758, "learning_rate": 1.7027407143393494e-05, "loss": 0.5477, "step": 10036 }, { "epoch": 0.2744749507766353, "grad_norm": 3.1811304092407227, "learning_rate": 1.7026776985100638e-05, "loss": 0.5454, "step": 10037 }, { "epoch": 0.27450229709035223, "grad_norm": 1.8630491495132446, "learning_rate": 1.702614677168469e-05, "loss": 0.5006, "step": 10038 }, { "epoch": 0.27452964340406916, "grad_norm": 1.4263383150100708, "learning_rate": 1.7025516503150603e-05, "loss": 0.5818, "step": 10039 }, { "epoch": 0.274556989717786, "grad_norm": 1.4284292459487915, "learning_rate": 1.7024886179503312e-05, "loss": 0.5378, "step": 10040 }, { "epoch": 0.27458433603150295, "grad_norm": 1.561217188835144, "learning_rate": 1.702425580074777e-05, "loss": 0.5539, "step": 10041 }, { "epoch": 0.2746116823452199, "grad_norm": 1.575221300125122, "learning_rate": 1.7023625366888915e-05, "loss": 0.8613, "step": 10042 }, { "epoch": 0.2746390286589368, "grad_norm": 1.3942288160324097, "learning_rate": 1.7022994877931696e-05, "loss": 0.5706, "step": 10043 }, { "epoch": 0.27466637497265367, "grad_norm": 1.334065556526184, "learning_rate": 1.7022364333881058e-05, "loss": 0.5453, "step": 10044 }, { "epoch": 0.2746937212863706, "grad_norm": 1.394698143005371, "learning_rate": 1.702173373474195e-05, "loss": 0.5497, "step": 10045 }, { "epoch": 0.2747210676000875, "grad_norm": 1.285051703453064, "learning_rate": 1.7021103080519318e-05, "loss": 0.542, "step": 10046 }, { "epoch": 0.27474841391380445, "grad_norm": 1.535361409187317, "learning_rate": 1.7020472371218106e-05, "loss": 0.5519, "step": 10047 }, { "epoch": 0.2747757602275213, "grad_norm": 1.303873896598816, "learning_rate": 1.7019841606843265e-05, "loss": 0.5457, "step": 10048 }, { "epoch": 0.27480310654123824, "grad_norm": 1.7135858535766602, "learning_rate": 1.701921078739974e-05, "loss": 0.5458, "step": 10049 }, { "epoch": 0.27483045285495517, "grad_norm": 1.2019977569580078, "learning_rate": 1.7018579912892482e-05, "loss": 0.5562, "step": 10050 }, { "epoch": 0.2748577991686721, "grad_norm": 1.389573097229004, "learning_rate": 1.701794898332644e-05, "loss": 0.4944, "step": 10051 }, { "epoch": 0.27488514548238896, "grad_norm": 1.5647873878479004, "learning_rate": 1.7017317998706564e-05, "loss": 0.6084, "step": 10052 }, { "epoch": 0.2749124917961059, "grad_norm": 1.1684156656265259, "learning_rate": 1.7016686959037802e-05, "loss": 0.5705, "step": 10053 }, { "epoch": 0.2749398381098228, "grad_norm": 1.3736575841903687, "learning_rate": 1.701605586432511e-05, "loss": 0.5447, "step": 10054 }, { "epoch": 0.27496718442353973, "grad_norm": 1.3576924800872803, "learning_rate": 1.701542471457343e-05, "loss": 0.5266, "step": 10055 }, { "epoch": 0.2749945307372566, "grad_norm": 1.34206223487854, "learning_rate": 1.7014793509787717e-05, "loss": 0.5568, "step": 10056 }, { "epoch": 0.27502187705097353, "grad_norm": 1.239453673362732, "learning_rate": 1.7014162249972924e-05, "loss": 0.5731, "step": 10057 }, { "epoch": 0.27504922336469045, "grad_norm": 1.474970817565918, "learning_rate": 1.7013530935134004e-05, "loss": 0.5687, "step": 10058 }, { "epoch": 0.2750765696784073, "grad_norm": 1.5274863243103027, "learning_rate": 1.7012899565275906e-05, "loss": 0.5683, "step": 10059 }, { "epoch": 0.27510391599212425, "grad_norm": 1.3532514572143555, "learning_rate": 1.7012268140403584e-05, "loss": 0.551, "step": 10060 }, { "epoch": 0.2751312623058412, "grad_norm": 1.200112223625183, "learning_rate": 1.701163666052199e-05, "loss": 0.5504, "step": 10061 }, { "epoch": 0.2751586086195581, "grad_norm": 1.2525594234466553, "learning_rate": 1.7011005125636085e-05, "loss": 0.581, "step": 10062 }, { "epoch": 0.27518595493327497, "grad_norm": 1.800798773765564, "learning_rate": 1.7010373535750814e-05, "loss": 0.8996, "step": 10063 }, { "epoch": 0.2752133012469919, "grad_norm": 1.533705234527588, "learning_rate": 1.7009741890871137e-05, "loss": 0.5734, "step": 10064 }, { "epoch": 0.2752406475607088, "grad_norm": 1.1968929767608643, "learning_rate": 1.7009110191002006e-05, "loss": 0.5473, "step": 10065 }, { "epoch": 0.27526799387442574, "grad_norm": 1.3512295484542847, "learning_rate": 1.7008478436148382e-05, "loss": 0.5549, "step": 10066 }, { "epoch": 0.2752953401881426, "grad_norm": 1.2358763217926025, "learning_rate": 1.7007846626315215e-05, "loss": 0.5233, "step": 10067 }, { "epoch": 0.27532268650185954, "grad_norm": 1.5788716077804565, "learning_rate": 1.7007214761507462e-05, "loss": 0.5153, "step": 10068 }, { "epoch": 0.27535003281557646, "grad_norm": 1.5130970478057861, "learning_rate": 1.7006582841730082e-05, "loss": 0.591, "step": 10069 }, { "epoch": 0.2753773791292934, "grad_norm": 3.624772071838379, "learning_rate": 1.7005950866988026e-05, "loss": 0.8783, "step": 10070 }, { "epoch": 0.27540472544301026, "grad_norm": 1.4052942991256714, "learning_rate": 1.7005318837286265e-05, "loss": 0.5593, "step": 10071 }, { "epoch": 0.2754320717567272, "grad_norm": 1.7111587524414062, "learning_rate": 1.7004686752629748e-05, "loss": 0.5884, "step": 10072 }, { "epoch": 0.2754594180704441, "grad_norm": 1.430942177772522, "learning_rate": 1.7004054613023432e-05, "loss": 0.5669, "step": 10073 }, { "epoch": 0.27548676438416103, "grad_norm": 1.6728960275650024, "learning_rate": 1.7003422418472278e-05, "loss": 0.5449, "step": 10074 }, { "epoch": 0.2755141106978779, "grad_norm": 1.5929367542266846, "learning_rate": 1.700279016898125e-05, "loss": 0.5754, "step": 10075 }, { "epoch": 0.2755414570115948, "grad_norm": 1.3425899744033813, "learning_rate": 1.70021578645553e-05, "loss": 0.4767, "step": 10076 }, { "epoch": 0.27556880332531175, "grad_norm": 1.1949436664581299, "learning_rate": 1.7001525505199396e-05, "loss": 0.5513, "step": 10077 }, { "epoch": 0.2755961496390287, "grad_norm": 1.468401551246643, "learning_rate": 1.7000893090918493e-05, "loss": 0.5514, "step": 10078 }, { "epoch": 0.27562349595274555, "grad_norm": 1.6174333095550537, "learning_rate": 1.7000260621717553e-05, "loss": 0.5671, "step": 10079 }, { "epoch": 0.27565084226646247, "grad_norm": 1.185217261314392, "learning_rate": 1.699962809760154e-05, "loss": 0.5597, "step": 10080 }, { "epoch": 0.2756781885801794, "grad_norm": 1.159021258354187, "learning_rate": 1.6998995518575414e-05, "loss": 0.5681, "step": 10081 }, { "epoch": 0.2757055348938963, "grad_norm": 1.2265522480010986, "learning_rate": 1.6998362884644137e-05, "loss": 0.5599, "step": 10082 }, { "epoch": 0.2757328812076132, "grad_norm": 1.3339022397994995, "learning_rate": 1.6997730195812672e-05, "loss": 0.5603, "step": 10083 }, { "epoch": 0.2757602275213301, "grad_norm": 1.3681448698043823, "learning_rate": 1.6997097452085986e-05, "loss": 0.4127, "step": 10084 }, { "epoch": 0.27578757383504704, "grad_norm": 1.267539620399475, "learning_rate": 1.6996464653469036e-05, "loss": 0.551, "step": 10085 }, { "epoch": 0.27581492014876396, "grad_norm": 1.3821064233779907, "learning_rate": 1.699583179996679e-05, "loss": 0.6028, "step": 10086 }, { "epoch": 0.27584226646248083, "grad_norm": 1.2943147420883179, "learning_rate": 1.6995198891584215e-05, "loss": 0.4374, "step": 10087 }, { "epoch": 0.27586961277619776, "grad_norm": 1.1184008121490479, "learning_rate": 1.6994565928326273e-05, "loss": 0.5151, "step": 10088 }, { "epoch": 0.2758969590899147, "grad_norm": 1.3533854484558105, "learning_rate": 1.6993932910197926e-05, "loss": 0.5395, "step": 10089 }, { "epoch": 0.2759243054036316, "grad_norm": 2.379249095916748, "learning_rate": 1.6993299837204146e-05, "loss": 0.9033, "step": 10090 }, { "epoch": 0.2759516517173485, "grad_norm": 1.1850085258483887, "learning_rate": 1.6992666709349898e-05, "loss": 0.529, "step": 10091 }, { "epoch": 0.2759789980310654, "grad_norm": 1.2298026084899902, "learning_rate": 1.699203352664015e-05, "loss": 0.5625, "step": 10092 }, { "epoch": 0.27600634434478233, "grad_norm": 1.290968418121338, "learning_rate": 1.699140028907986e-05, "loss": 0.5557, "step": 10093 }, { "epoch": 0.27603369065849925, "grad_norm": 1.287154197692871, "learning_rate": 1.699076699667401e-05, "loss": 0.3858, "step": 10094 }, { "epoch": 0.2760610369722161, "grad_norm": 1.4890447854995728, "learning_rate": 1.6990133649427552e-05, "loss": 0.5516, "step": 10095 }, { "epoch": 0.27608838328593305, "grad_norm": 1.528443455696106, "learning_rate": 1.698950024734547e-05, "loss": 0.5727, "step": 10096 }, { "epoch": 0.27611572959965, "grad_norm": 1.3319077491760254, "learning_rate": 1.698886679043272e-05, "loss": 0.5109, "step": 10097 }, { "epoch": 0.2761430759133669, "grad_norm": 1.0623992681503296, "learning_rate": 1.6988233278694278e-05, "loss": 0.5489, "step": 10098 }, { "epoch": 0.27617042222708377, "grad_norm": 1.3644654750823975, "learning_rate": 1.6987599712135117e-05, "loss": 0.5565, "step": 10099 }, { "epoch": 0.2761977685408007, "grad_norm": 1.5022788047790527, "learning_rate": 1.69869660907602e-05, "loss": 0.5412, "step": 10100 }, { "epoch": 0.2762251148545176, "grad_norm": 1.4364769458770752, "learning_rate": 1.6986332414574497e-05, "loss": 0.5586, "step": 10101 }, { "epoch": 0.27625246116823454, "grad_norm": 1.6283217668533325, "learning_rate": 1.698569868358299e-05, "loss": 0.5098, "step": 10102 }, { "epoch": 0.2762798074819514, "grad_norm": 1.3518284559249878, "learning_rate": 1.6985064897790634e-05, "loss": 0.5693, "step": 10103 }, { "epoch": 0.27630715379566834, "grad_norm": 1.2552523612976074, "learning_rate": 1.6984431057202416e-05, "loss": 0.5544, "step": 10104 }, { "epoch": 0.27633450010938526, "grad_norm": 1.44459068775177, "learning_rate": 1.69837971618233e-05, "loss": 0.5279, "step": 10105 }, { "epoch": 0.2763618464231022, "grad_norm": 1.3908909559249878, "learning_rate": 1.698316321165826e-05, "loss": 0.551, "step": 10106 }, { "epoch": 0.27638919273681906, "grad_norm": 1.2589266300201416, "learning_rate": 1.698252920671227e-05, "loss": 0.5863, "step": 10107 }, { "epoch": 0.276416539050536, "grad_norm": 1.3640103340148926, "learning_rate": 1.6981895146990303e-05, "loss": 0.4816, "step": 10108 }, { "epoch": 0.2764438853642529, "grad_norm": 1.1893826723098755, "learning_rate": 1.6981261032497333e-05, "loss": 0.5727, "step": 10109 }, { "epoch": 0.27647123167796983, "grad_norm": 1.0523793697357178, "learning_rate": 1.6980626863238337e-05, "loss": 0.5544, "step": 10110 }, { "epoch": 0.2764985779916867, "grad_norm": 1.0294697284698486, "learning_rate": 1.6979992639218287e-05, "loss": 0.557, "step": 10111 }, { "epoch": 0.2765259243054036, "grad_norm": 1.3950484991073608, "learning_rate": 1.697935836044216e-05, "loss": 0.5325, "step": 10112 }, { "epoch": 0.27655327061912055, "grad_norm": 1.5846738815307617, "learning_rate": 1.6978724026914925e-05, "loss": 0.8816, "step": 10113 }, { "epoch": 0.2765806169328375, "grad_norm": 1.2580316066741943, "learning_rate": 1.6978089638641567e-05, "loss": 0.5519, "step": 10114 }, { "epoch": 0.27660796324655434, "grad_norm": 1.4544715881347656, "learning_rate": 1.6977455195627065e-05, "loss": 0.5468, "step": 10115 }, { "epoch": 0.27663530956027127, "grad_norm": 1.8429173231124878, "learning_rate": 1.6976820697876386e-05, "loss": 0.5417, "step": 10116 }, { "epoch": 0.2766626558739882, "grad_norm": 1.3182650804519653, "learning_rate": 1.6976186145394513e-05, "loss": 0.5609, "step": 10117 }, { "epoch": 0.2766900021877051, "grad_norm": 1.2121760845184326, "learning_rate": 1.697555153818642e-05, "loss": 0.5399, "step": 10118 }, { "epoch": 0.276717348501422, "grad_norm": 1.4734150171279907, "learning_rate": 1.6974916876257095e-05, "loss": 0.5647, "step": 10119 }, { "epoch": 0.2767446948151389, "grad_norm": 1.4757877588272095, "learning_rate": 1.6974282159611506e-05, "loss": 0.5657, "step": 10120 }, { "epoch": 0.27677204112885584, "grad_norm": 1.174253225326538, "learning_rate": 1.6973647388254636e-05, "loss": 0.5372, "step": 10121 }, { "epoch": 0.27679938744257276, "grad_norm": 1.3689255714416504, "learning_rate": 1.6973012562191465e-05, "loss": 0.5259, "step": 10122 }, { "epoch": 0.27682673375628963, "grad_norm": 1.460742712020874, "learning_rate": 1.697237768142697e-05, "loss": 0.5618, "step": 10123 }, { "epoch": 0.27685408007000656, "grad_norm": 1.4012115001678467, "learning_rate": 1.697174274596614e-05, "loss": 0.5259, "step": 10124 }, { "epoch": 0.2768814263837235, "grad_norm": 1.6477885246276855, "learning_rate": 1.6971107755813946e-05, "loss": 0.5455, "step": 10125 }, { "epoch": 0.2769087726974404, "grad_norm": 1.1442914009094238, "learning_rate": 1.6970472710975373e-05, "loss": 0.5613, "step": 10126 }, { "epoch": 0.2769361190111573, "grad_norm": 1.1240781545639038, "learning_rate": 1.6969837611455408e-05, "loss": 0.5555, "step": 10127 }, { "epoch": 0.2769634653248742, "grad_norm": 1.289411187171936, "learning_rate": 1.6969202457259023e-05, "loss": 0.5309, "step": 10128 }, { "epoch": 0.27699081163859113, "grad_norm": 1.4673768281936646, "learning_rate": 1.696856724839121e-05, "loss": 0.577, "step": 10129 }, { "epoch": 0.27701815795230805, "grad_norm": 1.2109954357147217, "learning_rate": 1.6967931984856946e-05, "loss": 0.5279, "step": 10130 }, { "epoch": 0.2770455042660249, "grad_norm": 1.2009894847869873, "learning_rate": 1.696729666666122e-05, "loss": 0.5259, "step": 10131 }, { "epoch": 0.27707285057974185, "grad_norm": 1.605635166168213, "learning_rate": 1.696666129380901e-05, "loss": 0.5667, "step": 10132 }, { "epoch": 0.2771001968934588, "grad_norm": 1.3851118087768555, "learning_rate": 1.69660258663053e-05, "loss": 0.5165, "step": 10133 }, { "epoch": 0.2771275432071757, "grad_norm": 2.0753822326660156, "learning_rate": 1.6965390384155084e-05, "loss": 0.9237, "step": 10134 }, { "epoch": 0.27715488952089257, "grad_norm": 1.2871510982513428, "learning_rate": 1.6964754847363332e-05, "loss": 0.48, "step": 10135 }, { "epoch": 0.2771822358346095, "grad_norm": 1.4180604219436646, "learning_rate": 1.6964119255935045e-05, "loss": 0.8798, "step": 10136 }, { "epoch": 0.2772095821483264, "grad_norm": 1.1384531259536743, "learning_rate": 1.69634836098752e-05, "loss": 0.5979, "step": 10137 }, { "epoch": 0.27723692846204334, "grad_norm": 1.432326078414917, "learning_rate": 1.6962847909188786e-05, "loss": 0.5702, "step": 10138 }, { "epoch": 0.2772642747757602, "grad_norm": 1.1657956838607788, "learning_rate": 1.696221215388079e-05, "loss": 0.573, "step": 10139 }, { "epoch": 0.27729162108947714, "grad_norm": 1.3383331298828125, "learning_rate": 1.6961576343956195e-05, "loss": 0.5636, "step": 10140 }, { "epoch": 0.27731896740319406, "grad_norm": 1.5339858531951904, "learning_rate": 1.696094047942e-05, "loss": 0.8922, "step": 10141 }, { "epoch": 0.277346313716911, "grad_norm": 1.4527560472488403, "learning_rate": 1.696030456027718e-05, "loss": 0.5685, "step": 10142 }, { "epoch": 0.27737366003062786, "grad_norm": 1.6663216352462769, "learning_rate": 1.695966858653273e-05, "loss": 0.5621, "step": 10143 }, { "epoch": 0.2774010063443448, "grad_norm": 1.6231868267059326, "learning_rate": 1.695903255819164e-05, "loss": 0.8863, "step": 10144 }, { "epoch": 0.2774283526580617, "grad_norm": 1.1130399703979492, "learning_rate": 1.6958396475258897e-05, "loss": 0.5258, "step": 10145 }, { "epoch": 0.27745569897177863, "grad_norm": 1.0774685144424438, "learning_rate": 1.6957760337739496e-05, "loss": 0.576, "step": 10146 }, { "epoch": 0.2774830452854955, "grad_norm": 1.384926676750183, "learning_rate": 1.695712414563842e-05, "loss": 0.5316, "step": 10147 }, { "epoch": 0.2775103915992124, "grad_norm": 1.2290749549865723, "learning_rate": 1.6956487898960662e-05, "loss": 0.5583, "step": 10148 }, { "epoch": 0.27753773791292935, "grad_norm": 1.257099986076355, "learning_rate": 1.6955851597711216e-05, "loss": 0.5547, "step": 10149 }, { "epoch": 0.2775650842266463, "grad_norm": 1.1939436197280884, "learning_rate": 1.6955215241895074e-05, "loss": 0.4526, "step": 10150 }, { "epoch": 0.27759243054036314, "grad_norm": 1.0257858037948608, "learning_rate": 1.695457883151722e-05, "loss": 0.5487, "step": 10151 }, { "epoch": 0.27761977685408007, "grad_norm": 1.460661768913269, "learning_rate": 1.6953942366582658e-05, "loss": 0.5033, "step": 10152 }, { "epoch": 0.277647123167797, "grad_norm": 1.2895567417144775, "learning_rate": 1.695330584709637e-05, "loss": 0.5466, "step": 10153 }, { "epoch": 0.2776744694815139, "grad_norm": 1.3347009420394897, "learning_rate": 1.695266927306336e-05, "loss": 0.5367, "step": 10154 }, { "epoch": 0.2777018157952308, "grad_norm": 2.285407781600952, "learning_rate": 1.6952032644488613e-05, "loss": 0.8741, "step": 10155 }, { "epoch": 0.2777291621089477, "grad_norm": 1.5849159955978394, "learning_rate": 1.6951395961377125e-05, "loss": 0.5335, "step": 10156 }, { "epoch": 0.27775650842266464, "grad_norm": 1.1591187715530396, "learning_rate": 1.6950759223733895e-05, "loss": 0.5468, "step": 10157 }, { "epoch": 0.2777838547363815, "grad_norm": 1.3088233470916748, "learning_rate": 1.6950122431563912e-05, "loss": 0.5676, "step": 10158 }, { "epoch": 0.27781120105009843, "grad_norm": 1.233266830444336, "learning_rate": 1.6949485584872177e-05, "loss": 0.4928, "step": 10159 }, { "epoch": 0.27783854736381536, "grad_norm": 1.208345651626587, "learning_rate": 1.694884868366368e-05, "loss": 0.5334, "step": 10160 }, { "epoch": 0.2778658936775323, "grad_norm": 1.4085689783096313, "learning_rate": 1.6948211727943423e-05, "loss": 0.5621, "step": 10161 }, { "epoch": 0.27789323999124915, "grad_norm": 1.1303911209106445, "learning_rate": 1.6947574717716403e-05, "loss": 0.5377, "step": 10162 }, { "epoch": 0.2779205863049661, "grad_norm": 1.2652995586395264, "learning_rate": 1.694693765298761e-05, "loss": 0.52, "step": 10163 }, { "epoch": 0.277947932618683, "grad_norm": 1.2841016054153442, "learning_rate": 1.6946300533762048e-05, "loss": 0.557, "step": 10164 }, { "epoch": 0.2779752789323999, "grad_norm": 2.354759693145752, "learning_rate": 1.6945663360044712e-05, "loss": 0.4191, "step": 10165 }, { "epoch": 0.2780026252461168, "grad_norm": 1.09615159034729, "learning_rate": 1.6945026131840606e-05, "loss": 0.5139, "step": 10166 }, { "epoch": 0.2780299715598337, "grad_norm": 1.2969895601272583, "learning_rate": 1.694438884915472e-05, "loss": 0.5599, "step": 10167 }, { "epoch": 0.27805731787355065, "grad_norm": 1.6632394790649414, "learning_rate": 1.6943751511992058e-05, "loss": 0.4193, "step": 10168 }, { "epoch": 0.27808466418726757, "grad_norm": 1.4372830390930176, "learning_rate": 1.694311412035762e-05, "loss": 0.5774, "step": 10169 }, { "epoch": 0.27811201050098444, "grad_norm": 1.2378963232040405, "learning_rate": 1.694247667425641e-05, "loss": 0.561, "step": 10170 }, { "epoch": 0.27813935681470137, "grad_norm": 1.3250200748443604, "learning_rate": 1.6941839173693417e-05, "loss": 0.5513, "step": 10171 }, { "epoch": 0.2781667031284183, "grad_norm": 1.1291178464889526, "learning_rate": 1.6941201618673657e-05, "loss": 0.3673, "step": 10172 }, { "epoch": 0.2781940494421352, "grad_norm": 1.405946969985962, "learning_rate": 1.6940564009202115e-05, "loss": 0.5585, "step": 10173 }, { "epoch": 0.2782213957558521, "grad_norm": 1.490336298942566, "learning_rate": 1.6939926345283807e-05, "loss": 0.572, "step": 10174 }, { "epoch": 0.278248742069569, "grad_norm": 1.3713945150375366, "learning_rate": 1.6939288626923727e-05, "loss": 0.5855, "step": 10175 }, { "epoch": 0.27827608838328594, "grad_norm": 1.0369973182678223, "learning_rate": 1.6938650854126886e-05, "loss": 0.5337, "step": 10176 }, { "epoch": 0.27830343469700286, "grad_norm": 1.2822784185409546, "learning_rate": 1.693801302689828e-05, "loss": 0.5023, "step": 10177 }, { "epoch": 0.27833078101071973, "grad_norm": 1.1394575834274292, "learning_rate": 1.693737514524291e-05, "loss": 0.5437, "step": 10178 }, { "epoch": 0.27835812732443665, "grad_norm": 1.1190825700759888, "learning_rate": 1.693673720916579e-05, "loss": 0.5447, "step": 10179 }, { "epoch": 0.2783854736381536, "grad_norm": 1.566301941871643, "learning_rate": 1.6936099218671912e-05, "loss": 0.484, "step": 10180 }, { "epoch": 0.2784128199518705, "grad_norm": 1.1501288414001465, "learning_rate": 1.6935461173766296e-05, "loss": 0.546, "step": 10181 }, { "epoch": 0.2784401662655874, "grad_norm": 1.3595061302185059, "learning_rate": 1.693482307445393e-05, "loss": 0.5777, "step": 10182 }, { "epoch": 0.2784675125793043, "grad_norm": 1.5995064973831177, "learning_rate": 1.6934184920739834e-05, "loss": 0.8726, "step": 10183 }, { "epoch": 0.2784948588930212, "grad_norm": 1.2784582376480103, "learning_rate": 1.6933546712629007e-05, "loss": 0.5257, "step": 10184 }, { "epoch": 0.27852220520673815, "grad_norm": 1.2567776441574097, "learning_rate": 1.693290845012646e-05, "loss": 0.5613, "step": 10185 }, { "epoch": 0.278549551520455, "grad_norm": 1.3308796882629395, "learning_rate": 1.6932270133237193e-05, "loss": 0.5506, "step": 10186 }, { "epoch": 0.27857689783417194, "grad_norm": 1.5038493871688843, "learning_rate": 1.693163176196622e-05, "loss": 0.434, "step": 10187 }, { "epoch": 0.27860424414788887, "grad_norm": 1.2669545412063599, "learning_rate": 1.693099333631855e-05, "loss": 0.5399, "step": 10188 }, { "epoch": 0.2786315904616058, "grad_norm": 1.1820604801177979, "learning_rate": 1.6930354856299183e-05, "loss": 0.5486, "step": 10189 }, { "epoch": 0.27865893677532266, "grad_norm": 1.582870364189148, "learning_rate": 1.6929716321913136e-05, "loss": 0.559, "step": 10190 }, { "epoch": 0.2786862830890396, "grad_norm": 1.2709344625473022, "learning_rate": 1.692907773316541e-05, "loss": 0.5464, "step": 10191 }, { "epoch": 0.2787136294027565, "grad_norm": 1.2265678644180298, "learning_rate": 1.6928439090061024e-05, "loss": 0.5593, "step": 10192 }, { "epoch": 0.27874097571647344, "grad_norm": 1.1559051275253296, "learning_rate": 1.692780039260498e-05, "loss": 0.5958, "step": 10193 }, { "epoch": 0.2787683220301903, "grad_norm": 1.3465323448181152, "learning_rate": 1.692716164080229e-05, "loss": 0.9149, "step": 10194 }, { "epoch": 0.27879566834390723, "grad_norm": 1.3755632638931274, "learning_rate": 1.692652283465797e-05, "loss": 0.4348, "step": 10195 }, { "epoch": 0.27882301465762416, "grad_norm": 1.1331580877304077, "learning_rate": 1.6925883974177028e-05, "loss": 0.5481, "step": 10196 }, { "epoch": 0.2788503609713411, "grad_norm": 1.5333871841430664, "learning_rate": 1.6925245059364472e-05, "loss": 0.4849, "step": 10197 }, { "epoch": 0.27887770728505795, "grad_norm": 1.5696450471878052, "learning_rate": 1.6924606090225323e-05, "loss": 0.5871, "step": 10198 }, { "epoch": 0.2789050535987749, "grad_norm": 1.2448244094848633, "learning_rate": 1.6923967066764583e-05, "loss": 0.4972, "step": 10199 }, { "epoch": 0.2789323999124918, "grad_norm": 1.4807463884353638, "learning_rate": 1.692332798898727e-05, "loss": 0.5807, "step": 10200 }, { "epoch": 0.2789597462262087, "grad_norm": 1.1747639179229736, "learning_rate": 1.6922688856898398e-05, "loss": 0.5447, "step": 10201 }, { "epoch": 0.2789870925399256, "grad_norm": 1.09153413772583, "learning_rate": 1.692204967050298e-05, "loss": 0.522, "step": 10202 }, { "epoch": 0.2790144388536425, "grad_norm": 1.1121073961257935, "learning_rate": 1.692141042980603e-05, "loss": 0.5524, "step": 10203 }, { "epoch": 0.27904178516735945, "grad_norm": 1.0999432802200317, "learning_rate": 1.6920771134812568e-05, "loss": 0.364, "step": 10204 }, { "epoch": 0.27906913148107637, "grad_norm": 1.3323085308074951, "learning_rate": 1.69201317855276e-05, "loss": 0.8706, "step": 10205 }, { "epoch": 0.27909647779479324, "grad_norm": 1.0296648740768433, "learning_rate": 1.6919492381956146e-05, "loss": 0.3828, "step": 10206 }, { "epoch": 0.27912382410851017, "grad_norm": 1.5009758472442627, "learning_rate": 1.691885292410322e-05, "loss": 0.5182, "step": 10207 }, { "epoch": 0.2791511704222271, "grad_norm": 1.5213388204574585, "learning_rate": 1.6918213411973843e-05, "loss": 0.5706, "step": 10208 }, { "epoch": 0.279178516735944, "grad_norm": 1.4993321895599365, "learning_rate": 1.6917573845573027e-05, "loss": 0.5276, "step": 10209 }, { "epoch": 0.2792058630496609, "grad_norm": 1.2917401790618896, "learning_rate": 1.691693422490579e-05, "loss": 0.5176, "step": 10210 }, { "epoch": 0.2792332093633778, "grad_norm": 1.4457979202270508, "learning_rate": 1.6916294549977154e-05, "loss": 0.4979, "step": 10211 }, { "epoch": 0.27926055567709474, "grad_norm": 1.414915680885315, "learning_rate": 1.691565482079213e-05, "loss": 0.4374, "step": 10212 }, { "epoch": 0.27928790199081166, "grad_norm": 1.3281478881835938, "learning_rate": 1.691501503735574e-05, "loss": 0.5416, "step": 10213 }, { "epoch": 0.27931524830452853, "grad_norm": 1.5363413095474243, "learning_rate": 1.6914375199673005e-05, "loss": 0.5727, "step": 10214 }, { "epoch": 0.27934259461824545, "grad_norm": 1.147581696510315, "learning_rate": 1.6913735307748943e-05, "loss": 0.5695, "step": 10215 }, { "epoch": 0.2793699409319624, "grad_norm": 1.5279690027236938, "learning_rate": 1.6913095361588572e-05, "loss": 0.5535, "step": 10216 }, { "epoch": 0.2793972872456793, "grad_norm": 1.4964994192123413, "learning_rate": 1.6912455361196914e-05, "loss": 0.5662, "step": 10217 }, { "epoch": 0.2794246335593962, "grad_norm": 1.3441969156265259, "learning_rate": 1.691181530657899e-05, "loss": 0.5443, "step": 10218 }, { "epoch": 0.2794519798731131, "grad_norm": 1.7483899593353271, "learning_rate": 1.6911175197739814e-05, "loss": 0.5339, "step": 10219 }, { "epoch": 0.27947932618683, "grad_norm": 1.1947208642959595, "learning_rate": 1.691053503468442e-05, "loss": 0.5524, "step": 10220 }, { "epoch": 0.27950667250054695, "grad_norm": 1.0823307037353516, "learning_rate": 1.690989481741782e-05, "loss": 0.5328, "step": 10221 }, { "epoch": 0.2795340188142638, "grad_norm": 1.3127082586288452, "learning_rate": 1.6909254545945045e-05, "loss": 0.5516, "step": 10222 }, { "epoch": 0.27956136512798074, "grad_norm": 1.4651319980621338, "learning_rate": 1.690861422027111e-05, "loss": 0.4834, "step": 10223 }, { "epoch": 0.27958871144169767, "grad_norm": 1.3253190517425537, "learning_rate": 1.690797384040104e-05, "loss": 0.5471, "step": 10224 }, { "epoch": 0.2796160577554146, "grad_norm": 1.5269206762313843, "learning_rate": 1.690733340633986e-05, "loss": 0.5427, "step": 10225 }, { "epoch": 0.27964340406913146, "grad_norm": 1.8680139780044556, "learning_rate": 1.6906692918092588e-05, "loss": 0.4146, "step": 10226 }, { "epoch": 0.2796707503828484, "grad_norm": 1.4027771949768066, "learning_rate": 1.6906052375664258e-05, "loss": 0.5666, "step": 10227 }, { "epoch": 0.2796980966965653, "grad_norm": 1.395301103591919, "learning_rate": 1.6905411779059894e-05, "loss": 0.5465, "step": 10228 }, { "epoch": 0.27972544301028224, "grad_norm": 1.2615660429000854, "learning_rate": 1.690477112828451e-05, "loss": 0.5594, "step": 10229 }, { "epoch": 0.2797527893239991, "grad_norm": 1.1890413761138916, "learning_rate": 1.690413042334315e-05, "loss": 0.5443, "step": 10230 }, { "epoch": 0.27978013563771603, "grad_norm": 0.9702029824256897, "learning_rate": 1.6903489664240823e-05, "loss": 0.5583, "step": 10231 }, { "epoch": 0.27980748195143296, "grad_norm": 1.3735430240631104, "learning_rate": 1.6902848850982562e-05, "loss": 0.5815, "step": 10232 }, { "epoch": 0.2798348282651499, "grad_norm": 1.3822821378707886, "learning_rate": 1.6902207983573396e-05, "loss": 0.4315, "step": 10233 }, { "epoch": 0.27986217457886675, "grad_norm": 2.918278694152832, "learning_rate": 1.690156706201835e-05, "loss": 0.5396, "step": 10234 }, { "epoch": 0.2798895208925837, "grad_norm": 1.2237162590026855, "learning_rate": 1.6900926086322456e-05, "loss": 0.5367, "step": 10235 }, { "epoch": 0.2799168672063006, "grad_norm": 1.2268651723861694, "learning_rate": 1.6900285056490735e-05, "loss": 0.5351, "step": 10236 }, { "epoch": 0.2799442135200175, "grad_norm": 1.0826114416122437, "learning_rate": 1.689964397252822e-05, "loss": 0.5409, "step": 10237 }, { "epoch": 0.2799715598337344, "grad_norm": 1.781418800354004, "learning_rate": 1.689900283443994e-05, "loss": 0.4548, "step": 10238 }, { "epoch": 0.2799989061474513, "grad_norm": 1.5550111532211304, "learning_rate": 1.689836164223093e-05, "loss": 0.5586, "step": 10239 }, { "epoch": 0.28002625246116825, "grad_norm": 1.9193332195281982, "learning_rate": 1.689772039590621e-05, "loss": 0.5645, "step": 10240 }, { "epoch": 0.28005359877488517, "grad_norm": 1.1720390319824219, "learning_rate": 1.6897079095470812e-05, "loss": 0.4247, "step": 10241 }, { "epoch": 0.28008094508860204, "grad_norm": 1.6179935932159424, "learning_rate": 1.6896437740929773e-05, "loss": 0.5432, "step": 10242 }, { "epoch": 0.28010829140231897, "grad_norm": 1.4467227458953857, "learning_rate": 1.689579633228812e-05, "loss": 0.5493, "step": 10243 }, { "epoch": 0.2801356377160359, "grad_norm": 1.363998532295227, "learning_rate": 1.689515486955088e-05, "loss": 0.9171, "step": 10244 }, { "epoch": 0.2801629840297528, "grad_norm": 1.3082938194274902, "learning_rate": 1.6894513352723098e-05, "loss": 0.5494, "step": 10245 }, { "epoch": 0.2801903303434697, "grad_norm": 1.6983554363250732, "learning_rate": 1.6893871781809792e-05, "loss": 0.5574, "step": 10246 }, { "epoch": 0.2802176766571866, "grad_norm": 1.3428781032562256, "learning_rate": 1.689323015681601e-05, "loss": 0.5556, "step": 10247 }, { "epoch": 0.28024502297090353, "grad_norm": 1.033881664276123, "learning_rate": 1.6892588477746774e-05, "loss": 0.5061, "step": 10248 }, { "epoch": 0.28027236928462046, "grad_norm": 1.0925378799438477, "learning_rate": 1.689194674460712e-05, "loss": 0.5738, "step": 10249 }, { "epoch": 0.28029971559833733, "grad_norm": 1.27777099609375, "learning_rate": 1.6891304957402085e-05, "loss": 0.5821, "step": 10250 }, { "epoch": 0.28032706191205425, "grad_norm": 1.2951833009719849, "learning_rate": 1.68906631161367e-05, "loss": 0.5574, "step": 10251 }, { "epoch": 0.2803544082257712, "grad_norm": 1.6319974660873413, "learning_rate": 1.6890021220816004e-05, "loss": 0.5429, "step": 10252 }, { "epoch": 0.2803817545394881, "grad_norm": 1.5121604204177856, "learning_rate": 1.6889379271445027e-05, "loss": 0.5418, "step": 10253 }, { "epoch": 0.280409100853205, "grad_norm": 1.1671189069747925, "learning_rate": 1.688873726802881e-05, "loss": 0.5468, "step": 10254 }, { "epoch": 0.2804364471669219, "grad_norm": 1.19471275806427, "learning_rate": 1.6888095210572392e-05, "loss": 0.5663, "step": 10255 }, { "epoch": 0.2804637934806388, "grad_norm": 1.0180832147598267, "learning_rate": 1.68874530990808e-05, "loss": 0.5385, "step": 10256 }, { "epoch": 0.28049113979435575, "grad_norm": 1.1896892786026, "learning_rate": 1.6886810933559084e-05, "loss": 0.5371, "step": 10257 }, { "epoch": 0.2805184861080726, "grad_norm": 1.4500529766082764, "learning_rate": 1.688616871401227e-05, "loss": 0.8963, "step": 10258 }, { "epoch": 0.28054583242178954, "grad_norm": 1.1237313747406006, "learning_rate": 1.6885526440445397e-05, "loss": 0.5559, "step": 10259 }, { "epoch": 0.28057317873550647, "grad_norm": 1.1065435409545898, "learning_rate": 1.688488411286351e-05, "loss": 0.5121, "step": 10260 }, { "epoch": 0.28060052504922334, "grad_norm": 1.4754629135131836, "learning_rate": 1.6884241731271643e-05, "loss": 0.5743, "step": 10261 }, { "epoch": 0.28062787136294026, "grad_norm": 1.053484559059143, "learning_rate": 1.688359929567484e-05, "loss": 0.5292, "step": 10262 }, { "epoch": 0.2806552176766572, "grad_norm": 3.5149827003479004, "learning_rate": 1.6882956806078136e-05, "loss": 0.5491, "step": 10263 }, { "epoch": 0.2806825639903741, "grad_norm": 1.2513161897659302, "learning_rate": 1.6882314262486573e-05, "loss": 0.5808, "step": 10264 }, { "epoch": 0.280709910304091, "grad_norm": 1.3551639318466187, "learning_rate": 1.6881671664905192e-05, "loss": 0.3988, "step": 10265 }, { "epoch": 0.2807372566178079, "grad_norm": 1.1572390794754028, "learning_rate": 1.6881029013339033e-05, "loss": 0.5742, "step": 10266 }, { "epoch": 0.28076460293152483, "grad_norm": 1.4928221702575684, "learning_rate": 1.688038630779314e-05, "loss": 0.875, "step": 10267 }, { "epoch": 0.28079194924524176, "grad_norm": 1.3639321327209473, "learning_rate": 1.687974354827255e-05, "loss": 0.594, "step": 10268 }, { "epoch": 0.2808192955589586, "grad_norm": 3.112607002258301, "learning_rate": 1.6879100734782307e-05, "loss": 0.4169, "step": 10269 }, { "epoch": 0.28084664187267555, "grad_norm": 1.3928452730178833, "learning_rate": 1.6878457867327457e-05, "loss": 0.5397, "step": 10270 }, { "epoch": 0.2808739881863925, "grad_norm": 1.1779447793960571, "learning_rate": 1.6877814945913037e-05, "loss": 0.5605, "step": 10271 }, { "epoch": 0.2809013345001094, "grad_norm": 1.3629398345947266, "learning_rate": 1.68771719705441e-05, "loss": 0.8881, "step": 10272 }, { "epoch": 0.28092868081382627, "grad_norm": 1.3056609630584717, "learning_rate": 1.687652894122568e-05, "loss": 0.5301, "step": 10273 }, { "epoch": 0.2809560271275432, "grad_norm": 1.5083626508712769, "learning_rate": 1.687588585796283e-05, "loss": 0.5465, "step": 10274 }, { "epoch": 0.2809833734412601, "grad_norm": 1.2912641763687134, "learning_rate": 1.6875242720760588e-05, "loss": 0.4832, "step": 10275 }, { "epoch": 0.28101071975497705, "grad_norm": 1.5628154277801514, "learning_rate": 1.6874599529624003e-05, "loss": 0.8947, "step": 10276 }, { "epoch": 0.2810380660686939, "grad_norm": 1.4923639297485352, "learning_rate": 1.6873956284558114e-05, "loss": 0.5644, "step": 10277 }, { "epoch": 0.28106541238241084, "grad_norm": 1.3298417329788208, "learning_rate": 1.687331298556798e-05, "loss": 0.5645, "step": 10278 }, { "epoch": 0.28109275869612776, "grad_norm": 1.0713919401168823, "learning_rate": 1.6872669632658633e-05, "loss": 0.513, "step": 10279 }, { "epoch": 0.2811201050098447, "grad_norm": 1.3752219676971436, "learning_rate": 1.6872026225835133e-05, "loss": 0.4053, "step": 10280 }, { "epoch": 0.28114745132356156, "grad_norm": 1.3238880634307861, "learning_rate": 1.6871382765102516e-05, "loss": 0.5397, "step": 10281 }, { "epoch": 0.2811747976372785, "grad_norm": 1.9583443403244019, "learning_rate": 1.687073925046584e-05, "loss": 0.8586, "step": 10282 }, { "epoch": 0.2812021439509954, "grad_norm": 1.2707288265228271, "learning_rate": 1.687009568193014e-05, "loss": 0.5478, "step": 10283 }, { "epoch": 0.28122949026471233, "grad_norm": 1.6043106317520142, "learning_rate": 1.6869452059500484e-05, "loss": 0.5655, "step": 10284 }, { "epoch": 0.2812568365784292, "grad_norm": 1.082548975944519, "learning_rate": 1.6868808383181902e-05, "loss": 0.56, "step": 10285 }, { "epoch": 0.28128418289214613, "grad_norm": 1.3076838254928589, "learning_rate": 1.6868164652979454e-05, "loss": 0.505, "step": 10286 }, { "epoch": 0.28131152920586305, "grad_norm": 1.3218625783920288, "learning_rate": 1.6867520868898187e-05, "loss": 0.5395, "step": 10287 }, { "epoch": 0.28133887551958, "grad_norm": 1.2072789669036865, "learning_rate": 1.686687703094315e-05, "loss": 0.5344, "step": 10288 }, { "epoch": 0.28136622183329685, "grad_norm": 1.3096861839294434, "learning_rate": 1.6866233139119397e-05, "loss": 0.5437, "step": 10289 }, { "epoch": 0.2813935681470138, "grad_norm": 1.4535092115402222, "learning_rate": 1.6865589193431977e-05, "loss": 0.5301, "step": 10290 }, { "epoch": 0.2814209144607307, "grad_norm": 1.3111709356307983, "learning_rate": 1.686494519388594e-05, "loss": 0.5633, "step": 10291 }, { "epoch": 0.2814482607744476, "grad_norm": 1.1003479957580566, "learning_rate": 1.6864301140486343e-05, "loss": 0.5267, "step": 10292 }, { "epoch": 0.2814756070881645, "grad_norm": 1.3108516931533813, "learning_rate": 1.6863657033238235e-05, "loss": 0.4655, "step": 10293 }, { "epoch": 0.2815029534018814, "grad_norm": 1.386549711227417, "learning_rate": 1.6863012872146668e-05, "loss": 0.5293, "step": 10294 }, { "epoch": 0.28153029971559834, "grad_norm": 1.1910570859909058, "learning_rate": 1.6862368657216698e-05, "loss": 0.5665, "step": 10295 }, { "epoch": 0.28155764602931527, "grad_norm": 1.1884595155715942, "learning_rate": 1.6861724388453376e-05, "loss": 0.5577, "step": 10296 }, { "epoch": 0.28158499234303214, "grad_norm": 1.2569488286972046, "learning_rate": 1.6861080065861757e-05, "loss": 0.4815, "step": 10297 }, { "epoch": 0.28161233865674906, "grad_norm": 1.411734938621521, "learning_rate": 1.6860435689446893e-05, "loss": 0.536, "step": 10298 }, { "epoch": 0.281639684970466, "grad_norm": 1.8225140571594238, "learning_rate": 1.6859791259213843e-05, "loss": 0.5308, "step": 10299 }, { "epoch": 0.2816670312841829, "grad_norm": 1.1840931177139282, "learning_rate": 1.6859146775167662e-05, "loss": 0.5178, "step": 10300 }, { "epoch": 0.2816943775978998, "grad_norm": 1.4559862613677979, "learning_rate": 1.6858502237313407e-05, "loss": 0.5443, "step": 10301 }, { "epoch": 0.2817217239116167, "grad_norm": 1.4364701509475708, "learning_rate": 1.685785764565613e-05, "loss": 0.6012, "step": 10302 }, { "epoch": 0.28174907022533363, "grad_norm": 1.2039271593093872, "learning_rate": 1.685721300020089e-05, "loss": 0.5105, "step": 10303 }, { "epoch": 0.28177641653905056, "grad_norm": 1.1746759414672852, "learning_rate": 1.6856568300952744e-05, "loss": 0.5334, "step": 10304 }, { "epoch": 0.2818037628527674, "grad_norm": 1.408406138420105, "learning_rate": 1.6855923547916752e-05, "loss": 0.5388, "step": 10305 }, { "epoch": 0.28183110916648435, "grad_norm": 1.3596564531326294, "learning_rate": 1.6855278741097966e-05, "loss": 0.5491, "step": 10306 }, { "epoch": 0.2818584554802013, "grad_norm": 1.4976351261138916, "learning_rate": 1.6854633880501445e-05, "loss": 0.8832, "step": 10307 }, { "epoch": 0.2818858017939182, "grad_norm": 1.3073581457138062, "learning_rate": 1.6853988966132253e-05, "loss": 0.5164, "step": 10308 }, { "epoch": 0.28191314810763507, "grad_norm": 1.1836167573928833, "learning_rate": 1.685334399799545e-05, "loss": 0.5631, "step": 10309 }, { "epoch": 0.281940494421352, "grad_norm": 1.401478886604309, "learning_rate": 1.6852698976096086e-05, "loss": 0.5515, "step": 10310 }, { "epoch": 0.2819678407350689, "grad_norm": 1.5678144693374634, "learning_rate": 1.685205390043923e-05, "loss": 0.505, "step": 10311 }, { "epoch": 0.28199518704878584, "grad_norm": 1.1704418659210205, "learning_rate": 1.685140877102994e-05, "loss": 0.5534, "step": 10312 }, { "epoch": 0.2820225333625027, "grad_norm": 1.319467544555664, "learning_rate": 1.6850763587873276e-05, "loss": 0.5775, "step": 10313 }, { "epoch": 0.28204987967621964, "grad_norm": 1.1511121988296509, "learning_rate": 1.6850118350974298e-05, "loss": 0.5463, "step": 10314 }, { "epoch": 0.28207722598993656, "grad_norm": 1.1375584602355957, "learning_rate": 1.684947306033807e-05, "loss": 0.555, "step": 10315 }, { "epoch": 0.2821045723036535, "grad_norm": 0.9465241432189941, "learning_rate": 1.6848827715969656e-05, "loss": 0.5503, "step": 10316 }, { "epoch": 0.28213191861737036, "grad_norm": 1.4127920866012573, "learning_rate": 1.6848182317874114e-05, "loss": 0.5493, "step": 10317 }, { "epoch": 0.2821592649310873, "grad_norm": 1.3119982481002808, "learning_rate": 1.6847536866056507e-05, "loss": 0.5216, "step": 10318 }, { "epoch": 0.2821866112448042, "grad_norm": 1.2975372076034546, "learning_rate": 1.6846891360521903e-05, "loss": 0.5198, "step": 10319 }, { "epoch": 0.28221395755852113, "grad_norm": 1.3284436464309692, "learning_rate": 1.684624580127536e-05, "loss": 0.412, "step": 10320 }, { "epoch": 0.282241303872238, "grad_norm": 1.2306640148162842, "learning_rate": 1.684560018832195e-05, "loss": 0.5805, "step": 10321 }, { "epoch": 0.28226865018595493, "grad_norm": 1.3444218635559082, "learning_rate": 1.684495452166673e-05, "loss": 0.5014, "step": 10322 }, { "epoch": 0.28229599649967185, "grad_norm": 1.4023228883743286, "learning_rate": 1.684430880131477e-05, "loss": 0.53, "step": 10323 }, { "epoch": 0.2823233428133888, "grad_norm": 1.4596091508865356, "learning_rate": 1.6843663027271132e-05, "loss": 0.5722, "step": 10324 }, { "epoch": 0.28235068912710565, "grad_norm": 1.4359086751937866, "learning_rate": 1.6843017199540885e-05, "loss": 0.8486, "step": 10325 }, { "epoch": 0.2823780354408226, "grad_norm": 1.2969098091125488, "learning_rate": 1.6842371318129092e-05, "loss": 0.5866, "step": 10326 }, { "epoch": 0.2824053817545395, "grad_norm": 1.809889316558838, "learning_rate": 1.684172538304082e-05, "loss": 0.5621, "step": 10327 }, { "epoch": 0.2824327280682564, "grad_norm": 1.4157094955444336, "learning_rate": 1.6841079394281145e-05, "loss": 0.3864, "step": 10328 }, { "epoch": 0.2824600743819733, "grad_norm": 1.1697921752929688, "learning_rate": 1.684043335185512e-05, "loss": 0.5451, "step": 10329 }, { "epoch": 0.2824874206956902, "grad_norm": 1.3667376041412354, "learning_rate": 1.6839787255767825e-05, "loss": 0.5288, "step": 10330 }, { "epoch": 0.28251476700940714, "grad_norm": 1.090110421180725, "learning_rate": 1.6839141106024322e-05, "loss": 0.55, "step": 10331 }, { "epoch": 0.28254211332312407, "grad_norm": 1.484368085861206, "learning_rate": 1.683849490262968e-05, "loss": 0.5604, "step": 10332 }, { "epoch": 0.28256945963684094, "grad_norm": 1.3066728115081787, "learning_rate": 1.683784864558897e-05, "loss": 0.5388, "step": 10333 }, { "epoch": 0.28259680595055786, "grad_norm": 1.343448519706726, "learning_rate": 1.6837202334907267e-05, "loss": 0.5494, "step": 10334 }, { "epoch": 0.2826241522642748, "grad_norm": 1.5363950729370117, "learning_rate": 1.683655597058963e-05, "loss": 0.5948, "step": 10335 }, { "epoch": 0.2826514985779917, "grad_norm": 1.3476911783218384, "learning_rate": 1.6835909552641138e-05, "loss": 0.522, "step": 10336 }, { "epoch": 0.2826788448917086, "grad_norm": 1.3456487655639648, "learning_rate": 1.683526308106686e-05, "loss": 0.5517, "step": 10337 }, { "epoch": 0.2827061912054255, "grad_norm": 1.9108295440673828, "learning_rate": 1.6834616555871867e-05, "loss": 0.5361, "step": 10338 }, { "epoch": 0.28273353751914243, "grad_norm": 1.1159223318099976, "learning_rate": 1.683396997706123e-05, "loss": 0.5312, "step": 10339 }, { "epoch": 0.28276088383285936, "grad_norm": 1.5139248371124268, "learning_rate": 1.683332334464002e-05, "loss": 0.5199, "step": 10340 }, { "epoch": 0.2827882301465762, "grad_norm": 1.4523380994796753, "learning_rate": 1.683267665861331e-05, "loss": 0.8247, "step": 10341 }, { "epoch": 0.28281557646029315, "grad_norm": 1.480206847190857, "learning_rate": 1.6832029918986177e-05, "loss": 0.4952, "step": 10342 }, { "epoch": 0.2828429227740101, "grad_norm": 1.3105894327163696, "learning_rate": 1.6831383125763694e-05, "loss": 0.5404, "step": 10343 }, { "epoch": 0.282870269087727, "grad_norm": 1.1474957466125488, "learning_rate": 1.683073627895093e-05, "loss": 0.5639, "step": 10344 }, { "epoch": 0.28289761540144387, "grad_norm": 1.0696665048599243, "learning_rate": 1.6830089378552963e-05, "loss": 0.5117, "step": 10345 }, { "epoch": 0.2829249617151608, "grad_norm": 1.2839771509170532, "learning_rate": 1.682944242457487e-05, "loss": 0.5305, "step": 10346 }, { "epoch": 0.2829523080288777, "grad_norm": 1.5019596815109253, "learning_rate": 1.682879541702172e-05, "loss": 0.491, "step": 10347 }, { "epoch": 0.28297965434259464, "grad_norm": 1.3246805667877197, "learning_rate": 1.682814835589859e-05, "loss": 0.4785, "step": 10348 }, { "epoch": 0.2830070006563115, "grad_norm": 1.6804618835449219, "learning_rate": 1.682750124121056e-05, "loss": 0.5137, "step": 10349 }, { "epoch": 0.28303434697002844, "grad_norm": 1.450364112854004, "learning_rate": 1.6826854072962704e-05, "loss": 0.8858, "step": 10350 }, { "epoch": 0.28306169328374536, "grad_norm": 1.4646975994110107, "learning_rate": 1.68262068511601e-05, "loss": 0.4029, "step": 10351 }, { "epoch": 0.2830890395974623, "grad_norm": 1.508525013923645, "learning_rate": 1.6825559575807822e-05, "loss": 0.5661, "step": 10352 }, { "epoch": 0.28311638591117916, "grad_norm": 1.6052957773208618, "learning_rate": 1.6824912246910953e-05, "loss": 0.574, "step": 10353 }, { "epoch": 0.2831437322248961, "grad_norm": 1.475233554840088, "learning_rate": 1.6824264864474564e-05, "loss": 0.4929, "step": 10354 }, { "epoch": 0.283171078538613, "grad_norm": 1.5862345695495605, "learning_rate": 1.6823617428503742e-05, "loss": 0.5201, "step": 10355 }, { "epoch": 0.28319842485232993, "grad_norm": 1.32169771194458, "learning_rate": 1.682296993900356e-05, "loss": 0.5467, "step": 10356 }, { "epoch": 0.2832257711660468, "grad_norm": 1.1874427795410156, "learning_rate": 1.6822322395979097e-05, "loss": 0.5629, "step": 10357 }, { "epoch": 0.2832531174797637, "grad_norm": 1.7300125360488892, "learning_rate": 1.6821674799435436e-05, "loss": 0.5542, "step": 10358 }, { "epoch": 0.28328046379348065, "grad_norm": 1.2960494756698608, "learning_rate": 1.6821027149377656e-05, "loss": 0.5236, "step": 10359 }, { "epoch": 0.2833078101071976, "grad_norm": 2.202214479446411, "learning_rate": 1.682037944581084e-05, "loss": 0.5697, "step": 10360 }, { "epoch": 0.28333515642091445, "grad_norm": 1.3423916101455688, "learning_rate": 1.6819731688740063e-05, "loss": 0.5192, "step": 10361 }, { "epoch": 0.28336250273463137, "grad_norm": 1.3635708093643188, "learning_rate": 1.6819083878170414e-05, "loss": 0.5342, "step": 10362 }, { "epoch": 0.2833898490483483, "grad_norm": 1.2691290378570557, "learning_rate": 1.6818436014106968e-05, "loss": 0.4347, "step": 10363 }, { "epoch": 0.28341719536206517, "grad_norm": 1.3823379278182983, "learning_rate": 1.681778809655481e-05, "loss": 0.5327, "step": 10364 }, { "epoch": 0.2834445416757821, "grad_norm": 1.3868942260742188, "learning_rate": 1.6817140125519025e-05, "loss": 0.5614, "step": 10365 }, { "epoch": 0.283471887989499, "grad_norm": 1.3686896562576294, "learning_rate": 1.6816492101004693e-05, "loss": 0.6123, "step": 10366 }, { "epoch": 0.28349923430321594, "grad_norm": 2.6996421813964844, "learning_rate": 1.68158440230169e-05, "loss": 0.8919, "step": 10367 }, { "epoch": 0.2835265806169328, "grad_norm": 1.588752031326294, "learning_rate": 1.6815195891560728e-05, "loss": 0.5441, "step": 10368 }, { "epoch": 0.28355392693064974, "grad_norm": 1.5290412902832031, "learning_rate": 1.6814547706641263e-05, "loss": 0.5788, "step": 10369 }, { "epoch": 0.28358127324436666, "grad_norm": 1.4772130250930786, "learning_rate": 1.6813899468263587e-05, "loss": 0.5668, "step": 10370 }, { "epoch": 0.2836086195580836, "grad_norm": 1.4959434270858765, "learning_rate": 1.681325117643279e-05, "loss": 0.8696, "step": 10371 }, { "epoch": 0.28363596587180046, "grad_norm": 2.192436933517456, "learning_rate": 1.6812602831153954e-05, "loss": 0.4601, "step": 10372 }, { "epoch": 0.2836633121855174, "grad_norm": 1.3456724882125854, "learning_rate": 1.681195443243216e-05, "loss": 0.5511, "step": 10373 }, { "epoch": 0.2836906584992343, "grad_norm": 1.2827244997024536, "learning_rate": 1.681130598027251e-05, "loss": 0.5012, "step": 10374 }, { "epoch": 0.28371800481295123, "grad_norm": 1.975533127784729, "learning_rate": 1.681065747468008e-05, "loss": 0.6045, "step": 10375 }, { "epoch": 0.2837453511266681, "grad_norm": 1.1733649969100952, "learning_rate": 1.6810008915659955e-05, "loss": 0.505, "step": 10376 }, { "epoch": 0.283772697440385, "grad_norm": 1.3724098205566406, "learning_rate": 1.6809360303217225e-05, "loss": 0.5431, "step": 10377 }, { "epoch": 0.28380004375410195, "grad_norm": 1.649776816368103, "learning_rate": 1.6808711637356982e-05, "loss": 0.8986, "step": 10378 }, { "epoch": 0.2838273900678189, "grad_norm": 1.4701157808303833, "learning_rate": 1.6808062918084316e-05, "loss": 0.5229, "step": 10379 }, { "epoch": 0.28385473638153574, "grad_norm": 1.1993294954299927, "learning_rate": 1.6807414145404308e-05, "loss": 0.5358, "step": 10380 }, { "epoch": 0.28388208269525267, "grad_norm": 1.3387593030929565, "learning_rate": 1.6806765319322057e-05, "loss": 0.8652, "step": 10381 }, { "epoch": 0.2839094290089696, "grad_norm": 1.5099430084228516, "learning_rate": 1.6806116439842643e-05, "loss": 0.5528, "step": 10382 }, { "epoch": 0.2839367753226865, "grad_norm": 1.2860065698623657, "learning_rate": 1.6805467506971157e-05, "loss": 0.5166, "step": 10383 }, { "epoch": 0.2839641216364034, "grad_norm": 1.7801122665405273, "learning_rate": 1.68048185207127e-05, "loss": 0.5271, "step": 10384 }, { "epoch": 0.2839914679501203, "grad_norm": 1.6909406185150146, "learning_rate": 1.6804169481072355e-05, "loss": 0.4882, "step": 10385 }, { "epoch": 0.28401881426383724, "grad_norm": 1.2479817867279053, "learning_rate": 1.6803520388055216e-05, "loss": 0.5253, "step": 10386 }, { "epoch": 0.28404616057755416, "grad_norm": 1.4224971532821655, "learning_rate": 1.6802871241666373e-05, "loss": 0.5448, "step": 10387 }, { "epoch": 0.28407350689127103, "grad_norm": 1.472076177597046, "learning_rate": 1.680222204191092e-05, "loss": 0.8979, "step": 10388 }, { "epoch": 0.28410085320498796, "grad_norm": 1.3875086307525635, "learning_rate": 1.6801572788793952e-05, "loss": 0.4928, "step": 10389 }, { "epoch": 0.2841281995187049, "grad_norm": 1.2266497611999512, "learning_rate": 1.6800923482320553e-05, "loss": 0.5472, "step": 10390 }, { "epoch": 0.2841555458324218, "grad_norm": 1.4383643865585327, "learning_rate": 1.680027412249583e-05, "loss": 0.5625, "step": 10391 }, { "epoch": 0.2841828921461387, "grad_norm": 1.2472888231277466, "learning_rate": 1.6799624709324865e-05, "loss": 0.5491, "step": 10392 }, { "epoch": 0.2842102384598556, "grad_norm": 1.5089778900146484, "learning_rate": 1.6798975242812763e-05, "loss": 0.5341, "step": 10393 }, { "epoch": 0.2842375847735725, "grad_norm": 1.30355703830719, "learning_rate": 1.6798325722964613e-05, "loss": 0.8527, "step": 10394 }, { "epoch": 0.28426493108728945, "grad_norm": 1.612130045890808, "learning_rate": 1.6797676149785506e-05, "loss": 0.5759, "step": 10395 }, { "epoch": 0.2842922774010063, "grad_norm": 1.837967038154602, "learning_rate": 1.6797026523280546e-05, "loss": 0.498, "step": 10396 }, { "epoch": 0.28431962371472325, "grad_norm": 1.0863828659057617, "learning_rate": 1.6796376843454828e-05, "loss": 0.559, "step": 10397 }, { "epoch": 0.28434697002844017, "grad_norm": 2.2193117141723633, "learning_rate": 1.679572711031344e-05, "loss": 0.3991, "step": 10398 }, { "epoch": 0.2843743163421571, "grad_norm": 1.5227442979812622, "learning_rate": 1.6795077323861494e-05, "loss": 0.4646, "step": 10399 }, { "epoch": 0.28440166265587397, "grad_norm": 1.3041621446609497, "learning_rate": 1.679442748410407e-05, "loss": 0.5689, "step": 10400 }, { "epoch": 0.2844290089695909, "grad_norm": 1.453355312347412, "learning_rate": 1.6793777591046277e-05, "loss": 0.5685, "step": 10401 }, { "epoch": 0.2844563552833078, "grad_norm": 1.4352984428405762, "learning_rate": 1.6793127644693216e-05, "loss": 0.885, "step": 10402 }, { "epoch": 0.28448370159702474, "grad_norm": 1.5424084663391113, "learning_rate": 1.6792477645049975e-05, "loss": 0.5438, "step": 10403 }, { "epoch": 0.2845110479107416, "grad_norm": 1.324398398399353, "learning_rate": 1.679182759212166e-05, "loss": 0.5598, "step": 10404 }, { "epoch": 0.28453839422445854, "grad_norm": 2.7713124752044678, "learning_rate": 1.679117748591337e-05, "loss": 0.909, "step": 10405 }, { "epoch": 0.28456574053817546, "grad_norm": 1.2465126514434814, "learning_rate": 1.6790527326430203e-05, "loss": 0.5225, "step": 10406 }, { "epoch": 0.2845930868518924, "grad_norm": 1.5631232261657715, "learning_rate": 1.6789877113677256e-05, "loss": 0.5493, "step": 10407 }, { "epoch": 0.28462043316560925, "grad_norm": 1.3720539808273315, "learning_rate": 1.678922684765964e-05, "loss": 0.561, "step": 10408 }, { "epoch": 0.2846477794793262, "grad_norm": 2.961350440979004, "learning_rate": 1.6788576528382447e-05, "loss": 0.5403, "step": 10409 }, { "epoch": 0.2846751257930431, "grad_norm": 1.3048274517059326, "learning_rate": 1.6787926155850783e-05, "loss": 0.5596, "step": 10410 }, { "epoch": 0.28470247210676003, "grad_norm": 1.747758150100708, "learning_rate": 1.6787275730069745e-05, "loss": 0.5271, "step": 10411 }, { "epoch": 0.2847298184204769, "grad_norm": 1.3735781908035278, "learning_rate": 1.6786625251044443e-05, "loss": 0.5508, "step": 10412 }, { "epoch": 0.2847571647341938, "grad_norm": 1.2579983472824097, "learning_rate": 1.6785974718779972e-05, "loss": 0.5652, "step": 10413 }, { "epoch": 0.28478451104791075, "grad_norm": 1.4569119215011597, "learning_rate": 1.6785324133281443e-05, "loss": 0.5437, "step": 10414 }, { "epoch": 0.2848118573616277, "grad_norm": 1.251456618309021, "learning_rate": 1.6784673494553954e-05, "loss": 0.5598, "step": 10415 }, { "epoch": 0.28483920367534454, "grad_norm": 1.218013048171997, "learning_rate": 1.6784022802602614e-05, "loss": 0.5613, "step": 10416 }, { "epoch": 0.28486654998906147, "grad_norm": 1.3719875812530518, "learning_rate": 1.678337205743252e-05, "loss": 0.3716, "step": 10417 }, { "epoch": 0.2848938963027784, "grad_norm": 1.3141810894012451, "learning_rate": 1.6782721259048783e-05, "loss": 0.5682, "step": 10418 }, { "epoch": 0.2849212426164953, "grad_norm": 1.8515164852142334, "learning_rate": 1.6782070407456506e-05, "loss": 0.5419, "step": 10419 }, { "epoch": 0.2849485889302122, "grad_norm": 1.7043943405151367, "learning_rate": 1.6781419502660796e-05, "loss": 0.5479, "step": 10420 }, { "epoch": 0.2849759352439291, "grad_norm": 1.206255555152893, "learning_rate": 1.678076854466676e-05, "loss": 0.5676, "step": 10421 }, { "epoch": 0.28500328155764604, "grad_norm": 1.226538062095642, "learning_rate": 1.67801175334795e-05, "loss": 0.5463, "step": 10422 }, { "epoch": 0.28503062787136296, "grad_norm": 1.4055215120315552, "learning_rate": 1.6779466469104127e-05, "loss": 0.5473, "step": 10423 }, { "epoch": 0.28505797418507983, "grad_norm": 1.4523277282714844, "learning_rate": 1.6778815351545748e-05, "loss": 0.5402, "step": 10424 }, { "epoch": 0.28508532049879676, "grad_norm": 1.5697684288024902, "learning_rate": 1.677816418080947e-05, "loss": 0.5563, "step": 10425 }, { "epoch": 0.2851126668125137, "grad_norm": 1.7608609199523926, "learning_rate": 1.6777512956900398e-05, "loss": 0.5771, "step": 10426 }, { "epoch": 0.2851400131262306, "grad_norm": 1.4512475728988647, "learning_rate": 1.677686167982365e-05, "loss": 0.5805, "step": 10427 }, { "epoch": 0.2851673594399475, "grad_norm": 2.1688411235809326, "learning_rate": 1.6776210349584325e-05, "loss": 0.5525, "step": 10428 }, { "epoch": 0.2851947057536644, "grad_norm": 1.2866597175598145, "learning_rate": 1.677555896618754e-05, "loss": 0.5168, "step": 10429 }, { "epoch": 0.2852220520673813, "grad_norm": 1.5138999223709106, "learning_rate": 1.67749075296384e-05, "loss": 0.527, "step": 10430 }, { "epoch": 0.28524939838109825, "grad_norm": 1.3769198656082153, "learning_rate": 1.6774256039942017e-05, "loss": 0.5523, "step": 10431 }, { "epoch": 0.2852767446948151, "grad_norm": 1.422656774520874, "learning_rate": 1.67736044971035e-05, "loss": 0.4779, "step": 10432 }, { "epoch": 0.28530409100853205, "grad_norm": 1.3232773542404175, "learning_rate": 1.6772952901127967e-05, "loss": 0.5527, "step": 10433 }, { "epoch": 0.28533143732224897, "grad_norm": 1.5170037746429443, "learning_rate": 1.6772301252020523e-05, "loss": 0.572, "step": 10434 }, { "epoch": 0.2853587836359659, "grad_norm": 1.4858070611953735, "learning_rate": 1.6771649549786282e-05, "loss": 0.5378, "step": 10435 }, { "epoch": 0.28538612994968277, "grad_norm": 1.48519766330719, "learning_rate": 1.6770997794430352e-05, "loss": 0.4817, "step": 10436 }, { "epoch": 0.2854134762633997, "grad_norm": 1.709630012512207, "learning_rate": 1.677034598595785e-05, "loss": 0.5316, "step": 10437 }, { "epoch": 0.2854408225771166, "grad_norm": 1.2336790561676025, "learning_rate": 1.6769694124373893e-05, "loss": 0.5192, "step": 10438 }, { "epoch": 0.28546816889083354, "grad_norm": 1.7828357219696045, "learning_rate": 1.6769042209683592e-05, "loss": 0.5707, "step": 10439 }, { "epoch": 0.2854955152045504, "grad_norm": 1.5979926586151123, "learning_rate": 1.6768390241892057e-05, "loss": 0.6082, "step": 10440 }, { "epoch": 0.28552286151826733, "grad_norm": 1.7447431087493896, "learning_rate": 1.67677382210044e-05, "loss": 0.5699, "step": 10441 }, { "epoch": 0.28555020783198426, "grad_norm": 1.355482816696167, "learning_rate": 1.676708614702575e-05, "loss": 0.5659, "step": 10442 }, { "epoch": 0.2855775541457012, "grad_norm": 1.4604865312576294, "learning_rate": 1.6766434019961208e-05, "loss": 0.6183, "step": 10443 }, { "epoch": 0.28560490045941805, "grad_norm": 1.4094892740249634, "learning_rate": 1.6765781839815897e-05, "loss": 0.3927, "step": 10444 }, { "epoch": 0.285632246773135, "grad_norm": 1.3893171548843384, "learning_rate": 1.6765129606594933e-05, "loss": 0.522, "step": 10445 }, { "epoch": 0.2856595930868519, "grad_norm": 1.2236839532852173, "learning_rate": 1.676447732030343e-05, "loss": 0.5733, "step": 10446 }, { "epoch": 0.28568693940056883, "grad_norm": 1.7011239528656006, "learning_rate": 1.6763824980946508e-05, "loss": 0.5604, "step": 10447 }, { "epoch": 0.2857142857142857, "grad_norm": 1.526938796043396, "learning_rate": 1.676317258852928e-05, "loss": 0.5236, "step": 10448 }, { "epoch": 0.2857416320280026, "grad_norm": 1.1400114297866821, "learning_rate": 1.6762520143056867e-05, "loss": 0.5358, "step": 10449 }, { "epoch": 0.28576897834171955, "grad_norm": 1.2891596555709839, "learning_rate": 1.6761867644534386e-05, "loss": 0.4775, "step": 10450 }, { "epoch": 0.2857963246554365, "grad_norm": 1.2589141130447388, "learning_rate": 1.6761215092966957e-05, "loss": 0.5389, "step": 10451 }, { "epoch": 0.28582367096915334, "grad_norm": 1.280501365661621, "learning_rate": 1.6760562488359698e-05, "loss": 0.556, "step": 10452 }, { "epoch": 0.28585101728287027, "grad_norm": 1.233774185180664, "learning_rate": 1.675990983071773e-05, "loss": 0.5247, "step": 10453 }, { "epoch": 0.2858783635965872, "grad_norm": 2.551875352859497, "learning_rate": 1.675925712004617e-05, "loss": 0.5466, "step": 10454 }, { "epoch": 0.2859057099103041, "grad_norm": 1.1998094320297241, "learning_rate": 1.6758604356350142e-05, "loss": 0.5639, "step": 10455 }, { "epoch": 0.285933056224021, "grad_norm": 1.3454041481018066, "learning_rate": 1.6757951539634764e-05, "loss": 0.4603, "step": 10456 }, { "epoch": 0.2859604025377379, "grad_norm": 1.5953112840652466, "learning_rate": 1.675729866990516e-05, "loss": 0.5551, "step": 10457 }, { "epoch": 0.28598774885145484, "grad_norm": 1.3872863054275513, "learning_rate": 1.6756645747166446e-05, "loss": 0.5369, "step": 10458 }, { "epoch": 0.28601509516517176, "grad_norm": 1.170827031135559, "learning_rate": 1.675599277142375e-05, "loss": 0.5414, "step": 10459 }, { "epoch": 0.28604244147888863, "grad_norm": 1.593265414237976, "learning_rate": 1.675533974268219e-05, "loss": 0.5896, "step": 10460 }, { "epoch": 0.28606978779260556, "grad_norm": 1.484606146812439, "learning_rate": 1.6754686660946892e-05, "loss": 0.8987, "step": 10461 }, { "epoch": 0.2860971341063225, "grad_norm": 1.3002570867538452, "learning_rate": 1.675403352622298e-05, "loss": 0.5213, "step": 10462 }, { "epoch": 0.28612448042003935, "grad_norm": 1.3649466037750244, "learning_rate": 1.6753380338515572e-05, "loss": 0.5767, "step": 10463 }, { "epoch": 0.2861518267337563, "grad_norm": 1.6638917922973633, "learning_rate": 1.67527270978298e-05, "loss": 0.3833, "step": 10464 }, { "epoch": 0.2861791730474732, "grad_norm": 1.3358160257339478, "learning_rate": 1.6752073804170777e-05, "loss": 0.5452, "step": 10465 }, { "epoch": 0.2862065193611901, "grad_norm": 1.738091230392456, "learning_rate": 1.6751420457543642e-05, "loss": 0.9047, "step": 10466 }, { "epoch": 0.286233865674907, "grad_norm": 1.395881175994873, "learning_rate": 1.6750767057953512e-05, "loss": 0.5557, "step": 10467 }, { "epoch": 0.2862612119886239, "grad_norm": 1.433942437171936, "learning_rate": 1.6750113605405513e-05, "loss": 0.5323, "step": 10468 }, { "epoch": 0.28628855830234085, "grad_norm": 1.6645079851150513, "learning_rate": 1.6749460099904775e-05, "loss": 0.5748, "step": 10469 }, { "epoch": 0.28631590461605777, "grad_norm": 1.411359190940857, "learning_rate": 1.674880654145642e-05, "loss": 0.5741, "step": 10470 }, { "epoch": 0.28634325092977464, "grad_norm": 1.7100199460983276, "learning_rate": 1.6748152930065577e-05, "loss": 0.5714, "step": 10471 }, { "epoch": 0.28637059724349156, "grad_norm": 1.3239455223083496, "learning_rate": 1.6747499265737372e-05, "loss": 0.5384, "step": 10472 }, { "epoch": 0.2863979435572085, "grad_norm": 1.6256195306777954, "learning_rate": 1.6746845548476937e-05, "loss": 0.5475, "step": 10473 }, { "epoch": 0.2864252898709254, "grad_norm": 1.2196978330612183, "learning_rate": 1.6746191778289394e-05, "loss": 0.5368, "step": 10474 }, { "epoch": 0.2864526361846423, "grad_norm": 1.1851561069488525, "learning_rate": 1.674553795517988e-05, "loss": 0.5232, "step": 10475 }, { "epoch": 0.2864799824983592, "grad_norm": 1.4933135509490967, "learning_rate": 1.6744884079153517e-05, "loss": 0.8647, "step": 10476 }, { "epoch": 0.28650732881207613, "grad_norm": 1.4537789821624756, "learning_rate": 1.674423015021544e-05, "loss": 0.8962, "step": 10477 }, { "epoch": 0.28653467512579306, "grad_norm": 1.672027587890625, "learning_rate": 1.6743576168370768e-05, "loss": 0.5708, "step": 10478 }, { "epoch": 0.28656202143950993, "grad_norm": 2.3260738849639893, "learning_rate": 1.674292213362464e-05, "loss": 0.5783, "step": 10479 }, { "epoch": 0.28658936775322685, "grad_norm": 1.304091453552246, "learning_rate": 1.674226804598219e-05, "loss": 0.5668, "step": 10480 }, { "epoch": 0.2866167140669438, "grad_norm": 1.7782528400421143, "learning_rate": 1.6741613905448542e-05, "loss": 0.5904, "step": 10481 }, { "epoch": 0.2866440603806607, "grad_norm": 1.4835177659988403, "learning_rate": 1.674095971202883e-05, "loss": 0.5591, "step": 10482 }, { "epoch": 0.2866714066943776, "grad_norm": 1.0949586629867554, "learning_rate": 1.6740305465728188e-05, "loss": 0.5382, "step": 10483 }, { "epoch": 0.2866987530080945, "grad_norm": 2.1200356483459473, "learning_rate": 1.6739651166551744e-05, "loss": 0.5915, "step": 10484 }, { "epoch": 0.2867260993218114, "grad_norm": 1.1023277044296265, "learning_rate": 1.6738996814504635e-05, "loss": 0.5736, "step": 10485 }, { "epoch": 0.28675344563552835, "grad_norm": 1.4159644842147827, "learning_rate": 1.673834240959199e-05, "loss": 0.5522, "step": 10486 }, { "epoch": 0.2867807919492452, "grad_norm": 1.2305141687393188, "learning_rate": 1.6737687951818948e-05, "loss": 0.5589, "step": 10487 }, { "epoch": 0.28680813826296214, "grad_norm": 1.350594162940979, "learning_rate": 1.673703344119064e-05, "loss": 0.5323, "step": 10488 }, { "epoch": 0.28683548457667907, "grad_norm": 1.4306843280792236, "learning_rate": 1.6736378877712197e-05, "loss": 0.4842, "step": 10489 }, { "epoch": 0.286862830890396, "grad_norm": 1.3355563879013062, "learning_rate": 1.6735724261388762e-05, "loss": 0.4626, "step": 10490 }, { "epoch": 0.28689017720411286, "grad_norm": 1.1020526885986328, "learning_rate": 1.6735069592225464e-05, "loss": 0.5663, "step": 10491 }, { "epoch": 0.2869175235178298, "grad_norm": 1.1103137731552124, "learning_rate": 1.673441487022744e-05, "loss": 0.5589, "step": 10492 }, { "epoch": 0.2869448698315467, "grad_norm": 1.4937176704406738, "learning_rate": 1.6733760095399826e-05, "loss": 0.5568, "step": 10493 }, { "epoch": 0.28697221614526364, "grad_norm": 1.2901450395584106, "learning_rate": 1.6733105267747763e-05, "loss": 0.5178, "step": 10494 }, { "epoch": 0.2869995624589805, "grad_norm": 1.403582215309143, "learning_rate": 1.673245038727638e-05, "loss": 0.4818, "step": 10495 }, { "epoch": 0.28702690877269743, "grad_norm": 3.4182355403900146, "learning_rate": 1.673179545399082e-05, "loss": 0.4516, "step": 10496 }, { "epoch": 0.28705425508641436, "grad_norm": 2.2216620445251465, "learning_rate": 1.6731140467896215e-05, "loss": 0.9143, "step": 10497 }, { "epoch": 0.2870816014001313, "grad_norm": 1.6665581464767456, "learning_rate": 1.6730485428997713e-05, "loss": 0.8525, "step": 10498 }, { "epoch": 0.28710894771384815, "grad_norm": 1.4880681037902832, "learning_rate": 1.6729830337300445e-05, "loss": 0.5396, "step": 10499 }, { "epoch": 0.2871362940275651, "grad_norm": 1.5175001621246338, "learning_rate": 1.672917519280955e-05, "loss": 0.5968, "step": 10500 }, { "epoch": 0.287163640341282, "grad_norm": 1.517865777015686, "learning_rate": 1.6728519995530176e-05, "loss": 0.5671, "step": 10501 }, { "epoch": 0.2871909866549989, "grad_norm": 1.2062870264053345, "learning_rate": 1.672786474546745e-05, "loss": 0.5456, "step": 10502 }, { "epoch": 0.2872183329687158, "grad_norm": 1.4760252237319946, "learning_rate": 1.672720944262652e-05, "loss": 0.5564, "step": 10503 }, { "epoch": 0.2872456792824327, "grad_norm": 1.0679850578308105, "learning_rate": 1.6726554087012524e-05, "loss": 0.5166, "step": 10504 }, { "epoch": 0.28727302559614964, "grad_norm": 2.4235031604766846, "learning_rate": 1.6725898678630607e-05, "loss": 0.9271, "step": 10505 }, { "epoch": 0.28730037190986657, "grad_norm": 1.184462070465088, "learning_rate": 1.6725243217485905e-05, "loss": 0.5741, "step": 10506 }, { "epoch": 0.28732771822358344, "grad_norm": 1.4422739744186401, "learning_rate": 1.6724587703583564e-05, "loss": 0.5838, "step": 10507 }, { "epoch": 0.28735506453730036, "grad_norm": 1.2163021564483643, "learning_rate": 1.6723932136928726e-05, "loss": 0.5377, "step": 10508 }, { "epoch": 0.2873824108510173, "grad_norm": 2.391043186187744, "learning_rate": 1.672327651752653e-05, "loss": 0.8708, "step": 10509 }, { "epoch": 0.2874097571647342, "grad_norm": 1.6989853382110596, "learning_rate": 1.6722620845382124e-05, "loss": 0.9064, "step": 10510 }, { "epoch": 0.2874371034784511, "grad_norm": 1.2122424840927124, "learning_rate": 1.6721965120500646e-05, "loss": 0.5545, "step": 10511 }, { "epoch": 0.287464449792168, "grad_norm": 1.380607008934021, "learning_rate": 1.6721309342887248e-05, "loss": 0.8711, "step": 10512 }, { "epoch": 0.28749179610588493, "grad_norm": 1.410367727279663, "learning_rate": 1.672065351254707e-05, "loss": 0.5919, "step": 10513 }, { "epoch": 0.28751914241960186, "grad_norm": 1.6004146337509155, "learning_rate": 1.6719997629485248e-05, "loss": 0.4489, "step": 10514 }, { "epoch": 0.28754648873331873, "grad_norm": 1.5528464317321777, "learning_rate": 1.6719341693706943e-05, "loss": 0.526, "step": 10515 }, { "epoch": 0.28757383504703565, "grad_norm": 1.5508522987365723, "learning_rate": 1.6718685705217293e-05, "loss": 0.5508, "step": 10516 }, { "epoch": 0.2876011813607526, "grad_norm": 1.3244739770889282, "learning_rate": 1.671802966402144e-05, "loss": 0.567, "step": 10517 }, { "epoch": 0.2876285276744695, "grad_norm": 1.220579743385315, "learning_rate": 1.671737357012454e-05, "loss": 0.5274, "step": 10518 }, { "epoch": 0.2876558739881864, "grad_norm": 2.596437931060791, "learning_rate": 1.6716717423531736e-05, "loss": 0.9333, "step": 10519 }, { "epoch": 0.2876832203019033, "grad_norm": 1.3351752758026123, "learning_rate": 1.671606122424817e-05, "loss": 0.4285, "step": 10520 }, { "epoch": 0.2877105666156202, "grad_norm": 1.439258337020874, "learning_rate": 1.6715404972278996e-05, "loss": 0.5654, "step": 10521 }, { "epoch": 0.28773791292933715, "grad_norm": 1.6960393190383911, "learning_rate": 1.6714748667629362e-05, "loss": 0.5286, "step": 10522 }, { "epoch": 0.287765259243054, "grad_norm": 1.3140244483947754, "learning_rate": 1.671409231030441e-05, "loss": 0.5108, "step": 10523 }, { "epoch": 0.28779260555677094, "grad_norm": 1.435113787651062, "learning_rate": 1.6713435900309294e-05, "loss": 0.4106, "step": 10524 }, { "epoch": 0.28781995187048787, "grad_norm": 1.3961073160171509, "learning_rate": 1.6712779437649164e-05, "loss": 0.5296, "step": 10525 }, { "epoch": 0.2878472981842048, "grad_norm": 1.9699335098266602, "learning_rate": 1.6712122922329168e-05, "loss": 0.5087, "step": 10526 }, { "epoch": 0.28787464449792166, "grad_norm": 1.4030983448028564, "learning_rate": 1.6711466354354456e-05, "loss": 0.5462, "step": 10527 }, { "epoch": 0.2879019908116386, "grad_norm": 1.3422819375991821, "learning_rate": 1.671080973373018e-05, "loss": 0.5341, "step": 10528 }, { "epoch": 0.2879293371253555, "grad_norm": 1.3204983472824097, "learning_rate": 1.671015306046149e-05, "loss": 0.5451, "step": 10529 }, { "epoch": 0.28795668343907244, "grad_norm": 1.3166433572769165, "learning_rate": 1.6709496334553537e-05, "loss": 0.569, "step": 10530 }, { "epoch": 0.2879840297527893, "grad_norm": 1.3480610847473145, "learning_rate": 1.6708839556011473e-05, "loss": 0.5061, "step": 10531 }, { "epoch": 0.28801137606650623, "grad_norm": 1.1811671257019043, "learning_rate": 1.6708182724840453e-05, "loss": 0.5548, "step": 10532 }, { "epoch": 0.28803872238022316, "grad_norm": 1.5637218952178955, "learning_rate": 1.6707525841045628e-05, "loss": 0.4939, "step": 10533 }, { "epoch": 0.2880660686939401, "grad_norm": 1.5100518465042114, "learning_rate": 1.6706868904632147e-05, "loss": 0.5323, "step": 10534 }, { "epoch": 0.28809341500765695, "grad_norm": 1.2631878852844238, "learning_rate": 1.6706211915605166e-05, "loss": 0.5531, "step": 10535 }, { "epoch": 0.2881207613213739, "grad_norm": 1.3112002611160278, "learning_rate": 1.6705554873969844e-05, "loss": 0.5443, "step": 10536 }, { "epoch": 0.2881481076350908, "grad_norm": 1.2719610929489136, "learning_rate": 1.670489777973133e-05, "loss": 0.5372, "step": 10537 }, { "epoch": 0.2881754539488077, "grad_norm": 1.6264762878417969, "learning_rate": 1.670424063289478e-05, "loss": 0.4336, "step": 10538 }, { "epoch": 0.2882028002625246, "grad_norm": 1.4231148958206177, "learning_rate": 1.6703583433465347e-05, "loss": 0.552, "step": 10539 }, { "epoch": 0.2882301465762415, "grad_norm": 1.6128463745117188, "learning_rate": 1.6702926181448193e-05, "loss": 0.5388, "step": 10540 }, { "epoch": 0.28825749288995844, "grad_norm": 1.110979437828064, "learning_rate": 1.6702268876848463e-05, "loss": 0.5645, "step": 10541 }, { "epoch": 0.28828483920367537, "grad_norm": 1.2956011295318604, "learning_rate": 1.6701611519671325e-05, "loss": 0.5411, "step": 10542 }, { "epoch": 0.28831218551739224, "grad_norm": 1.7197320461273193, "learning_rate": 1.6700954109921928e-05, "loss": 0.8906, "step": 10543 }, { "epoch": 0.28833953183110916, "grad_norm": 1.4821972846984863, "learning_rate": 1.6700296647605432e-05, "loss": 0.5916, "step": 10544 }, { "epoch": 0.2883668781448261, "grad_norm": 1.478506326675415, "learning_rate": 1.6699639132726995e-05, "loss": 0.589, "step": 10545 }, { "epoch": 0.288394224458543, "grad_norm": 1.2076019048690796, "learning_rate": 1.6698981565291773e-05, "loss": 0.5429, "step": 10546 }, { "epoch": 0.2884215707722599, "grad_norm": 1.2239092588424683, "learning_rate": 1.669832394530493e-05, "loss": 0.5505, "step": 10547 }, { "epoch": 0.2884489170859768, "grad_norm": 1.005505084991455, "learning_rate": 1.6697666272771614e-05, "loss": 0.5438, "step": 10548 }, { "epoch": 0.28847626339969373, "grad_norm": 1.5219744443893433, "learning_rate": 1.6697008547696997e-05, "loss": 0.5777, "step": 10549 }, { "epoch": 0.28850360971341066, "grad_norm": 1.2929048538208008, "learning_rate": 1.6696350770086233e-05, "loss": 0.5801, "step": 10550 }, { "epoch": 0.2885309560271275, "grad_norm": 1.2150766849517822, "learning_rate": 1.6695692939944477e-05, "loss": 0.5729, "step": 10551 }, { "epoch": 0.28855830234084445, "grad_norm": 1.3365345001220703, "learning_rate": 1.6695035057276897e-05, "loss": 0.5729, "step": 10552 }, { "epoch": 0.2885856486545614, "grad_norm": 1.246795892715454, "learning_rate": 1.669437712208865e-05, "loss": 0.5562, "step": 10553 }, { "epoch": 0.2886129949682783, "grad_norm": 1.2549514770507812, "learning_rate": 1.66937191343849e-05, "loss": 0.5565, "step": 10554 }, { "epoch": 0.28864034128199517, "grad_norm": 1.3864843845367432, "learning_rate": 1.6693061094170807e-05, "loss": 0.5363, "step": 10555 }, { "epoch": 0.2886676875957121, "grad_norm": 1.4425601959228516, "learning_rate": 1.669240300145153e-05, "loss": 0.5023, "step": 10556 }, { "epoch": 0.288695033909429, "grad_norm": 1.2714320421218872, "learning_rate": 1.669174485623224e-05, "loss": 0.5148, "step": 10557 }, { "epoch": 0.28872238022314595, "grad_norm": 1.400854468345642, "learning_rate": 1.6691086658518093e-05, "loss": 0.4621, "step": 10558 }, { "epoch": 0.2887497265368628, "grad_norm": 1.3428943157196045, "learning_rate": 1.6690428408314253e-05, "loss": 0.5528, "step": 10559 }, { "epoch": 0.28877707285057974, "grad_norm": 1.3786929845809937, "learning_rate": 1.6689770105625885e-05, "loss": 0.5396, "step": 10560 }, { "epoch": 0.28880441916429667, "grad_norm": 1.3767350912094116, "learning_rate": 1.6689111750458153e-05, "loss": 0.5446, "step": 10561 }, { "epoch": 0.2888317654780136, "grad_norm": 1.4678584337234497, "learning_rate": 1.6688453342816222e-05, "loss": 0.5654, "step": 10562 }, { "epoch": 0.28885911179173046, "grad_norm": 1.9785906076431274, "learning_rate": 1.668779488270526e-05, "loss": 0.5517, "step": 10563 }, { "epoch": 0.2888864581054474, "grad_norm": 1.4952374696731567, "learning_rate": 1.6687136370130423e-05, "loss": 0.5624, "step": 10564 }, { "epoch": 0.2889138044191643, "grad_norm": 1.3361997604370117, "learning_rate": 1.668647780509689e-05, "loss": 0.5698, "step": 10565 }, { "epoch": 0.2889411507328812, "grad_norm": 1.3773449659347534, "learning_rate": 1.6685819187609816e-05, "loss": 0.573, "step": 10566 }, { "epoch": 0.2889684970465981, "grad_norm": 1.8579243421554565, "learning_rate": 1.6685160517674372e-05, "loss": 0.9206, "step": 10567 }, { "epoch": 0.28899584336031503, "grad_norm": 1.3473637104034424, "learning_rate": 1.6684501795295726e-05, "loss": 0.5437, "step": 10568 }, { "epoch": 0.28902318967403196, "grad_norm": 1.5280671119689941, "learning_rate": 1.6683843020479046e-05, "loss": 0.5789, "step": 10569 }, { "epoch": 0.2890505359877488, "grad_norm": 1.3137743473052979, "learning_rate": 1.6683184193229495e-05, "loss": 0.3764, "step": 10570 }, { "epoch": 0.28907788230146575, "grad_norm": 1.3786529302597046, "learning_rate": 1.6682525313552247e-05, "loss": 0.5512, "step": 10571 }, { "epoch": 0.2891052286151827, "grad_norm": 1.2325832843780518, "learning_rate": 1.668186638145247e-05, "loss": 0.5289, "step": 10572 }, { "epoch": 0.2891325749288996, "grad_norm": 1.3880503177642822, "learning_rate": 1.6681207396935327e-05, "loss": 0.5474, "step": 10573 }, { "epoch": 0.28915992124261647, "grad_norm": 1.2290451526641846, "learning_rate": 1.6680548360006e-05, "loss": 0.5529, "step": 10574 }, { "epoch": 0.2891872675563334, "grad_norm": 1.1648447513580322, "learning_rate": 1.6679889270669645e-05, "loss": 0.5887, "step": 10575 }, { "epoch": 0.2892146138700503, "grad_norm": 1.0755735635757446, "learning_rate": 1.6679230128931442e-05, "loss": 0.5493, "step": 10576 }, { "epoch": 0.28924196018376724, "grad_norm": 1.353110432624817, "learning_rate": 1.6678570934796553e-05, "loss": 0.4061, "step": 10577 }, { "epoch": 0.2892693064974841, "grad_norm": 1.4059621095657349, "learning_rate": 1.6677911688270162e-05, "loss": 0.5456, "step": 10578 }, { "epoch": 0.28929665281120104, "grad_norm": 1.7259190082550049, "learning_rate": 1.6677252389357426e-05, "loss": 0.5544, "step": 10579 }, { "epoch": 0.28932399912491796, "grad_norm": 1.9392799139022827, "learning_rate": 1.667659303806353e-05, "loss": 0.8905, "step": 10580 }, { "epoch": 0.2893513454386349, "grad_norm": 1.2201275825500488, "learning_rate": 1.6675933634393638e-05, "loss": 0.5234, "step": 10581 }, { "epoch": 0.28937869175235176, "grad_norm": 1.0778199434280396, "learning_rate": 1.6675274178352926e-05, "loss": 0.5596, "step": 10582 }, { "epoch": 0.2894060380660687, "grad_norm": 1.8048211336135864, "learning_rate": 1.6674614669946564e-05, "loss": 0.5868, "step": 10583 }, { "epoch": 0.2894333843797856, "grad_norm": 1.3934192657470703, "learning_rate": 1.6673955109179728e-05, "loss": 0.5672, "step": 10584 }, { "epoch": 0.28946073069350253, "grad_norm": 1.3367407321929932, "learning_rate": 1.6673295496057596e-05, "loss": 0.5775, "step": 10585 }, { "epoch": 0.2894880770072194, "grad_norm": 1.7428703308105469, "learning_rate": 1.667263583058534e-05, "loss": 0.8856, "step": 10586 }, { "epoch": 0.2895154233209363, "grad_norm": 1.6767390966415405, "learning_rate": 1.6671976112768132e-05, "loss": 0.6002, "step": 10587 }, { "epoch": 0.28954276963465325, "grad_norm": 1.356984257698059, "learning_rate": 1.6671316342611147e-05, "loss": 0.5147, "step": 10588 }, { "epoch": 0.2895701159483702, "grad_norm": 1.3860340118408203, "learning_rate": 1.667065652011956e-05, "loss": 0.5627, "step": 10589 }, { "epoch": 0.28959746226208705, "grad_norm": 1.3792110681533813, "learning_rate": 1.6669996645298556e-05, "loss": 0.5298, "step": 10590 }, { "epoch": 0.28962480857580397, "grad_norm": 1.396689772605896, "learning_rate": 1.6669336718153305e-05, "loss": 0.5436, "step": 10591 }, { "epoch": 0.2896521548895209, "grad_norm": 1.389389991760254, "learning_rate": 1.6668676738688983e-05, "loss": 0.5823, "step": 10592 }, { "epoch": 0.2896795012032378, "grad_norm": 1.1082731485366821, "learning_rate": 1.666801670691077e-05, "loss": 0.5773, "step": 10593 }, { "epoch": 0.2897068475169547, "grad_norm": 1.0625786781311035, "learning_rate": 1.666735662282384e-05, "loss": 0.5773, "step": 10594 }, { "epoch": 0.2897341938306716, "grad_norm": 1.4488754272460938, "learning_rate": 1.6666696486433373e-05, "loss": 0.5658, "step": 10595 }, { "epoch": 0.28976154014438854, "grad_norm": 1.453230619430542, "learning_rate": 1.666603629774455e-05, "loss": 0.5058, "step": 10596 }, { "epoch": 0.28978888645810547, "grad_norm": 1.3408818244934082, "learning_rate": 1.666537605676255e-05, "loss": 0.5193, "step": 10597 }, { "epoch": 0.28981623277182234, "grad_norm": 1.2976760864257812, "learning_rate": 1.666471576349255e-05, "loss": 0.5525, "step": 10598 }, { "epoch": 0.28984357908553926, "grad_norm": 1.421027421951294, "learning_rate": 1.6664055417939726e-05, "loss": 0.5387, "step": 10599 }, { "epoch": 0.2898709253992562, "grad_norm": 1.0227354764938354, "learning_rate": 1.6663395020109267e-05, "loss": 0.5377, "step": 10600 }, { "epoch": 0.2898982717129731, "grad_norm": 1.218638300895691, "learning_rate": 1.6662734570006348e-05, "loss": 0.5352, "step": 10601 }, { "epoch": 0.28992561802669, "grad_norm": 1.2688288688659668, "learning_rate": 1.666207406763615e-05, "loss": 0.561, "step": 10602 }, { "epoch": 0.2899529643404069, "grad_norm": 1.1509196758270264, "learning_rate": 1.666141351300386e-05, "loss": 0.5751, "step": 10603 }, { "epoch": 0.28998031065412383, "grad_norm": 1.4933011531829834, "learning_rate": 1.666075290611465e-05, "loss": 0.5005, "step": 10604 }, { "epoch": 0.29000765696784075, "grad_norm": 1.4535279273986816, "learning_rate": 1.666009224697371e-05, "loss": 0.4916, "step": 10605 }, { "epoch": 0.2900350032815576, "grad_norm": 1.5145378112792969, "learning_rate": 1.6659431535586224e-05, "loss": 0.4943, "step": 10606 }, { "epoch": 0.29006234959527455, "grad_norm": 1.110383152961731, "learning_rate": 1.665877077195737e-05, "loss": 0.5368, "step": 10607 }, { "epoch": 0.2900896959089915, "grad_norm": 3.3236265182495117, "learning_rate": 1.665810995609233e-05, "loss": 0.9445, "step": 10608 }, { "epoch": 0.2901170422227084, "grad_norm": 1.3594533205032349, "learning_rate": 1.6657449087996296e-05, "loss": 0.6022, "step": 10609 }, { "epoch": 0.29014438853642527, "grad_norm": 1.6404927968978882, "learning_rate": 1.6656788167674444e-05, "loss": 0.5468, "step": 10610 }, { "epoch": 0.2901717348501422, "grad_norm": 1.2063082456588745, "learning_rate": 1.6656127195131964e-05, "loss": 0.563, "step": 10611 }, { "epoch": 0.2901990811638591, "grad_norm": 1.7455068826675415, "learning_rate": 1.665546617037404e-05, "loss": 0.5623, "step": 10612 }, { "epoch": 0.29022642747757604, "grad_norm": 1.1697580814361572, "learning_rate": 1.6654805093405856e-05, "loss": 0.5454, "step": 10613 }, { "epoch": 0.2902537737912929, "grad_norm": 1.098612666130066, "learning_rate": 1.6654143964232598e-05, "loss": 0.5171, "step": 10614 }, { "epoch": 0.29028112010500984, "grad_norm": 1.2060259580612183, "learning_rate": 1.6653482782859454e-05, "loss": 0.5196, "step": 10615 }, { "epoch": 0.29030846641872676, "grad_norm": 1.1728694438934326, "learning_rate": 1.6652821549291608e-05, "loss": 0.5631, "step": 10616 }, { "epoch": 0.2903358127324437, "grad_norm": 3.8076164722442627, "learning_rate": 1.665216026353425e-05, "loss": 0.5706, "step": 10617 }, { "epoch": 0.29036315904616056, "grad_norm": 1.0496118068695068, "learning_rate": 1.665149892559257e-05, "loss": 0.5404, "step": 10618 }, { "epoch": 0.2903905053598775, "grad_norm": 1.4984853267669678, "learning_rate": 1.665083753547175e-05, "loss": 0.5416, "step": 10619 }, { "epoch": 0.2904178516735944, "grad_norm": 1.136637568473816, "learning_rate": 1.6650176093176978e-05, "loss": 0.4043, "step": 10620 }, { "epoch": 0.29044519798731133, "grad_norm": 1.6735730171203613, "learning_rate": 1.6649514598713454e-05, "loss": 0.5395, "step": 10621 }, { "epoch": 0.2904725443010282, "grad_norm": 1.1757367849349976, "learning_rate": 1.6648853052086354e-05, "loss": 0.5475, "step": 10622 }, { "epoch": 0.2904998906147451, "grad_norm": 1.3113958835601807, "learning_rate": 1.6648191453300872e-05, "loss": 0.5664, "step": 10623 }, { "epoch": 0.29052723692846205, "grad_norm": 1.9371981620788574, "learning_rate": 1.6647529802362204e-05, "loss": 0.6038, "step": 10624 }, { "epoch": 0.290554583242179, "grad_norm": 1.2672858238220215, "learning_rate": 1.664686809927553e-05, "loss": 0.5456, "step": 10625 }, { "epoch": 0.29058192955589585, "grad_norm": 1.517547607421875, "learning_rate": 1.6646206344046053e-05, "loss": 0.5626, "step": 10626 }, { "epoch": 0.29060927586961277, "grad_norm": 1.2138314247131348, "learning_rate": 1.6645544536678953e-05, "loss": 0.5447, "step": 10627 }, { "epoch": 0.2906366221833297, "grad_norm": 1.4460391998291016, "learning_rate": 1.6644882677179427e-05, "loss": 0.5452, "step": 10628 }, { "epoch": 0.2906639684970466, "grad_norm": 1.1450119018554688, "learning_rate": 1.6644220765552664e-05, "loss": 0.5447, "step": 10629 }, { "epoch": 0.2906913148107635, "grad_norm": 1.7214537858963013, "learning_rate": 1.6643558801803864e-05, "loss": 0.5058, "step": 10630 }, { "epoch": 0.2907186611244804, "grad_norm": 2.22672176361084, "learning_rate": 1.664289678593821e-05, "loss": 0.5458, "step": 10631 }, { "epoch": 0.29074600743819734, "grad_norm": 1.3757188320159912, "learning_rate": 1.6642234717960902e-05, "loss": 0.5706, "step": 10632 }, { "epoch": 0.29077335375191427, "grad_norm": 1.4207940101623535, "learning_rate": 1.6641572597877135e-05, "loss": 0.5581, "step": 10633 }, { "epoch": 0.29080070006563113, "grad_norm": 1.2636181116104126, "learning_rate": 1.6640910425692097e-05, "loss": 0.5647, "step": 10634 }, { "epoch": 0.29082804637934806, "grad_norm": 1.3820258378982544, "learning_rate": 1.6640248201410986e-05, "loss": 0.5346, "step": 10635 }, { "epoch": 0.290855392693065, "grad_norm": 1.0789028406143188, "learning_rate": 1.6639585925038994e-05, "loss": 0.5901, "step": 10636 }, { "epoch": 0.2908827390067819, "grad_norm": 1.3083381652832031, "learning_rate": 1.6638923596581324e-05, "loss": 0.5381, "step": 10637 }, { "epoch": 0.2909100853204988, "grad_norm": 1.7542132139205933, "learning_rate": 1.6638261216043163e-05, "loss": 0.5653, "step": 10638 }, { "epoch": 0.2909374316342157, "grad_norm": 1.0969520807266235, "learning_rate": 1.663759878342971e-05, "loss": 0.5415, "step": 10639 }, { "epoch": 0.29096477794793263, "grad_norm": 1.1360458135604858, "learning_rate": 1.6636936298746167e-05, "loss": 0.5937, "step": 10640 }, { "epoch": 0.29099212426164955, "grad_norm": 1.215889573097229, "learning_rate": 1.6636273761997722e-05, "loss": 0.5366, "step": 10641 }, { "epoch": 0.2910194705753664, "grad_norm": 1.4214849472045898, "learning_rate": 1.663561117318958e-05, "loss": 0.569, "step": 10642 }, { "epoch": 0.29104681688908335, "grad_norm": 1.2887495756149292, "learning_rate": 1.6634948532326936e-05, "loss": 0.5372, "step": 10643 }, { "epoch": 0.2910741632028003, "grad_norm": 1.1170231103897095, "learning_rate": 1.663428583941499e-05, "loss": 0.5292, "step": 10644 }, { "epoch": 0.2911015095165172, "grad_norm": 1.4061949253082275, "learning_rate": 1.663362309445893e-05, "loss": 0.5506, "step": 10645 }, { "epoch": 0.29112885583023407, "grad_norm": 2.863750696182251, "learning_rate": 1.6632960297463973e-05, "loss": 0.9122, "step": 10646 }, { "epoch": 0.291156202143951, "grad_norm": 1.3519219160079956, "learning_rate": 1.6632297448435307e-05, "loss": 0.5474, "step": 10647 }, { "epoch": 0.2911835484576679, "grad_norm": 1.3913730382919312, "learning_rate": 1.6631634547378133e-05, "loss": 0.5382, "step": 10648 }, { "epoch": 0.29121089477138484, "grad_norm": 1.7739492654800415, "learning_rate": 1.663097159429765e-05, "loss": 0.5408, "step": 10649 }, { "epoch": 0.2912382410851017, "grad_norm": 1.5110406875610352, "learning_rate": 1.6630308589199063e-05, "loss": 0.5071, "step": 10650 }, { "epoch": 0.29126558739881864, "grad_norm": 1.313956379890442, "learning_rate": 1.6629645532087572e-05, "loss": 0.5539, "step": 10651 }, { "epoch": 0.29129293371253556, "grad_norm": 1.2679767608642578, "learning_rate": 1.6628982422968376e-05, "loss": 0.8342, "step": 10652 }, { "epoch": 0.2913202800262525, "grad_norm": 1.5852304697036743, "learning_rate": 1.6628319261846676e-05, "loss": 0.5852, "step": 10653 }, { "epoch": 0.29134762633996936, "grad_norm": 1.2637431621551514, "learning_rate": 1.662765604872768e-05, "loss": 0.5483, "step": 10654 }, { "epoch": 0.2913749726536863, "grad_norm": 1.2623971700668335, "learning_rate": 1.6626992783616587e-05, "loss": 0.573, "step": 10655 }, { "epoch": 0.2914023189674032, "grad_norm": 1.5370374917984009, "learning_rate": 1.66263294665186e-05, "loss": 0.5882, "step": 10656 }, { "epoch": 0.29142966528112013, "grad_norm": 1.5514110326766968, "learning_rate": 1.6625666097438922e-05, "loss": 0.5801, "step": 10657 }, { "epoch": 0.291457011594837, "grad_norm": 1.374186396598816, "learning_rate": 1.662500267638276e-05, "loss": 0.555, "step": 10658 }, { "epoch": 0.2914843579085539, "grad_norm": 1.2196987867355347, "learning_rate": 1.662433920335532e-05, "loss": 0.569, "step": 10659 }, { "epoch": 0.29151170422227085, "grad_norm": 10.237060546875, "learning_rate": 1.6623675678361796e-05, "loss": 0.8779, "step": 10660 }, { "epoch": 0.2915390505359878, "grad_norm": 1.6200251579284668, "learning_rate": 1.6623012101407407e-05, "loss": 0.5064, "step": 10661 }, { "epoch": 0.29156639684970465, "grad_norm": 1.3213489055633545, "learning_rate": 1.6622348472497346e-05, "loss": 0.5538, "step": 10662 }, { "epoch": 0.29159374316342157, "grad_norm": 1.6086008548736572, "learning_rate": 1.662168479163683e-05, "loss": 0.5242, "step": 10663 }, { "epoch": 0.2916210894771385, "grad_norm": 1.541509985923767, "learning_rate": 1.662102105883106e-05, "loss": 0.554, "step": 10664 }, { "epoch": 0.2916484357908554, "grad_norm": 1.4060168266296387, "learning_rate": 1.662035727408524e-05, "loss": 0.5675, "step": 10665 }, { "epoch": 0.2916757821045723, "grad_norm": 1.0496782064437866, "learning_rate": 1.6619693437404582e-05, "loss": 0.5149, "step": 10666 }, { "epoch": 0.2917031284182892, "grad_norm": 1.4926857948303223, "learning_rate": 1.661902954879429e-05, "loss": 0.579, "step": 10667 }, { "epoch": 0.29173047473200614, "grad_norm": 1.5947290658950806, "learning_rate": 1.6618365608259575e-05, "loss": 0.569, "step": 10668 }, { "epoch": 0.291757821045723, "grad_norm": 1.3495466709136963, "learning_rate": 1.6617701615805642e-05, "loss": 0.9296, "step": 10669 }, { "epoch": 0.29178516735943993, "grad_norm": 1.326930284500122, "learning_rate": 1.6617037571437708e-05, "loss": 0.5342, "step": 10670 }, { "epoch": 0.29181251367315686, "grad_norm": 1.5823426246643066, "learning_rate": 1.6616373475160972e-05, "loss": 0.5753, "step": 10671 }, { "epoch": 0.2918398599868738, "grad_norm": 1.7025961875915527, "learning_rate": 1.661570932698065e-05, "loss": 0.5641, "step": 10672 }, { "epoch": 0.29186720630059065, "grad_norm": 1.3911340236663818, "learning_rate": 1.661504512690195e-05, "loss": 0.5243, "step": 10673 }, { "epoch": 0.2918945526143076, "grad_norm": 1.6407124996185303, "learning_rate": 1.661438087493008e-05, "loss": 0.552, "step": 10674 }, { "epoch": 0.2919218989280245, "grad_norm": 1.3387534618377686, "learning_rate": 1.6613716571070257e-05, "loss": 0.3842, "step": 10675 }, { "epoch": 0.29194924524174143, "grad_norm": 1.2531875371932983, "learning_rate": 1.661305221532769e-05, "loss": 0.5513, "step": 10676 }, { "epoch": 0.2919765915554583, "grad_norm": 1.4009792804718018, "learning_rate": 1.6612387807707584e-05, "loss": 0.5424, "step": 10677 }, { "epoch": 0.2920039378691752, "grad_norm": 1.4406239986419678, "learning_rate": 1.6611723348215162e-05, "loss": 0.4009, "step": 10678 }, { "epoch": 0.29203128418289215, "grad_norm": 1.2163945436477661, "learning_rate": 1.6611058836855626e-05, "loss": 0.5392, "step": 10679 }, { "epoch": 0.2920586304966091, "grad_norm": 1.2510982751846313, "learning_rate": 1.66103942736342e-05, "loss": 0.5363, "step": 10680 }, { "epoch": 0.29208597681032594, "grad_norm": 1.3624238967895508, "learning_rate": 1.6609729658556086e-05, "loss": 0.58, "step": 10681 }, { "epoch": 0.29211332312404287, "grad_norm": 1.2487843036651611, "learning_rate": 1.6609064991626504e-05, "loss": 0.5709, "step": 10682 }, { "epoch": 0.2921406694377598, "grad_norm": 1.1966639757156372, "learning_rate": 1.6608400272850666e-05, "loss": 0.5705, "step": 10683 }, { "epoch": 0.2921680157514767, "grad_norm": 1.8166431188583374, "learning_rate": 1.660773550223379e-05, "loss": 0.5802, "step": 10684 }, { "epoch": 0.2921953620651936, "grad_norm": 1.1376925706863403, "learning_rate": 1.660707067978109e-05, "loss": 0.5743, "step": 10685 }, { "epoch": 0.2922227083789105, "grad_norm": 1.185102939605713, "learning_rate": 1.6606405805497776e-05, "loss": 0.5405, "step": 10686 }, { "epoch": 0.29225005469262744, "grad_norm": 1.2434098720550537, "learning_rate": 1.660574087938907e-05, "loss": 0.5208, "step": 10687 }, { "epoch": 0.29227740100634436, "grad_norm": 1.5641295909881592, "learning_rate": 1.6605075901460184e-05, "loss": 0.491, "step": 10688 }, { "epoch": 0.29230474732006123, "grad_norm": 1.3707280158996582, "learning_rate": 1.6604410871716338e-05, "loss": 0.558, "step": 10689 }, { "epoch": 0.29233209363377816, "grad_norm": 8.043241500854492, "learning_rate": 1.660374579016275e-05, "loss": 0.4401, "step": 10690 }, { "epoch": 0.2923594399474951, "grad_norm": 1.6640946865081787, "learning_rate": 1.660308065680463e-05, "loss": 0.4848, "step": 10691 }, { "epoch": 0.292386786261212, "grad_norm": 1.8744601011276245, "learning_rate": 1.6602415471647203e-05, "loss": 0.4678, "step": 10692 }, { "epoch": 0.2924141325749289, "grad_norm": 1.2435811758041382, "learning_rate": 1.6601750234695683e-05, "loss": 0.4904, "step": 10693 }, { "epoch": 0.2924414788886458, "grad_norm": 1.183467149734497, "learning_rate": 1.660108494595529e-05, "loss": 0.5503, "step": 10694 }, { "epoch": 0.2924688252023627, "grad_norm": 1.2475292682647705, "learning_rate": 1.6600419605431246e-05, "loss": 0.5654, "step": 10695 }, { "epoch": 0.29249617151607965, "grad_norm": 1.1461291313171387, "learning_rate": 1.6599754213128764e-05, "loss": 0.4756, "step": 10696 }, { "epoch": 0.2925235178297965, "grad_norm": 1.3070498704910278, "learning_rate": 1.6599088769053074e-05, "loss": 0.5612, "step": 10697 }, { "epoch": 0.29255086414351344, "grad_norm": 1.6322590112686157, "learning_rate": 1.6598423273209383e-05, "loss": 0.5907, "step": 10698 }, { "epoch": 0.29257821045723037, "grad_norm": 1.316680669784546, "learning_rate": 1.6597757725602922e-05, "loss": 0.5571, "step": 10699 }, { "epoch": 0.2926055567709473, "grad_norm": 1.1194735765457153, "learning_rate": 1.6597092126238906e-05, "loss": 0.5628, "step": 10700 }, { "epoch": 0.29263290308466416, "grad_norm": 1.77286958694458, "learning_rate": 1.6596426475122563e-05, "loss": 0.5458, "step": 10701 }, { "epoch": 0.2926602493983811, "grad_norm": 1.1425046920776367, "learning_rate": 1.6595760772259107e-05, "loss": 0.5101, "step": 10702 }, { "epoch": 0.292687595712098, "grad_norm": 1.3616087436676025, "learning_rate": 1.6595095017653765e-05, "loss": 0.59, "step": 10703 }, { "epoch": 0.29271494202581494, "grad_norm": 1.1816328763961792, "learning_rate": 1.6594429211311756e-05, "loss": 0.5499, "step": 10704 }, { "epoch": 0.2927422883395318, "grad_norm": 1.6404231786727905, "learning_rate": 1.659376335323831e-05, "loss": 0.5756, "step": 10705 }, { "epoch": 0.29276963465324873, "grad_norm": 1.6222494840621948, "learning_rate": 1.6593097443438645e-05, "loss": 0.6037, "step": 10706 }, { "epoch": 0.29279698096696566, "grad_norm": 1.2921321392059326, "learning_rate": 1.6592431481917985e-05, "loss": 0.4685, "step": 10707 }, { "epoch": 0.2928243272806826, "grad_norm": 1.1917271614074707, "learning_rate": 1.6591765468681555e-05, "loss": 0.8892, "step": 10708 }, { "epoch": 0.29285167359439945, "grad_norm": 1.260805368423462, "learning_rate": 1.659109940373458e-05, "loss": 0.5745, "step": 10709 }, { "epoch": 0.2928790199081164, "grad_norm": 1.2745734453201294, "learning_rate": 1.6590433287082285e-05, "loss": 0.8616, "step": 10710 }, { "epoch": 0.2929063662218333, "grad_norm": 1.1194349527359009, "learning_rate": 1.6589767118729897e-05, "loss": 0.554, "step": 10711 }, { "epoch": 0.29293371253555023, "grad_norm": 1.2184333801269531, "learning_rate": 1.6589100898682638e-05, "loss": 0.5431, "step": 10712 }, { "epoch": 0.2929610588492671, "grad_norm": 1.2559597492218018, "learning_rate": 1.658843462694574e-05, "loss": 0.5605, "step": 10713 }, { "epoch": 0.292988405162984, "grad_norm": 1.051541805267334, "learning_rate": 1.6587768303524424e-05, "loss": 0.4197, "step": 10714 }, { "epoch": 0.29301575147670095, "grad_norm": 1.2679520845413208, "learning_rate": 1.658710192842392e-05, "loss": 0.8745, "step": 10715 }, { "epoch": 0.2930430977904179, "grad_norm": 1.209001898765564, "learning_rate": 1.658643550164946e-05, "loss": 0.5457, "step": 10716 }, { "epoch": 0.29307044410413474, "grad_norm": 1.3971139192581177, "learning_rate": 1.658576902320626e-05, "loss": 0.841, "step": 10717 }, { "epoch": 0.29309779041785167, "grad_norm": 1.4411166906356812, "learning_rate": 1.658510249309956e-05, "loss": 0.554, "step": 10718 }, { "epoch": 0.2931251367315686, "grad_norm": 1.4594740867614746, "learning_rate": 1.658443591133458e-05, "loss": 0.5353, "step": 10719 }, { "epoch": 0.2931524830452855, "grad_norm": 1.1237775087356567, "learning_rate": 1.6583769277916555e-05, "loss": 0.5522, "step": 10720 }, { "epoch": 0.2931798293590024, "grad_norm": 1.9263160228729248, "learning_rate": 1.6583102592850716e-05, "loss": 0.88, "step": 10721 }, { "epoch": 0.2932071756727193, "grad_norm": 1.268813133239746, "learning_rate": 1.6582435856142287e-05, "loss": 0.5253, "step": 10722 }, { "epoch": 0.29323452198643624, "grad_norm": 1.5030314922332764, "learning_rate": 1.65817690677965e-05, "loss": 0.4943, "step": 10723 }, { "epoch": 0.29326186830015316, "grad_norm": 1.3160467147827148, "learning_rate": 1.658110222781859e-05, "loss": 0.5696, "step": 10724 }, { "epoch": 0.29328921461387003, "grad_norm": 1.2651631832122803, "learning_rate": 1.6580435336213784e-05, "loss": 0.5364, "step": 10725 }, { "epoch": 0.29331656092758696, "grad_norm": 1.2993581295013428, "learning_rate": 1.657976839298731e-05, "loss": 0.5355, "step": 10726 }, { "epoch": 0.2933439072413039, "grad_norm": 1.2926472425460815, "learning_rate": 1.6579101398144413e-05, "loss": 0.5829, "step": 10727 }, { "epoch": 0.2933712535550208, "grad_norm": 1.294622778892517, "learning_rate": 1.6578434351690313e-05, "loss": 0.5459, "step": 10728 }, { "epoch": 0.2933985998687377, "grad_norm": 1.262956976890564, "learning_rate": 1.6577767253630245e-05, "loss": 0.5482, "step": 10729 }, { "epoch": 0.2934259461824546, "grad_norm": 1.2189356088638306, "learning_rate": 1.6577100103969446e-05, "loss": 0.5601, "step": 10730 }, { "epoch": 0.2934532924961715, "grad_norm": 1.351679801940918, "learning_rate": 1.6576432902713146e-05, "loss": 0.5384, "step": 10731 }, { "epoch": 0.29348063880988845, "grad_norm": 1.4017527103424072, "learning_rate": 1.6575765649866587e-05, "loss": 0.4373, "step": 10732 }, { "epoch": 0.2935079851236053, "grad_norm": 1.1807315349578857, "learning_rate": 1.657509834543499e-05, "loss": 0.5717, "step": 10733 }, { "epoch": 0.29353533143732224, "grad_norm": 1.5273070335388184, "learning_rate": 1.65744309894236e-05, "loss": 0.8565, "step": 10734 }, { "epoch": 0.29356267775103917, "grad_norm": 1.3135253190994263, "learning_rate": 1.6573763581837647e-05, "loss": 0.5558, "step": 10735 }, { "epoch": 0.2935900240647561, "grad_norm": 1.0645174980163574, "learning_rate": 1.657309612268237e-05, "loss": 0.5421, "step": 10736 }, { "epoch": 0.29361737037847296, "grad_norm": 1.1492300033569336, "learning_rate": 1.6572428611963002e-05, "loss": 0.5603, "step": 10737 }, { "epoch": 0.2936447166921899, "grad_norm": 1.5795989036560059, "learning_rate": 1.6571761049684784e-05, "loss": 0.5908, "step": 10738 }, { "epoch": 0.2936720630059068, "grad_norm": 1.2550585269927979, "learning_rate": 1.6571093435852948e-05, "loss": 0.5569, "step": 10739 }, { "epoch": 0.29369940931962374, "grad_norm": 0.9963013529777527, "learning_rate": 1.6570425770472734e-05, "loss": 0.5456, "step": 10740 }, { "epoch": 0.2937267556333406, "grad_norm": 1.4778050184249878, "learning_rate": 1.656975805354938e-05, "loss": 0.5039, "step": 10741 }, { "epoch": 0.29375410194705753, "grad_norm": 1.453424334526062, "learning_rate": 1.656909028508812e-05, "loss": 0.5083, "step": 10742 }, { "epoch": 0.29378144826077446, "grad_norm": 1.3236085176467896, "learning_rate": 1.65684224650942e-05, "loss": 0.5252, "step": 10743 }, { "epoch": 0.2938087945744914, "grad_norm": 1.3360984325408936, "learning_rate": 1.6567754593572847e-05, "loss": 0.5448, "step": 10744 }, { "epoch": 0.29383614088820825, "grad_norm": 1.28253173828125, "learning_rate": 1.6567086670529314e-05, "loss": 0.5378, "step": 10745 }, { "epoch": 0.2938634872019252, "grad_norm": 1.1681110858917236, "learning_rate": 1.656641869596883e-05, "loss": 0.5554, "step": 10746 }, { "epoch": 0.2938908335156421, "grad_norm": 1.2808995246887207, "learning_rate": 1.6565750669896643e-05, "loss": 0.5163, "step": 10747 }, { "epoch": 0.293918179829359, "grad_norm": 1.4685025215148926, "learning_rate": 1.656508259231799e-05, "loss": 0.5476, "step": 10748 }, { "epoch": 0.2939455261430759, "grad_norm": 1.1428260803222656, "learning_rate": 1.656441446323811e-05, "loss": 0.5312, "step": 10749 }, { "epoch": 0.2939728724567928, "grad_norm": 1.3624744415283203, "learning_rate": 1.6563746282662248e-05, "loss": 0.5699, "step": 10750 }, { "epoch": 0.29400021877050975, "grad_norm": 1.6186747550964355, "learning_rate": 1.656307805059564e-05, "loss": 0.5612, "step": 10751 }, { "epoch": 0.29402756508422667, "grad_norm": 1.8958768844604492, "learning_rate": 1.6562409767043534e-05, "loss": 0.4245, "step": 10752 }, { "epoch": 0.29405491139794354, "grad_norm": 1.2401676177978516, "learning_rate": 1.656174143201117e-05, "loss": 0.5482, "step": 10753 }, { "epoch": 0.29408225771166047, "grad_norm": 2.0273704528808594, "learning_rate": 1.6561073045503788e-05, "loss": 0.4344, "step": 10754 }, { "epoch": 0.2941096040253774, "grad_norm": 1.8599915504455566, "learning_rate": 1.656040460752664e-05, "loss": 0.5965, "step": 10755 }, { "epoch": 0.2941369503390943, "grad_norm": 1.8260819911956787, "learning_rate": 1.655973611808496e-05, "loss": 0.5716, "step": 10756 }, { "epoch": 0.2941642966528112, "grad_norm": 1.7452325820922852, "learning_rate": 1.6559067577183997e-05, "loss": 0.8813, "step": 10757 }, { "epoch": 0.2941916429665281, "grad_norm": 1.2285397052764893, "learning_rate": 1.6558398984828997e-05, "loss": 0.539, "step": 10758 }, { "epoch": 0.29421898928024504, "grad_norm": 1.435985803604126, "learning_rate": 1.65577303410252e-05, "loss": 0.5726, "step": 10759 }, { "epoch": 0.29424633559396196, "grad_norm": 1.5926003456115723, "learning_rate": 1.6557061645777852e-05, "loss": 0.55, "step": 10760 }, { "epoch": 0.29427368190767883, "grad_norm": 1.224678874015808, "learning_rate": 1.6556392899092203e-05, "loss": 0.5351, "step": 10761 }, { "epoch": 0.29430102822139576, "grad_norm": 1.6582238674163818, "learning_rate": 1.6555724100973497e-05, "loss": 0.5587, "step": 10762 }, { "epoch": 0.2943283745351127, "grad_norm": 1.2112208604812622, "learning_rate": 1.6555055251426982e-05, "loss": 0.5428, "step": 10763 }, { "epoch": 0.2943557208488296, "grad_norm": 1.1425117254257202, "learning_rate": 1.65543863504579e-05, "loss": 0.5718, "step": 10764 }, { "epoch": 0.2943830671625465, "grad_norm": 1.110718846321106, "learning_rate": 1.6553717398071506e-05, "loss": 0.5294, "step": 10765 }, { "epoch": 0.2944104134762634, "grad_norm": 1.6011426448822021, "learning_rate": 1.6553048394273037e-05, "loss": 0.5887, "step": 10766 }, { "epoch": 0.2944377597899803, "grad_norm": 1.2205383777618408, "learning_rate": 1.6552379339067753e-05, "loss": 0.5552, "step": 10767 }, { "epoch": 0.2944651061036972, "grad_norm": 1.4045201539993286, "learning_rate": 1.6551710232460898e-05, "loss": 0.3961, "step": 10768 }, { "epoch": 0.2944924524174141, "grad_norm": 1.4426052570343018, "learning_rate": 1.6551041074457716e-05, "loss": 0.6036, "step": 10769 }, { "epoch": 0.29451979873113104, "grad_norm": 2.0027079582214355, "learning_rate": 1.6550371865063464e-05, "loss": 0.5663, "step": 10770 }, { "epoch": 0.29454714504484797, "grad_norm": 1.3702374696731567, "learning_rate": 1.6549702604283385e-05, "loss": 0.5352, "step": 10771 }, { "epoch": 0.29457449135856484, "grad_norm": 1.1097558736801147, "learning_rate": 1.6549033292122735e-05, "loss": 0.5163, "step": 10772 }, { "epoch": 0.29460183767228176, "grad_norm": 1.364200472831726, "learning_rate": 1.6548363928586758e-05, "loss": 0.4411, "step": 10773 }, { "epoch": 0.2946291839859987, "grad_norm": 1.263552188873291, "learning_rate": 1.6547694513680713e-05, "loss": 0.5421, "step": 10774 }, { "epoch": 0.2946565302997156, "grad_norm": 1.2088013887405396, "learning_rate": 1.6547025047409845e-05, "loss": 0.5401, "step": 10775 }, { "epoch": 0.2946838766134325, "grad_norm": 2.0898854732513428, "learning_rate": 1.6546355529779408e-05, "loss": 0.8887, "step": 10776 }, { "epoch": 0.2947112229271494, "grad_norm": 1.4018700122833252, "learning_rate": 1.654568596079465e-05, "loss": 0.5375, "step": 10777 }, { "epoch": 0.29473856924086633, "grad_norm": 1.6130287647247314, "learning_rate": 1.6545016340460836e-05, "loss": 0.5287, "step": 10778 }, { "epoch": 0.29476591555458326, "grad_norm": 2.3092174530029297, "learning_rate": 1.6544346668783208e-05, "loss": 0.5625, "step": 10779 }, { "epoch": 0.2947932618683001, "grad_norm": 1.2811291217803955, "learning_rate": 1.654367694576702e-05, "loss": 0.5411, "step": 10780 }, { "epoch": 0.29482060818201705, "grad_norm": 1.312915563583374, "learning_rate": 1.654300717141753e-05, "loss": 0.5327, "step": 10781 }, { "epoch": 0.294847954495734, "grad_norm": 1.5153905153274536, "learning_rate": 1.6542337345739988e-05, "loss": 0.4986, "step": 10782 }, { "epoch": 0.2948753008094509, "grad_norm": 1.4215444326400757, "learning_rate": 1.6541667468739652e-05, "loss": 0.5263, "step": 10783 }, { "epoch": 0.29490264712316777, "grad_norm": 1.3897442817687988, "learning_rate": 1.6540997540421777e-05, "loss": 0.5355, "step": 10784 }, { "epoch": 0.2949299934368847, "grad_norm": 1.3037444353103638, "learning_rate": 1.6540327560791616e-05, "loss": 0.5286, "step": 10785 }, { "epoch": 0.2949573397506016, "grad_norm": 1.4162204265594482, "learning_rate": 1.6539657529854426e-05, "loss": 0.5489, "step": 10786 }, { "epoch": 0.29498468606431855, "grad_norm": 1.4561209678649902, "learning_rate": 1.6538987447615462e-05, "loss": 0.5548, "step": 10787 }, { "epoch": 0.2950120323780354, "grad_norm": 1.434470295906067, "learning_rate": 1.653831731407998e-05, "loss": 0.5577, "step": 10788 }, { "epoch": 0.29503937869175234, "grad_norm": 1.3625884056091309, "learning_rate": 1.6537647129253244e-05, "loss": 0.525, "step": 10789 }, { "epoch": 0.29506672500546927, "grad_norm": 1.2992500066757202, "learning_rate": 1.65369768931405e-05, "loss": 0.542, "step": 10790 }, { "epoch": 0.2950940713191862, "grad_norm": 1.2598886489868164, "learning_rate": 1.653630660574702e-05, "loss": 0.5381, "step": 10791 }, { "epoch": 0.29512141763290306, "grad_norm": 2.1860721111297607, "learning_rate": 1.6535636267078047e-05, "loss": 0.4827, "step": 10792 }, { "epoch": 0.29514876394662, "grad_norm": 1.398563265800476, "learning_rate": 1.6534965877138847e-05, "loss": 0.5672, "step": 10793 }, { "epoch": 0.2951761102603369, "grad_norm": 1.386193871498108, "learning_rate": 1.6534295435934683e-05, "loss": 0.5326, "step": 10794 }, { "epoch": 0.29520345657405384, "grad_norm": 1.5452628135681152, "learning_rate": 1.653362494347081e-05, "loss": 0.5562, "step": 10795 }, { "epoch": 0.2952308028877707, "grad_norm": 1.4842782020568848, "learning_rate": 1.6532954399752483e-05, "loss": 0.5883, "step": 10796 }, { "epoch": 0.29525814920148763, "grad_norm": 1.8488872051239014, "learning_rate": 1.653228380478497e-05, "loss": 0.88, "step": 10797 }, { "epoch": 0.29528549551520455, "grad_norm": 1.101341724395752, "learning_rate": 1.653161315857353e-05, "loss": 0.5421, "step": 10798 }, { "epoch": 0.2953128418289215, "grad_norm": 1.543434977531433, "learning_rate": 1.653094246112342e-05, "loss": 0.5708, "step": 10799 }, { "epoch": 0.29534018814263835, "grad_norm": 1.1481698751449585, "learning_rate": 1.653027171243991e-05, "loss": 0.5292, "step": 10800 }, { "epoch": 0.2953675344563553, "grad_norm": 1.2864091396331787, "learning_rate": 1.652960091252825e-05, "loss": 0.5298, "step": 10801 }, { "epoch": 0.2953948807700722, "grad_norm": 1.8213595151901245, "learning_rate": 1.652893006139371e-05, "loss": 0.5504, "step": 10802 }, { "epoch": 0.2954222270837891, "grad_norm": 2.0167322158813477, "learning_rate": 1.6528259159041556e-05, "loss": 0.4055, "step": 10803 }, { "epoch": 0.295449573397506, "grad_norm": 2.168727397918701, "learning_rate": 1.652758820547704e-05, "loss": 0.4129, "step": 10804 }, { "epoch": 0.2954769197112229, "grad_norm": 1.3265488147735596, "learning_rate": 1.652691720070544e-05, "loss": 0.5355, "step": 10805 }, { "epoch": 0.29550426602493984, "grad_norm": 1.4209516048431396, "learning_rate": 1.6526246144732e-05, "loss": 0.5764, "step": 10806 }, { "epoch": 0.29553161233865677, "grad_norm": 1.2916233539581299, "learning_rate": 1.6525575037562e-05, "loss": 0.5718, "step": 10807 }, { "epoch": 0.29555895865237364, "grad_norm": 1.3336793184280396, "learning_rate": 1.6524903879200704e-05, "loss": 0.5507, "step": 10808 }, { "epoch": 0.29558630496609056, "grad_norm": 1.3783786296844482, "learning_rate": 1.652423266965337e-05, "loss": 0.5486, "step": 10809 }, { "epoch": 0.2956136512798075, "grad_norm": 1.192344069480896, "learning_rate": 1.6523561408925266e-05, "loss": 0.5383, "step": 10810 }, { "epoch": 0.2956409975935244, "grad_norm": 1.5719777345657349, "learning_rate": 1.652289009702166e-05, "loss": 0.5821, "step": 10811 }, { "epoch": 0.2956683439072413, "grad_norm": 1.4508389234542847, "learning_rate": 1.6522218733947816e-05, "loss": 0.5456, "step": 10812 }, { "epoch": 0.2956956902209582, "grad_norm": 1.416597843170166, "learning_rate": 1.6521547319709004e-05, "loss": 0.5708, "step": 10813 }, { "epoch": 0.29572303653467513, "grad_norm": 1.3304044008255005, "learning_rate": 1.6520875854310486e-05, "loss": 0.5239, "step": 10814 }, { "epoch": 0.29575038284839206, "grad_norm": 1.2464871406555176, "learning_rate": 1.6520204337757534e-05, "loss": 0.5346, "step": 10815 }, { "epoch": 0.2957777291621089, "grad_norm": 1.4165254831314087, "learning_rate": 1.651953277005541e-05, "loss": 0.5199, "step": 10816 }, { "epoch": 0.29580507547582585, "grad_norm": 4.350482940673828, "learning_rate": 1.651886115120939e-05, "loss": 0.4332, "step": 10817 }, { "epoch": 0.2958324217895428, "grad_norm": 2.269085168838501, "learning_rate": 1.651818948122474e-05, "loss": 0.9101, "step": 10818 }, { "epoch": 0.2958597681032597, "grad_norm": 1.2975276708602905, "learning_rate": 1.6517517760106725e-05, "loss": 0.5535, "step": 10819 }, { "epoch": 0.29588711441697657, "grad_norm": 1.4126712083816528, "learning_rate": 1.651684598786062e-05, "loss": 0.5729, "step": 10820 }, { "epoch": 0.2959144607306935, "grad_norm": 1.4164725542068481, "learning_rate": 1.651617416449169e-05, "loss": 0.5395, "step": 10821 }, { "epoch": 0.2959418070444104, "grad_norm": 1.667287826538086, "learning_rate": 1.6515502290005206e-05, "loss": 0.5148, "step": 10822 }, { "epoch": 0.29596915335812735, "grad_norm": 2.49621844291687, "learning_rate": 1.651483036440644e-05, "loss": 0.5486, "step": 10823 }, { "epoch": 0.2959964996718442, "grad_norm": 1.5746551752090454, "learning_rate": 1.651415838770067e-05, "loss": 0.5296, "step": 10824 }, { "epoch": 0.29602384598556114, "grad_norm": 1.3821227550506592, "learning_rate": 1.6513486359893154e-05, "loss": 0.5355, "step": 10825 }, { "epoch": 0.29605119229927807, "grad_norm": 11.726079940795898, "learning_rate": 1.6512814280989174e-05, "loss": 0.9302, "step": 10826 }, { "epoch": 0.296078538612995, "grad_norm": 1.251147985458374, "learning_rate": 1.6512142150993996e-05, "loss": 0.5841, "step": 10827 }, { "epoch": 0.29610588492671186, "grad_norm": 1.7726435661315918, "learning_rate": 1.65114699699129e-05, "loss": 0.8653, "step": 10828 }, { "epoch": 0.2961332312404288, "grad_norm": 1.2502361536026, "learning_rate": 1.6510797737751148e-05, "loss": 0.5168, "step": 10829 }, { "epoch": 0.2961605775541457, "grad_norm": 1.5045522451400757, "learning_rate": 1.6510125454514025e-05, "loss": 0.4964, "step": 10830 }, { "epoch": 0.29618792386786263, "grad_norm": 2.8718318939208984, "learning_rate": 1.65094531202068e-05, "loss": 0.5706, "step": 10831 }, { "epoch": 0.2962152701815795, "grad_norm": 2.121948480606079, "learning_rate": 1.650878073483474e-05, "loss": 0.5215, "step": 10832 }, { "epoch": 0.29624261649529643, "grad_norm": 1.2050788402557373, "learning_rate": 1.6508108298403135e-05, "loss": 0.5612, "step": 10833 }, { "epoch": 0.29626996280901335, "grad_norm": 1.425260305404663, "learning_rate": 1.650743581091725e-05, "loss": 0.5365, "step": 10834 }, { "epoch": 0.2962973091227303, "grad_norm": 1.3742461204528809, "learning_rate": 1.650676327238236e-05, "loss": 0.5001, "step": 10835 }, { "epoch": 0.29632465543644715, "grad_norm": 1.4356838464736938, "learning_rate": 1.6506090682803745e-05, "loss": 0.4865, "step": 10836 }, { "epoch": 0.2963520017501641, "grad_norm": 1.6235121488571167, "learning_rate": 1.650541804218668e-05, "loss": 0.5605, "step": 10837 }, { "epoch": 0.296379348063881, "grad_norm": 1.5414841175079346, "learning_rate": 1.650474535053644e-05, "loss": 0.5892, "step": 10838 }, { "epoch": 0.2964066943775979, "grad_norm": 1.6318529844284058, "learning_rate": 1.6504072607858307e-05, "loss": 0.5396, "step": 10839 }, { "epoch": 0.2964340406913148, "grad_norm": 1.277346134185791, "learning_rate": 1.650339981415755e-05, "loss": 0.5504, "step": 10840 }, { "epoch": 0.2964613870050317, "grad_norm": 1.3337559700012207, "learning_rate": 1.6502726969439455e-05, "loss": 0.5319, "step": 10841 }, { "epoch": 0.29648873331874864, "grad_norm": 1.3803515434265137, "learning_rate": 1.6502054073709298e-05, "loss": 0.5511, "step": 10842 }, { "epoch": 0.29651607963246557, "grad_norm": 1.1722921133041382, "learning_rate": 1.6501381126972352e-05, "loss": 0.5763, "step": 10843 }, { "epoch": 0.29654342594618244, "grad_norm": 1.325094223022461, "learning_rate": 1.6500708129233905e-05, "loss": 0.5332, "step": 10844 }, { "epoch": 0.29657077225989936, "grad_norm": 1.976484775543213, "learning_rate": 1.650003508049923e-05, "loss": 0.5424, "step": 10845 }, { "epoch": 0.2965981185736163, "grad_norm": 1.6486555337905884, "learning_rate": 1.649936198077361e-05, "loss": 0.8653, "step": 10846 }, { "epoch": 0.2966254648873332, "grad_norm": 1.32275390625, "learning_rate": 1.6498688830062323e-05, "loss": 0.5632, "step": 10847 }, { "epoch": 0.2966528112010501, "grad_norm": 1.704854130744934, "learning_rate": 1.649801562837065e-05, "loss": 0.5535, "step": 10848 }, { "epoch": 0.296680157514767, "grad_norm": 1.5132821798324585, "learning_rate": 1.6497342375703876e-05, "loss": 0.5433, "step": 10849 }, { "epoch": 0.29670750382848393, "grad_norm": 1.6103535890579224, "learning_rate": 1.6496669072067277e-05, "loss": 0.5472, "step": 10850 }, { "epoch": 0.29673485014220086, "grad_norm": 1.7578948736190796, "learning_rate": 1.649599571746614e-05, "loss": 0.464, "step": 10851 }, { "epoch": 0.2967621964559177, "grad_norm": 1.4074163436889648, "learning_rate": 1.6495322311905742e-05, "loss": 0.5037, "step": 10852 }, { "epoch": 0.29678954276963465, "grad_norm": 1.3986701965332031, "learning_rate": 1.649464885539137e-05, "loss": 0.5577, "step": 10853 }, { "epoch": 0.2968168890833516, "grad_norm": 1.3263888359069824, "learning_rate": 1.6493975347928307e-05, "loss": 0.5802, "step": 10854 }, { "epoch": 0.2968442353970685, "grad_norm": 2.2162044048309326, "learning_rate": 1.6493301789521832e-05, "loss": 0.882, "step": 10855 }, { "epoch": 0.29687158171078537, "grad_norm": 1.2136801481246948, "learning_rate": 1.6492628180177235e-05, "loss": 0.5552, "step": 10856 }, { "epoch": 0.2968989280245023, "grad_norm": 1.4354915618896484, "learning_rate": 1.6491954519899795e-05, "loss": 0.4973, "step": 10857 }, { "epoch": 0.2969262743382192, "grad_norm": 1.2884302139282227, "learning_rate": 1.6491280808694797e-05, "loss": 0.5272, "step": 10858 }, { "epoch": 0.29695362065193615, "grad_norm": 1.554856777191162, "learning_rate": 1.649060704656753e-05, "loss": 0.5518, "step": 10859 }, { "epoch": 0.296980966965653, "grad_norm": 1.2775312662124634, "learning_rate": 1.648993323352328e-05, "loss": 0.5486, "step": 10860 }, { "epoch": 0.29700831327936994, "grad_norm": 1.940763235092163, "learning_rate": 1.6489259369567325e-05, "loss": 0.5724, "step": 10861 }, { "epoch": 0.29703565959308686, "grad_norm": 1.421979546546936, "learning_rate": 1.648858545470496e-05, "loss": 0.8473, "step": 10862 }, { "epoch": 0.2970630059068038, "grad_norm": 1.2531782388687134, "learning_rate": 1.6487911488941466e-05, "loss": 0.5673, "step": 10863 }, { "epoch": 0.29709035222052066, "grad_norm": 1.490623950958252, "learning_rate": 1.6487237472282134e-05, "loss": 0.4863, "step": 10864 }, { "epoch": 0.2971176985342376, "grad_norm": 1.551571249961853, "learning_rate": 1.648656340473225e-05, "loss": 0.5015, "step": 10865 }, { "epoch": 0.2971450448479545, "grad_norm": 1.3965498208999634, "learning_rate": 1.64858892862971e-05, "loss": 0.8554, "step": 10866 }, { "epoch": 0.29717239116167143, "grad_norm": 2.0211386680603027, "learning_rate": 1.6485215116981972e-05, "loss": 0.4628, "step": 10867 }, { "epoch": 0.2971997374753883, "grad_norm": 1.7159916162490845, "learning_rate": 1.648454089679216e-05, "loss": 0.3834, "step": 10868 }, { "epoch": 0.29722708378910523, "grad_norm": 1.8950114250183105, "learning_rate": 1.648386662573295e-05, "loss": 0.5276, "step": 10869 }, { "epoch": 0.29725443010282215, "grad_norm": 1.3378335237503052, "learning_rate": 1.6483192303809627e-05, "loss": 0.59, "step": 10870 }, { "epoch": 0.297281776416539, "grad_norm": 1.6075266599655151, "learning_rate": 1.648251793102749e-05, "loss": 0.5327, "step": 10871 }, { "epoch": 0.29730912273025595, "grad_norm": 1.3876975774765015, "learning_rate": 1.648184350739182e-05, "loss": 0.4792, "step": 10872 }, { "epoch": 0.2973364690439729, "grad_norm": 1.2498239278793335, "learning_rate": 1.6481169032907917e-05, "loss": 0.5332, "step": 10873 }, { "epoch": 0.2973638153576898, "grad_norm": 1.9911307096481323, "learning_rate": 1.648049450758106e-05, "loss": 0.5758, "step": 10874 }, { "epoch": 0.29739116167140667, "grad_norm": 1.514870285987854, "learning_rate": 1.647981993141655e-05, "loss": 0.571, "step": 10875 }, { "epoch": 0.2974185079851236, "grad_norm": 1.4030417203903198, "learning_rate": 1.647914530441968e-05, "loss": 0.4543, "step": 10876 }, { "epoch": 0.2974458542988405, "grad_norm": 1.9220716953277588, "learning_rate": 1.6478470626595732e-05, "loss": 0.5777, "step": 10877 }, { "epoch": 0.29747320061255744, "grad_norm": 1.5637774467468262, "learning_rate": 1.647779589795001e-05, "loss": 0.4831, "step": 10878 }, { "epoch": 0.2975005469262743, "grad_norm": 1.358088493347168, "learning_rate": 1.64771211184878e-05, "loss": 0.5386, "step": 10879 }, { "epoch": 0.29752789323999124, "grad_norm": 1.4329783916473389, "learning_rate": 1.6476446288214398e-05, "loss": 0.5683, "step": 10880 }, { "epoch": 0.29755523955370816, "grad_norm": 1.6333526372909546, "learning_rate": 1.64757714071351e-05, "loss": 0.8686, "step": 10881 }, { "epoch": 0.2975825858674251, "grad_norm": 1.4561065435409546, "learning_rate": 1.6475096475255194e-05, "loss": 0.8701, "step": 10882 }, { "epoch": 0.29760993218114196, "grad_norm": 1.519486904144287, "learning_rate": 1.647442149257998e-05, "loss": 0.5856, "step": 10883 }, { "epoch": 0.2976372784948589, "grad_norm": 1.5074501037597656, "learning_rate": 1.647374645911475e-05, "loss": 0.5377, "step": 10884 }, { "epoch": 0.2976646248085758, "grad_norm": 2.0695130825042725, "learning_rate": 1.6473071374864802e-05, "loss": 0.5359, "step": 10885 }, { "epoch": 0.29769197112229273, "grad_norm": 1.6135647296905518, "learning_rate": 1.647239623983543e-05, "loss": 0.8888, "step": 10886 }, { "epoch": 0.2977193174360096, "grad_norm": 1.286232352256775, "learning_rate": 1.6471721054031935e-05, "loss": 0.5592, "step": 10887 }, { "epoch": 0.2977466637497265, "grad_norm": 1.216063380241394, "learning_rate": 1.6471045817459607e-05, "loss": 0.5318, "step": 10888 }, { "epoch": 0.29777401006344345, "grad_norm": 1.2789663076400757, "learning_rate": 1.6470370530123744e-05, "loss": 0.5183, "step": 10889 }, { "epoch": 0.2978013563771604, "grad_norm": 1.462863564491272, "learning_rate": 1.6469695192029646e-05, "loss": 0.4806, "step": 10890 }, { "epoch": 0.29782870269087725, "grad_norm": 1.679720163345337, "learning_rate": 1.6469019803182614e-05, "loss": 0.548, "step": 10891 }, { "epoch": 0.29785604900459417, "grad_norm": 1.3428488969802856, "learning_rate": 1.6468344363587937e-05, "loss": 0.5809, "step": 10892 }, { "epoch": 0.2978833953183111, "grad_norm": 1.0480692386627197, "learning_rate": 1.646766887325092e-05, "loss": 0.5596, "step": 10893 }, { "epoch": 0.297910741632028, "grad_norm": 1.1661137342453003, "learning_rate": 1.6466993332176863e-05, "loss": 0.5498, "step": 10894 }, { "epoch": 0.2979380879457449, "grad_norm": 1.364641547203064, "learning_rate": 1.646631774037106e-05, "loss": 0.5254, "step": 10895 }, { "epoch": 0.2979654342594618, "grad_norm": 1.7657006978988647, "learning_rate": 1.6465642097838816e-05, "loss": 0.5837, "step": 10896 }, { "epoch": 0.29799278057317874, "grad_norm": 1.4140923023223877, "learning_rate": 1.646496640458543e-05, "loss": 0.4764, "step": 10897 }, { "epoch": 0.29802012688689566, "grad_norm": 1.333984375, "learning_rate": 1.6464290660616202e-05, "loss": 0.5731, "step": 10898 }, { "epoch": 0.29804747320061253, "grad_norm": 1.2170439958572388, "learning_rate": 1.646361486593643e-05, "loss": 0.5392, "step": 10899 }, { "epoch": 0.29807481951432946, "grad_norm": 1.221440076828003, "learning_rate": 1.646293902055142e-05, "loss": 0.5543, "step": 10900 }, { "epoch": 0.2981021658280464, "grad_norm": 1.3543850183486938, "learning_rate": 1.6462263124466472e-05, "loss": 0.4551, "step": 10901 }, { "epoch": 0.2981295121417633, "grad_norm": 1.4937509298324585, "learning_rate": 1.6461587177686888e-05, "loss": 0.5435, "step": 10902 }, { "epoch": 0.2981568584554802, "grad_norm": 1.4425848722457886, "learning_rate": 1.6460911180217973e-05, "loss": 0.885, "step": 10903 }, { "epoch": 0.2981842047691971, "grad_norm": 1.486072301864624, "learning_rate": 1.6460235132065028e-05, "loss": 0.5502, "step": 10904 }, { "epoch": 0.29821155108291403, "grad_norm": 1.4064522981643677, "learning_rate": 1.6459559033233353e-05, "loss": 0.5477, "step": 10905 }, { "epoch": 0.29823889739663095, "grad_norm": 1.6140118837356567, "learning_rate": 1.645888288372826e-05, "loss": 0.4692, "step": 10906 }, { "epoch": 0.2982662437103478, "grad_norm": 1.37277352809906, "learning_rate": 1.6458206683555044e-05, "loss": 0.4646, "step": 10907 }, { "epoch": 0.29829359002406475, "grad_norm": 1.59768545627594, "learning_rate": 1.6457530432719017e-05, "loss": 0.5602, "step": 10908 }, { "epoch": 0.2983209363377817, "grad_norm": 1.1108839511871338, "learning_rate": 1.645685413122548e-05, "loss": 0.5254, "step": 10909 }, { "epoch": 0.2983482826514986, "grad_norm": 1.3120263814926147, "learning_rate": 1.6456177779079743e-05, "loss": 0.8662, "step": 10910 }, { "epoch": 0.29837562896521547, "grad_norm": 1.3473923206329346, "learning_rate": 1.6455501376287104e-05, "loss": 0.8972, "step": 10911 }, { "epoch": 0.2984029752789324, "grad_norm": 1.397741436958313, "learning_rate": 1.6454824922852875e-05, "loss": 0.4626, "step": 10912 }, { "epoch": 0.2984303215926493, "grad_norm": 2.0322225093841553, "learning_rate": 1.645414841878236e-05, "loss": 0.8387, "step": 10913 }, { "epoch": 0.29845766790636624, "grad_norm": 1.3424004316329956, "learning_rate": 1.645347186408087e-05, "loss": 0.6144, "step": 10914 }, { "epoch": 0.2984850142200831, "grad_norm": 1.3019388914108276, "learning_rate": 1.6452795258753704e-05, "loss": 0.4365, "step": 10915 }, { "epoch": 0.29851236053380004, "grad_norm": 1.3194077014923096, "learning_rate": 1.6452118602806176e-05, "loss": 0.5078, "step": 10916 }, { "epoch": 0.29853970684751696, "grad_norm": 1.7616204023361206, "learning_rate": 1.6451441896243598e-05, "loss": 0.5388, "step": 10917 }, { "epoch": 0.2985670531612339, "grad_norm": 1.1679527759552002, "learning_rate": 1.6450765139071273e-05, "loss": 0.5628, "step": 10918 }, { "epoch": 0.29859439947495076, "grad_norm": 1.4282360076904297, "learning_rate": 1.645008833129451e-05, "loss": 0.5182, "step": 10919 }, { "epoch": 0.2986217457886677, "grad_norm": 1.6199188232421875, "learning_rate": 1.644941147291862e-05, "loss": 0.4934, "step": 10920 }, { "epoch": 0.2986490921023846, "grad_norm": 1.4485154151916504, "learning_rate": 1.6448734563948905e-05, "loss": 0.554, "step": 10921 }, { "epoch": 0.29867643841610153, "grad_norm": 1.1539357900619507, "learning_rate": 1.644805760439069e-05, "loss": 0.5413, "step": 10922 }, { "epoch": 0.2987037847298184, "grad_norm": 1.8403021097183228, "learning_rate": 1.6447380594249276e-05, "loss": 0.3672, "step": 10923 }, { "epoch": 0.2987311310435353, "grad_norm": 1.7305186986923218, "learning_rate": 1.6446703533529975e-05, "loss": 0.8983, "step": 10924 }, { "epoch": 0.29875847735725225, "grad_norm": 1.413622498512268, "learning_rate": 1.6446026422238097e-05, "loss": 0.5596, "step": 10925 }, { "epoch": 0.2987858236709692, "grad_norm": 1.4216270446777344, "learning_rate": 1.644534926037896e-05, "loss": 0.5814, "step": 10926 }, { "epoch": 0.29881316998468604, "grad_norm": 1.2820653915405273, "learning_rate": 1.644467204795787e-05, "loss": 0.5292, "step": 10927 }, { "epoch": 0.29884051629840297, "grad_norm": 1.3445552587509155, "learning_rate": 1.644399478498014e-05, "loss": 0.5415, "step": 10928 }, { "epoch": 0.2988678626121199, "grad_norm": 1.332564353942871, "learning_rate": 1.6443317471451087e-05, "loss": 0.4862, "step": 10929 }, { "epoch": 0.2988952089258368, "grad_norm": 1.3667861223220825, "learning_rate": 1.644264010737602e-05, "loss": 0.5798, "step": 10930 }, { "epoch": 0.2989225552395537, "grad_norm": 1.1552289724349976, "learning_rate": 1.6441962692760257e-05, "loss": 0.3869, "step": 10931 }, { "epoch": 0.2989499015532706, "grad_norm": 1.289477825164795, "learning_rate": 1.6441285227609103e-05, "loss": 0.5636, "step": 10932 }, { "epoch": 0.29897724786698754, "grad_norm": 2.650052547454834, "learning_rate": 1.6440607711927884e-05, "loss": 0.5467, "step": 10933 }, { "epoch": 0.29900459418070446, "grad_norm": 1.8607800006866455, "learning_rate": 1.6439930145721912e-05, "loss": 0.4764, "step": 10934 }, { "epoch": 0.29903194049442133, "grad_norm": 1.369118094444275, "learning_rate": 1.6439252528996496e-05, "loss": 0.5397, "step": 10935 }, { "epoch": 0.29905928680813826, "grad_norm": 1.725738525390625, "learning_rate": 1.643857486175696e-05, "loss": 0.8476, "step": 10936 }, { "epoch": 0.2990866331218552, "grad_norm": 1.195898413658142, "learning_rate": 1.6437897144008614e-05, "loss": 0.8951, "step": 10937 }, { "epoch": 0.2991139794355721, "grad_norm": 1.6383389234542847, "learning_rate": 1.643721937575678e-05, "loss": 0.5201, "step": 10938 }, { "epoch": 0.299141325749289, "grad_norm": 1.3508459329605103, "learning_rate": 1.6436541557006765e-05, "loss": 0.542, "step": 10939 }, { "epoch": 0.2991686720630059, "grad_norm": 1.6363184452056885, "learning_rate": 1.6435863687763894e-05, "loss": 0.5988, "step": 10940 }, { "epoch": 0.2991960183767228, "grad_norm": 1.3922845125198364, "learning_rate": 1.643518576803349e-05, "loss": 0.4757, "step": 10941 }, { "epoch": 0.29922336469043975, "grad_norm": 1.1915137767791748, "learning_rate": 1.643450779782086e-05, "loss": 0.5421, "step": 10942 }, { "epoch": 0.2992507110041566, "grad_norm": 1.2485469579696655, "learning_rate": 1.643382977713133e-05, "loss": 0.5848, "step": 10943 }, { "epoch": 0.29927805731787355, "grad_norm": 2.0566186904907227, "learning_rate": 1.6433151705970215e-05, "loss": 0.5423, "step": 10944 }, { "epoch": 0.29930540363159047, "grad_norm": 1.199764609336853, "learning_rate": 1.643247358434283e-05, "loss": 0.5632, "step": 10945 }, { "epoch": 0.2993327499453074, "grad_norm": 1.1431719064712524, "learning_rate": 1.6431795412254507e-05, "loss": 0.5758, "step": 10946 }, { "epoch": 0.29936009625902427, "grad_norm": 1.6766579151153564, "learning_rate": 1.6431117189710554e-05, "loss": 0.559, "step": 10947 }, { "epoch": 0.2993874425727412, "grad_norm": 1.3437548875808716, "learning_rate": 1.64304389167163e-05, "loss": 0.5787, "step": 10948 }, { "epoch": 0.2994147888864581, "grad_norm": 1.3186204433441162, "learning_rate": 1.642976059327706e-05, "loss": 0.5529, "step": 10949 }, { "epoch": 0.29944213520017504, "grad_norm": 1.288969874382019, "learning_rate": 1.6429082219398158e-05, "loss": 0.5352, "step": 10950 }, { "epoch": 0.2994694815138919, "grad_norm": 1.286215901374817, "learning_rate": 1.6428403795084916e-05, "loss": 0.5084, "step": 10951 }, { "epoch": 0.29949682782760884, "grad_norm": 1.3259565830230713, "learning_rate": 1.6427725320342657e-05, "loss": 0.4966, "step": 10952 }, { "epoch": 0.29952417414132576, "grad_norm": 1.156646728515625, "learning_rate": 1.64270467951767e-05, "loss": 0.5433, "step": 10953 }, { "epoch": 0.2995515204550427, "grad_norm": 1.4552549123764038, "learning_rate": 1.642636821959237e-05, "loss": 0.5658, "step": 10954 }, { "epoch": 0.29957886676875956, "grad_norm": 1.4015272855758667, "learning_rate": 1.6425689593594988e-05, "loss": 0.5434, "step": 10955 }, { "epoch": 0.2996062130824765, "grad_norm": 1.8035286664962769, "learning_rate": 1.6425010917189884e-05, "loss": 0.5249, "step": 10956 }, { "epoch": 0.2996335593961934, "grad_norm": 1.5613945722579956, "learning_rate": 1.6424332190382373e-05, "loss": 0.4237, "step": 10957 }, { "epoch": 0.29966090570991033, "grad_norm": 1.3113083839416504, "learning_rate": 1.6423653413177784e-05, "loss": 0.5768, "step": 10958 }, { "epoch": 0.2996882520236272, "grad_norm": 1.1453946828842163, "learning_rate": 1.6422974585581446e-05, "loss": 0.5695, "step": 10959 }, { "epoch": 0.2997155983373441, "grad_norm": 1.303385615348816, "learning_rate": 1.6422295707598676e-05, "loss": 0.5441, "step": 10960 }, { "epoch": 0.29974294465106105, "grad_norm": 1.8957666158676147, "learning_rate": 1.6421616779234806e-05, "loss": 0.546, "step": 10961 }, { "epoch": 0.299770290964778, "grad_norm": 1.4529320001602173, "learning_rate": 1.6420937800495156e-05, "loss": 0.5712, "step": 10962 }, { "epoch": 0.29979763727849484, "grad_norm": 1.5064772367477417, "learning_rate": 1.6420258771385063e-05, "loss": 0.9143, "step": 10963 }, { "epoch": 0.29982498359221177, "grad_norm": 1.8292030096054077, "learning_rate": 1.641957969190984e-05, "loss": 0.5707, "step": 10964 }, { "epoch": 0.2998523299059287, "grad_norm": 1.3010730743408203, "learning_rate": 1.6418900562074824e-05, "loss": 0.5378, "step": 10965 }, { "epoch": 0.2998796762196456, "grad_norm": 1.5169053077697754, "learning_rate": 1.641822138188534e-05, "loss": 0.5513, "step": 10966 }, { "epoch": 0.2999070225333625, "grad_norm": 1.378220796585083, "learning_rate": 1.6417542151346714e-05, "loss": 0.5509, "step": 10967 }, { "epoch": 0.2999343688470794, "grad_norm": 1.4794864654541016, "learning_rate": 1.6416862870464275e-05, "loss": 0.5459, "step": 10968 }, { "epoch": 0.29996171516079634, "grad_norm": 1.0750539302825928, "learning_rate": 1.6416183539243354e-05, "loss": 0.5237, "step": 10969 }, { "epoch": 0.29998906147451326, "grad_norm": 1.2331035137176514, "learning_rate": 1.6415504157689278e-05, "loss": 0.5717, "step": 10970 }, { "epoch": 0.30001640778823013, "grad_norm": 1.1795625686645508, "learning_rate": 1.6414824725807377e-05, "loss": 0.844, "step": 10971 }, { "epoch": 0.30004375410194706, "grad_norm": 1.6597554683685303, "learning_rate": 1.641414524360298e-05, "loss": 0.527, "step": 10972 }, { "epoch": 0.300071100415664, "grad_norm": 1.294788122177124, "learning_rate": 1.641346571108142e-05, "loss": 0.5688, "step": 10973 }, { "epoch": 0.30009844672938085, "grad_norm": 1.4350489377975464, "learning_rate": 1.641278612824803e-05, "loss": 0.5841, "step": 10974 }, { "epoch": 0.3001257930430978, "grad_norm": 1.4993423223495483, "learning_rate": 1.6412106495108137e-05, "loss": 0.5037, "step": 10975 }, { "epoch": 0.3001531393568147, "grad_norm": 1.3314002752304077, "learning_rate": 1.641142681166707e-05, "loss": 0.5596, "step": 10976 }, { "epoch": 0.3001804856705316, "grad_norm": 1.345461130142212, "learning_rate": 1.6410747077930166e-05, "loss": 0.5182, "step": 10977 }, { "epoch": 0.3002078319842485, "grad_norm": 1.3175452947616577, "learning_rate": 1.641006729390275e-05, "loss": 0.5363, "step": 10978 }, { "epoch": 0.3002351782979654, "grad_norm": 2.192340135574341, "learning_rate": 1.6409387459590167e-05, "loss": 0.5562, "step": 10979 }, { "epoch": 0.30026252461168235, "grad_norm": 1.3195992708206177, "learning_rate": 1.640870757499774e-05, "loss": 0.513, "step": 10980 }, { "epoch": 0.30028987092539927, "grad_norm": 1.2139344215393066, "learning_rate": 1.6408027640130805e-05, "loss": 0.5455, "step": 10981 }, { "epoch": 0.30031721723911614, "grad_norm": 1.5277000665664673, "learning_rate": 1.6407347654994698e-05, "loss": 0.567, "step": 10982 }, { "epoch": 0.30034456355283307, "grad_norm": 1.2644754648208618, "learning_rate": 1.640666761959475e-05, "loss": 0.5591, "step": 10983 }, { "epoch": 0.30037190986655, "grad_norm": 1.3489596843719482, "learning_rate": 1.64059875339363e-05, "loss": 0.5444, "step": 10984 }, { "epoch": 0.3003992561802669, "grad_norm": 1.285942554473877, "learning_rate": 1.6405307398024678e-05, "loss": 0.5241, "step": 10985 }, { "epoch": 0.3004266024939838, "grad_norm": 1.2528575658798218, "learning_rate": 1.6404627211865227e-05, "loss": 0.5408, "step": 10986 }, { "epoch": 0.3004539488077007, "grad_norm": 1.2252596616744995, "learning_rate": 1.640394697546327e-05, "loss": 0.4729, "step": 10987 }, { "epoch": 0.30048129512141764, "grad_norm": 1.6900962591171265, "learning_rate": 1.6403266688824157e-05, "loss": 0.4663, "step": 10988 }, { "epoch": 0.30050864143513456, "grad_norm": 1.273650884628296, "learning_rate": 1.6402586351953217e-05, "loss": 0.5517, "step": 10989 }, { "epoch": 0.30053598774885143, "grad_norm": 1.5901532173156738, "learning_rate": 1.6401905964855793e-05, "loss": 0.9004, "step": 10990 }, { "epoch": 0.30056333406256835, "grad_norm": 1.3572322130203247, "learning_rate": 1.6401225527537215e-05, "loss": 0.5485, "step": 10991 }, { "epoch": 0.3005906803762853, "grad_norm": 1.2161285877227783, "learning_rate": 1.640054504000282e-05, "loss": 0.4236, "step": 10992 }, { "epoch": 0.3006180266900022, "grad_norm": 1.5204877853393555, "learning_rate": 1.6399864502257958e-05, "loss": 0.8702, "step": 10993 }, { "epoch": 0.3006453730037191, "grad_norm": 1.5240564346313477, "learning_rate": 1.6399183914307954e-05, "loss": 0.5285, "step": 10994 }, { "epoch": 0.300672719317436, "grad_norm": 1.7508234977722168, "learning_rate": 1.6398503276158156e-05, "loss": 0.5587, "step": 10995 }, { "epoch": 0.3007000656311529, "grad_norm": 1.6286993026733398, "learning_rate": 1.63978225878139e-05, "loss": 0.5348, "step": 10996 }, { "epoch": 0.30072741194486985, "grad_norm": 1.3202438354492188, "learning_rate": 1.6397141849280527e-05, "loss": 0.8391, "step": 10997 }, { "epoch": 0.3007547582585867, "grad_norm": 1.3999152183532715, "learning_rate": 1.6396461060563375e-05, "loss": 0.4455, "step": 10998 }, { "epoch": 0.30078210457230364, "grad_norm": 1.9070039987564087, "learning_rate": 1.639578022166779e-05, "loss": 0.9001, "step": 10999 }, { "epoch": 0.30080945088602057, "grad_norm": 1.4461286067962646, "learning_rate": 1.6395099332599104e-05, "loss": 0.5572, "step": 11000 }, { "epoch": 0.3008367971997375, "grad_norm": 2.1249749660491943, "learning_rate": 1.6394418393362666e-05, "loss": 0.5228, "step": 11001 }, { "epoch": 0.30086414351345436, "grad_norm": 1.5909212827682495, "learning_rate": 1.6393737403963816e-05, "loss": 0.5883, "step": 11002 }, { "epoch": 0.3008914898271713, "grad_norm": 1.455047607421875, "learning_rate": 1.6393056364407892e-05, "loss": 0.5561, "step": 11003 }, { "epoch": 0.3009188361408882, "grad_norm": 1.4289885759353638, "learning_rate": 1.6392375274700243e-05, "loss": 0.6087, "step": 11004 }, { "epoch": 0.30094618245460514, "grad_norm": 1.3059254884719849, "learning_rate": 1.6391694134846206e-05, "loss": 0.5414, "step": 11005 }, { "epoch": 0.300973528768322, "grad_norm": 1.4160033464431763, "learning_rate": 1.639101294485113e-05, "loss": 0.5255, "step": 11006 }, { "epoch": 0.30100087508203893, "grad_norm": 1.5158220529556274, "learning_rate": 1.6390331704720356e-05, "loss": 0.5545, "step": 11007 }, { "epoch": 0.30102822139575586, "grad_norm": 1.5548701286315918, "learning_rate": 1.6389650414459225e-05, "loss": 0.9095, "step": 11008 }, { "epoch": 0.3010555677094728, "grad_norm": 1.425439715385437, "learning_rate": 1.638896907407309e-05, "loss": 0.5398, "step": 11009 }, { "epoch": 0.30108291402318965, "grad_norm": 1.1456221342086792, "learning_rate": 1.638828768356729e-05, "loss": 0.5555, "step": 11010 }, { "epoch": 0.3011102603369066, "grad_norm": 2.3163838386535645, "learning_rate": 1.6387606242947164e-05, "loss": 0.5653, "step": 11011 }, { "epoch": 0.3011376066506235, "grad_norm": 1.406331181526184, "learning_rate": 1.638692475221807e-05, "loss": 0.5485, "step": 11012 }, { "epoch": 0.3011649529643404, "grad_norm": 1.4015530347824097, "learning_rate": 1.638624321138535e-05, "loss": 0.5565, "step": 11013 }, { "epoch": 0.3011922992780573, "grad_norm": 1.5608553886413574, "learning_rate": 1.638556162045435e-05, "loss": 0.5432, "step": 11014 }, { "epoch": 0.3012196455917742, "grad_norm": 1.2664681673049927, "learning_rate": 1.6384879979430413e-05, "loss": 0.5174, "step": 11015 }, { "epoch": 0.30124699190549115, "grad_norm": 1.112200379371643, "learning_rate": 1.6384198288318888e-05, "loss": 0.5458, "step": 11016 }, { "epoch": 0.30127433821920807, "grad_norm": 1.5474472045898438, "learning_rate": 1.638351654712513e-05, "loss": 0.5544, "step": 11017 }, { "epoch": 0.30130168453292494, "grad_norm": 1.2602335214614868, "learning_rate": 1.6382834755854477e-05, "loss": 0.4296, "step": 11018 }, { "epoch": 0.30132903084664187, "grad_norm": 1.545815110206604, "learning_rate": 1.638215291451228e-05, "loss": 0.536, "step": 11019 }, { "epoch": 0.3013563771603588, "grad_norm": 1.8325011730194092, "learning_rate": 1.638147102310389e-05, "loss": 0.4986, "step": 11020 }, { "epoch": 0.3013837234740757, "grad_norm": 1.5310899019241333, "learning_rate": 1.6380789081634658e-05, "loss": 0.5154, "step": 11021 }, { "epoch": 0.3014110697877926, "grad_norm": 1.6338179111480713, "learning_rate": 1.638010709010993e-05, "loss": 0.5718, "step": 11022 }, { "epoch": 0.3014384161015095, "grad_norm": 1.1777334213256836, "learning_rate": 1.637942504853506e-05, "loss": 0.4766, "step": 11023 }, { "epoch": 0.30146576241522643, "grad_norm": 1.2170857191085815, "learning_rate": 1.6378742956915392e-05, "loss": 0.5795, "step": 11024 }, { "epoch": 0.30149310872894336, "grad_norm": 2.0854148864746094, "learning_rate": 1.637806081525628e-05, "loss": 0.8504, "step": 11025 }, { "epoch": 0.30152045504266023, "grad_norm": 1.2348722219467163, "learning_rate": 1.637737862356308e-05, "loss": 0.5653, "step": 11026 }, { "epoch": 0.30154780135637715, "grad_norm": 1.2643215656280518, "learning_rate": 1.6376696381841136e-05, "loss": 0.5687, "step": 11027 }, { "epoch": 0.3015751476700941, "grad_norm": 1.4838844537734985, "learning_rate": 1.6376014090095807e-05, "loss": 0.5262, "step": 11028 }, { "epoch": 0.301602493983811, "grad_norm": 1.3813899755477905, "learning_rate": 1.637533174833244e-05, "loss": 0.5579, "step": 11029 }, { "epoch": 0.3016298402975279, "grad_norm": 1.6152637004852295, "learning_rate": 1.637464935655639e-05, "loss": 0.8457, "step": 11030 }, { "epoch": 0.3016571866112448, "grad_norm": 1.4188578128814697, "learning_rate": 1.637396691477301e-05, "loss": 0.8537, "step": 11031 }, { "epoch": 0.3016845329249617, "grad_norm": 1.0537875890731812, "learning_rate": 1.6373284422987655e-05, "loss": 0.5603, "step": 11032 }, { "epoch": 0.30171187923867865, "grad_norm": 1.3538577556610107, "learning_rate": 1.637260188120567e-05, "loss": 0.5478, "step": 11033 }, { "epoch": 0.3017392255523955, "grad_norm": 1.5191603899002075, "learning_rate": 1.6371919289432425e-05, "loss": 0.5811, "step": 11034 }, { "epoch": 0.30176657186611244, "grad_norm": 1.2577054500579834, "learning_rate": 1.6371236647673262e-05, "loss": 0.4145, "step": 11035 }, { "epoch": 0.30179391817982937, "grad_norm": 1.4677722454071045, "learning_rate": 1.637055395593354e-05, "loss": 0.5603, "step": 11036 }, { "epoch": 0.3018212644935463, "grad_norm": 1.370659351348877, "learning_rate": 1.6369871214218618e-05, "loss": 0.5416, "step": 11037 }, { "epoch": 0.30184861080726316, "grad_norm": 1.5981521606445312, "learning_rate": 1.6369188422533847e-05, "loss": 0.5795, "step": 11038 }, { "epoch": 0.3018759571209801, "grad_norm": 1.2388287782669067, "learning_rate": 1.6368505580884585e-05, "loss": 0.5506, "step": 11039 }, { "epoch": 0.301903303434697, "grad_norm": 3.4863288402557373, "learning_rate": 1.636782268927619e-05, "loss": 0.3669, "step": 11040 }, { "epoch": 0.30193064974841394, "grad_norm": 1.2521846294403076, "learning_rate": 1.636713974771402e-05, "loss": 0.8893, "step": 11041 }, { "epoch": 0.3019579960621308, "grad_norm": 1.2976752519607544, "learning_rate": 1.6366456756203425e-05, "loss": 0.5301, "step": 11042 }, { "epoch": 0.30198534237584773, "grad_norm": 1.437928557395935, "learning_rate": 1.6365773714749773e-05, "loss": 0.5338, "step": 11043 }, { "epoch": 0.30201268868956466, "grad_norm": 1.233338475227356, "learning_rate": 1.6365090623358417e-05, "loss": 0.5384, "step": 11044 }, { "epoch": 0.3020400350032816, "grad_norm": 1.226622223854065, "learning_rate": 1.6364407482034715e-05, "loss": 0.5625, "step": 11045 }, { "epoch": 0.30206738131699845, "grad_norm": 1.186915397644043, "learning_rate": 1.636372429078403e-05, "loss": 0.5484, "step": 11046 }, { "epoch": 0.3020947276307154, "grad_norm": 1.4544047117233276, "learning_rate": 1.6363041049611717e-05, "loss": 0.5583, "step": 11047 }, { "epoch": 0.3021220739444323, "grad_norm": 1.2143840789794922, "learning_rate": 1.6362357758523137e-05, "loss": 0.5312, "step": 11048 }, { "epoch": 0.3021494202581492, "grad_norm": 1.0952658653259277, "learning_rate": 1.6361674417523653e-05, "loss": 0.5465, "step": 11049 }, { "epoch": 0.3021767665718661, "grad_norm": 1.2744961977005005, "learning_rate": 1.6360991026618624e-05, "loss": 0.5385, "step": 11050 }, { "epoch": 0.302204112885583, "grad_norm": 1.361733078956604, "learning_rate": 1.6360307585813406e-05, "loss": 0.497, "step": 11051 }, { "epoch": 0.30223145919929995, "grad_norm": 1.6809308528900146, "learning_rate": 1.6359624095113368e-05, "loss": 0.5205, "step": 11052 }, { "epoch": 0.30225880551301687, "grad_norm": 1.2878656387329102, "learning_rate": 1.6358940554523868e-05, "loss": 0.4848, "step": 11053 }, { "epoch": 0.30228615182673374, "grad_norm": 1.2400463819503784, "learning_rate": 1.635825696405027e-05, "loss": 0.3965, "step": 11054 }, { "epoch": 0.30231349814045066, "grad_norm": 1.3975145816802979, "learning_rate": 1.6357573323697934e-05, "loss": 0.564, "step": 11055 }, { "epoch": 0.3023408444541676, "grad_norm": 1.5362447500228882, "learning_rate": 1.6356889633472227e-05, "loss": 0.586, "step": 11056 }, { "epoch": 0.3023681907678845, "grad_norm": 1.1855942010879517, "learning_rate": 1.635620589337851e-05, "loss": 0.5267, "step": 11057 }, { "epoch": 0.3023955370816014, "grad_norm": 3.5245442390441895, "learning_rate": 1.635552210342214e-05, "loss": 0.4357, "step": 11058 }, { "epoch": 0.3024228833953183, "grad_norm": 1.427855134010315, "learning_rate": 1.6354838263608493e-05, "loss": 0.8889, "step": 11059 }, { "epoch": 0.30245022970903523, "grad_norm": 1.6298248767852783, "learning_rate": 1.635415437394293e-05, "loss": 0.5663, "step": 11060 }, { "epoch": 0.30247757602275216, "grad_norm": 1.3402100801467896, "learning_rate": 1.635347043443081e-05, "loss": 0.555, "step": 11061 }, { "epoch": 0.30250492233646903, "grad_norm": 2.0194618701934814, "learning_rate": 1.6352786445077503e-05, "loss": 0.5567, "step": 11062 }, { "epoch": 0.30253226865018595, "grad_norm": 1.5477735996246338, "learning_rate": 1.6352102405888377e-05, "loss": 0.5441, "step": 11063 }, { "epoch": 0.3025596149639029, "grad_norm": 1.30868661403656, "learning_rate": 1.635141831686879e-05, "loss": 0.5684, "step": 11064 }, { "epoch": 0.3025869612776198, "grad_norm": 1.2754287719726562, "learning_rate": 1.6350734178024122e-05, "loss": 0.5282, "step": 11065 }, { "epoch": 0.3026143075913367, "grad_norm": 1.286697506904602, "learning_rate": 1.6350049989359726e-05, "loss": 0.5217, "step": 11066 }, { "epoch": 0.3026416539050536, "grad_norm": 1.835386037826538, "learning_rate": 1.6349365750880976e-05, "loss": 0.571, "step": 11067 }, { "epoch": 0.3026690002187705, "grad_norm": 1.4691078662872314, "learning_rate": 1.634868146259324e-05, "loss": 0.5134, "step": 11068 }, { "epoch": 0.30269634653248745, "grad_norm": 1.485634207725525, "learning_rate": 1.6347997124501878e-05, "loss": 0.5543, "step": 11069 }, { "epoch": 0.3027236928462043, "grad_norm": 1.2037745714187622, "learning_rate": 1.6347312736612273e-05, "loss": 0.5491, "step": 11070 }, { "epoch": 0.30275103915992124, "grad_norm": 1.6380220651626587, "learning_rate": 1.6346628298929782e-05, "loss": 0.4799, "step": 11071 }, { "epoch": 0.30277838547363817, "grad_norm": 1.5355013608932495, "learning_rate": 1.634594381145978e-05, "loss": 0.5082, "step": 11072 }, { "epoch": 0.30280573178735504, "grad_norm": 1.3848371505737305, "learning_rate": 1.6345259274207633e-05, "loss": 0.5496, "step": 11073 }, { "epoch": 0.30283307810107196, "grad_norm": 1.371416449546814, "learning_rate": 1.6344574687178712e-05, "loss": 0.5619, "step": 11074 }, { "epoch": 0.3028604244147889, "grad_norm": 1.0849181413650513, "learning_rate": 1.6343890050378393e-05, "loss": 0.5467, "step": 11075 }, { "epoch": 0.3028877707285058, "grad_norm": 1.176627516746521, "learning_rate": 1.634320536381204e-05, "loss": 0.5257, "step": 11076 }, { "epoch": 0.3029151170422227, "grad_norm": 1.2859410047531128, "learning_rate": 1.6342520627485022e-05, "loss": 0.5451, "step": 11077 }, { "epoch": 0.3029424633559396, "grad_norm": 1.3348791599273682, "learning_rate": 1.6341835841402715e-05, "loss": 0.5045, "step": 11078 }, { "epoch": 0.30296980966965653, "grad_norm": 1.13019859790802, "learning_rate": 1.6341151005570493e-05, "loss": 0.5408, "step": 11079 }, { "epoch": 0.30299715598337346, "grad_norm": 1.4740090370178223, "learning_rate": 1.6340466119993727e-05, "loss": 0.5339, "step": 11080 }, { "epoch": 0.3030245022970903, "grad_norm": 1.2374215126037598, "learning_rate": 1.6339781184677787e-05, "loss": 0.5527, "step": 11081 }, { "epoch": 0.30305184861080725, "grad_norm": 1.219241976737976, "learning_rate": 1.6339096199628047e-05, "loss": 0.5324, "step": 11082 }, { "epoch": 0.3030791949245242, "grad_norm": 1.4318090677261353, "learning_rate": 1.6338411164849884e-05, "loss": 0.5016, "step": 11083 }, { "epoch": 0.3031065412382411, "grad_norm": 1.5778307914733887, "learning_rate": 1.6337726080348667e-05, "loss": 0.5603, "step": 11084 }, { "epoch": 0.30313388755195797, "grad_norm": 1.1709924936294556, "learning_rate": 1.6337040946129774e-05, "loss": 0.5361, "step": 11085 }, { "epoch": 0.3031612338656749, "grad_norm": 1.2687938213348389, "learning_rate": 1.6336355762198578e-05, "loss": 0.5376, "step": 11086 }, { "epoch": 0.3031885801793918, "grad_norm": 1.483525276184082, "learning_rate": 1.6335670528560453e-05, "loss": 0.569, "step": 11087 }, { "epoch": 0.30321592649310875, "grad_norm": 1.2592639923095703, "learning_rate": 1.6334985245220772e-05, "loss": 0.5272, "step": 11088 }, { "epoch": 0.3032432728068256, "grad_norm": 1.663558840751648, "learning_rate": 1.633429991218492e-05, "loss": 0.9101, "step": 11089 }, { "epoch": 0.30327061912054254, "grad_norm": 1.285474181175232, "learning_rate": 1.6333614529458266e-05, "loss": 0.4757, "step": 11090 }, { "epoch": 0.30329796543425946, "grad_norm": 1.4560109376907349, "learning_rate": 1.633292909704619e-05, "loss": 0.5537, "step": 11091 }, { "epoch": 0.3033253117479764, "grad_norm": 1.2437405586242676, "learning_rate": 1.6332243614954065e-05, "loss": 0.5556, "step": 11092 }, { "epoch": 0.30335265806169326, "grad_norm": 1.3629804849624634, "learning_rate": 1.6331558083187275e-05, "loss": 0.5799, "step": 11093 }, { "epoch": 0.3033800043754102, "grad_norm": 1.100201964378357, "learning_rate": 1.6330872501751188e-05, "loss": 0.5059, "step": 11094 }, { "epoch": 0.3034073506891271, "grad_norm": 1.208375096321106, "learning_rate": 1.6330186870651194e-05, "loss": 0.4227, "step": 11095 }, { "epoch": 0.30343469700284403, "grad_norm": 1.2455247640609741, "learning_rate": 1.6329501189892664e-05, "loss": 0.5531, "step": 11096 }, { "epoch": 0.3034620433165609, "grad_norm": 2.166935920715332, "learning_rate": 1.6328815459480974e-05, "loss": 0.5495, "step": 11097 }, { "epoch": 0.30348938963027783, "grad_norm": 1.313511848449707, "learning_rate": 1.632812967942151e-05, "loss": 0.5381, "step": 11098 }, { "epoch": 0.30351673594399475, "grad_norm": 1.3740566968917847, "learning_rate": 1.6327443849719648e-05, "loss": 0.5297, "step": 11099 }, { "epoch": 0.3035440822577117, "grad_norm": 1.4085396528244019, "learning_rate": 1.6326757970380772e-05, "loss": 0.5578, "step": 11100 }, { "epoch": 0.30357142857142855, "grad_norm": 1.5005265474319458, "learning_rate": 1.632607204141026e-05, "loss": 0.8399, "step": 11101 }, { "epoch": 0.3035987748851455, "grad_norm": 1.148478388786316, "learning_rate": 1.6325386062813496e-05, "loss": 0.5643, "step": 11102 }, { "epoch": 0.3036261211988624, "grad_norm": 1.3772523403167725, "learning_rate": 1.6324700034595854e-05, "loss": 0.5106, "step": 11103 }, { "epoch": 0.3036534675125793, "grad_norm": 1.1733132600784302, "learning_rate": 1.6324013956762722e-05, "loss": 0.5426, "step": 11104 }, { "epoch": 0.3036808138262962, "grad_norm": 1.2405779361724854, "learning_rate": 1.6323327829319483e-05, "loss": 0.5558, "step": 11105 }, { "epoch": 0.3037081601400131, "grad_norm": 1.1934162378311157, "learning_rate": 1.6322641652271515e-05, "loss": 0.5591, "step": 11106 }, { "epoch": 0.30373550645373004, "grad_norm": 1.299358606338501, "learning_rate": 1.6321955425624197e-05, "loss": 0.5659, "step": 11107 }, { "epoch": 0.30376285276744697, "grad_norm": 1.2507814168930054, "learning_rate": 1.6321269149382925e-05, "loss": 0.5108, "step": 11108 }, { "epoch": 0.30379019908116384, "grad_norm": 1.513372540473938, "learning_rate": 1.632058282355307e-05, "loss": 0.5822, "step": 11109 }, { "epoch": 0.30381754539488076, "grad_norm": 1.5524016618728638, "learning_rate": 1.6319896448140028e-05, "loss": 0.5354, "step": 11110 }, { "epoch": 0.3038448917085977, "grad_norm": 1.2347393035888672, "learning_rate": 1.6319210023149172e-05, "loss": 0.5647, "step": 11111 }, { "epoch": 0.3038722380223146, "grad_norm": 1.2260102033615112, "learning_rate": 1.6318523548585892e-05, "loss": 0.5191, "step": 11112 }, { "epoch": 0.3038995843360315, "grad_norm": 1.3721983432769775, "learning_rate": 1.6317837024455575e-05, "loss": 0.5092, "step": 11113 }, { "epoch": 0.3039269306497484, "grad_norm": 1.3756439685821533, "learning_rate": 1.6317150450763603e-05, "loss": 0.5357, "step": 11114 }, { "epoch": 0.30395427696346533, "grad_norm": 1.154860019683838, "learning_rate": 1.6316463827515364e-05, "loss": 0.5124, "step": 11115 }, { "epoch": 0.30398162327718226, "grad_norm": 1.8710616827011108, "learning_rate": 1.631577715471624e-05, "loss": 0.8899, "step": 11116 }, { "epoch": 0.3040089695908991, "grad_norm": 1.1881076097488403, "learning_rate": 1.631509043237163e-05, "loss": 0.5492, "step": 11117 }, { "epoch": 0.30403631590461605, "grad_norm": 1.4099031686782837, "learning_rate": 1.6314403660486906e-05, "loss": 0.583, "step": 11118 }, { "epoch": 0.304063662218333, "grad_norm": 1.3776646852493286, "learning_rate": 1.6313716839067466e-05, "loss": 0.5331, "step": 11119 }, { "epoch": 0.3040910085320499, "grad_norm": 1.2672141790390015, "learning_rate": 1.631302996811869e-05, "loss": 0.5513, "step": 11120 }, { "epoch": 0.30411835484576677, "grad_norm": 1.3289235830307007, "learning_rate": 1.6312343047645977e-05, "loss": 0.5126, "step": 11121 }, { "epoch": 0.3041457011594837, "grad_norm": 1.220604658126831, "learning_rate": 1.63116560776547e-05, "loss": 0.5311, "step": 11122 }, { "epoch": 0.3041730474732006, "grad_norm": 1.255223274230957, "learning_rate": 1.6310969058150264e-05, "loss": 0.8873, "step": 11123 }, { "epoch": 0.30420039378691754, "grad_norm": 1.2145099639892578, "learning_rate": 1.6310281989138052e-05, "loss": 0.5645, "step": 11124 }, { "epoch": 0.3042277401006344, "grad_norm": 1.2990679740905762, "learning_rate": 1.6309594870623453e-05, "loss": 0.5459, "step": 11125 }, { "epoch": 0.30425508641435134, "grad_norm": 1.1034659147262573, "learning_rate": 1.6308907702611856e-05, "loss": 0.5249, "step": 11126 }, { "epoch": 0.30428243272806826, "grad_norm": 1.9062070846557617, "learning_rate": 1.6308220485108656e-05, "loss": 0.4854, "step": 11127 }, { "epoch": 0.3043097790417852, "grad_norm": 1.2532509565353394, "learning_rate": 1.6307533218119237e-05, "loss": 0.5517, "step": 11128 }, { "epoch": 0.30433712535550206, "grad_norm": 1.4055382013320923, "learning_rate": 1.6306845901649002e-05, "loss": 0.5583, "step": 11129 }, { "epoch": 0.304364471669219, "grad_norm": 1.1496562957763672, "learning_rate": 1.6306158535703333e-05, "loss": 0.5216, "step": 11130 }, { "epoch": 0.3043918179829359, "grad_norm": 1.3317649364471436, "learning_rate": 1.6305471120287622e-05, "loss": 0.5957, "step": 11131 }, { "epoch": 0.30441916429665283, "grad_norm": 1.7574636936187744, "learning_rate": 1.6304783655407268e-05, "loss": 0.5805, "step": 11132 }, { "epoch": 0.3044465106103697, "grad_norm": 1.2014646530151367, "learning_rate": 1.6304096141067657e-05, "loss": 0.5309, "step": 11133 }, { "epoch": 0.3044738569240866, "grad_norm": 1.225711703300476, "learning_rate": 1.630340857727419e-05, "loss": 0.522, "step": 11134 }, { "epoch": 0.30450120323780355, "grad_norm": 2.236804485321045, "learning_rate": 1.6302720964032253e-05, "loss": 0.5736, "step": 11135 }, { "epoch": 0.3045285495515205, "grad_norm": 1.385751724243164, "learning_rate": 1.6302033301347246e-05, "loss": 0.5337, "step": 11136 }, { "epoch": 0.30455589586523735, "grad_norm": 1.8148653507232666, "learning_rate": 1.630134558922456e-05, "loss": 0.5783, "step": 11137 }, { "epoch": 0.30458324217895427, "grad_norm": 1.2816952466964722, "learning_rate": 1.6300657827669595e-05, "loss": 0.5394, "step": 11138 }, { "epoch": 0.3046105884926712, "grad_norm": 1.3399274349212646, "learning_rate": 1.629997001668774e-05, "loss": 0.5566, "step": 11139 }, { "epoch": 0.3046379348063881, "grad_norm": 1.5292171239852905, "learning_rate": 1.629928215628439e-05, "loss": 0.5314, "step": 11140 }, { "epoch": 0.304665281120105, "grad_norm": 1.2321138381958008, "learning_rate": 1.6298594246464948e-05, "loss": 0.466, "step": 11141 }, { "epoch": 0.3046926274338219, "grad_norm": 2.0999293327331543, "learning_rate": 1.6297906287234803e-05, "loss": 0.4055, "step": 11142 }, { "epoch": 0.30471997374753884, "grad_norm": 1.5009773969650269, "learning_rate": 1.629721827859936e-05, "loss": 0.5277, "step": 11143 }, { "epoch": 0.30474732006125577, "grad_norm": 1.265998125076294, "learning_rate": 1.629653022056401e-05, "loss": 0.4126, "step": 11144 }, { "epoch": 0.30477466637497264, "grad_norm": 1.7670810222625732, "learning_rate": 1.629584211313415e-05, "loss": 0.4003, "step": 11145 }, { "epoch": 0.30480201268868956, "grad_norm": 1.13677978515625, "learning_rate": 1.6295153956315187e-05, "loss": 0.5351, "step": 11146 }, { "epoch": 0.3048293590024065, "grad_norm": 1.5699437856674194, "learning_rate": 1.629446575011251e-05, "loss": 0.5491, "step": 11147 }, { "epoch": 0.3048567053161234, "grad_norm": 1.2974047660827637, "learning_rate": 1.6293777494531516e-05, "loss": 0.5398, "step": 11148 }, { "epoch": 0.3048840516298403, "grad_norm": 1.2055147886276245, "learning_rate": 1.6293089189577613e-05, "loss": 0.5078, "step": 11149 }, { "epoch": 0.3049113979435572, "grad_norm": 1.4954965114593506, "learning_rate": 1.6292400835256198e-05, "loss": 0.5258, "step": 11150 }, { "epoch": 0.30493874425727413, "grad_norm": 1.3812227249145508, "learning_rate": 1.6291712431572663e-05, "loss": 0.509, "step": 11151 }, { "epoch": 0.30496609057099106, "grad_norm": 2.8143744468688965, "learning_rate": 1.629102397853242e-05, "loss": 0.5228, "step": 11152 }, { "epoch": 0.3049934368847079, "grad_norm": 1.478652834892273, "learning_rate": 1.629033547614086e-05, "loss": 0.5278, "step": 11153 }, { "epoch": 0.30502078319842485, "grad_norm": 1.355761170387268, "learning_rate": 1.6289646924403393e-05, "loss": 0.536, "step": 11154 }, { "epoch": 0.3050481295121418, "grad_norm": 1.4327337741851807, "learning_rate": 1.6288958323325416e-05, "loss": 0.881, "step": 11155 }, { "epoch": 0.3050754758258587, "grad_norm": 1.6173752546310425, "learning_rate": 1.6288269672912328e-05, "loss": 0.5523, "step": 11156 }, { "epoch": 0.30510282213957557, "grad_norm": 1.4805362224578857, "learning_rate": 1.6287580973169538e-05, "loss": 0.5788, "step": 11157 }, { "epoch": 0.3051301684532925, "grad_norm": 1.3400654792785645, "learning_rate": 1.6286892224102437e-05, "loss": 0.5546, "step": 11158 }, { "epoch": 0.3051575147670094, "grad_norm": 1.3467782735824585, "learning_rate": 1.6286203425716443e-05, "loss": 0.5533, "step": 11159 }, { "epoch": 0.30518486108072634, "grad_norm": 1.6829710006713867, "learning_rate": 1.6285514578016947e-05, "loss": 0.5375, "step": 11160 }, { "epoch": 0.3052122073944432, "grad_norm": 1.2954801321029663, "learning_rate": 1.6284825681009362e-05, "loss": 0.5429, "step": 11161 }, { "epoch": 0.30523955370816014, "grad_norm": 3.261314630508423, "learning_rate": 1.6284136734699086e-05, "loss": 0.5556, "step": 11162 }, { "epoch": 0.30526690002187706, "grad_norm": 1.348671317100525, "learning_rate": 1.6283447739091526e-05, "loss": 0.5225, "step": 11163 }, { "epoch": 0.305294246335594, "grad_norm": 1.4306340217590332, "learning_rate": 1.628275869419209e-05, "loss": 0.5462, "step": 11164 }, { "epoch": 0.30532159264931086, "grad_norm": 1.5279285907745361, "learning_rate": 1.628206960000618e-05, "loss": 0.893, "step": 11165 }, { "epoch": 0.3053489389630278, "grad_norm": 1.7866618633270264, "learning_rate": 1.6281380456539197e-05, "loss": 0.8524, "step": 11166 }, { "epoch": 0.3053762852767447, "grad_norm": 1.3671127557754517, "learning_rate": 1.6280691263796553e-05, "loss": 0.5702, "step": 11167 }, { "epoch": 0.30540363159046163, "grad_norm": 1.2018849849700928, "learning_rate": 1.628000202178366e-05, "loss": 0.54, "step": 11168 }, { "epoch": 0.3054309779041785, "grad_norm": 1.2992204427719116, "learning_rate": 1.627931273050591e-05, "loss": 0.8711, "step": 11169 }, { "epoch": 0.3054583242178954, "grad_norm": 1.508521318435669, "learning_rate": 1.6278623389968724e-05, "loss": 0.4808, "step": 11170 }, { "epoch": 0.30548567053161235, "grad_norm": 1.8209103345870972, "learning_rate": 1.6277934000177503e-05, "loss": 0.511, "step": 11171 }, { "epoch": 0.3055130168453293, "grad_norm": 1.2425836324691772, "learning_rate": 1.6277244561137658e-05, "loss": 0.8803, "step": 11172 }, { "epoch": 0.30554036315904615, "grad_norm": 1.7418417930603027, "learning_rate": 1.6276555072854594e-05, "loss": 0.5245, "step": 11173 }, { "epoch": 0.30556770947276307, "grad_norm": 1.6542704105377197, "learning_rate": 1.6275865535333722e-05, "loss": 0.5864, "step": 11174 }, { "epoch": 0.30559505578648, "grad_norm": 1.3453588485717773, "learning_rate": 1.6275175948580452e-05, "loss": 0.5656, "step": 11175 }, { "epoch": 0.30562240210019687, "grad_norm": 1.177824854850769, "learning_rate": 1.6274486312600192e-05, "loss": 0.5095, "step": 11176 }, { "epoch": 0.3056497484139138, "grad_norm": 1.320507287979126, "learning_rate": 1.627379662739835e-05, "loss": 0.5428, "step": 11177 }, { "epoch": 0.3056770947276307, "grad_norm": 1.8649240732192993, "learning_rate": 1.6273106892980342e-05, "loss": 0.6003, "step": 11178 }, { "epoch": 0.30570444104134764, "grad_norm": 1.3435028791427612, "learning_rate": 1.6272417109351578e-05, "loss": 0.5569, "step": 11179 }, { "epoch": 0.3057317873550645, "grad_norm": 1.294276475906372, "learning_rate": 1.6271727276517462e-05, "loss": 0.5451, "step": 11180 }, { "epoch": 0.30575913366878144, "grad_norm": 1.348167896270752, "learning_rate": 1.6271037394483415e-05, "loss": 0.5683, "step": 11181 }, { "epoch": 0.30578647998249836, "grad_norm": 1.7470110654830933, "learning_rate": 1.6270347463254842e-05, "loss": 0.6196, "step": 11182 }, { "epoch": 0.3058138262962153, "grad_norm": 1.1274598836898804, "learning_rate": 1.626965748283716e-05, "loss": 0.5246, "step": 11183 }, { "epoch": 0.30584117260993215, "grad_norm": 1.4635601043701172, "learning_rate": 1.6268967453235776e-05, "loss": 0.5299, "step": 11184 }, { "epoch": 0.3058685189236491, "grad_norm": 1.5378488302230835, "learning_rate": 1.626827737445611e-05, "loss": 0.5016, "step": 11185 }, { "epoch": 0.305895865237366, "grad_norm": 1.4658691883087158, "learning_rate": 1.626758724650357e-05, "loss": 0.5409, "step": 11186 }, { "epoch": 0.30592321155108293, "grad_norm": 1.4580347537994385, "learning_rate": 1.6266897069383572e-05, "loss": 0.6042, "step": 11187 }, { "epoch": 0.3059505578647998, "grad_norm": 1.3797266483306885, "learning_rate": 1.6266206843101532e-05, "loss": 0.8605, "step": 11188 }, { "epoch": 0.3059779041785167, "grad_norm": 1.4452331066131592, "learning_rate": 1.626551656766286e-05, "loss": 0.5576, "step": 11189 }, { "epoch": 0.30600525049223365, "grad_norm": 1.7261606454849243, "learning_rate": 1.626482624307298e-05, "loss": 0.5393, "step": 11190 }, { "epoch": 0.3060325968059506, "grad_norm": 1.9288581609725952, "learning_rate": 1.626413586933729e-05, "loss": 0.4366, "step": 11191 }, { "epoch": 0.30605994311966744, "grad_norm": 1.3797763586044312, "learning_rate": 1.6263445446461225e-05, "loss": 0.5772, "step": 11192 }, { "epoch": 0.30608728943338437, "grad_norm": 1.5000194311141968, "learning_rate": 1.6262754974450193e-05, "loss": 0.5644, "step": 11193 }, { "epoch": 0.3061146357471013, "grad_norm": 1.284091591835022, "learning_rate": 1.626206445330961e-05, "loss": 0.5156, "step": 11194 }, { "epoch": 0.3061419820608182, "grad_norm": 1.3571780920028687, "learning_rate": 1.626137388304489e-05, "loss": 0.4574, "step": 11195 }, { "epoch": 0.3061693283745351, "grad_norm": 1.2018404006958008, "learning_rate": 1.626068326366146e-05, "loss": 0.4807, "step": 11196 }, { "epoch": 0.306196674688252, "grad_norm": 1.2226580381393433, "learning_rate": 1.6259992595164727e-05, "loss": 0.5105, "step": 11197 }, { "epoch": 0.30622402100196894, "grad_norm": 1.2734266519546509, "learning_rate": 1.6259301877560118e-05, "loss": 0.5269, "step": 11198 }, { "epoch": 0.30625136731568586, "grad_norm": 1.1840554475784302, "learning_rate": 1.6258611110853042e-05, "loss": 0.5379, "step": 11199 }, { "epoch": 0.30627871362940273, "grad_norm": 1.5244086980819702, "learning_rate": 1.6257920295048925e-05, "loss": 0.5584, "step": 11200 }, { "epoch": 0.30630605994311966, "grad_norm": 2.5953309535980225, "learning_rate": 1.6257229430153184e-05, "loss": 0.8629, "step": 11201 }, { "epoch": 0.3063334062568366, "grad_norm": 1.3135725259780884, "learning_rate": 1.6256538516171243e-05, "loss": 0.5743, "step": 11202 }, { "epoch": 0.3063607525705535, "grad_norm": 1.5764001607894897, "learning_rate": 1.6255847553108516e-05, "loss": 0.4996, "step": 11203 }, { "epoch": 0.3063880988842704, "grad_norm": 1.9914509057998657, "learning_rate": 1.6255156540970425e-05, "loss": 0.508, "step": 11204 }, { "epoch": 0.3064154451979873, "grad_norm": 1.2370630502700806, "learning_rate": 1.6254465479762388e-05, "loss": 0.5545, "step": 11205 }, { "epoch": 0.3064427915117042, "grad_norm": 1.229900598526001, "learning_rate": 1.625377436948983e-05, "loss": 0.5633, "step": 11206 }, { "epoch": 0.30647013782542115, "grad_norm": 1.2601977586746216, "learning_rate": 1.625308321015818e-05, "loss": 0.5304, "step": 11207 }, { "epoch": 0.306497484139138, "grad_norm": 1.3500277996063232, "learning_rate": 1.6252392001772845e-05, "loss": 0.5337, "step": 11208 }, { "epoch": 0.30652483045285495, "grad_norm": 1.7145111560821533, "learning_rate": 1.6251700744339254e-05, "loss": 0.8647, "step": 11209 }, { "epoch": 0.30655217676657187, "grad_norm": 2.1034674644470215, "learning_rate": 1.6251009437862835e-05, "loss": 0.5643, "step": 11210 }, { "epoch": 0.3065795230802888, "grad_norm": 1.3826227188110352, "learning_rate": 1.6250318082349002e-05, "loss": 0.5549, "step": 11211 }, { "epoch": 0.30660686939400567, "grad_norm": 1.2500032186508179, "learning_rate": 1.6249626677803183e-05, "loss": 0.526, "step": 11212 }, { "epoch": 0.3066342157077226, "grad_norm": 1.5225878953933716, "learning_rate": 1.6248935224230806e-05, "loss": 0.5892, "step": 11213 }, { "epoch": 0.3066615620214395, "grad_norm": 1.1947728395462036, "learning_rate": 1.6248243721637287e-05, "loss": 0.5349, "step": 11214 }, { "epoch": 0.30668890833515644, "grad_norm": 1.5620824098587036, "learning_rate": 1.6247552170028053e-05, "loss": 0.4395, "step": 11215 }, { "epoch": 0.3067162546488733, "grad_norm": 1.2626850605010986, "learning_rate": 1.624686056940853e-05, "loss": 0.8779, "step": 11216 }, { "epoch": 0.30674360096259023, "grad_norm": 1.5199284553527832, "learning_rate": 1.6246168919784146e-05, "loss": 0.5395, "step": 11217 }, { "epoch": 0.30677094727630716, "grad_norm": 1.07772958278656, "learning_rate": 1.6245477221160326e-05, "loss": 0.5536, "step": 11218 }, { "epoch": 0.3067982935900241, "grad_norm": 1.8679128885269165, "learning_rate": 1.6244785473542493e-05, "loss": 0.5867, "step": 11219 }, { "epoch": 0.30682563990374095, "grad_norm": 1.3493555784225464, "learning_rate": 1.6244093676936073e-05, "loss": 0.5736, "step": 11220 }, { "epoch": 0.3068529862174579, "grad_norm": 1.3437416553497314, "learning_rate": 1.62434018313465e-05, "loss": 0.5414, "step": 11221 }, { "epoch": 0.3068803325311748, "grad_norm": 1.415787935256958, "learning_rate": 1.6242709936779196e-05, "loss": 0.928, "step": 11222 }, { "epoch": 0.30690767884489173, "grad_norm": 1.1307348012924194, "learning_rate": 1.6242017993239587e-05, "loss": 0.5723, "step": 11223 }, { "epoch": 0.3069350251586086, "grad_norm": 1.0930486917495728, "learning_rate": 1.6241326000733105e-05, "loss": 0.5121, "step": 11224 }, { "epoch": 0.3069623714723255, "grad_norm": 1.4303067922592163, "learning_rate": 1.6240633959265177e-05, "loss": 0.5556, "step": 11225 }, { "epoch": 0.30698971778604245, "grad_norm": 1.4834229946136475, "learning_rate": 1.6239941868841232e-05, "loss": 0.5321, "step": 11226 }, { "epoch": 0.3070170640997594, "grad_norm": 1.2274811267852783, "learning_rate": 1.6239249729466698e-05, "loss": 0.3803, "step": 11227 }, { "epoch": 0.30704441041347624, "grad_norm": 1.355447769165039, "learning_rate": 1.6238557541147005e-05, "loss": 0.53, "step": 11228 }, { "epoch": 0.30707175672719317, "grad_norm": 1.0739495754241943, "learning_rate": 1.6237865303887588e-05, "loss": 0.5293, "step": 11229 }, { "epoch": 0.3070991030409101, "grad_norm": 1.5866059064865112, "learning_rate": 1.623717301769387e-05, "loss": 0.5633, "step": 11230 }, { "epoch": 0.307126449354627, "grad_norm": 1.33127760887146, "learning_rate": 1.6236480682571283e-05, "loss": 0.5537, "step": 11231 }, { "epoch": 0.3071537956683439, "grad_norm": 1.4574754238128662, "learning_rate": 1.623578829852526e-05, "loss": 0.8431, "step": 11232 }, { "epoch": 0.3071811419820608, "grad_norm": 1.6043075323104858, "learning_rate": 1.6235095865561237e-05, "loss": 0.5157, "step": 11233 }, { "epoch": 0.30720848829577774, "grad_norm": 1.2583286762237549, "learning_rate": 1.623440338368464e-05, "loss": 0.5384, "step": 11234 }, { "epoch": 0.30723583460949466, "grad_norm": 1.54686439037323, "learning_rate": 1.6233710852900904e-05, "loss": 0.5332, "step": 11235 }, { "epoch": 0.30726318092321153, "grad_norm": 1.5042132139205933, "learning_rate": 1.623301827321546e-05, "loss": 0.5758, "step": 11236 }, { "epoch": 0.30729052723692846, "grad_norm": 1.5077062845230103, "learning_rate": 1.623232564463374e-05, "loss": 0.5155, "step": 11237 }, { "epoch": 0.3073178735506454, "grad_norm": 1.384319543838501, "learning_rate": 1.623163296716118e-05, "loss": 0.5451, "step": 11238 }, { "epoch": 0.3073452198643623, "grad_norm": 1.263561487197876, "learning_rate": 1.6230940240803214e-05, "loss": 0.487, "step": 11239 }, { "epoch": 0.3073725661780792, "grad_norm": 1.2178648710250854, "learning_rate": 1.6230247465565274e-05, "loss": 0.4597, "step": 11240 }, { "epoch": 0.3073999124917961, "grad_norm": 1.4574373960494995, "learning_rate": 1.6229554641452795e-05, "loss": 0.5308, "step": 11241 }, { "epoch": 0.307427258805513, "grad_norm": 1.1907292604446411, "learning_rate": 1.6228861768471217e-05, "loss": 0.5375, "step": 11242 }, { "epoch": 0.30745460511922995, "grad_norm": 1.6426080465316772, "learning_rate": 1.622816884662597e-05, "loss": 0.4643, "step": 11243 }, { "epoch": 0.3074819514329468, "grad_norm": 2.1464312076568604, "learning_rate": 1.6227475875922488e-05, "loss": 0.5123, "step": 11244 }, { "epoch": 0.30750929774666375, "grad_norm": 1.9180614948272705, "learning_rate": 1.6226782856366213e-05, "loss": 0.8702, "step": 11245 }, { "epoch": 0.30753664406038067, "grad_norm": 1.971198320388794, "learning_rate": 1.622608978796258e-05, "loss": 0.5358, "step": 11246 }, { "epoch": 0.3075639903740976, "grad_norm": 2.059333086013794, "learning_rate": 1.6225396670717023e-05, "loss": 0.6111, "step": 11247 }, { "epoch": 0.30759133668781447, "grad_norm": 3.479274272918701, "learning_rate": 1.622470350463498e-05, "loss": 0.5065, "step": 11248 }, { "epoch": 0.3076186830015314, "grad_norm": 1.8102437257766724, "learning_rate": 1.6224010289721894e-05, "loss": 0.5846, "step": 11249 }, { "epoch": 0.3076460293152483, "grad_norm": 1.5211544036865234, "learning_rate": 1.6223317025983194e-05, "loss": 0.5334, "step": 11250 }, { "epoch": 0.30767337562896524, "grad_norm": 1.3006906509399414, "learning_rate": 1.6222623713424327e-05, "loss": 0.5298, "step": 11251 }, { "epoch": 0.3077007219426821, "grad_norm": 1.411118984222412, "learning_rate": 1.6221930352050725e-05, "loss": 0.5425, "step": 11252 }, { "epoch": 0.30772806825639903, "grad_norm": 1.3775919675827026, "learning_rate": 1.6221236941867832e-05, "loss": 0.5765, "step": 11253 }, { "epoch": 0.30775541457011596, "grad_norm": 1.3798385858535767, "learning_rate": 1.6220543482881085e-05, "loss": 0.5486, "step": 11254 }, { "epoch": 0.3077827608838329, "grad_norm": 1.11368989944458, "learning_rate": 1.6219849975095927e-05, "loss": 0.5516, "step": 11255 }, { "epoch": 0.30781010719754975, "grad_norm": 1.367629885673523, "learning_rate": 1.6219156418517795e-05, "loss": 0.8753, "step": 11256 }, { "epoch": 0.3078374535112667, "grad_norm": 1.2031055688858032, "learning_rate": 1.621846281315213e-05, "loss": 0.5327, "step": 11257 }, { "epoch": 0.3078647998249836, "grad_norm": 1.4884620904922485, "learning_rate": 1.6217769159004375e-05, "loss": 0.5834, "step": 11258 }, { "epoch": 0.30789214613870053, "grad_norm": 1.5664981603622437, "learning_rate": 1.6217075456079973e-05, "loss": 0.5432, "step": 11259 }, { "epoch": 0.3079194924524174, "grad_norm": 1.5277084112167358, "learning_rate": 1.6216381704384363e-05, "loss": 0.5199, "step": 11260 }, { "epoch": 0.3079468387661343, "grad_norm": 1.5382537841796875, "learning_rate": 1.6215687903922986e-05, "loss": 0.5635, "step": 11261 }, { "epoch": 0.30797418507985125, "grad_norm": 1.0547453165054321, "learning_rate": 1.6214994054701286e-05, "loss": 0.5118, "step": 11262 }, { "epoch": 0.3080015313935682, "grad_norm": 1.0978269577026367, "learning_rate": 1.6214300156724712e-05, "loss": 0.5492, "step": 11263 }, { "epoch": 0.30802887770728504, "grad_norm": 1.1940243244171143, "learning_rate": 1.62136062099987e-05, "loss": 0.5482, "step": 11264 }, { "epoch": 0.30805622402100197, "grad_norm": 1.418912410736084, "learning_rate": 1.6212912214528693e-05, "loss": 0.5732, "step": 11265 }, { "epoch": 0.3080835703347189, "grad_norm": 1.328233003616333, "learning_rate": 1.621221817032014e-05, "loss": 0.4471, "step": 11266 }, { "epoch": 0.3081109166484358, "grad_norm": 1.724315881729126, "learning_rate": 1.6211524077378484e-05, "loss": 0.5064, "step": 11267 }, { "epoch": 0.3081382629621527, "grad_norm": 1.339294672012329, "learning_rate": 1.6210829935709165e-05, "loss": 0.3931, "step": 11268 }, { "epoch": 0.3081656092758696, "grad_norm": 1.5358612537384033, "learning_rate": 1.6210135745317638e-05, "loss": 0.5246, "step": 11269 }, { "epoch": 0.30819295558958654, "grad_norm": 1.2069107294082642, "learning_rate": 1.6209441506209342e-05, "loss": 0.5287, "step": 11270 }, { "epoch": 0.30822030190330346, "grad_norm": 1.4258289337158203, "learning_rate": 1.6208747218389727e-05, "loss": 0.4911, "step": 11271 }, { "epoch": 0.30824764821702033, "grad_norm": 1.3536267280578613, "learning_rate": 1.6208052881864237e-05, "loss": 0.5544, "step": 11272 }, { "epoch": 0.30827499453073726, "grad_norm": 1.6506726741790771, "learning_rate": 1.6207358496638316e-05, "loss": 0.5348, "step": 11273 }, { "epoch": 0.3083023408444542, "grad_norm": 1.2962915897369385, "learning_rate": 1.6206664062717416e-05, "loss": 0.5534, "step": 11274 }, { "epoch": 0.3083296871581711, "grad_norm": 1.7646703720092773, "learning_rate": 1.6205969580106982e-05, "loss": 0.5181, "step": 11275 }, { "epoch": 0.308357033471888, "grad_norm": 1.3871792554855347, "learning_rate": 1.6205275048812466e-05, "loss": 0.5802, "step": 11276 }, { "epoch": 0.3083843797856049, "grad_norm": 1.3917096853256226, "learning_rate": 1.6204580468839315e-05, "loss": 0.9359, "step": 11277 }, { "epoch": 0.3084117260993218, "grad_norm": 1.410659670829773, "learning_rate": 1.6203885840192978e-05, "loss": 0.5932, "step": 11278 }, { "epoch": 0.3084390724130387, "grad_norm": 1.3818953037261963, "learning_rate": 1.6203191162878896e-05, "loss": 0.5517, "step": 11279 }, { "epoch": 0.3084664187267556, "grad_norm": 1.3942824602127075, "learning_rate": 1.6202496436902527e-05, "loss": 0.5711, "step": 11280 }, { "epoch": 0.30849376504047255, "grad_norm": 1.5168644189834595, "learning_rate": 1.620180166226932e-05, "loss": 0.5153, "step": 11281 }, { "epoch": 0.30852111135418947, "grad_norm": 1.3630318641662598, "learning_rate": 1.6201106838984724e-05, "loss": 0.8932, "step": 11282 }, { "epoch": 0.30854845766790634, "grad_norm": 1.15726900100708, "learning_rate": 1.6200411967054187e-05, "loss": 0.5521, "step": 11283 }, { "epoch": 0.30857580398162326, "grad_norm": 1.4848968982696533, "learning_rate": 1.619971704648317e-05, "loss": 0.5119, "step": 11284 }, { "epoch": 0.3086031502953402, "grad_norm": 1.278547763824463, "learning_rate": 1.619902207727711e-05, "loss": 0.5463, "step": 11285 }, { "epoch": 0.3086304966090571, "grad_norm": 1.3777774572372437, "learning_rate": 1.619832705944147e-05, "loss": 0.534, "step": 11286 }, { "epoch": 0.308657842922774, "grad_norm": 1.7427674531936646, "learning_rate": 1.6197631992981698e-05, "loss": 0.5513, "step": 11287 }, { "epoch": 0.3086851892364909, "grad_norm": 1.3878568410873413, "learning_rate": 1.619693687790325e-05, "loss": 0.5567, "step": 11288 }, { "epoch": 0.30871253555020783, "grad_norm": 1.1857110261917114, "learning_rate": 1.6196241714211572e-05, "loss": 0.5437, "step": 11289 }, { "epoch": 0.30873988186392476, "grad_norm": 1.6475437879562378, "learning_rate": 1.6195546501912124e-05, "loss": 0.5452, "step": 11290 }, { "epoch": 0.30876722817764163, "grad_norm": 1.204929232597351, "learning_rate": 1.6194851241010353e-05, "loss": 0.577, "step": 11291 }, { "epoch": 0.30879457449135855, "grad_norm": 1.4604744911193848, "learning_rate": 1.6194155931511722e-05, "loss": 0.5186, "step": 11292 }, { "epoch": 0.3088219208050755, "grad_norm": 1.4430609941482544, "learning_rate": 1.6193460573421682e-05, "loss": 0.5195, "step": 11293 }, { "epoch": 0.3088492671187924, "grad_norm": 1.325767993927002, "learning_rate": 1.6192765166745682e-05, "loss": 0.5569, "step": 11294 }, { "epoch": 0.3088766134325093, "grad_norm": 1.3347115516662598, "learning_rate": 1.619206971148919e-05, "loss": 0.5468, "step": 11295 }, { "epoch": 0.3089039597462262, "grad_norm": 1.3021936416625977, "learning_rate": 1.6191374207657643e-05, "loss": 0.4746, "step": 11296 }, { "epoch": 0.3089313060599431, "grad_norm": 1.2167601585388184, "learning_rate": 1.6190678655256516e-05, "loss": 0.5353, "step": 11297 }, { "epoch": 0.30895865237366005, "grad_norm": 1.1597102880477905, "learning_rate": 1.6189983054291254e-05, "loss": 0.3698, "step": 11298 }, { "epoch": 0.3089859986873769, "grad_norm": 1.6478958129882812, "learning_rate": 1.6189287404767317e-05, "loss": 0.5426, "step": 11299 }, { "epoch": 0.30901334500109384, "grad_norm": 1.5874890089035034, "learning_rate": 1.6188591706690164e-05, "loss": 0.5342, "step": 11300 }, { "epoch": 0.30904069131481077, "grad_norm": 1.2710611820220947, "learning_rate": 1.6187895960065246e-05, "loss": 0.5374, "step": 11301 }, { "epoch": 0.3090680376285277, "grad_norm": 1.1630862951278687, "learning_rate": 1.6187200164898033e-05, "loss": 0.5627, "step": 11302 }, { "epoch": 0.30909538394224456, "grad_norm": 1.3834072351455688, "learning_rate": 1.618650432119397e-05, "loss": 0.5032, "step": 11303 }, { "epoch": 0.3091227302559615, "grad_norm": 1.3547571897506714, "learning_rate": 1.618580842895852e-05, "loss": 0.5268, "step": 11304 }, { "epoch": 0.3091500765696784, "grad_norm": 1.0459970235824585, "learning_rate": 1.6185112488197148e-05, "loss": 0.3903, "step": 11305 }, { "epoch": 0.30917742288339534, "grad_norm": 1.0963873863220215, "learning_rate": 1.618441649891531e-05, "loss": 0.4943, "step": 11306 }, { "epoch": 0.3092047691971122, "grad_norm": 1.3247827291488647, "learning_rate": 1.6183720461118462e-05, "loss": 0.5175, "step": 11307 }, { "epoch": 0.30923211551082913, "grad_norm": 1.13041090965271, "learning_rate": 1.618302437481207e-05, "loss": 0.4106, "step": 11308 }, { "epoch": 0.30925946182454606, "grad_norm": 1.5794093608856201, "learning_rate": 1.6182328240001587e-05, "loss": 0.3944, "step": 11309 }, { "epoch": 0.309286808138263, "grad_norm": 1.2052265405654907, "learning_rate": 1.6181632056692483e-05, "loss": 0.5531, "step": 11310 }, { "epoch": 0.30931415445197985, "grad_norm": 1.3818340301513672, "learning_rate": 1.6180935824890214e-05, "loss": 0.5596, "step": 11311 }, { "epoch": 0.3093415007656968, "grad_norm": 1.2118571996688843, "learning_rate": 1.618023954460024e-05, "loss": 0.5453, "step": 11312 }, { "epoch": 0.3093688470794137, "grad_norm": 1.188241720199585, "learning_rate": 1.6179543215828034e-05, "loss": 0.5325, "step": 11313 }, { "epoch": 0.3093961933931306, "grad_norm": 1.446116328239441, "learning_rate": 1.6178846838579044e-05, "loss": 0.3947, "step": 11314 }, { "epoch": 0.3094235397068475, "grad_norm": 1.417970061302185, "learning_rate": 1.6178150412858743e-05, "loss": 0.5353, "step": 11315 }, { "epoch": 0.3094508860205644, "grad_norm": 1.4333254098892212, "learning_rate": 1.617745393867259e-05, "loss": 0.4492, "step": 11316 }, { "epoch": 0.30947823233428134, "grad_norm": 1.2581101655960083, "learning_rate": 1.6176757416026046e-05, "loss": 0.5114, "step": 11317 }, { "epoch": 0.30950557864799827, "grad_norm": 1.4131360054016113, "learning_rate": 1.617606084492458e-05, "loss": 0.5308, "step": 11318 }, { "epoch": 0.30953292496171514, "grad_norm": 1.306031584739685, "learning_rate": 1.6175364225373657e-05, "loss": 0.5532, "step": 11319 }, { "epoch": 0.30956027127543206, "grad_norm": 1.3717483282089233, "learning_rate": 1.6174667557378738e-05, "loss": 0.913, "step": 11320 }, { "epoch": 0.309587617589149, "grad_norm": 1.2404820919036865, "learning_rate": 1.6173970840945287e-05, "loss": 0.5486, "step": 11321 }, { "epoch": 0.3096149639028659, "grad_norm": 1.193068504333496, "learning_rate": 1.6173274076078773e-05, "loss": 0.5478, "step": 11322 }, { "epoch": 0.3096423102165828, "grad_norm": 1.2239036560058594, "learning_rate": 1.6172577262784662e-05, "loss": 0.5287, "step": 11323 }, { "epoch": 0.3096696565302997, "grad_norm": 1.406724214553833, "learning_rate": 1.6171880401068422e-05, "loss": 0.5333, "step": 11324 }, { "epoch": 0.30969700284401663, "grad_norm": 1.52801513671875, "learning_rate": 1.6171183490935514e-05, "loss": 0.5659, "step": 11325 }, { "epoch": 0.30972434915773356, "grad_norm": 1.337816834449768, "learning_rate": 1.617048653239141e-05, "loss": 0.5495, "step": 11326 }, { "epoch": 0.30975169547145043, "grad_norm": 1.3136042356491089, "learning_rate": 1.616978952544158e-05, "loss": 0.5447, "step": 11327 }, { "epoch": 0.30977904178516735, "grad_norm": 1.2298052310943604, "learning_rate": 1.616909247009148e-05, "loss": 0.5442, "step": 11328 }, { "epoch": 0.3098063880988843, "grad_norm": 1.18004310131073, "learning_rate": 1.6168395366346587e-05, "loss": 0.5329, "step": 11329 }, { "epoch": 0.3098337344126012, "grad_norm": 1.3065866231918335, "learning_rate": 1.616769821421237e-05, "loss": 0.4875, "step": 11330 }, { "epoch": 0.30986108072631807, "grad_norm": 1.2560120820999146, "learning_rate": 1.61670010136943e-05, "loss": 0.5696, "step": 11331 }, { "epoch": 0.309888427040035, "grad_norm": 1.3823996782302856, "learning_rate": 1.616630376479784e-05, "loss": 0.5401, "step": 11332 }, { "epoch": 0.3099157733537519, "grad_norm": 1.2011643648147583, "learning_rate": 1.6165606467528458e-05, "loss": 0.5801, "step": 11333 }, { "epoch": 0.30994311966746885, "grad_norm": 1.4068821668624878, "learning_rate": 1.6164909121891632e-05, "loss": 0.5359, "step": 11334 }, { "epoch": 0.3099704659811857, "grad_norm": 1.0869673490524292, "learning_rate": 1.616421172789283e-05, "loss": 0.5397, "step": 11335 }, { "epoch": 0.30999781229490264, "grad_norm": 1.3166674375534058, "learning_rate": 1.616351428553752e-05, "loss": 0.4898, "step": 11336 }, { "epoch": 0.31002515860861957, "grad_norm": 1.4621953964233398, "learning_rate": 1.616281679483117e-05, "loss": 0.5201, "step": 11337 }, { "epoch": 0.3100525049223365, "grad_norm": 1.5404855012893677, "learning_rate": 1.616211925577927e-05, "loss": 0.5164, "step": 11338 }, { "epoch": 0.31007985123605336, "grad_norm": 1.3843529224395752, "learning_rate": 1.6161421668387266e-05, "loss": 0.5219, "step": 11339 }, { "epoch": 0.3101071975497703, "grad_norm": 1.2753608226776123, "learning_rate": 1.6160724032660647e-05, "loss": 0.5458, "step": 11340 }, { "epoch": 0.3101345438634872, "grad_norm": 1.4631284475326538, "learning_rate": 1.6160026348604882e-05, "loss": 0.8608, "step": 11341 }, { "epoch": 0.31016189017720414, "grad_norm": 1.7686437368392944, "learning_rate": 1.6159328616225444e-05, "loss": 0.5396, "step": 11342 }, { "epoch": 0.310189236490921, "grad_norm": 1.3805705308914185, "learning_rate": 1.6158630835527807e-05, "loss": 0.5432, "step": 11343 }, { "epoch": 0.31021658280463793, "grad_norm": 1.3911091089248657, "learning_rate": 1.6157933006517444e-05, "loss": 0.561, "step": 11344 }, { "epoch": 0.31024392911835486, "grad_norm": 1.4429923295974731, "learning_rate": 1.615723512919983e-05, "loss": 0.5412, "step": 11345 }, { "epoch": 0.3102712754320718, "grad_norm": 1.2688827514648438, "learning_rate": 1.6156537203580437e-05, "loss": 0.4301, "step": 11346 }, { "epoch": 0.31029862174578865, "grad_norm": 1.404331088066101, "learning_rate": 1.6155839229664746e-05, "loss": 0.5585, "step": 11347 }, { "epoch": 0.3103259680595056, "grad_norm": 1.3060415983200073, "learning_rate": 1.6155141207458226e-05, "loss": 0.4678, "step": 11348 }, { "epoch": 0.3103533143732225, "grad_norm": 1.3025858402252197, "learning_rate": 1.6154443136966356e-05, "loss": 0.5313, "step": 11349 }, { "epoch": 0.3103806606869394, "grad_norm": 1.4982175827026367, "learning_rate": 1.615374501819461e-05, "loss": 0.5417, "step": 11350 }, { "epoch": 0.3104080070006563, "grad_norm": 1.494834542274475, "learning_rate": 1.615304685114847e-05, "loss": 0.4855, "step": 11351 }, { "epoch": 0.3104353533143732, "grad_norm": 1.0891025066375732, "learning_rate": 1.6152348635833406e-05, "loss": 0.5158, "step": 11352 }, { "epoch": 0.31046269962809014, "grad_norm": 1.0290297269821167, "learning_rate": 1.6151650372254897e-05, "loss": 0.542, "step": 11353 }, { "epoch": 0.31049004594180707, "grad_norm": 1.1027089357376099, "learning_rate": 1.6150952060418427e-05, "loss": 0.546, "step": 11354 }, { "epoch": 0.31051739225552394, "grad_norm": 1.0313818454742432, "learning_rate": 1.6150253700329465e-05, "loss": 0.5232, "step": 11355 }, { "epoch": 0.31054473856924086, "grad_norm": 1.28953218460083, "learning_rate": 1.6149555291993496e-05, "loss": 0.5401, "step": 11356 }, { "epoch": 0.3105720848829578, "grad_norm": 1.273851990699768, "learning_rate": 1.6148856835415992e-05, "loss": 0.5569, "step": 11357 }, { "epoch": 0.3105994311966747, "grad_norm": 1.2976197004318237, "learning_rate": 1.6148158330602442e-05, "loss": 0.5272, "step": 11358 }, { "epoch": 0.3106267775103916, "grad_norm": 1.2689546346664429, "learning_rate": 1.6147459777558318e-05, "loss": 0.5343, "step": 11359 }, { "epoch": 0.3106541238241085, "grad_norm": 1.6292238235473633, "learning_rate": 1.61467611762891e-05, "loss": 0.5701, "step": 11360 }, { "epoch": 0.31068147013782543, "grad_norm": 1.2047125101089478, "learning_rate": 1.6146062526800273e-05, "loss": 0.5536, "step": 11361 }, { "epoch": 0.31070881645154236, "grad_norm": 1.8893845081329346, "learning_rate": 1.6145363829097314e-05, "loss": 0.8482, "step": 11362 }, { "epoch": 0.3107361627652592, "grad_norm": 1.6351280212402344, "learning_rate": 1.6144665083185705e-05, "loss": 0.4832, "step": 11363 }, { "epoch": 0.31076350907897615, "grad_norm": 1.4681907892227173, "learning_rate": 1.614396628907093e-05, "loss": 0.4866, "step": 11364 }, { "epoch": 0.3107908553926931, "grad_norm": 1.1154937744140625, "learning_rate": 1.6143267446758466e-05, "loss": 0.56, "step": 11365 }, { "epoch": 0.31081820170641, "grad_norm": 1.2481763362884521, "learning_rate": 1.6142568556253797e-05, "loss": 0.5403, "step": 11366 }, { "epoch": 0.31084554802012687, "grad_norm": 2.2765250205993652, "learning_rate": 1.614186961756241e-05, "loss": 0.5156, "step": 11367 }, { "epoch": 0.3108728943338438, "grad_norm": 1.281429409980774, "learning_rate": 1.614117063068978e-05, "loss": 0.5417, "step": 11368 }, { "epoch": 0.3109002406475607, "grad_norm": 1.383629560470581, "learning_rate": 1.61404715956414e-05, "loss": 0.5542, "step": 11369 }, { "epoch": 0.31092758696127765, "grad_norm": 1.2222542762756348, "learning_rate": 1.6139772512422743e-05, "loss": 0.8697, "step": 11370 }, { "epoch": 0.3109549332749945, "grad_norm": 1.1183795928955078, "learning_rate": 1.61390733810393e-05, "loss": 0.5239, "step": 11371 }, { "epoch": 0.31098227958871144, "grad_norm": 1.2936782836914062, "learning_rate": 1.6138374201496555e-05, "loss": 0.5581, "step": 11372 }, { "epoch": 0.31100962590242837, "grad_norm": 1.24542236328125, "learning_rate": 1.613767497379999e-05, "loss": 0.5489, "step": 11373 }, { "epoch": 0.3110369722161453, "grad_norm": 1.2756187915802002, "learning_rate": 1.6136975697955097e-05, "loss": 0.5842, "step": 11374 }, { "epoch": 0.31106431852986216, "grad_norm": 1.489960789680481, "learning_rate": 1.613627637396735e-05, "loss": 0.5716, "step": 11375 }, { "epoch": 0.3110916648435791, "grad_norm": 1.1358765363693237, "learning_rate": 1.6135577001842248e-05, "loss": 0.5301, "step": 11376 }, { "epoch": 0.311119011157296, "grad_norm": 1.2830473184585571, "learning_rate": 1.613487758158527e-05, "loss": 0.5119, "step": 11377 }, { "epoch": 0.3111463574710129, "grad_norm": 2.366611957550049, "learning_rate": 1.61341781132019e-05, "loss": 0.5543, "step": 11378 }, { "epoch": 0.3111737037847298, "grad_norm": 1.3785110712051392, "learning_rate": 1.6133478596697632e-05, "loss": 0.5593, "step": 11379 }, { "epoch": 0.31120105009844673, "grad_norm": 1.0763505697250366, "learning_rate": 1.613277903207795e-05, "loss": 0.5563, "step": 11380 }, { "epoch": 0.31122839641216365, "grad_norm": 1.2343077659606934, "learning_rate": 1.6132079419348343e-05, "loss": 0.5263, "step": 11381 }, { "epoch": 0.3112557427258805, "grad_norm": 1.643111228942871, "learning_rate": 1.6131379758514296e-05, "loss": 0.4193, "step": 11382 }, { "epoch": 0.31128308903959745, "grad_norm": 1.4130473136901855, "learning_rate": 1.6130680049581307e-05, "loss": 0.5323, "step": 11383 }, { "epoch": 0.3113104353533144, "grad_norm": 1.325348138809204, "learning_rate": 1.612998029255485e-05, "loss": 0.5463, "step": 11384 }, { "epoch": 0.3113377816670313, "grad_norm": 1.4735479354858398, "learning_rate": 1.6129280487440427e-05, "loss": 0.5261, "step": 11385 }, { "epoch": 0.31136512798074817, "grad_norm": 1.2962392568588257, "learning_rate": 1.612858063424352e-05, "loss": 0.5767, "step": 11386 }, { "epoch": 0.3113924742944651, "grad_norm": 1.2636420726776123, "learning_rate": 1.612788073296963e-05, "loss": 0.5235, "step": 11387 }, { "epoch": 0.311419820608182, "grad_norm": 1.6134113073349, "learning_rate": 1.6127180783624236e-05, "loss": 0.5547, "step": 11388 }, { "epoch": 0.31144716692189894, "grad_norm": 1.4947301149368286, "learning_rate": 1.612648078621283e-05, "loss": 0.59, "step": 11389 }, { "epoch": 0.3114745132356158, "grad_norm": 1.4230892658233643, "learning_rate": 1.612578074074091e-05, "loss": 0.5312, "step": 11390 }, { "epoch": 0.31150185954933274, "grad_norm": 1.4997549057006836, "learning_rate": 1.6125080647213963e-05, "loss": 0.5532, "step": 11391 }, { "epoch": 0.31152920586304966, "grad_norm": 1.7098500728607178, "learning_rate": 1.6124380505637483e-05, "loss": 0.4986, "step": 11392 }, { "epoch": 0.3115565521767666, "grad_norm": 1.1680145263671875, "learning_rate": 1.6123680316016962e-05, "loss": 0.549, "step": 11393 }, { "epoch": 0.31158389849048346, "grad_norm": 1.312964916229248, "learning_rate": 1.612298007835789e-05, "loss": 0.5912, "step": 11394 }, { "epoch": 0.3116112448042004, "grad_norm": 1.3831416368484497, "learning_rate": 1.6122279792665764e-05, "loss": 0.5628, "step": 11395 }, { "epoch": 0.3116385911179173, "grad_norm": 1.3032931089401245, "learning_rate": 1.6121579458946074e-05, "loss": 0.5259, "step": 11396 }, { "epoch": 0.31166593743163423, "grad_norm": 1.3503425121307373, "learning_rate": 1.6120879077204318e-05, "loss": 0.5721, "step": 11397 }, { "epoch": 0.3116932837453511, "grad_norm": 1.2279878854751587, "learning_rate": 1.6120178647445986e-05, "loss": 0.5174, "step": 11398 }, { "epoch": 0.311720630059068, "grad_norm": 1.1793055534362793, "learning_rate": 1.6119478169676576e-05, "loss": 0.4876, "step": 11399 }, { "epoch": 0.31174797637278495, "grad_norm": 1.483837604522705, "learning_rate": 1.611877764390158e-05, "loss": 0.4346, "step": 11400 }, { "epoch": 0.3117753226865019, "grad_norm": 1.5989800691604614, "learning_rate": 1.6118077070126503e-05, "loss": 0.5725, "step": 11401 }, { "epoch": 0.31180266900021875, "grad_norm": 1.3294360637664795, "learning_rate": 1.6117376448356826e-05, "loss": 0.5708, "step": 11402 }, { "epoch": 0.31183001531393567, "grad_norm": 1.4383584260940552, "learning_rate": 1.6116675778598058e-05, "loss": 0.5719, "step": 11403 }, { "epoch": 0.3118573616276526, "grad_norm": 1.3482486009597778, "learning_rate": 1.6115975060855683e-05, "loss": 0.5645, "step": 11404 }, { "epoch": 0.3118847079413695, "grad_norm": 1.0472041368484497, "learning_rate": 1.6115274295135207e-05, "loss": 0.5485, "step": 11405 }, { "epoch": 0.3119120542550864, "grad_norm": 1.2607260942459106, "learning_rate": 1.611457348144213e-05, "loss": 0.5332, "step": 11406 }, { "epoch": 0.3119394005688033, "grad_norm": 1.5245096683502197, "learning_rate": 1.6113872619781938e-05, "loss": 0.5694, "step": 11407 }, { "epoch": 0.31196674688252024, "grad_norm": 1.389601707458496, "learning_rate": 1.6113171710160142e-05, "loss": 0.5087, "step": 11408 }, { "epoch": 0.31199409319623717, "grad_norm": 1.3587772846221924, "learning_rate": 1.6112470752582228e-05, "loss": 0.5346, "step": 11409 }, { "epoch": 0.31202143950995403, "grad_norm": 1.7178804874420166, "learning_rate": 1.6111769747053704e-05, "loss": 0.5383, "step": 11410 }, { "epoch": 0.31204878582367096, "grad_norm": 1.3493636846542358, "learning_rate": 1.6111068693580066e-05, "loss": 0.3697, "step": 11411 }, { "epoch": 0.3120761321373879, "grad_norm": 1.9292967319488525, "learning_rate": 1.6110367592166816e-05, "loss": 0.8806, "step": 11412 }, { "epoch": 0.3121034784511048, "grad_norm": 1.4050776958465576, "learning_rate": 1.6109666442819446e-05, "loss": 0.577, "step": 11413 }, { "epoch": 0.3121308247648217, "grad_norm": 1.472471833229065, "learning_rate": 1.610896524554347e-05, "loss": 0.5704, "step": 11414 }, { "epoch": 0.3121581710785386, "grad_norm": 1.3117998838424683, "learning_rate": 1.6108264000344374e-05, "loss": 0.5724, "step": 11415 }, { "epoch": 0.31218551739225553, "grad_norm": 1.482240080833435, "learning_rate": 1.6107562707227673e-05, "loss": 0.5311, "step": 11416 }, { "epoch": 0.31221286370597245, "grad_norm": 1.211401104927063, "learning_rate": 1.6106861366198855e-05, "loss": 0.5517, "step": 11417 }, { "epoch": 0.3122402100196893, "grad_norm": 1.2369498014450073, "learning_rate": 1.6106159977263432e-05, "loss": 0.5607, "step": 11418 }, { "epoch": 0.31226755633340625, "grad_norm": 1.4968969821929932, "learning_rate": 1.61054585404269e-05, "loss": 0.5548, "step": 11419 }, { "epoch": 0.3122949026471232, "grad_norm": 2.616428852081299, "learning_rate": 1.610475705569477e-05, "loss": 0.5551, "step": 11420 }, { "epoch": 0.3123222489608401, "grad_norm": 1.683344841003418, "learning_rate": 1.6104055523072533e-05, "loss": 0.4722, "step": 11421 }, { "epoch": 0.31234959527455697, "grad_norm": 1.3967732191085815, "learning_rate": 1.6103353942565698e-05, "loss": 0.5228, "step": 11422 }, { "epoch": 0.3123769415882739, "grad_norm": 1.3919371366500854, "learning_rate": 1.610265231417977e-05, "loss": 0.518, "step": 11423 }, { "epoch": 0.3124042879019908, "grad_norm": 1.1828129291534424, "learning_rate": 1.610195063792025e-05, "loss": 0.5085, "step": 11424 }, { "epoch": 0.31243163421570774, "grad_norm": 1.2303663492202759, "learning_rate": 1.610124891379265e-05, "loss": 0.5403, "step": 11425 }, { "epoch": 0.3124589805294246, "grad_norm": 1.3104052543640137, "learning_rate": 1.6100547141802465e-05, "loss": 0.5732, "step": 11426 }, { "epoch": 0.31248632684314154, "grad_norm": 1.1597976684570312, "learning_rate": 1.609984532195521e-05, "loss": 0.4171, "step": 11427 }, { "epoch": 0.31251367315685846, "grad_norm": 1.1465418338775635, "learning_rate": 1.6099143454256377e-05, "loss": 0.5484, "step": 11428 }, { "epoch": 0.3125410194705754, "grad_norm": 1.4390733242034912, "learning_rate": 1.6098441538711484e-05, "loss": 0.4133, "step": 11429 }, { "epoch": 0.31256836578429226, "grad_norm": 1.2401448488235474, "learning_rate": 1.609773957532603e-05, "loss": 0.5378, "step": 11430 }, { "epoch": 0.3125957120980092, "grad_norm": 1.3929661512374878, "learning_rate": 1.609703756410553e-05, "loss": 0.562, "step": 11431 }, { "epoch": 0.3126230584117261, "grad_norm": 1.3237766027450562, "learning_rate": 1.6096335505055486e-05, "loss": 0.4566, "step": 11432 }, { "epoch": 0.31265040472544303, "grad_norm": 1.6407819986343384, "learning_rate": 1.6095633398181406e-05, "loss": 0.4733, "step": 11433 }, { "epoch": 0.3126777510391599, "grad_norm": 1.1335960626602173, "learning_rate": 1.6094931243488795e-05, "loss": 0.5277, "step": 11434 }, { "epoch": 0.3127050973528768, "grad_norm": 1.2434823513031006, "learning_rate": 1.6094229040983162e-05, "loss": 0.5676, "step": 11435 }, { "epoch": 0.31273244366659375, "grad_norm": 1.2164267301559448, "learning_rate": 1.6093526790670022e-05, "loss": 0.8062, "step": 11436 }, { "epoch": 0.3127597899803107, "grad_norm": 1.2898011207580566, "learning_rate": 1.6092824492554875e-05, "loss": 0.3924, "step": 11437 }, { "epoch": 0.31278713629402755, "grad_norm": 1.1703232526779175, "learning_rate": 1.6092122146643238e-05, "loss": 0.5801, "step": 11438 }, { "epoch": 0.31281448260774447, "grad_norm": 1.5451173782348633, "learning_rate": 1.6091419752940613e-05, "loss": 0.5424, "step": 11439 }, { "epoch": 0.3128418289214614, "grad_norm": 1.2345011234283447, "learning_rate": 1.609071731145252e-05, "loss": 0.5321, "step": 11440 }, { "epoch": 0.3128691752351783, "grad_norm": 1.3151298761367798, "learning_rate": 1.6090014822184458e-05, "loss": 0.5361, "step": 11441 }, { "epoch": 0.3128965215488952, "grad_norm": 1.1324462890625, "learning_rate": 1.6089312285141947e-05, "loss": 0.5236, "step": 11442 }, { "epoch": 0.3129238678626121, "grad_norm": 1.2766778469085693, "learning_rate": 1.608860970033049e-05, "loss": 0.5261, "step": 11443 }, { "epoch": 0.31295121417632904, "grad_norm": 1.7479723691940308, "learning_rate": 1.608790706775561e-05, "loss": 0.5776, "step": 11444 }, { "epoch": 0.31297856049004597, "grad_norm": 1.5379188060760498, "learning_rate": 1.608720438742281e-05, "loss": 0.5726, "step": 11445 }, { "epoch": 0.31300590680376283, "grad_norm": 1.4123769998550415, "learning_rate": 1.6086501659337604e-05, "loss": 0.5453, "step": 11446 }, { "epoch": 0.31303325311747976, "grad_norm": 2.5547308921813965, "learning_rate": 1.6085798883505507e-05, "loss": 0.3961, "step": 11447 }, { "epoch": 0.3130605994311967, "grad_norm": 1.338663935661316, "learning_rate": 1.6085096059932028e-05, "loss": 0.5462, "step": 11448 }, { "epoch": 0.3130879457449136, "grad_norm": 1.363420844078064, "learning_rate": 1.6084393188622686e-05, "loss": 0.5333, "step": 11449 }, { "epoch": 0.3131152920586305, "grad_norm": 1.265473484992981, "learning_rate": 1.6083690269582987e-05, "loss": 0.3944, "step": 11450 }, { "epoch": 0.3131426383723474, "grad_norm": 1.589410424232483, "learning_rate": 1.6082987302818452e-05, "loss": 0.5172, "step": 11451 }, { "epoch": 0.31316998468606433, "grad_norm": 1.5467404127120972, "learning_rate": 1.6082284288334596e-05, "loss": 0.4686, "step": 11452 }, { "epoch": 0.31319733099978125, "grad_norm": 1.4414321184158325, "learning_rate": 1.608158122613693e-05, "loss": 0.5473, "step": 11453 }, { "epoch": 0.3132246773134981, "grad_norm": 1.208916187286377, "learning_rate": 1.6080878116230973e-05, "loss": 0.5457, "step": 11454 }, { "epoch": 0.31325202362721505, "grad_norm": 1.4549860954284668, "learning_rate": 1.608017495862223e-05, "loss": 0.5213, "step": 11455 }, { "epoch": 0.313279369940932, "grad_norm": 1.2019562721252441, "learning_rate": 1.6079471753316234e-05, "loss": 0.5423, "step": 11456 }, { "epoch": 0.3133067162546489, "grad_norm": 1.2080072164535522, "learning_rate": 1.607876850031849e-05, "loss": 0.5596, "step": 11457 }, { "epoch": 0.31333406256836577, "grad_norm": 1.2106807231903076, "learning_rate": 1.607806519963452e-05, "loss": 0.5499, "step": 11458 }, { "epoch": 0.3133614088820827, "grad_norm": 1.3582895994186401, "learning_rate": 1.6077361851269836e-05, "loss": 0.5264, "step": 11459 }, { "epoch": 0.3133887551957996, "grad_norm": 1.0804977416992188, "learning_rate": 1.607665845522996e-05, "loss": 0.5289, "step": 11460 }, { "epoch": 0.31341610150951654, "grad_norm": 1.2201550006866455, "learning_rate": 1.607595501152041e-05, "loss": 0.5302, "step": 11461 }, { "epoch": 0.3134434478232334, "grad_norm": 1.3125628232955933, "learning_rate": 1.60752515201467e-05, "loss": 0.5584, "step": 11462 }, { "epoch": 0.31347079413695034, "grad_norm": 1.2151329517364502, "learning_rate": 1.6074547981114355e-05, "loss": 0.5504, "step": 11463 }, { "epoch": 0.31349814045066726, "grad_norm": 1.3450124263763428, "learning_rate": 1.6073844394428887e-05, "loss": 0.5355, "step": 11464 }, { "epoch": 0.3135254867643842, "grad_norm": 1.366828203201294, "learning_rate": 1.607314076009582e-05, "loss": 0.5513, "step": 11465 }, { "epoch": 0.31355283307810106, "grad_norm": 1.1282424926757812, "learning_rate": 1.6072437078120673e-05, "loss": 0.3933, "step": 11466 }, { "epoch": 0.313580179391818, "grad_norm": 2.8326759338378906, "learning_rate": 1.6071733348508966e-05, "loss": 0.8492, "step": 11467 }, { "epoch": 0.3136075257055349, "grad_norm": 1.4010412693023682, "learning_rate": 1.607102957126622e-05, "loss": 0.5564, "step": 11468 }, { "epoch": 0.31363487201925183, "grad_norm": 1.398358941078186, "learning_rate": 1.6070325746397958e-05, "loss": 0.5409, "step": 11469 }, { "epoch": 0.3136622183329687, "grad_norm": 1.0989633798599243, "learning_rate": 1.6069621873909696e-05, "loss": 0.5719, "step": 11470 }, { "epoch": 0.3136895646466856, "grad_norm": 1.384364128112793, "learning_rate": 1.606891795380696e-05, "loss": 0.3959, "step": 11471 }, { "epoch": 0.31371691096040255, "grad_norm": 1.3471628427505493, "learning_rate": 1.6068213986095268e-05, "loss": 0.5201, "step": 11472 }, { "epoch": 0.3137442572741195, "grad_norm": 1.2614909410476685, "learning_rate": 1.6067509970780147e-05, "loss": 0.5054, "step": 11473 }, { "epoch": 0.31377160358783635, "grad_norm": 1.3813711404800415, "learning_rate": 1.6066805907867118e-05, "loss": 0.4947, "step": 11474 }, { "epoch": 0.31379894990155327, "grad_norm": 1.7157829999923706, "learning_rate": 1.6066101797361703e-05, "loss": 0.579, "step": 11475 }, { "epoch": 0.3138262962152702, "grad_norm": 1.2664289474487305, "learning_rate": 1.606539763926943e-05, "loss": 0.5442, "step": 11476 }, { "epoch": 0.3138536425289871, "grad_norm": 1.2984778881072998, "learning_rate": 1.6064693433595816e-05, "loss": 0.3933, "step": 11477 }, { "epoch": 0.313880988842704, "grad_norm": 1.5267298221588135, "learning_rate": 1.6063989180346386e-05, "loss": 0.5492, "step": 11478 }, { "epoch": 0.3139083351564209, "grad_norm": 1.602657437324524, "learning_rate": 1.606328487952667e-05, "loss": 0.5745, "step": 11479 }, { "epoch": 0.31393568147013784, "grad_norm": 1.3971103429794312, "learning_rate": 1.6062580531142194e-05, "loss": 0.5267, "step": 11480 }, { "epoch": 0.3139630277838547, "grad_norm": 1.605104684829712, "learning_rate": 1.6061876135198477e-05, "loss": 0.4648, "step": 11481 }, { "epoch": 0.31399037409757163, "grad_norm": 1.0121427774429321, "learning_rate": 1.6061171691701046e-05, "loss": 0.5471, "step": 11482 }, { "epoch": 0.31401772041128856, "grad_norm": 1.2555065155029297, "learning_rate": 1.606046720065543e-05, "loss": 0.5487, "step": 11483 }, { "epoch": 0.3140450667250055, "grad_norm": 1.3678909540176392, "learning_rate": 1.6059762662067156e-05, "loss": 0.4978, "step": 11484 }, { "epoch": 0.31407241303872235, "grad_norm": 1.6399779319763184, "learning_rate": 1.6059058075941747e-05, "loss": 0.5496, "step": 11485 }, { "epoch": 0.3140997593524393, "grad_norm": 1.2457951307296753, "learning_rate": 1.605835344228473e-05, "loss": 0.547, "step": 11486 }, { "epoch": 0.3141271056661562, "grad_norm": 1.0272420644760132, "learning_rate": 1.6057648761101643e-05, "loss": 0.5313, "step": 11487 }, { "epoch": 0.31415445197987313, "grad_norm": 1.6846156120300293, "learning_rate": 1.6056944032398003e-05, "loss": 0.4789, "step": 11488 }, { "epoch": 0.31418179829359, "grad_norm": 1.4605646133422852, "learning_rate": 1.605623925617934e-05, "loss": 0.549, "step": 11489 }, { "epoch": 0.3142091446073069, "grad_norm": 1.4687743186950684, "learning_rate": 1.6055534432451186e-05, "loss": 0.48, "step": 11490 }, { "epoch": 0.31423649092102385, "grad_norm": 1.0325261354446411, "learning_rate": 1.605482956121907e-05, "loss": 0.5552, "step": 11491 }, { "epoch": 0.3142638372347408, "grad_norm": 1.0695003271102905, "learning_rate": 1.605412464248852e-05, "loss": 0.5001, "step": 11492 }, { "epoch": 0.31429118354845764, "grad_norm": 1.2646328210830688, "learning_rate": 1.6053419676265057e-05, "loss": 0.5438, "step": 11493 }, { "epoch": 0.31431852986217457, "grad_norm": 1.2504706382751465, "learning_rate": 1.605271466255423e-05, "loss": 0.5341, "step": 11494 }, { "epoch": 0.3143458761758915, "grad_norm": 1.198026180267334, "learning_rate": 1.6052009601361557e-05, "loss": 0.5216, "step": 11495 }, { "epoch": 0.3143732224896084, "grad_norm": 1.3760920763015747, "learning_rate": 1.605130449269257e-05, "loss": 0.5863, "step": 11496 }, { "epoch": 0.3144005688033253, "grad_norm": 1.2101293802261353, "learning_rate": 1.6050599336552806e-05, "loss": 0.5238, "step": 11497 }, { "epoch": 0.3144279151170422, "grad_norm": 1.264198899269104, "learning_rate": 1.604989413294779e-05, "loss": 0.5519, "step": 11498 }, { "epoch": 0.31445526143075914, "grad_norm": 1.8797545433044434, "learning_rate": 1.6049188881883057e-05, "loss": 0.8758, "step": 11499 }, { "epoch": 0.31448260774447606, "grad_norm": 1.2884955406188965, "learning_rate": 1.604848358336414e-05, "loss": 0.5356, "step": 11500 }, { "epoch": 0.31450995405819293, "grad_norm": 1.3046081066131592, "learning_rate": 1.6047778237396575e-05, "loss": 0.5373, "step": 11501 }, { "epoch": 0.31453730037190986, "grad_norm": 1.2849197387695312, "learning_rate": 1.6047072843985886e-05, "loss": 0.5223, "step": 11502 }, { "epoch": 0.3145646466856268, "grad_norm": 1.6403255462646484, "learning_rate": 1.6046367403137616e-05, "loss": 0.5237, "step": 11503 }, { "epoch": 0.3145919929993437, "grad_norm": 1.6329845190048218, "learning_rate": 1.60456619148573e-05, "loss": 0.859, "step": 11504 }, { "epoch": 0.3146193393130606, "grad_norm": 1.226516842842102, "learning_rate": 1.604495637915046e-05, "loss": 0.5357, "step": 11505 }, { "epoch": 0.3146466856267775, "grad_norm": 1.4911843538284302, "learning_rate": 1.604425079602264e-05, "loss": 0.496, "step": 11506 }, { "epoch": 0.3146740319404944, "grad_norm": 1.1103436946868896, "learning_rate": 1.6043545165479375e-05, "loss": 0.5458, "step": 11507 }, { "epoch": 0.31470137825421135, "grad_norm": 1.4257012605667114, "learning_rate": 1.60428394875262e-05, "loss": 0.8814, "step": 11508 }, { "epoch": 0.3147287245679282, "grad_norm": 1.2373762130737305, "learning_rate": 1.6042133762168646e-05, "loss": 0.5586, "step": 11509 }, { "epoch": 0.31475607088164514, "grad_norm": 1.4441289901733398, "learning_rate": 1.6041427989412256e-05, "loss": 0.5426, "step": 11510 }, { "epoch": 0.31478341719536207, "grad_norm": 1.7044110298156738, "learning_rate": 1.604072216926256e-05, "loss": 0.4032, "step": 11511 }, { "epoch": 0.314810763509079, "grad_norm": 1.1541210412979126, "learning_rate": 1.6040016301725102e-05, "loss": 0.5875, "step": 11512 }, { "epoch": 0.31483810982279586, "grad_norm": 1.434158444404602, "learning_rate": 1.6039310386805417e-05, "loss": 0.5597, "step": 11513 }, { "epoch": 0.3148654561365128, "grad_norm": 1.281045913696289, "learning_rate": 1.6038604424509037e-05, "loss": 0.847, "step": 11514 }, { "epoch": 0.3148928024502297, "grad_norm": 1.3855042457580566, "learning_rate": 1.603789841484151e-05, "loss": 0.5558, "step": 11515 }, { "epoch": 0.31492014876394664, "grad_norm": 1.4863333702087402, "learning_rate": 1.6037192357808368e-05, "loss": 0.4953, "step": 11516 }, { "epoch": 0.3149474950776635, "grad_norm": 1.2631914615631104, "learning_rate": 1.6036486253415147e-05, "loss": 0.4301, "step": 11517 }, { "epoch": 0.31497484139138043, "grad_norm": 1.7037657499313354, "learning_rate": 1.6035780101667395e-05, "loss": 0.5502, "step": 11518 }, { "epoch": 0.31500218770509736, "grad_norm": 1.2868138551712036, "learning_rate": 1.6035073902570642e-05, "loss": 0.8745, "step": 11519 }, { "epoch": 0.3150295340188143, "grad_norm": 1.405799150466919, "learning_rate": 1.6034367656130437e-05, "loss": 0.4955, "step": 11520 }, { "epoch": 0.31505688033253115, "grad_norm": 1.2889328002929688, "learning_rate": 1.603366136235231e-05, "loss": 0.5633, "step": 11521 }, { "epoch": 0.3150842266462481, "grad_norm": 1.1211048364639282, "learning_rate": 1.6032955021241815e-05, "loss": 0.5378, "step": 11522 }, { "epoch": 0.315111572959965, "grad_norm": 1.3764111995697021, "learning_rate": 1.603224863280448e-05, "loss": 0.563, "step": 11523 }, { "epoch": 0.31513891927368193, "grad_norm": 1.4195160865783691, "learning_rate": 1.6031542197045856e-05, "loss": 0.5422, "step": 11524 }, { "epoch": 0.3151662655873988, "grad_norm": 3.0895018577575684, "learning_rate": 1.603083571397148e-05, "loss": 0.4017, "step": 11525 }, { "epoch": 0.3151936119011157, "grad_norm": 1.9172042608261108, "learning_rate": 1.6030129183586894e-05, "loss": 0.525, "step": 11526 }, { "epoch": 0.31522095821483265, "grad_norm": 1.3056581020355225, "learning_rate": 1.6029422605897642e-05, "loss": 0.5306, "step": 11527 }, { "epoch": 0.31524830452854957, "grad_norm": 1.4534794092178345, "learning_rate": 1.6028715980909265e-05, "loss": 0.581, "step": 11528 }, { "epoch": 0.31527565084226644, "grad_norm": 1.4297198057174683, "learning_rate": 1.602800930862731e-05, "loss": 0.4911, "step": 11529 }, { "epoch": 0.31530299715598337, "grad_norm": 1.3587491512298584, "learning_rate": 1.602730258905732e-05, "loss": 0.5639, "step": 11530 }, { "epoch": 0.3153303434697003, "grad_norm": 1.1835418939590454, "learning_rate": 1.6026595822204832e-05, "loss": 0.5399, "step": 11531 }, { "epoch": 0.3153576897834172, "grad_norm": 1.2288541793823242, "learning_rate": 1.60258890080754e-05, "loss": 0.5386, "step": 11532 }, { "epoch": 0.3153850360971341, "grad_norm": 1.4925446510314941, "learning_rate": 1.6025182146674564e-05, "loss": 0.4834, "step": 11533 }, { "epoch": 0.315412382410851, "grad_norm": 1.3384891748428345, "learning_rate": 1.602447523800787e-05, "loss": 0.4537, "step": 11534 }, { "epoch": 0.31543972872456794, "grad_norm": 1.0658460855484009, "learning_rate": 1.6023768282080863e-05, "loss": 0.5536, "step": 11535 }, { "epoch": 0.31546707503828486, "grad_norm": 1.590510368347168, "learning_rate": 1.6023061278899085e-05, "loss": 0.5242, "step": 11536 }, { "epoch": 0.31549442135200173, "grad_norm": 1.406326174736023, "learning_rate": 1.602235422846809e-05, "loss": 0.8413, "step": 11537 }, { "epoch": 0.31552176766571866, "grad_norm": 0.9996873140335083, "learning_rate": 1.602164713079342e-05, "loss": 0.5502, "step": 11538 }, { "epoch": 0.3155491139794356, "grad_norm": 1.3240879774093628, "learning_rate": 1.6020939985880625e-05, "loss": 0.5536, "step": 11539 }, { "epoch": 0.3155764602931525, "grad_norm": 1.392379641532898, "learning_rate": 1.602023279373525e-05, "loss": 0.5364, "step": 11540 }, { "epoch": 0.3156038066068694, "grad_norm": 1.3787212371826172, "learning_rate": 1.601952555436284e-05, "loss": 0.5438, "step": 11541 }, { "epoch": 0.3156311529205863, "grad_norm": 1.3094642162322998, "learning_rate": 1.601881826776895e-05, "loss": 0.568, "step": 11542 }, { "epoch": 0.3156584992343032, "grad_norm": 1.7506043910980225, "learning_rate": 1.601811093395912e-05, "loss": 0.3804, "step": 11543 }, { "epoch": 0.31568584554802015, "grad_norm": 2.2112650871276855, "learning_rate": 1.601740355293891e-05, "loss": 0.5566, "step": 11544 }, { "epoch": 0.315713191861737, "grad_norm": 1.5203351974487305, "learning_rate": 1.6016696124713856e-05, "loss": 0.5271, "step": 11545 }, { "epoch": 0.31574053817545394, "grad_norm": 1.1065752506256104, "learning_rate": 1.6015988649289515e-05, "loss": 0.5219, "step": 11546 }, { "epoch": 0.31576788448917087, "grad_norm": 1.1993194818496704, "learning_rate": 1.601528112667144e-05, "loss": 0.5536, "step": 11547 }, { "epoch": 0.3157952308028878, "grad_norm": 1.2323248386383057, "learning_rate": 1.6014573556865172e-05, "loss": 0.5287, "step": 11548 }, { "epoch": 0.31582257711660466, "grad_norm": 1.2283917665481567, "learning_rate": 1.601386593987627e-05, "loss": 0.5371, "step": 11549 }, { "epoch": 0.3158499234303216, "grad_norm": 1.5275472402572632, "learning_rate": 1.6013158275710284e-05, "loss": 0.5641, "step": 11550 }, { "epoch": 0.3158772697440385, "grad_norm": 1.176647424697876, "learning_rate": 1.601245056437276e-05, "loss": 0.5357, "step": 11551 }, { "epoch": 0.31590461605775544, "grad_norm": 1.2506117820739746, "learning_rate": 1.6011742805869256e-05, "loss": 0.5432, "step": 11552 }, { "epoch": 0.3159319623714723, "grad_norm": 1.6261932849884033, "learning_rate": 1.601103500020532e-05, "loss": 0.5641, "step": 11553 }, { "epoch": 0.31595930868518923, "grad_norm": 1.1933469772338867, "learning_rate": 1.6010327147386506e-05, "loss": 0.4992, "step": 11554 }, { "epoch": 0.31598665499890616, "grad_norm": 1.1301305294036865, "learning_rate": 1.600961924741837e-05, "loss": 0.5411, "step": 11555 }, { "epoch": 0.3160140013126231, "grad_norm": 1.407248854637146, "learning_rate": 1.6008911300306457e-05, "loss": 0.5576, "step": 11556 }, { "epoch": 0.31604134762633995, "grad_norm": 1.1215616464614868, "learning_rate": 1.600820330605633e-05, "loss": 0.5375, "step": 11557 }, { "epoch": 0.3160686939400569, "grad_norm": 1.503612756729126, "learning_rate": 1.600749526467354e-05, "loss": 0.5586, "step": 11558 }, { "epoch": 0.3160960402537738, "grad_norm": 3.7633769512176514, "learning_rate": 1.6006787176163637e-05, "loss": 0.9014, "step": 11559 }, { "epoch": 0.3161233865674907, "grad_norm": 1.4885225296020508, "learning_rate": 1.6006079040532178e-05, "loss": 0.5396, "step": 11560 }, { "epoch": 0.3161507328812076, "grad_norm": 1.169682264328003, "learning_rate": 1.6005370857784725e-05, "loss": 0.5296, "step": 11561 }, { "epoch": 0.3161780791949245, "grad_norm": 1.3889349699020386, "learning_rate": 1.600466262792682e-05, "loss": 0.563, "step": 11562 }, { "epoch": 0.31620542550864145, "grad_norm": 1.2717275619506836, "learning_rate": 1.600395435096403e-05, "loss": 0.4964, "step": 11563 }, { "epoch": 0.31623277182235837, "grad_norm": 1.0821502208709717, "learning_rate": 1.600324602690191e-05, "loss": 0.5537, "step": 11564 }, { "epoch": 0.31626011813607524, "grad_norm": 1.3884332180023193, "learning_rate": 1.6002537655746012e-05, "loss": 0.573, "step": 11565 }, { "epoch": 0.31628746444979217, "grad_norm": 1.1310107707977295, "learning_rate": 1.6001829237501898e-05, "loss": 0.5093, "step": 11566 }, { "epoch": 0.3163148107635091, "grad_norm": 1.1909809112548828, "learning_rate": 1.600112077217512e-05, "loss": 0.5516, "step": 11567 }, { "epoch": 0.316342157077226, "grad_norm": 1.2196980714797974, "learning_rate": 1.600041225977124e-05, "loss": 0.5526, "step": 11568 }, { "epoch": 0.3163695033909429, "grad_norm": 1.2180531024932861, "learning_rate": 1.5999703700295812e-05, "loss": 0.5425, "step": 11569 }, { "epoch": 0.3163968497046598, "grad_norm": 1.4113171100616455, "learning_rate": 1.5998995093754398e-05, "loss": 0.5074, "step": 11570 }, { "epoch": 0.31642419601837674, "grad_norm": 1.1375443935394287, "learning_rate": 1.5998286440152557e-05, "loss": 0.5607, "step": 11571 }, { "epoch": 0.31645154233209366, "grad_norm": 1.0753889083862305, "learning_rate": 1.5997577739495848e-05, "loss": 0.5061, "step": 11572 }, { "epoch": 0.31647888864581053, "grad_norm": 1.4502761363983154, "learning_rate": 1.5996868991789826e-05, "loss": 0.505, "step": 11573 }, { "epoch": 0.31650623495952745, "grad_norm": 1.1221636533737183, "learning_rate": 1.599616019704006e-05, "loss": 0.5062, "step": 11574 }, { "epoch": 0.3165335812732444, "grad_norm": 1.380184292793274, "learning_rate": 1.59954513552521e-05, "loss": 0.5005, "step": 11575 }, { "epoch": 0.3165609275869613, "grad_norm": 1.222997784614563, "learning_rate": 1.599474246643151e-05, "loss": 0.5353, "step": 11576 }, { "epoch": 0.3165882739006782, "grad_norm": 1.1141736507415771, "learning_rate": 1.5994033530583854e-05, "loss": 0.4469, "step": 11577 }, { "epoch": 0.3166156202143951, "grad_norm": 1.4815654754638672, "learning_rate": 1.5993324547714696e-05, "loss": 0.4831, "step": 11578 }, { "epoch": 0.316642966528112, "grad_norm": 1.2000359296798706, "learning_rate": 1.599261551782959e-05, "loss": 0.5533, "step": 11579 }, { "epoch": 0.3166703128418289, "grad_norm": 1.1065540313720703, "learning_rate": 1.59919064409341e-05, "loss": 0.5596, "step": 11580 }, { "epoch": 0.3166976591555458, "grad_norm": 1.1279797554016113, "learning_rate": 1.5991197317033792e-05, "loss": 0.5537, "step": 11581 }, { "epoch": 0.31672500546926274, "grad_norm": 1.5563989877700806, "learning_rate": 1.5990488146134226e-05, "loss": 0.5488, "step": 11582 }, { "epoch": 0.31675235178297967, "grad_norm": 1.2183598279953003, "learning_rate": 1.5989778928240968e-05, "loss": 0.5651, "step": 11583 }, { "epoch": 0.31677969809669654, "grad_norm": 1.3417245149612427, "learning_rate": 1.598906966335958e-05, "loss": 0.5096, "step": 11584 }, { "epoch": 0.31680704441041346, "grad_norm": 1.061247706413269, "learning_rate": 1.5988360351495625e-05, "loss": 0.5201, "step": 11585 }, { "epoch": 0.3168343907241304, "grad_norm": 1.6988626718521118, "learning_rate": 1.5987650992654668e-05, "loss": 0.4107, "step": 11586 }, { "epoch": 0.3168617370378473, "grad_norm": 1.0752896070480347, "learning_rate": 1.5986941586842274e-05, "loss": 0.529, "step": 11587 }, { "epoch": 0.3168890833515642, "grad_norm": 1.1229625940322876, "learning_rate": 1.598623213406401e-05, "loss": 0.525, "step": 11588 }, { "epoch": 0.3169164296652811, "grad_norm": 1.6178447008132935, "learning_rate": 1.598552263432544e-05, "loss": 0.4375, "step": 11589 }, { "epoch": 0.31694377597899803, "grad_norm": 1.5365639925003052, "learning_rate": 1.5984813087632127e-05, "loss": 0.5478, "step": 11590 }, { "epoch": 0.31697112229271496, "grad_norm": 1.2342766523361206, "learning_rate": 1.598410349398964e-05, "loss": 0.5094, "step": 11591 }, { "epoch": 0.3169984686064318, "grad_norm": 1.2347660064697266, "learning_rate": 1.5983393853403547e-05, "loss": 0.5813, "step": 11592 }, { "epoch": 0.31702581492014875, "grad_norm": 1.1905076503753662, "learning_rate": 1.598268416587941e-05, "loss": 0.578, "step": 11593 }, { "epoch": 0.3170531612338657, "grad_norm": 1.2396153211593628, "learning_rate": 1.59819744314228e-05, "loss": 0.5297, "step": 11594 }, { "epoch": 0.3170805075475826, "grad_norm": 1.1882600784301758, "learning_rate": 1.598126465003929e-05, "loss": 0.5283, "step": 11595 }, { "epoch": 0.31710785386129947, "grad_norm": 1.0894421339035034, "learning_rate": 1.5980554821734436e-05, "loss": 0.5679, "step": 11596 }, { "epoch": 0.3171352001750164, "grad_norm": 1.110835075378418, "learning_rate": 1.5979844946513812e-05, "loss": 0.5514, "step": 11597 }, { "epoch": 0.3171625464887333, "grad_norm": 1.620339274406433, "learning_rate": 1.597913502438299e-05, "loss": 0.9107, "step": 11598 }, { "epoch": 0.31718989280245025, "grad_norm": 1.3807528018951416, "learning_rate": 1.5978425055347536e-05, "loss": 0.5131, "step": 11599 }, { "epoch": 0.3172172391161671, "grad_norm": 1.1272780895233154, "learning_rate": 1.597771503941302e-05, "loss": 0.5386, "step": 11600 }, { "epoch": 0.31724458542988404, "grad_norm": 1.6146225929260254, "learning_rate": 1.5977004976585007e-05, "loss": 0.5246, "step": 11601 }, { "epoch": 0.31727193174360097, "grad_norm": 1.3109451532363892, "learning_rate": 1.5976294866869078e-05, "loss": 0.527, "step": 11602 }, { "epoch": 0.3172992780573179, "grad_norm": 1.2076163291931152, "learning_rate": 1.5975584710270797e-05, "loss": 0.5346, "step": 11603 }, { "epoch": 0.31732662437103476, "grad_norm": 1.5849953889846802, "learning_rate": 1.5974874506795734e-05, "loss": 0.5401, "step": 11604 }, { "epoch": 0.3173539706847517, "grad_norm": 1.5518485307693481, "learning_rate": 1.5974164256449462e-05, "loss": 0.5173, "step": 11605 }, { "epoch": 0.3173813169984686, "grad_norm": 1.1633914709091187, "learning_rate": 1.5973453959237555e-05, "loss": 0.5462, "step": 11606 }, { "epoch": 0.31740866331218553, "grad_norm": 1.6431409120559692, "learning_rate": 1.597274361516558e-05, "loss": 0.5398, "step": 11607 }, { "epoch": 0.3174360096259024, "grad_norm": 1.1928216218948364, "learning_rate": 1.5972033224239114e-05, "loss": 0.5234, "step": 11608 }, { "epoch": 0.31746335593961933, "grad_norm": 1.7794033288955688, "learning_rate": 1.5971322786463726e-05, "loss": 0.4428, "step": 11609 }, { "epoch": 0.31749070225333625, "grad_norm": 1.3036154508590698, "learning_rate": 1.5970612301844994e-05, "loss": 0.5376, "step": 11610 }, { "epoch": 0.3175180485670532, "grad_norm": 1.1121686697006226, "learning_rate": 1.5969901770388484e-05, "loss": 0.5285, "step": 11611 }, { "epoch": 0.31754539488077005, "grad_norm": 1.2491801977157593, "learning_rate": 1.5969191192099783e-05, "loss": 0.3564, "step": 11612 }, { "epoch": 0.317572741194487, "grad_norm": 1.3534178733825684, "learning_rate": 1.596848056698445e-05, "loss": 0.4935, "step": 11613 }, { "epoch": 0.3176000875082039, "grad_norm": 1.1813695430755615, "learning_rate": 1.596776989504807e-05, "loss": 0.5455, "step": 11614 }, { "epoch": 0.3176274338219208, "grad_norm": 1.9389995336532593, "learning_rate": 1.5967059176296213e-05, "loss": 0.5203, "step": 11615 }, { "epoch": 0.3176547801356377, "grad_norm": 1.210020661354065, "learning_rate": 1.5966348410734453e-05, "loss": 0.5589, "step": 11616 }, { "epoch": 0.3176821264493546, "grad_norm": 1.2657339572906494, "learning_rate": 1.5965637598368373e-05, "loss": 0.5637, "step": 11617 }, { "epoch": 0.31770947276307154, "grad_norm": 1.7294553518295288, "learning_rate": 1.5964926739203544e-05, "loss": 0.4842, "step": 11618 }, { "epoch": 0.31773681907678847, "grad_norm": 1.3728821277618408, "learning_rate": 1.5964215833245544e-05, "loss": 0.5516, "step": 11619 }, { "epoch": 0.31776416539050534, "grad_norm": 1.3459019660949707, "learning_rate": 1.596350488049995e-05, "loss": 0.5635, "step": 11620 }, { "epoch": 0.31779151170422226, "grad_norm": 1.5196950435638428, "learning_rate": 1.5962793880972338e-05, "loss": 0.5618, "step": 11621 }, { "epoch": 0.3178188580179392, "grad_norm": 1.4319294691085815, "learning_rate": 1.5962082834668284e-05, "loss": 0.8443, "step": 11622 }, { "epoch": 0.3178462043316561, "grad_norm": 1.477567195892334, "learning_rate": 1.5961371741593364e-05, "loss": 0.8576, "step": 11623 }, { "epoch": 0.317873550645373, "grad_norm": 1.1385568380355835, "learning_rate": 1.596066060175317e-05, "loss": 0.5293, "step": 11624 }, { "epoch": 0.3179008969590899, "grad_norm": 1.4583460092544556, "learning_rate": 1.5959949415153263e-05, "loss": 0.5556, "step": 11625 }, { "epoch": 0.31792824327280683, "grad_norm": 1.2798867225646973, "learning_rate": 1.5959238181799236e-05, "loss": 0.5501, "step": 11626 }, { "epoch": 0.31795558958652376, "grad_norm": 1.3154828548431396, "learning_rate": 1.595852690169666e-05, "loss": 0.5384, "step": 11627 }, { "epoch": 0.3179829359002406, "grad_norm": 2.00026535987854, "learning_rate": 1.5957815574851116e-05, "loss": 0.5246, "step": 11628 }, { "epoch": 0.31801028221395755, "grad_norm": 1.1805366277694702, "learning_rate": 1.595710420126819e-05, "loss": 0.5608, "step": 11629 }, { "epoch": 0.3180376285276745, "grad_norm": 1.0862113237380981, "learning_rate": 1.5956392780953454e-05, "loss": 0.5281, "step": 11630 }, { "epoch": 0.3180649748413914, "grad_norm": 1.122862696647644, "learning_rate": 1.5955681313912496e-05, "loss": 0.5415, "step": 11631 }, { "epoch": 0.31809232115510827, "grad_norm": 1.0425944328308105, "learning_rate": 1.595496980015089e-05, "loss": 0.5381, "step": 11632 }, { "epoch": 0.3181196674688252, "grad_norm": 1.3768585920333862, "learning_rate": 1.5954258239674226e-05, "loss": 0.5347, "step": 11633 }, { "epoch": 0.3181470137825421, "grad_norm": 1.3503830432891846, "learning_rate": 1.595354663248808e-05, "loss": 0.5515, "step": 11634 }, { "epoch": 0.31817436009625905, "grad_norm": 1.313309669494629, "learning_rate": 1.5952834978598035e-05, "loss": 0.5424, "step": 11635 }, { "epoch": 0.3182017064099759, "grad_norm": 1.2183918952941895, "learning_rate": 1.5952123278009673e-05, "loss": 0.556, "step": 11636 }, { "epoch": 0.31822905272369284, "grad_norm": 1.2321298122406006, "learning_rate": 1.5951411530728583e-05, "loss": 0.5337, "step": 11637 }, { "epoch": 0.31825639903740977, "grad_norm": 1.956809401512146, "learning_rate": 1.5950699736760344e-05, "loss": 0.8463, "step": 11638 }, { "epoch": 0.3182837453511267, "grad_norm": 1.4152175188064575, "learning_rate": 1.594998789611054e-05, "loss": 0.5551, "step": 11639 }, { "epoch": 0.31831109166484356, "grad_norm": 1.7543199062347412, "learning_rate": 1.5949276008784752e-05, "loss": 0.5272, "step": 11640 }, { "epoch": 0.3183384379785605, "grad_norm": 1.461767315864563, "learning_rate": 1.594856407478857e-05, "loss": 0.5892, "step": 11641 }, { "epoch": 0.3183657842922774, "grad_norm": 1.2016466856002808, "learning_rate": 1.594785209412758e-05, "loss": 0.5469, "step": 11642 }, { "epoch": 0.31839313060599433, "grad_norm": 1.0155659914016724, "learning_rate": 1.594714006680736e-05, "loss": 0.5279, "step": 11643 }, { "epoch": 0.3184204769197112, "grad_norm": 1.2585746049880981, "learning_rate": 1.5946427992833502e-05, "loss": 0.8494, "step": 11644 }, { "epoch": 0.31844782323342813, "grad_norm": 1.5092682838439941, "learning_rate": 1.5945715872211588e-05, "loss": 0.5636, "step": 11645 }, { "epoch": 0.31847516954714505, "grad_norm": 1.1865348815917969, "learning_rate": 1.5945003704947208e-05, "loss": 0.5274, "step": 11646 }, { "epoch": 0.318502515860862, "grad_norm": 1.3929578065872192, "learning_rate": 1.594429149104595e-05, "loss": 0.5541, "step": 11647 }, { "epoch": 0.31852986217457885, "grad_norm": 1.2457482814788818, "learning_rate": 1.5943579230513396e-05, "loss": 0.5716, "step": 11648 }, { "epoch": 0.3185572084882958, "grad_norm": 1.3229881525039673, "learning_rate": 1.5942866923355132e-05, "loss": 0.5596, "step": 11649 }, { "epoch": 0.3185845548020127, "grad_norm": 1.4313889741897583, "learning_rate": 1.594215456957675e-05, "loss": 0.5749, "step": 11650 }, { "epoch": 0.3186119011157296, "grad_norm": 1.404099702835083, "learning_rate": 1.594144216918384e-05, "loss": 0.5155, "step": 11651 }, { "epoch": 0.3186392474294465, "grad_norm": 1.2280298471450806, "learning_rate": 1.594072972218199e-05, "loss": 0.5726, "step": 11652 }, { "epoch": 0.3186665937431634, "grad_norm": 1.2918912172317505, "learning_rate": 1.5940017228576784e-05, "loss": 0.5493, "step": 11653 }, { "epoch": 0.31869394005688034, "grad_norm": 1.260474443435669, "learning_rate": 1.593930468837382e-05, "loss": 0.4654, "step": 11654 }, { "epoch": 0.31872128637059727, "grad_norm": 1.1212825775146484, "learning_rate": 1.5938592101578673e-05, "loss": 0.5397, "step": 11655 }, { "epoch": 0.31874863268431414, "grad_norm": 1.3108662366867065, "learning_rate": 1.5937879468196948e-05, "loss": 0.5676, "step": 11656 }, { "epoch": 0.31877597899803106, "grad_norm": 2.1340172290802, "learning_rate": 1.5937166788234228e-05, "loss": 0.5553, "step": 11657 }, { "epoch": 0.318803325311748, "grad_norm": 1.3611423969268799, "learning_rate": 1.5936454061696104e-05, "loss": 0.5577, "step": 11658 }, { "epoch": 0.3188306716254649, "grad_norm": 1.2914979457855225, "learning_rate": 1.5935741288588173e-05, "loss": 0.5635, "step": 11659 }, { "epoch": 0.3188580179391818, "grad_norm": 1.8083250522613525, "learning_rate": 1.593502846891602e-05, "loss": 0.899, "step": 11660 }, { "epoch": 0.3188853642528987, "grad_norm": 1.830822229385376, "learning_rate": 1.593431560268524e-05, "loss": 0.5554, "step": 11661 }, { "epoch": 0.31891271056661563, "grad_norm": 1.2291316986083984, "learning_rate": 1.593360268990142e-05, "loss": 0.5414, "step": 11662 }, { "epoch": 0.31894005688033256, "grad_norm": 1.1696094274520874, "learning_rate": 1.593288973057016e-05, "loss": 0.5516, "step": 11663 }, { "epoch": 0.3189674031940494, "grad_norm": 2.7806646823883057, "learning_rate": 1.5932176724697047e-05, "loss": 0.4136, "step": 11664 }, { "epoch": 0.31899474950776635, "grad_norm": 1.7887582778930664, "learning_rate": 1.5931463672287682e-05, "loss": 0.4132, "step": 11665 }, { "epoch": 0.3190220958214833, "grad_norm": 1.256658673286438, "learning_rate": 1.593075057334765e-05, "loss": 0.4771, "step": 11666 }, { "epoch": 0.3190494421352002, "grad_norm": 1.2462568283081055, "learning_rate": 1.593003742788255e-05, "loss": 0.549, "step": 11667 }, { "epoch": 0.31907678844891707, "grad_norm": 1.4212007522583008, "learning_rate": 1.5929324235897977e-05, "loss": 0.8422, "step": 11668 }, { "epoch": 0.319104134762634, "grad_norm": 1.375747799873352, "learning_rate": 1.592861099739952e-05, "loss": 0.4501, "step": 11669 }, { "epoch": 0.3191314810763509, "grad_norm": 1.1999154090881348, "learning_rate": 1.592789771239278e-05, "loss": 0.5473, "step": 11670 }, { "epoch": 0.31915882739006785, "grad_norm": 1.4136173725128174, "learning_rate": 1.5927184380883352e-05, "loss": 0.8312, "step": 11671 }, { "epoch": 0.3191861737037847, "grad_norm": 1.1702959537506104, "learning_rate": 1.592647100287683e-05, "loss": 0.5237, "step": 11672 }, { "epoch": 0.31921352001750164, "grad_norm": 1.0763963460922241, "learning_rate": 1.592575757837881e-05, "loss": 0.5351, "step": 11673 }, { "epoch": 0.31924086633121856, "grad_norm": 1.1578141450881958, "learning_rate": 1.5925044107394892e-05, "loss": 0.5191, "step": 11674 }, { "epoch": 0.3192682126449355, "grad_norm": 1.4484235048294067, "learning_rate": 1.5924330589930666e-05, "loss": 0.5549, "step": 11675 }, { "epoch": 0.31929555895865236, "grad_norm": 1.5047924518585205, "learning_rate": 1.5923617025991736e-05, "loss": 0.842, "step": 11676 }, { "epoch": 0.3193229052723693, "grad_norm": 1.4408010244369507, "learning_rate": 1.5922903415583696e-05, "loss": 0.5415, "step": 11677 }, { "epoch": 0.3193502515860862, "grad_norm": 1.3460955619812012, "learning_rate": 1.592218975871215e-05, "loss": 0.5448, "step": 11678 }, { "epoch": 0.31937759789980313, "grad_norm": 1.610081672668457, "learning_rate": 1.5921476055382688e-05, "loss": 0.4175, "step": 11679 }, { "epoch": 0.31940494421352, "grad_norm": 1.5805187225341797, "learning_rate": 1.5920762305600913e-05, "loss": 0.5345, "step": 11680 }, { "epoch": 0.31943229052723693, "grad_norm": 1.2459639310836792, "learning_rate": 1.5920048509372422e-05, "loss": 0.5649, "step": 11681 }, { "epoch": 0.31945963684095385, "grad_norm": 1.3794922828674316, "learning_rate": 1.5919334666702823e-05, "loss": 0.5966, "step": 11682 }, { "epoch": 0.3194869831546707, "grad_norm": 1.326153039932251, "learning_rate": 1.5918620777597708e-05, "loss": 0.5414, "step": 11683 }, { "epoch": 0.31951432946838765, "grad_norm": 1.1197335720062256, "learning_rate": 1.5917906842062675e-05, "loss": 0.5739, "step": 11684 }, { "epoch": 0.3195416757821046, "grad_norm": 1.2287840843200684, "learning_rate": 1.591719286010333e-05, "loss": 0.545, "step": 11685 }, { "epoch": 0.3195690220958215, "grad_norm": 1.2933073043823242, "learning_rate": 1.591647883172527e-05, "loss": 0.4431, "step": 11686 }, { "epoch": 0.31959636840953837, "grad_norm": 1.5372586250305176, "learning_rate": 1.59157647569341e-05, "loss": 0.8645, "step": 11687 }, { "epoch": 0.3196237147232553, "grad_norm": 1.281227707862854, "learning_rate": 1.591505063573542e-05, "loss": 0.5393, "step": 11688 }, { "epoch": 0.3196510610369722, "grad_norm": 1.1779043674468994, "learning_rate": 1.5914336468134835e-05, "loss": 0.5492, "step": 11689 }, { "epoch": 0.31967840735068914, "grad_norm": 1.4468491077423096, "learning_rate": 1.591362225413794e-05, "loss": 0.8816, "step": 11690 }, { "epoch": 0.319705753664406, "grad_norm": 1.0502279996871948, "learning_rate": 1.5912907993750347e-05, "loss": 0.5421, "step": 11691 }, { "epoch": 0.31973309997812294, "grad_norm": 1.290254831314087, "learning_rate": 1.5912193686977656e-05, "loss": 0.4789, "step": 11692 }, { "epoch": 0.31976044629183986, "grad_norm": 1.531783938407898, "learning_rate": 1.5911479333825467e-05, "loss": 0.5733, "step": 11693 }, { "epoch": 0.3197877926055568, "grad_norm": 1.660831332206726, "learning_rate": 1.5910764934299386e-05, "loss": 0.4194, "step": 11694 }, { "epoch": 0.31981513891927366, "grad_norm": 1.2127189636230469, "learning_rate": 1.5910050488405017e-05, "loss": 0.5593, "step": 11695 }, { "epoch": 0.3198424852329906, "grad_norm": 1.1551026105880737, "learning_rate": 1.5909335996147964e-05, "loss": 0.5467, "step": 11696 }, { "epoch": 0.3198698315467075, "grad_norm": 1.2599432468414307, "learning_rate": 1.5908621457533838e-05, "loss": 0.545, "step": 11697 }, { "epoch": 0.31989717786042443, "grad_norm": 1.314205288887024, "learning_rate": 1.5907906872568233e-05, "loss": 0.5658, "step": 11698 }, { "epoch": 0.3199245241741413, "grad_norm": 1.818360447883606, "learning_rate": 1.5907192241256763e-05, "loss": 0.9062, "step": 11699 }, { "epoch": 0.3199518704878582, "grad_norm": 1.2526603937149048, "learning_rate": 1.5906477563605038e-05, "loss": 0.4983, "step": 11700 }, { "epoch": 0.31997921680157515, "grad_norm": 1.2186931371688843, "learning_rate": 1.5905762839618653e-05, "loss": 0.5333, "step": 11701 }, { "epoch": 0.3200065631152921, "grad_norm": 1.3892979621887207, "learning_rate": 1.5905048069303225e-05, "loss": 0.4836, "step": 11702 }, { "epoch": 0.32003390942900894, "grad_norm": 1.5207267999649048, "learning_rate": 1.590433325266435e-05, "loss": 0.8849, "step": 11703 }, { "epoch": 0.32006125574272587, "grad_norm": 1.6857082843780518, "learning_rate": 1.590361838970765e-05, "loss": 0.9004, "step": 11704 }, { "epoch": 0.3200886020564428, "grad_norm": 0.98640376329422, "learning_rate": 1.5902903480438722e-05, "loss": 0.5342, "step": 11705 }, { "epoch": 0.3201159483701597, "grad_norm": 1.6659846305847168, "learning_rate": 1.5902188524863175e-05, "loss": 0.5141, "step": 11706 }, { "epoch": 0.3201432946838766, "grad_norm": 1.2884392738342285, "learning_rate": 1.5901473522986622e-05, "loss": 0.4803, "step": 11707 }, { "epoch": 0.3201706409975935, "grad_norm": 1.1158806085586548, "learning_rate": 1.590075847481467e-05, "loss": 0.5636, "step": 11708 }, { "epoch": 0.32019798731131044, "grad_norm": 1.1716896295547485, "learning_rate": 1.5900043380352926e-05, "loss": 0.5431, "step": 11709 }, { "epoch": 0.32022533362502736, "grad_norm": 1.1310532093048096, "learning_rate": 1.5899328239607005e-05, "loss": 0.5337, "step": 11710 }, { "epoch": 0.32025267993874423, "grad_norm": 1.1600717306137085, "learning_rate": 1.589861305258251e-05, "loss": 0.5427, "step": 11711 }, { "epoch": 0.32028002625246116, "grad_norm": 1.2830030918121338, "learning_rate": 1.5897897819285064e-05, "loss": 0.5378, "step": 11712 }, { "epoch": 0.3203073725661781, "grad_norm": 1.3575208187103271, "learning_rate": 1.5897182539720265e-05, "loss": 0.5234, "step": 11713 }, { "epoch": 0.320334718879895, "grad_norm": 1.7124446630477905, "learning_rate": 1.5896467213893725e-05, "loss": 0.8922, "step": 11714 }, { "epoch": 0.3203620651936119, "grad_norm": 1.8898979425430298, "learning_rate": 1.5895751841811064e-05, "loss": 0.5529, "step": 11715 }, { "epoch": 0.3203894115073288, "grad_norm": 1.4921069145202637, "learning_rate": 1.5895036423477884e-05, "loss": 0.5596, "step": 11716 }, { "epoch": 0.32041675782104573, "grad_norm": 1.245752215385437, "learning_rate": 1.589432095889981e-05, "loss": 0.5407, "step": 11717 }, { "epoch": 0.32044410413476265, "grad_norm": 1.3580515384674072, "learning_rate": 1.5893605448082442e-05, "loss": 0.5486, "step": 11718 }, { "epoch": 0.3204714504484795, "grad_norm": 1.5763236284255981, "learning_rate": 1.5892889891031398e-05, "loss": 0.5023, "step": 11719 }, { "epoch": 0.32049879676219645, "grad_norm": 1.4531548023223877, "learning_rate": 1.589217428775229e-05, "loss": 0.5089, "step": 11720 }, { "epoch": 0.3205261430759134, "grad_norm": 1.3990042209625244, "learning_rate": 1.5891458638250737e-05, "loss": 0.5598, "step": 11721 }, { "epoch": 0.3205534893896303, "grad_norm": 1.7649214267730713, "learning_rate": 1.5890742942532346e-05, "loss": 0.5773, "step": 11722 }, { "epoch": 0.32058083570334717, "grad_norm": 1.510219693183899, "learning_rate": 1.5890027200602734e-05, "loss": 0.6, "step": 11723 }, { "epoch": 0.3206081820170641, "grad_norm": 1.3415392637252808, "learning_rate": 1.588931141246752e-05, "loss": 0.5053, "step": 11724 }, { "epoch": 0.320635528330781, "grad_norm": 1.1121881008148193, "learning_rate": 1.5888595578132308e-05, "loss": 0.5451, "step": 11725 }, { "epoch": 0.32066287464449794, "grad_norm": 1.0554941892623901, "learning_rate": 1.5887879697602725e-05, "loss": 0.5394, "step": 11726 }, { "epoch": 0.3206902209582148, "grad_norm": 1.036181092262268, "learning_rate": 1.5887163770884385e-05, "loss": 0.5556, "step": 11727 }, { "epoch": 0.32071756727193174, "grad_norm": 1.475154995918274, "learning_rate": 1.5886447797982902e-05, "loss": 0.4166, "step": 11728 }, { "epoch": 0.32074491358564866, "grad_norm": 1.6709150075912476, "learning_rate": 1.5885731778903886e-05, "loss": 0.5305, "step": 11729 }, { "epoch": 0.3207722598993656, "grad_norm": 1.1637545824050903, "learning_rate": 1.5885015713652965e-05, "loss": 0.506, "step": 11730 }, { "epoch": 0.32079960621308246, "grad_norm": 1.2478171586990356, "learning_rate": 1.5884299602235754e-05, "loss": 0.5738, "step": 11731 }, { "epoch": 0.3208269525267994, "grad_norm": 1.4423656463623047, "learning_rate": 1.5883583444657865e-05, "loss": 0.5643, "step": 11732 }, { "epoch": 0.3208542988405163, "grad_norm": 1.1510107517242432, "learning_rate": 1.588286724092492e-05, "loss": 0.5301, "step": 11733 }, { "epoch": 0.32088164515423323, "grad_norm": 1.0818150043487549, "learning_rate": 1.5882150991042543e-05, "loss": 0.543, "step": 11734 }, { "epoch": 0.3209089914679501, "grad_norm": 1.1112622022628784, "learning_rate": 1.5881434695016344e-05, "loss": 0.5574, "step": 11735 }, { "epoch": 0.320936337781667, "grad_norm": 1.1392205953598022, "learning_rate": 1.588071835285194e-05, "loss": 0.5555, "step": 11736 }, { "epoch": 0.32096368409538395, "grad_norm": 1.3033937215805054, "learning_rate": 1.588000196455496e-05, "loss": 0.5456, "step": 11737 }, { "epoch": 0.3209910304091009, "grad_norm": 1.2451529502868652, "learning_rate": 1.5879285530131018e-05, "loss": 0.5676, "step": 11738 }, { "epoch": 0.32101837672281774, "grad_norm": 1.2159473896026611, "learning_rate": 1.5878569049585735e-05, "loss": 0.4433, "step": 11739 }, { "epoch": 0.32104572303653467, "grad_norm": 1.2429606914520264, "learning_rate": 1.5877852522924733e-05, "loss": 0.5265, "step": 11740 }, { "epoch": 0.3210730693502516, "grad_norm": 1.3103872537612915, "learning_rate": 1.587713595015363e-05, "loss": 0.5402, "step": 11741 }, { "epoch": 0.3211004156639685, "grad_norm": 1.197869062423706, "learning_rate": 1.5876419331278053e-05, "loss": 0.54, "step": 11742 }, { "epoch": 0.3211277619776854, "grad_norm": 1.3653719425201416, "learning_rate": 1.5875702666303618e-05, "loss": 0.5372, "step": 11743 }, { "epoch": 0.3211551082914023, "grad_norm": 1.0959097146987915, "learning_rate": 1.587498595523595e-05, "loss": 0.5314, "step": 11744 }, { "epoch": 0.32118245460511924, "grad_norm": 1.2846254110336304, "learning_rate": 1.587426919808067e-05, "loss": 0.5487, "step": 11745 }, { "epoch": 0.32120980091883616, "grad_norm": 1.5117969512939453, "learning_rate": 1.58735523948434e-05, "loss": 0.5565, "step": 11746 }, { "epoch": 0.32123714723255303, "grad_norm": 1.7378027439117432, "learning_rate": 1.5872835545529768e-05, "loss": 0.4229, "step": 11747 }, { "epoch": 0.32126449354626996, "grad_norm": 1.122352123260498, "learning_rate": 1.5872118650145393e-05, "loss": 0.5244, "step": 11748 }, { "epoch": 0.3212918398599869, "grad_norm": 1.3535598516464233, "learning_rate": 1.58714017086959e-05, "loss": 0.5425, "step": 11749 }, { "epoch": 0.3213191861737038, "grad_norm": 1.7600330114364624, "learning_rate": 1.5870684721186907e-05, "loss": 0.8609, "step": 11750 }, { "epoch": 0.3213465324874207, "grad_norm": 1.329893708229065, "learning_rate": 1.586996768762405e-05, "loss": 0.5447, "step": 11751 }, { "epoch": 0.3213738788011376, "grad_norm": 1.5603951215744019, "learning_rate": 1.586925060801295e-05, "loss": 0.5682, "step": 11752 }, { "epoch": 0.3214012251148545, "grad_norm": 1.3845547437667847, "learning_rate": 1.5868533482359228e-05, "loss": 0.5932, "step": 11753 }, { "epoch": 0.32142857142857145, "grad_norm": 1.315838098526001, "learning_rate": 1.5867816310668515e-05, "loss": 0.5306, "step": 11754 }, { "epoch": 0.3214559177422883, "grad_norm": 1.1857430934906006, "learning_rate": 1.5867099092946433e-05, "loss": 0.5263, "step": 11755 }, { "epoch": 0.32148326405600525, "grad_norm": 1.8886501789093018, "learning_rate": 1.5866381829198603e-05, "loss": 0.4022, "step": 11756 }, { "epoch": 0.32151061036972217, "grad_norm": 1.5160338878631592, "learning_rate": 1.5865664519430667e-05, "loss": 0.5419, "step": 11757 }, { "epoch": 0.3215379566834391, "grad_norm": 1.5864790678024292, "learning_rate": 1.5864947163648242e-05, "loss": 0.5286, "step": 11758 }, { "epoch": 0.32156530299715597, "grad_norm": 1.3003666400909424, "learning_rate": 1.5864229761856956e-05, "loss": 0.5364, "step": 11759 }, { "epoch": 0.3215926493108729, "grad_norm": 1.3729088306427002, "learning_rate": 1.5863512314062437e-05, "loss": 0.5659, "step": 11760 }, { "epoch": 0.3216199956245898, "grad_norm": 1.158507227897644, "learning_rate": 1.5862794820270315e-05, "loss": 0.5465, "step": 11761 }, { "epoch": 0.32164734193830674, "grad_norm": 1.2610424757003784, "learning_rate": 1.5862077280486218e-05, "loss": 0.5539, "step": 11762 }, { "epoch": 0.3216746882520236, "grad_norm": 1.4195096492767334, "learning_rate": 1.5861359694715774e-05, "loss": 0.5773, "step": 11763 }, { "epoch": 0.32170203456574054, "grad_norm": 1.537337064743042, "learning_rate": 1.5860642062964614e-05, "loss": 0.4492, "step": 11764 }, { "epoch": 0.32172938087945746, "grad_norm": 1.444712519645691, "learning_rate": 1.5859924385238363e-05, "loss": 0.5179, "step": 11765 }, { "epoch": 0.3217567271931744, "grad_norm": 4.855686187744141, "learning_rate": 1.5859206661542658e-05, "loss": 0.8692, "step": 11766 }, { "epoch": 0.32178407350689125, "grad_norm": 1.6453391313552856, "learning_rate": 1.5858488891883123e-05, "loss": 0.3993, "step": 11767 }, { "epoch": 0.3218114198206082, "grad_norm": 1.380084753036499, "learning_rate": 1.5857771076265393e-05, "loss": 0.4842, "step": 11768 }, { "epoch": 0.3218387661343251, "grad_norm": 1.5285309553146362, "learning_rate": 1.5857053214695096e-05, "loss": 0.5413, "step": 11769 }, { "epoch": 0.32186611244804203, "grad_norm": 1.6763485670089722, "learning_rate": 1.5856335307177865e-05, "loss": 0.5583, "step": 11770 }, { "epoch": 0.3218934587617589, "grad_norm": 1.178325891494751, "learning_rate": 1.5855617353719333e-05, "loss": 0.5216, "step": 11771 }, { "epoch": 0.3219208050754758, "grad_norm": 1.7198207378387451, "learning_rate": 1.5854899354325127e-05, "loss": 0.5445, "step": 11772 }, { "epoch": 0.32194815138919275, "grad_norm": 1.9257017374038696, "learning_rate": 1.5854181309000888e-05, "loss": 0.918, "step": 11773 }, { "epoch": 0.3219754977029097, "grad_norm": 2.0268750190734863, "learning_rate": 1.5853463217752244e-05, "loss": 0.5034, "step": 11774 }, { "epoch": 0.32200284401662654, "grad_norm": 1.186023235321045, "learning_rate": 1.5852745080584825e-05, "loss": 0.5369, "step": 11775 }, { "epoch": 0.32203019033034347, "grad_norm": 1.6121363639831543, "learning_rate": 1.585202689750427e-05, "loss": 0.8486, "step": 11776 }, { "epoch": 0.3220575366440604, "grad_norm": 1.6516282558441162, "learning_rate": 1.585130866851621e-05, "loss": 0.8745, "step": 11777 }, { "epoch": 0.3220848829577773, "grad_norm": 1.2033534049987793, "learning_rate": 1.5850590393626283e-05, "loss": 0.4684, "step": 11778 }, { "epoch": 0.3221122292714942, "grad_norm": 1.5045496225357056, "learning_rate": 1.584987207284012e-05, "loss": 0.5134, "step": 11779 }, { "epoch": 0.3221395755852111, "grad_norm": 1.4198414087295532, "learning_rate": 1.584915370616335e-05, "loss": 0.5237, "step": 11780 }, { "epoch": 0.32216692189892804, "grad_norm": 1.313400149345398, "learning_rate": 1.5848435293601624e-05, "loss": 0.5272, "step": 11781 }, { "epoch": 0.32219426821264496, "grad_norm": 1.0702240467071533, "learning_rate": 1.5847716835160565e-05, "loss": 0.5447, "step": 11782 }, { "epoch": 0.32222161452636183, "grad_norm": 1.3134151697158813, "learning_rate": 1.5846998330845815e-05, "loss": 0.5481, "step": 11783 }, { "epoch": 0.32224896084007876, "grad_norm": 1.3561893701553345, "learning_rate": 1.5846279780663007e-05, "loss": 0.5247, "step": 11784 }, { "epoch": 0.3222763071537957, "grad_norm": 1.6274832487106323, "learning_rate": 1.584556118461778e-05, "loss": 0.5361, "step": 11785 }, { "epoch": 0.32230365346751255, "grad_norm": 3.25687313079834, "learning_rate": 1.584484254271577e-05, "loss": 0.4298, "step": 11786 }, { "epoch": 0.3223309997812295, "grad_norm": 1.197529911994934, "learning_rate": 1.5844123854962615e-05, "loss": 0.5261, "step": 11787 }, { "epoch": 0.3223583460949464, "grad_norm": 1.385099172592163, "learning_rate": 1.5843405121363957e-05, "loss": 0.5532, "step": 11788 }, { "epoch": 0.3223856924086633, "grad_norm": 1.3812743425369263, "learning_rate": 1.5842686341925427e-05, "loss": 0.533, "step": 11789 }, { "epoch": 0.3224130387223802, "grad_norm": 1.4847818613052368, "learning_rate": 1.5841967516652664e-05, "loss": 0.5569, "step": 11790 }, { "epoch": 0.3224403850360971, "grad_norm": 1.1348437070846558, "learning_rate": 1.5841248645551315e-05, "loss": 0.5448, "step": 11791 }, { "epoch": 0.32246773134981405, "grad_norm": 1.3466159105300903, "learning_rate": 1.5840529728627012e-05, "loss": 0.54, "step": 11792 }, { "epoch": 0.32249507766353097, "grad_norm": 1.4736003875732422, "learning_rate": 1.5839810765885396e-05, "loss": 0.4803, "step": 11793 }, { "epoch": 0.32252242397724784, "grad_norm": 1.5033953189849854, "learning_rate": 1.583909175733211e-05, "loss": 0.3919, "step": 11794 }, { "epoch": 0.32254977029096477, "grad_norm": 2.0425355434417725, "learning_rate": 1.583837270297279e-05, "loss": 0.376, "step": 11795 }, { "epoch": 0.3225771166046817, "grad_norm": 1.2906731367111206, "learning_rate": 1.583765360281308e-05, "loss": 0.5741, "step": 11796 }, { "epoch": 0.3226044629183986, "grad_norm": 1.5625840425491333, "learning_rate": 1.5836934456858625e-05, "loss": 0.4794, "step": 11797 }, { "epoch": 0.3226318092321155, "grad_norm": 1.7177258729934692, "learning_rate": 1.5836215265115055e-05, "loss": 0.5221, "step": 11798 }, { "epoch": 0.3226591555458324, "grad_norm": 1.5342333316802979, "learning_rate": 1.5835496027588022e-05, "loss": 0.5502, "step": 11799 }, { "epoch": 0.32268650185954934, "grad_norm": 1.2460330724716187, "learning_rate": 1.5834776744283166e-05, "loss": 0.5316, "step": 11800 }, { "epoch": 0.32271384817326626, "grad_norm": 1.1836516857147217, "learning_rate": 1.5834057415206125e-05, "loss": 0.5449, "step": 11801 }, { "epoch": 0.32274119448698313, "grad_norm": 1.2775764465332031, "learning_rate": 1.5833338040362547e-05, "loss": 0.5247, "step": 11802 }, { "epoch": 0.32276854080070005, "grad_norm": 1.2967476844787598, "learning_rate": 1.5832618619758077e-05, "loss": 0.547, "step": 11803 }, { "epoch": 0.322795887114417, "grad_norm": 1.2003291845321655, "learning_rate": 1.5831899153398352e-05, "loss": 0.5455, "step": 11804 }, { "epoch": 0.3228232334281339, "grad_norm": 1.397883415222168, "learning_rate": 1.5831179641289018e-05, "loss": 0.5625, "step": 11805 }, { "epoch": 0.3228505797418508, "grad_norm": 1.3150732517242432, "learning_rate": 1.5830460083435724e-05, "loss": 0.5353, "step": 11806 }, { "epoch": 0.3228779260555677, "grad_norm": 1.0853779315948486, "learning_rate": 1.582974047984411e-05, "loss": 0.5161, "step": 11807 }, { "epoch": 0.3229052723692846, "grad_norm": 2.8146417140960693, "learning_rate": 1.582902083051982e-05, "loss": 0.8689, "step": 11808 }, { "epoch": 0.32293261868300155, "grad_norm": 1.2873378992080688, "learning_rate": 1.582830113546851e-05, "loss": 0.5439, "step": 11809 }, { "epoch": 0.3229599649967184, "grad_norm": 1.1295530796051025, "learning_rate": 1.582758139469581e-05, "loss": 0.4924, "step": 11810 }, { "epoch": 0.32298731131043534, "grad_norm": 1.3353968858718872, "learning_rate": 1.5826861608207375e-05, "loss": 0.5743, "step": 11811 }, { "epoch": 0.32301465762415227, "grad_norm": 1.407808780670166, "learning_rate": 1.582614177600885e-05, "loss": 0.5464, "step": 11812 }, { "epoch": 0.3230420039378692, "grad_norm": 1.4073584079742432, "learning_rate": 1.5825421898105886e-05, "loss": 0.56, "step": 11813 }, { "epoch": 0.32306935025158606, "grad_norm": 1.3601089715957642, "learning_rate": 1.5824701974504125e-05, "loss": 0.5612, "step": 11814 }, { "epoch": 0.323096696565303, "grad_norm": 1.4521329402923584, "learning_rate": 1.582398200520922e-05, "loss": 0.5658, "step": 11815 }, { "epoch": 0.3231240428790199, "grad_norm": 1.4704593420028687, "learning_rate": 1.582326199022681e-05, "loss": 0.5882, "step": 11816 }, { "epoch": 0.32315138919273684, "grad_norm": 1.1856110095977783, "learning_rate": 1.5822541929562547e-05, "loss": 0.5419, "step": 11817 }, { "epoch": 0.3231787355064537, "grad_norm": 1.365825891494751, "learning_rate": 1.5821821823222084e-05, "loss": 0.5362, "step": 11818 }, { "epoch": 0.32320608182017063, "grad_norm": 2.3951494693756104, "learning_rate": 1.582110167121107e-05, "loss": 0.8626, "step": 11819 }, { "epoch": 0.32323342813388756, "grad_norm": 1.9567174911499023, "learning_rate": 1.5820381473535148e-05, "loss": 0.5571, "step": 11820 }, { "epoch": 0.3232607744476045, "grad_norm": 1.2820487022399902, "learning_rate": 1.581966123019997e-05, "loss": 0.4723, "step": 11821 }, { "epoch": 0.32328812076132135, "grad_norm": 1.5545823574066162, "learning_rate": 1.581894094121119e-05, "loss": 0.5496, "step": 11822 }, { "epoch": 0.3233154670750383, "grad_norm": 1.257317066192627, "learning_rate": 1.5818220606574458e-05, "loss": 0.5178, "step": 11823 }, { "epoch": 0.3233428133887552, "grad_norm": 1.5440322160720825, "learning_rate": 1.581750022629542e-05, "loss": 0.4773, "step": 11824 }, { "epoch": 0.3233701597024721, "grad_norm": 1.5900377035140991, "learning_rate": 1.5816779800379728e-05, "loss": 0.5534, "step": 11825 }, { "epoch": 0.323397506016189, "grad_norm": 4.005265712738037, "learning_rate": 1.5816059328833037e-05, "loss": 0.3796, "step": 11826 }, { "epoch": 0.3234248523299059, "grad_norm": 1.644178867340088, "learning_rate": 1.5815338811661002e-05, "loss": 0.4922, "step": 11827 }, { "epoch": 0.32345219864362285, "grad_norm": 1.5924136638641357, "learning_rate": 1.5814618248869266e-05, "loss": 0.5074, "step": 11828 }, { "epoch": 0.32347954495733977, "grad_norm": 1.1769102811813354, "learning_rate": 1.5813897640463485e-05, "loss": 0.5394, "step": 11829 }, { "epoch": 0.32350689127105664, "grad_norm": 1.2622172832489014, "learning_rate": 1.5813176986449316e-05, "loss": 0.5553, "step": 11830 }, { "epoch": 0.32353423758477357, "grad_norm": 1.271397352218628, "learning_rate": 1.5812456286832408e-05, "loss": 0.5462, "step": 11831 }, { "epoch": 0.3235615838984905, "grad_norm": 1.1927800178527832, "learning_rate": 1.5811735541618415e-05, "loss": 0.5442, "step": 11832 }, { "epoch": 0.3235889302122074, "grad_norm": 1.2896952629089355, "learning_rate": 1.5811014750812993e-05, "loss": 0.5675, "step": 11833 }, { "epoch": 0.3236162765259243, "grad_norm": 1.4033762216567993, "learning_rate": 1.58102939144218e-05, "loss": 0.8353, "step": 11834 }, { "epoch": 0.3236436228396412, "grad_norm": 1.417496681213379, "learning_rate": 1.580957303245048e-05, "loss": 0.5404, "step": 11835 }, { "epoch": 0.32367096915335813, "grad_norm": 1.5894922018051147, "learning_rate": 1.58088521049047e-05, "loss": 0.8764, "step": 11836 }, { "epoch": 0.32369831546707506, "grad_norm": 1.346504807472229, "learning_rate": 1.5808131131790104e-05, "loss": 0.5577, "step": 11837 }, { "epoch": 0.32372566178079193, "grad_norm": 1.3367398977279663, "learning_rate": 1.5807410113112357e-05, "loss": 0.5708, "step": 11838 }, { "epoch": 0.32375300809450885, "grad_norm": 1.8392245769500732, "learning_rate": 1.5806689048877113e-05, "loss": 0.428, "step": 11839 }, { "epoch": 0.3237803544082258, "grad_norm": 1.3923143148422241, "learning_rate": 1.5805967939090027e-05, "loss": 0.5278, "step": 11840 }, { "epoch": 0.3238077007219427, "grad_norm": 1.3181101083755493, "learning_rate": 1.580524678375676e-05, "loss": 0.5239, "step": 11841 }, { "epoch": 0.3238350470356596, "grad_norm": 2.5237348079681396, "learning_rate": 1.580452558288296e-05, "loss": 0.5564, "step": 11842 }, { "epoch": 0.3238623933493765, "grad_norm": 1.163845181465149, "learning_rate": 1.5803804336474294e-05, "loss": 0.4927, "step": 11843 }, { "epoch": 0.3238897396630934, "grad_norm": 1.5235236883163452, "learning_rate": 1.5803083044536414e-05, "loss": 0.535, "step": 11844 }, { "epoch": 0.32391708597681035, "grad_norm": 1.273353099822998, "learning_rate": 1.580236170707498e-05, "loss": 0.5202, "step": 11845 }, { "epoch": 0.3239444322905272, "grad_norm": 1.3461331129074097, "learning_rate": 1.5801640324095654e-05, "loss": 0.5547, "step": 11846 }, { "epoch": 0.32397177860424414, "grad_norm": 1.452055811882019, "learning_rate": 1.580091889560409e-05, "loss": 0.3723, "step": 11847 }, { "epoch": 0.32399912491796107, "grad_norm": 1.234849214553833, "learning_rate": 1.580019742160595e-05, "loss": 0.526, "step": 11848 }, { "epoch": 0.324026471231678, "grad_norm": 1.7528035640716553, "learning_rate": 1.5799475902106896e-05, "loss": 0.4912, "step": 11849 }, { "epoch": 0.32405381754539486, "grad_norm": 1.3497538566589355, "learning_rate": 1.5798754337112583e-05, "loss": 0.5136, "step": 11850 }, { "epoch": 0.3240811638591118, "grad_norm": 1.5503753423690796, "learning_rate": 1.5798032726628675e-05, "loss": 0.5532, "step": 11851 }, { "epoch": 0.3241085101728287, "grad_norm": 1.3517929315567017, "learning_rate": 1.5797311070660832e-05, "loss": 0.5315, "step": 11852 }, { "epoch": 0.32413585648654564, "grad_norm": 1.1807814836502075, "learning_rate": 1.5796589369214713e-05, "loss": 0.5325, "step": 11853 }, { "epoch": 0.3241632028002625, "grad_norm": 1.3508639335632324, "learning_rate": 1.5795867622295986e-05, "loss": 0.5175, "step": 11854 }, { "epoch": 0.32419054911397943, "grad_norm": 1.1688956022262573, "learning_rate": 1.579514582991031e-05, "loss": 0.5605, "step": 11855 }, { "epoch": 0.32421789542769636, "grad_norm": 1.157941460609436, "learning_rate": 1.579442399206334e-05, "loss": 0.5527, "step": 11856 }, { "epoch": 0.3242452417414133, "grad_norm": 1.1037434339523315, "learning_rate": 1.5793702108760745e-05, "loss": 0.5402, "step": 11857 }, { "epoch": 0.32427258805513015, "grad_norm": 1.289842128753662, "learning_rate": 1.5792980180008194e-05, "loss": 0.4109, "step": 11858 }, { "epoch": 0.3242999343688471, "grad_norm": 1.4681278467178345, "learning_rate": 1.5792258205811333e-05, "loss": 0.5519, "step": 11859 }, { "epoch": 0.324327280682564, "grad_norm": 1.54966402053833, "learning_rate": 1.5791536186175848e-05, "loss": 0.4758, "step": 11860 }, { "epoch": 0.3243546269962809, "grad_norm": 1.576621413230896, "learning_rate": 1.5790814121107385e-05, "loss": 0.5362, "step": 11861 }, { "epoch": 0.3243819733099978, "grad_norm": 2.984090805053711, "learning_rate": 1.5790092010611616e-05, "loss": 0.406, "step": 11862 }, { "epoch": 0.3244093196237147, "grad_norm": 1.4013895988464355, "learning_rate": 1.5789369854694205e-05, "loss": 0.5044, "step": 11863 }, { "epoch": 0.32443666593743165, "grad_norm": 0.9851533770561218, "learning_rate": 1.5788647653360817e-05, "loss": 0.4955, "step": 11864 }, { "epoch": 0.32446401225114857, "grad_norm": 1.0317798852920532, "learning_rate": 1.5787925406617116e-05, "loss": 0.5545, "step": 11865 }, { "epoch": 0.32449135856486544, "grad_norm": 1.1944652795791626, "learning_rate": 1.578720311446877e-05, "loss": 0.5034, "step": 11866 }, { "epoch": 0.32451870487858236, "grad_norm": 1.2946127653121948, "learning_rate": 1.5786480776921442e-05, "loss": 0.5482, "step": 11867 }, { "epoch": 0.3245460511922993, "grad_norm": 3.5380043983459473, "learning_rate": 1.5785758393980803e-05, "loss": 0.571, "step": 11868 }, { "epoch": 0.3245733975060162, "grad_norm": 1.3168662786483765, "learning_rate": 1.5785035965652516e-05, "loss": 0.5136, "step": 11869 }, { "epoch": 0.3246007438197331, "grad_norm": 1.568404197692871, "learning_rate": 1.578431349194225e-05, "loss": 0.5557, "step": 11870 }, { "epoch": 0.32462809013345, "grad_norm": 1.15840482711792, "learning_rate": 1.5783590972855675e-05, "loss": 0.5257, "step": 11871 }, { "epoch": 0.32465543644716693, "grad_norm": 1.2509760856628418, "learning_rate": 1.5782868408398453e-05, "loss": 0.5574, "step": 11872 }, { "epoch": 0.32468278276088386, "grad_norm": 1.0829960107803345, "learning_rate": 1.578214579857626e-05, "loss": 0.4798, "step": 11873 }, { "epoch": 0.32471012907460073, "grad_norm": 1.1979235410690308, "learning_rate": 1.5781423143394757e-05, "loss": 0.5033, "step": 11874 }, { "epoch": 0.32473747538831765, "grad_norm": 3.1945576667785645, "learning_rate": 1.578070044285962e-05, "loss": 0.8253, "step": 11875 }, { "epoch": 0.3247648217020346, "grad_norm": 1.5413249731063843, "learning_rate": 1.5779977696976516e-05, "loss": 0.5424, "step": 11876 }, { "epoch": 0.3247921680157515, "grad_norm": 5.746281147003174, "learning_rate": 1.5779254905751108e-05, "loss": 0.8738, "step": 11877 }, { "epoch": 0.3248195143294684, "grad_norm": 1.4438815116882324, "learning_rate": 1.5778532069189072e-05, "loss": 0.5337, "step": 11878 }, { "epoch": 0.3248468606431853, "grad_norm": 1.945131540298462, "learning_rate": 1.5777809187296082e-05, "loss": 0.4373, "step": 11879 }, { "epoch": 0.3248742069569022, "grad_norm": 1.3788741827011108, "learning_rate": 1.5777086260077804e-05, "loss": 0.8892, "step": 11880 }, { "epoch": 0.32490155327061915, "grad_norm": 1.433020830154419, "learning_rate": 1.577636328753991e-05, "loss": 0.5619, "step": 11881 }, { "epoch": 0.324928899584336, "grad_norm": 2.9296891689300537, "learning_rate": 1.5775640269688068e-05, "loss": 0.5333, "step": 11882 }, { "epoch": 0.32495624589805294, "grad_norm": 1.2940126657485962, "learning_rate": 1.5774917206527954e-05, "loss": 0.8679, "step": 11883 }, { "epoch": 0.32498359221176987, "grad_norm": 1.4540106058120728, "learning_rate": 1.5774194098065242e-05, "loss": 0.5529, "step": 11884 }, { "epoch": 0.32501093852548674, "grad_norm": 1.2442517280578613, "learning_rate": 1.5773470944305603e-05, "loss": 0.5574, "step": 11885 }, { "epoch": 0.32503828483920366, "grad_norm": 1.2427661418914795, "learning_rate": 1.577274774525471e-05, "loss": 0.5314, "step": 11886 }, { "epoch": 0.3250656311529206, "grad_norm": 1.3179866075515747, "learning_rate": 1.5772024500918232e-05, "loss": 0.5394, "step": 11887 }, { "epoch": 0.3250929774666375, "grad_norm": 1.2616037130355835, "learning_rate": 1.577130121130185e-05, "loss": 0.5457, "step": 11888 }, { "epoch": 0.3251203237803544, "grad_norm": 1.514319658279419, "learning_rate": 1.5770577876411233e-05, "loss": 0.5112, "step": 11889 }, { "epoch": 0.3251476700940713, "grad_norm": 1.3890037536621094, "learning_rate": 1.5769854496252057e-05, "loss": 0.5763, "step": 11890 }, { "epoch": 0.32517501640778823, "grad_norm": 1.2758585214614868, "learning_rate": 1.5769131070829992e-05, "loss": 0.5609, "step": 11891 }, { "epoch": 0.32520236272150516, "grad_norm": 1.23881196975708, "learning_rate": 1.5768407600150723e-05, "loss": 0.5409, "step": 11892 }, { "epoch": 0.325229709035222, "grad_norm": 1.0128140449523926, "learning_rate": 1.5767684084219918e-05, "loss": 0.5335, "step": 11893 }, { "epoch": 0.32525705534893895, "grad_norm": 1.0121729373931885, "learning_rate": 1.5766960523043252e-05, "loss": 0.5133, "step": 11894 }, { "epoch": 0.3252844016626559, "grad_norm": 1.2870126962661743, "learning_rate": 1.5766236916626408e-05, "loss": 0.5705, "step": 11895 }, { "epoch": 0.3253117479763728, "grad_norm": 1.3143731355667114, "learning_rate": 1.5765513264975053e-05, "loss": 0.8773, "step": 11896 }, { "epoch": 0.32533909429008967, "grad_norm": 1.2326916456222534, "learning_rate": 1.5764789568094872e-05, "loss": 0.5427, "step": 11897 }, { "epoch": 0.3253664406038066, "grad_norm": 1.3310052156448364, "learning_rate": 1.576406582599154e-05, "loss": 0.5469, "step": 11898 }, { "epoch": 0.3253937869175235, "grad_norm": 1.5983741283416748, "learning_rate": 1.5763342038670734e-05, "loss": 0.5358, "step": 11899 }, { "epoch": 0.32542113323124044, "grad_norm": 1.4732543230056763, "learning_rate": 1.576261820613813e-05, "loss": 0.5535, "step": 11900 }, { "epoch": 0.3254484795449573, "grad_norm": 1.4765574932098389, "learning_rate": 1.5761894328399406e-05, "loss": 0.488, "step": 11901 }, { "epoch": 0.32547582585867424, "grad_norm": 1.2244112491607666, "learning_rate": 1.5761170405460244e-05, "loss": 0.5502, "step": 11902 }, { "epoch": 0.32550317217239116, "grad_norm": 1.5710216760635376, "learning_rate": 1.576044643732632e-05, "loss": 0.4817, "step": 11903 }, { "epoch": 0.3255305184861081, "grad_norm": 1.2644233703613281, "learning_rate": 1.5759722424003322e-05, "loss": 0.8544, "step": 11904 }, { "epoch": 0.32555786479982496, "grad_norm": 1.0880284309387207, "learning_rate": 1.5758998365496916e-05, "loss": 0.5228, "step": 11905 }, { "epoch": 0.3255852111135419, "grad_norm": 1.2934778928756714, "learning_rate": 1.575827426181279e-05, "loss": 0.4677, "step": 11906 }, { "epoch": 0.3256125574272588, "grad_norm": 1.2381576299667358, "learning_rate": 1.5757550112956624e-05, "loss": 0.5444, "step": 11907 }, { "epoch": 0.32563990374097573, "grad_norm": 1.4443392753601074, "learning_rate": 1.5756825918934094e-05, "loss": 0.8703, "step": 11908 }, { "epoch": 0.3256672500546926, "grad_norm": 1.3993147611618042, "learning_rate": 1.575610167975089e-05, "loss": 0.8528, "step": 11909 }, { "epoch": 0.32569459636840953, "grad_norm": 1.520280361175537, "learning_rate": 1.5755377395412685e-05, "loss": 0.5361, "step": 11910 }, { "epoch": 0.32572194268212645, "grad_norm": 1.3675192594528198, "learning_rate": 1.5754653065925165e-05, "loss": 0.5392, "step": 11911 }, { "epoch": 0.3257492889958434, "grad_norm": 1.4895602464675903, "learning_rate": 1.5753928691294012e-05, "loss": 0.5465, "step": 11912 }, { "epoch": 0.32577663530956025, "grad_norm": 1.1972767114639282, "learning_rate": 1.5753204271524903e-05, "loss": 0.5386, "step": 11913 }, { "epoch": 0.3258039816232772, "grad_norm": 1.6495237350463867, "learning_rate": 1.575247980662353e-05, "loss": 0.5479, "step": 11914 }, { "epoch": 0.3258313279369941, "grad_norm": 1.88279390335083, "learning_rate": 1.5751755296595574e-05, "loss": 0.4265, "step": 11915 }, { "epoch": 0.325858674250711, "grad_norm": 1.3053253889083862, "learning_rate": 1.5751030741446713e-05, "loss": 0.5292, "step": 11916 }, { "epoch": 0.3258860205644279, "grad_norm": 1.1572282314300537, "learning_rate": 1.5750306141182635e-05, "loss": 0.5247, "step": 11917 }, { "epoch": 0.3259133668781448, "grad_norm": 1.406986117362976, "learning_rate": 1.5749581495809022e-05, "loss": 0.5332, "step": 11918 }, { "epoch": 0.32594071319186174, "grad_norm": 1.6311131715774536, "learning_rate": 1.574885680533156e-05, "loss": 0.4569, "step": 11919 }, { "epoch": 0.32596805950557867, "grad_norm": 2.180673360824585, "learning_rate": 1.5748132069755938e-05, "loss": 0.4784, "step": 11920 }, { "epoch": 0.32599540581929554, "grad_norm": 1.4673023223876953, "learning_rate": 1.5747407289087835e-05, "loss": 0.8602, "step": 11921 }, { "epoch": 0.32602275213301246, "grad_norm": 1.3330423831939697, "learning_rate": 1.5746682463332938e-05, "loss": 0.4882, "step": 11922 }, { "epoch": 0.3260500984467294, "grad_norm": 1.480379343032837, "learning_rate": 1.5745957592496938e-05, "loss": 0.4504, "step": 11923 }, { "epoch": 0.3260774447604463, "grad_norm": 1.166075587272644, "learning_rate": 1.5745232676585514e-05, "loss": 0.5155, "step": 11924 }, { "epoch": 0.3261047910741632, "grad_norm": 1.4248080253601074, "learning_rate": 1.5744507715604358e-05, "loss": 0.4817, "step": 11925 }, { "epoch": 0.3261321373878801, "grad_norm": 1.521295189857483, "learning_rate": 1.5743782709559155e-05, "loss": 0.5383, "step": 11926 }, { "epoch": 0.32615948370159703, "grad_norm": 1.089437484741211, "learning_rate": 1.5743057658455594e-05, "loss": 0.5099, "step": 11927 }, { "epoch": 0.32618683001531396, "grad_norm": 1.2817577123641968, "learning_rate": 1.574233256229936e-05, "loss": 0.515, "step": 11928 }, { "epoch": 0.3262141763290308, "grad_norm": 1.9984275102615356, "learning_rate": 1.574160742109615e-05, "loss": 0.539, "step": 11929 }, { "epoch": 0.32624152264274775, "grad_norm": 1.368387222290039, "learning_rate": 1.574088223485164e-05, "loss": 0.5408, "step": 11930 }, { "epoch": 0.3262688689564647, "grad_norm": 1.184997797012329, "learning_rate": 1.5740157003571522e-05, "loss": 0.5423, "step": 11931 }, { "epoch": 0.3262962152701816, "grad_norm": 1.1152931451797485, "learning_rate": 1.573943172726149e-05, "loss": 0.5041, "step": 11932 }, { "epoch": 0.32632356158389847, "grad_norm": 1.0412194728851318, "learning_rate": 1.5738706405927233e-05, "loss": 0.5319, "step": 11933 }, { "epoch": 0.3263509078976154, "grad_norm": 1.4259406328201294, "learning_rate": 1.573798103957444e-05, "loss": 0.5436, "step": 11934 }, { "epoch": 0.3263782542113323, "grad_norm": 1.1786620616912842, "learning_rate": 1.57372556282088e-05, "loss": 0.5273, "step": 11935 }, { "epoch": 0.32640560052504924, "grad_norm": 1.258321762084961, "learning_rate": 1.5736530171836e-05, "loss": 0.527, "step": 11936 }, { "epoch": 0.3264329468387661, "grad_norm": 1.6330883502960205, "learning_rate": 1.573580467046174e-05, "loss": 0.5387, "step": 11937 }, { "epoch": 0.32646029315248304, "grad_norm": 1.3713434934616089, "learning_rate": 1.5735079124091703e-05, "loss": 0.5372, "step": 11938 }, { "epoch": 0.32648763946619996, "grad_norm": 1.9879567623138428, "learning_rate": 1.573435353273159e-05, "loss": 0.8694, "step": 11939 }, { "epoch": 0.3265149857799169, "grad_norm": 1.2564979791641235, "learning_rate": 1.573362789638708e-05, "loss": 0.5578, "step": 11940 }, { "epoch": 0.32654233209363376, "grad_norm": 1.9230327606201172, "learning_rate": 1.5732902215063876e-05, "loss": 0.4069, "step": 11941 }, { "epoch": 0.3265696784073507, "grad_norm": 1.600446343421936, "learning_rate": 1.5732176488767666e-05, "loss": 0.4917, "step": 11942 }, { "epoch": 0.3265970247210676, "grad_norm": 1.3090577125549316, "learning_rate": 1.5731450717504147e-05, "loss": 0.8796, "step": 11943 }, { "epoch": 0.32662437103478453, "grad_norm": 4.919473648071289, "learning_rate": 1.5730724901279008e-05, "loss": 0.5526, "step": 11944 }, { "epoch": 0.3266517173485014, "grad_norm": 1.4155892133712769, "learning_rate": 1.5729999040097948e-05, "loss": 0.5168, "step": 11945 }, { "epoch": 0.3266790636622183, "grad_norm": 1.6505016088485718, "learning_rate": 1.5729273133966658e-05, "loss": 0.5362, "step": 11946 }, { "epoch": 0.32670640997593525, "grad_norm": 1.3275609016418457, "learning_rate": 1.572854718289083e-05, "loss": 0.5817, "step": 11947 }, { "epoch": 0.3267337562896522, "grad_norm": 1.2851368188858032, "learning_rate": 1.5727821186876162e-05, "loss": 0.5352, "step": 11948 }, { "epoch": 0.32676110260336905, "grad_norm": 1.5590431690216064, "learning_rate": 1.572709514592835e-05, "loss": 0.5079, "step": 11949 }, { "epoch": 0.32678844891708597, "grad_norm": 1.2055045366287231, "learning_rate": 1.572636906005309e-05, "loss": 0.5567, "step": 11950 }, { "epoch": 0.3268157952308029, "grad_norm": 1.362599492073059, "learning_rate": 1.5725642929256074e-05, "loss": 0.5572, "step": 11951 }, { "epoch": 0.3268431415445198, "grad_norm": 1.292136311531067, "learning_rate": 1.5724916753542996e-05, "loss": 0.4945, "step": 11952 }, { "epoch": 0.3268704878582367, "grad_norm": 2.3592312335968018, "learning_rate": 1.5724190532919564e-05, "loss": 0.5509, "step": 11953 }, { "epoch": 0.3268978341719536, "grad_norm": 1.5271193981170654, "learning_rate": 1.5723464267391463e-05, "loss": 0.5507, "step": 11954 }, { "epoch": 0.32692518048567054, "grad_norm": 1.2519100904464722, "learning_rate": 1.57227379569644e-05, "loss": 0.5318, "step": 11955 }, { "epoch": 0.32695252679938747, "grad_norm": 1.8355863094329834, "learning_rate": 1.5722011601644067e-05, "loss": 0.8842, "step": 11956 }, { "epoch": 0.32697987311310434, "grad_norm": 1.3376346826553345, "learning_rate": 1.572128520143616e-05, "loss": 0.4793, "step": 11957 }, { "epoch": 0.32700721942682126, "grad_norm": 1.4004806280136108, "learning_rate": 1.5720558756346386e-05, "loss": 0.5936, "step": 11958 }, { "epoch": 0.3270345657405382, "grad_norm": 1.310530662536621, "learning_rate": 1.5719832266380436e-05, "loss": 0.4801, "step": 11959 }, { "epoch": 0.3270619120542551, "grad_norm": 1.4384809732437134, "learning_rate": 1.571910573154401e-05, "loss": 0.4712, "step": 11960 }, { "epoch": 0.327089258367972, "grad_norm": 2.142982244491577, "learning_rate": 1.571837915184281e-05, "loss": 0.582, "step": 11961 }, { "epoch": 0.3271166046816889, "grad_norm": 1.1452577114105225, "learning_rate": 1.5717652527282535e-05, "loss": 0.5416, "step": 11962 }, { "epoch": 0.32714395099540583, "grad_norm": 1.3857825994491577, "learning_rate": 1.5716925857868884e-05, "loss": 0.5486, "step": 11963 }, { "epoch": 0.32717129730912275, "grad_norm": 1.2653273344039917, "learning_rate": 1.5716199143607563e-05, "loss": 0.51, "step": 11964 }, { "epoch": 0.3271986436228396, "grad_norm": 4.166703701019287, "learning_rate": 1.5715472384504266e-05, "loss": 0.5227, "step": 11965 }, { "epoch": 0.32722598993655655, "grad_norm": 1.4547134637832642, "learning_rate": 1.5714745580564697e-05, "loss": 0.4865, "step": 11966 }, { "epoch": 0.3272533362502735, "grad_norm": 1.2632966041564941, "learning_rate": 1.5714018731794553e-05, "loss": 0.5418, "step": 11967 }, { "epoch": 0.3272806825639904, "grad_norm": 1.1647793054580688, "learning_rate": 1.5713291838199545e-05, "loss": 0.5365, "step": 11968 }, { "epoch": 0.32730802887770727, "grad_norm": 1.40395188331604, "learning_rate": 1.571256489978537e-05, "loss": 0.5393, "step": 11969 }, { "epoch": 0.3273353751914242, "grad_norm": 1.395579218864441, "learning_rate": 1.571183791655773e-05, "loss": 0.5432, "step": 11970 }, { "epoch": 0.3273627215051411, "grad_norm": 1.5839804410934448, "learning_rate": 1.5711110888522325e-05, "loss": 0.5268, "step": 11971 }, { "epoch": 0.32739006781885804, "grad_norm": 4.340685844421387, "learning_rate": 1.571038381568487e-05, "loss": 0.5447, "step": 11972 }, { "epoch": 0.3274174141325749, "grad_norm": 1.5656764507293701, "learning_rate": 1.5709656698051056e-05, "loss": 0.496, "step": 11973 }, { "epoch": 0.32744476044629184, "grad_norm": 1.0915461778640747, "learning_rate": 1.5708929535626594e-05, "loss": 0.5582, "step": 11974 }, { "epoch": 0.32747210676000876, "grad_norm": 1.2312813997268677, "learning_rate": 1.5708202328417183e-05, "loss": 0.5795, "step": 11975 }, { "epoch": 0.3274994530737257, "grad_norm": 1.7079747915267944, "learning_rate": 1.5707475076428535e-05, "loss": 0.4381, "step": 11976 }, { "epoch": 0.32752679938744256, "grad_norm": 2.0124258995056152, "learning_rate": 1.570674777966635e-05, "loss": 0.8986, "step": 11977 }, { "epoch": 0.3275541457011595, "grad_norm": 1.8301078081130981, "learning_rate": 1.5706020438136333e-05, "loss": 0.4501, "step": 11978 }, { "epoch": 0.3275814920148764, "grad_norm": 1.6946667432785034, "learning_rate": 1.5705293051844194e-05, "loss": 0.5339, "step": 11979 }, { "epoch": 0.32760883832859333, "grad_norm": 1.4130688905715942, "learning_rate": 1.5704565620795634e-05, "loss": 0.497, "step": 11980 }, { "epoch": 0.3276361846423102, "grad_norm": 1.642348051071167, "learning_rate": 1.5703838144996365e-05, "loss": 0.5663, "step": 11981 }, { "epoch": 0.3276635309560271, "grad_norm": 1.2730907201766968, "learning_rate": 1.570311062445209e-05, "loss": 0.5235, "step": 11982 }, { "epoch": 0.32769087726974405, "grad_norm": 1.282513976097107, "learning_rate": 1.570238305916852e-05, "loss": 0.5482, "step": 11983 }, { "epoch": 0.327718223583461, "grad_norm": 1.1646851301193237, "learning_rate": 1.5701655449151353e-05, "loss": 0.55, "step": 11984 }, { "epoch": 0.32774556989717785, "grad_norm": 1.6747862100601196, "learning_rate": 1.5700927794406307e-05, "loss": 0.9059, "step": 11985 }, { "epoch": 0.32777291621089477, "grad_norm": 1.6425883769989014, "learning_rate": 1.5700200094939087e-05, "loss": 0.5508, "step": 11986 }, { "epoch": 0.3278002625246117, "grad_norm": 1.6404818296432495, "learning_rate": 1.56994723507554e-05, "loss": 0.4866, "step": 11987 }, { "epoch": 0.32782760883832857, "grad_norm": 1.2047427892684937, "learning_rate": 1.5698744561860962e-05, "loss": 0.4717, "step": 11988 }, { "epoch": 0.3278549551520455, "grad_norm": 1.4355051517486572, "learning_rate": 1.569801672826147e-05, "loss": 0.5615, "step": 11989 }, { "epoch": 0.3278823014657624, "grad_norm": 1.9708802700042725, "learning_rate": 1.5697288849962645e-05, "loss": 0.5241, "step": 11990 }, { "epoch": 0.32790964777947934, "grad_norm": 1.1360160112380981, "learning_rate": 1.569656092697019e-05, "loss": 0.5225, "step": 11991 }, { "epoch": 0.3279369940931962, "grad_norm": 1.161969542503357, "learning_rate": 1.5695832959289818e-05, "loss": 0.5314, "step": 11992 }, { "epoch": 0.32796434040691314, "grad_norm": 1.5112559795379639, "learning_rate": 1.5695104946927235e-05, "loss": 0.4577, "step": 11993 }, { "epoch": 0.32799168672063006, "grad_norm": 1.1559408903121948, "learning_rate": 1.569437688988816e-05, "loss": 0.5423, "step": 11994 }, { "epoch": 0.328019033034347, "grad_norm": 1.2304922342300415, "learning_rate": 1.5693648788178302e-05, "loss": 0.5442, "step": 11995 }, { "epoch": 0.32804637934806385, "grad_norm": 1.1291230916976929, "learning_rate": 1.569292064180337e-05, "loss": 0.51, "step": 11996 }, { "epoch": 0.3280737256617808, "grad_norm": 1.5845820903778076, "learning_rate": 1.569219245076908e-05, "loss": 0.5047, "step": 11997 }, { "epoch": 0.3281010719754977, "grad_norm": 1.578485369682312, "learning_rate": 1.5691464215081142e-05, "loss": 0.5485, "step": 11998 }, { "epoch": 0.32812841828921463, "grad_norm": 1.209585428237915, "learning_rate": 1.569073593474527e-05, "loss": 0.509, "step": 11999 }, { "epoch": 0.3281557646029315, "grad_norm": 1.6131354570388794, "learning_rate": 1.5690007609767174e-05, "loss": 0.6012, "step": 12000 }, { "epoch": 0.3281831109166484, "grad_norm": 1.75574791431427, "learning_rate": 1.568927924015257e-05, "loss": 0.5163, "step": 12001 }, { "epoch": 0.32821045723036535, "grad_norm": 1.4737346172332764, "learning_rate": 1.5688550825907167e-05, "loss": 0.5293, "step": 12002 }, { "epoch": 0.3282378035440823, "grad_norm": 1.2866235971450806, "learning_rate": 1.568782236703669e-05, "loss": 0.5841, "step": 12003 }, { "epoch": 0.32826514985779914, "grad_norm": 1.1993190050125122, "learning_rate": 1.5687093863546846e-05, "loss": 0.5633, "step": 12004 }, { "epoch": 0.32829249617151607, "grad_norm": 1.328145146369934, "learning_rate": 1.568636531544335e-05, "loss": 0.528, "step": 12005 }, { "epoch": 0.328319842485233, "grad_norm": 1.3804599046707153, "learning_rate": 1.5685636722731915e-05, "loss": 0.5252, "step": 12006 }, { "epoch": 0.3283471887989499, "grad_norm": 1.0472204685211182, "learning_rate": 1.5684908085418263e-05, "loss": 0.5599, "step": 12007 }, { "epoch": 0.3283745351126668, "grad_norm": 1.2535653114318848, "learning_rate": 1.568417940350811e-05, "loss": 0.5089, "step": 12008 }, { "epoch": 0.3284018814263837, "grad_norm": 1.4558900594711304, "learning_rate": 1.5683450677007166e-05, "loss": 0.5293, "step": 12009 }, { "epoch": 0.32842922774010064, "grad_norm": 1.2170438766479492, "learning_rate": 1.568272190592115e-05, "loss": 0.5347, "step": 12010 }, { "epoch": 0.32845657405381756, "grad_norm": 1.2716295719146729, "learning_rate": 1.5681993090255783e-05, "loss": 0.5311, "step": 12011 }, { "epoch": 0.32848392036753443, "grad_norm": 3.1013004779815674, "learning_rate": 1.5681264230016777e-05, "loss": 0.5572, "step": 12012 }, { "epoch": 0.32851126668125136, "grad_norm": 1.4030638933181763, "learning_rate": 1.5680535325209853e-05, "loss": 0.5326, "step": 12013 }, { "epoch": 0.3285386129949683, "grad_norm": 1.335461139678955, "learning_rate": 1.5679806375840728e-05, "loss": 0.5201, "step": 12014 }, { "epoch": 0.3285659593086852, "grad_norm": 1.1887565851211548, "learning_rate": 1.567907738191512e-05, "loss": 0.5662, "step": 12015 }, { "epoch": 0.3285933056224021, "grad_norm": 1.3849307298660278, "learning_rate": 1.5678348343438748e-05, "loss": 0.4129, "step": 12016 }, { "epoch": 0.328620651936119, "grad_norm": 1.0924315452575684, "learning_rate": 1.567761926041733e-05, "loss": 0.5451, "step": 12017 }, { "epoch": 0.3286479982498359, "grad_norm": 1.176853895187378, "learning_rate": 1.567689013285659e-05, "loss": 0.5765, "step": 12018 }, { "epoch": 0.32867534456355285, "grad_norm": 1.2675771713256836, "learning_rate": 1.567616096076224e-05, "loss": 0.8638, "step": 12019 }, { "epoch": 0.3287026908772697, "grad_norm": 1.6435996294021606, "learning_rate": 1.567543174414001e-05, "loss": 0.5419, "step": 12020 }, { "epoch": 0.32873003719098665, "grad_norm": 1.3633630275726318, "learning_rate": 1.567470248299561e-05, "loss": 0.5342, "step": 12021 }, { "epoch": 0.32875738350470357, "grad_norm": 1.568610668182373, "learning_rate": 1.5673973177334773e-05, "loss": 0.8613, "step": 12022 }, { "epoch": 0.3287847298184205, "grad_norm": 1.112684965133667, "learning_rate": 1.5673243827163206e-05, "loss": 0.5438, "step": 12023 }, { "epoch": 0.32881207613213737, "grad_norm": 1.6951552629470825, "learning_rate": 1.5672514432486642e-05, "loss": 0.8434, "step": 12024 }, { "epoch": 0.3288394224458543, "grad_norm": 1.173244833946228, "learning_rate": 1.5671784993310795e-05, "loss": 0.5395, "step": 12025 }, { "epoch": 0.3288667687595712, "grad_norm": 1.713777780532837, "learning_rate": 1.5671055509641395e-05, "loss": 0.4895, "step": 12026 }, { "epoch": 0.32889411507328814, "grad_norm": 1.2131562232971191, "learning_rate": 1.5670325981484157e-05, "loss": 0.5355, "step": 12027 }, { "epoch": 0.328921461387005, "grad_norm": 2.2288689613342285, "learning_rate": 1.5669596408844808e-05, "loss": 0.8484, "step": 12028 }, { "epoch": 0.32894880770072193, "grad_norm": 1.4306647777557373, "learning_rate": 1.5668866791729074e-05, "loss": 0.5702, "step": 12029 }, { "epoch": 0.32897615401443886, "grad_norm": 1.289685606956482, "learning_rate": 1.566813713014267e-05, "loss": 0.5776, "step": 12030 }, { "epoch": 0.3290035003281558, "grad_norm": 1.227000117301941, "learning_rate": 1.5667407424091332e-05, "loss": 0.4551, "step": 12031 }, { "epoch": 0.32903084664187265, "grad_norm": 1.325895071029663, "learning_rate": 1.5666677673580773e-05, "loss": 0.5842, "step": 12032 }, { "epoch": 0.3290581929555896, "grad_norm": 1.6666947603225708, "learning_rate": 1.5665947878616723e-05, "loss": 0.8201, "step": 12033 }, { "epoch": 0.3290855392693065, "grad_norm": 1.6821097135543823, "learning_rate": 1.566521803920491e-05, "loss": 0.856, "step": 12034 }, { "epoch": 0.32911288558302343, "grad_norm": 1.6905771493911743, "learning_rate": 1.5664488155351053e-05, "loss": 0.5787, "step": 12035 }, { "epoch": 0.3291402318967403, "grad_norm": 1.4116138219833374, "learning_rate": 1.566375822706088e-05, "loss": 0.5517, "step": 12036 }, { "epoch": 0.3291675782104572, "grad_norm": 1.4014077186584473, "learning_rate": 1.5663028254340117e-05, "loss": 0.536, "step": 12037 }, { "epoch": 0.32919492452417415, "grad_norm": 1.4514894485473633, "learning_rate": 1.5662298237194493e-05, "loss": 0.553, "step": 12038 }, { "epoch": 0.3292222708378911, "grad_norm": 1.1408936977386475, "learning_rate": 1.5661568175629732e-05, "loss": 0.5334, "step": 12039 }, { "epoch": 0.32924961715160794, "grad_norm": 1.3732044696807861, "learning_rate": 1.566083806965156e-05, "loss": 0.568, "step": 12040 }, { "epoch": 0.32927696346532487, "grad_norm": 1.4698185920715332, "learning_rate": 1.566010791926571e-05, "loss": 0.8471, "step": 12041 }, { "epoch": 0.3293043097790418, "grad_norm": 1.3199796676635742, "learning_rate": 1.5659377724477903e-05, "loss": 0.5145, "step": 12042 }, { "epoch": 0.3293316560927587, "grad_norm": 1.6552766561508179, "learning_rate": 1.5658647485293874e-05, "loss": 0.5673, "step": 12043 }, { "epoch": 0.3293590024064756, "grad_norm": 1.6350059509277344, "learning_rate": 1.5657917201719346e-05, "loss": 0.5773, "step": 12044 }, { "epoch": 0.3293863487201925, "grad_norm": 1.6549055576324463, "learning_rate": 1.565718687376005e-05, "loss": 0.4177, "step": 12045 }, { "epoch": 0.32941369503390944, "grad_norm": 1.42423677444458, "learning_rate": 1.5656456501421714e-05, "loss": 0.4743, "step": 12046 }, { "epoch": 0.32944104134762636, "grad_norm": 1.2491956949234009, "learning_rate": 1.565572608471007e-05, "loss": 0.5231, "step": 12047 }, { "epoch": 0.32946838766134323, "grad_norm": 1.54234778881073, "learning_rate": 1.5654995623630848e-05, "loss": 0.5753, "step": 12048 }, { "epoch": 0.32949573397506016, "grad_norm": 1.1316889524459839, "learning_rate": 1.565426511818977e-05, "loss": 0.5529, "step": 12049 }, { "epoch": 0.3295230802887771, "grad_norm": 1.5391926765441895, "learning_rate": 1.565353456839258e-05, "loss": 0.4972, "step": 12050 }, { "epoch": 0.329550426602494, "grad_norm": 1.6533029079437256, "learning_rate": 1.5652803974245002e-05, "loss": 0.5584, "step": 12051 }, { "epoch": 0.3295777729162109, "grad_norm": 1.8956855535507202, "learning_rate": 1.5652073335752766e-05, "loss": 0.8908, "step": 12052 }, { "epoch": 0.3296051192299278, "grad_norm": 1.424159288406372, "learning_rate": 1.5651342652921604e-05, "loss": 0.5565, "step": 12053 }, { "epoch": 0.3296324655436447, "grad_norm": 1.3192983865737915, "learning_rate": 1.5650611925757252e-05, "loss": 0.528, "step": 12054 }, { "epoch": 0.32965981185736165, "grad_norm": 1.2241510152816772, "learning_rate": 1.5649881154265437e-05, "loss": 0.5174, "step": 12055 }, { "epoch": 0.3296871581710785, "grad_norm": 1.3930214643478394, "learning_rate": 1.5649150338451895e-05, "loss": 0.5539, "step": 12056 }, { "epoch": 0.32971450448479545, "grad_norm": 1.5585360527038574, "learning_rate": 1.564841947832236e-05, "loss": 0.5115, "step": 12057 }, { "epoch": 0.32974185079851237, "grad_norm": 1.3736233711242676, "learning_rate": 1.5647688573882565e-05, "loss": 0.5133, "step": 12058 }, { "epoch": 0.3297691971122293, "grad_norm": 1.2065036296844482, "learning_rate": 1.564695762513824e-05, "loss": 0.5345, "step": 12059 }, { "epoch": 0.32979654342594616, "grad_norm": 1.607325792312622, "learning_rate": 1.5646226632095122e-05, "loss": 0.4968, "step": 12060 }, { "epoch": 0.3298238897396631, "grad_norm": 2.516031265258789, "learning_rate": 1.5645495594758947e-05, "loss": 0.5953, "step": 12061 }, { "epoch": 0.32985123605338, "grad_norm": 1.2722671031951904, "learning_rate": 1.5644764513135444e-05, "loss": 0.565, "step": 12062 }, { "epoch": 0.32987858236709694, "grad_norm": 1.4911761283874512, "learning_rate": 1.5644033387230356e-05, "loss": 0.5964, "step": 12063 }, { "epoch": 0.3299059286808138, "grad_norm": 2.417872190475464, "learning_rate": 1.564330221704941e-05, "loss": 0.5372, "step": 12064 }, { "epoch": 0.32993327499453073, "grad_norm": 1.5696691274642944, "learning_rate": 1.5642571002598347e-05, "loss": 0.5643, "step": 12065 }, { "epoch": 0.32996062130824766, "grad_norm": 1.2749654054641724, "learning_rate": 1.5641839743882905e-05, "loss": 0.5281, "step": 12066 }, { "epoch": 0.3299879676219646, "grad_norm": 2.157299518585205, "learning_rate": 1.5641108440908818e-05, "loss": 0.5698, "step": 12067 }, { "epoch": 0.33001531393568145, "grad_norm": 1.1872411966323853, "learning_rate": 1.564037709368182e-05, "loss": 0.5197, "step": 12068 }, { "epoch": 0.3300426602493984, "grad_norm": 1.3701380491256714, "learning_rate": 1.5639645702207655e-05, "loss": 0.5262, "step": 12069 }, { "epoch": 0.3300700065631153, "grad_norm": 2.44912052154541, "learning_rate": 1.5638914266492058e-05, "loss": 0.8429, "step": 12070 }, { "epoch": 0.33009735287683223, "grad_norm": 1.9770771265029907, "learning_rate": 1.5638182786540757e-05, "loss": 0.3981, "step": 12071 }, { "epoch": 0.3301246991905491, "grad_norm": 1.2841894626617432, "learning_rate": 1.5637451262359507e-05, "loss": 0.5045, "step": 12072 }, { "epoch": 0.330152045504266, "grad_norm": 1.3932594060897827, "learning_rate": 1.5636719693954033e-05, "loss": 0.5296, "step": 12073 }, { "epoch": 0.33017939181798295, "grad_norm": 1.5143201351165771, "learning_rate": 1.5635988081330086e-05, "loss": 0.4789, "step": 12074 }, { "epoch": 0.3302067381316999, "grad_norm": 5.085677623748779, "learning_rate": 1.563525642449339e-05, "loss": 0.8772, "step": 12075 }, { "epoch": 0.33023408444541674, "grad_norm": 1.6915377378463745, "learning_rate": 1.5634524723449698e-05, "loss": 0.542, "step": 12076 }, { "epoch": 0.33026143075913367, "grad_norm": 1.2035688161849976, "learning_rate": 1.5633792978204745e-05, "loss": 0.5818, "step": 12077 }, { "epoch": 0.3302887770728506, "grad_norm": 2.7274112701416016, "learning_rate": 1.5633061188764275e-05, "loss": 0.5617, "step": 12078 }, { "epoch": 0.3303161233865675, "grad_norm": 3.3241207599639893, "learning_rate": 1.563232935513402e-05, "loss": 0.4449, "step": 12079 }, { "epoch": 0.3303434697002844, "grad_norm": 1.2675219774246216, "learning_rate": 1.5631597477319725e-05, "loss": 0.5317, "step": 12080 }, { "epoch": 0.3303708160140013, "grad_norm": 1.671106219291687, "learning_rate": 1.5630865555327137e-05, "loss": 0.5943, "step": 12081 }, { "epoch": 0.33039816232771824, "grad_norm": 2.07303524017334, "learning_rate": 1.5630133589161994e-05, "loss": 0.3726, "step": 12082 }, { "epoch": 0.33042550864143516, "grad_norm": 1.6238913536071777, "learning_rate": 1.5629401578830033e-05, "loss": 0.5404, "step": 12083 }, { "epoch": 0.33045285495515203, "grad_norm": 1.4176952838897705, "learning_rate": 1.5628669524337e-05, "loss": 0.5384, "step": 12084 }, { "epoch": 0.33048020126886896, "grad_norm": 1.4663335084915161, "learning_rate": 1.5627937425688643e-05, "loss": 0.5532, "step": 12085 }, { "epoch": 0.3305075475825859, "grad_norm": 1.398943305015564, "learning_rate": 1.56272052828907e-05, "loss": 0.5292, "step": 12086 }, { "epoch": 0.3305348938963028, "grad_norm": 1.3915647268295288, "learning_rate": 1.5626473095948917e-05, "loss": 0.5154, "step": 12087 }, { "epoch": 0.3305622402100197, "grad_norm": 1.3278744220733643, "learning_rate": 1.5625740864869033e-05, "loss": 0.5689, "step": 12088 }, { "epoch": 0.3305895865237366, "grad_norm": 1.2982858419418335, "learning_rate": 1.5625008589656794e-05, "loss": 0.5052, "step": 12089 }, { "epoch": 0.3306169328374535, "grad_norm": 1.9087798595428467, "learning_rate": 1.562427627031795e-05, "loss": 0.5274, "step": 12090 }, { "epoch": 0.3306442791511704, "grad_norm": 1.41079580783844, "learning_rate": 1.562354390685824e-05, "loss": 0.5172, "step": 12091 }, { "epoch": 0.3306716254648873, "grad_norm": 1.5999970436096191, "learning_rate": 1.5622811499283407e-05, "loss": 0.386, "step": 12092 }, { "epoch": 0.33069897177860424, "grad_norm": 1.9405584335327148, "learning_rate": 1.5622079047599203e-05, "loss": 0.5266, "step": 12093 }, { "epoch": 0.33072631809232117, "grad_norm": 1.5720549821853638, "learning_rate": 1.5621346551811375e-05, "loss": 0.5495, "step": 12094 }, { "epoch": 0.33075366440603804, "grad_norm": 1.373637080192566, "learning_rate": 1.5620614011925657e-05, "loss": 0.5435, "step": 12095 }, { "epoch": 0.33078101071975496, "grad_norm": 1.2838884592056274, "learning_rate": 1.561988142794781e-05, "loss": 0.5737, "step": 12096 }, { "epoch": 0.3308083570334719, "grad_norm": 1.33375883102417, "learning_rate": 1.5619148799883572e-05, "loss": 0.4769, "step": 12097 }, { "epoch": 0.3308357033471888, "grad_norm": 1.3127124309539795, "learning_rate": 1.5618416127738694e-05, "loss": 0.5296, "step": 12098 }, { "epoch": 0.3308630496609057, "grad_norm": 1.4243063926696777, "learning_rate": 1.561768341151892e-05, "loss": 0.4444, "step": 12099 }, { "epoch": 0.3308903959746226, "grad_norm": 1.2431614398956299, "learning_rate": 1.5616950651230002e-05, "loss": 0.5349, "step": 12100 }, { "epoch": 0.33091774228833953, "grad_norm": 1.7314163446426392, "learning_rate": 1.561621784687769e-05, "loss": 0.8734, "step": 12101 }, { "epoch": 0.33094508860205646, "grad_norm": 1.5354878902435303, "learning_rate": 1.5615484998467726e-05, "loss": 0.5317, "step": 12102 }, { "epoch": 0.33097243491577333, "grad_norm": 3.5676450729370117, "learning_rate": 1.5614752106005865e-05, "loss": 0.538, "step": 12103 }, { "epoch": 0.33099978122949025, "grad_norm": 1.825523853302002, "learning_rate": 1.561401916949785e-05, "loss": 0.551, "step": 12104 }, { "epoch": 0.3310271275432072, "grad_norm": 1.5040537118911743, "learning_rate": 1.5613286188949442e-05, "loss": 0.5694, "step": 12105 }, { "epoch": 0.3310544738569241, "grad_norm": 1.5457041263580322, "learning_rate": 1.561255316436638e-05, "loss": 0.5495, "step": 12106 }, { "epoch": 0.331081820170641, "grad_norm": 1.2341984510421753, "learning_rate": 1.5611820095754415e-05, "loss": 0.542, "step": 12107 }, { "epoch": 0.3311091664843579, "grad_norm": 1.1576547622680664, "learning_rate": 1.5611086983119302e-05, "loss": 0.534, "step": 12108 }, { "epoch": 0.3311365127980748, "grad_norm": 1.5485594272613525, "learning_rate": 1.5610353826466794e-05, "loss": 0.537, "step": 12109 }, { "epoch": 0.33116385911179175, "grad_norm": 1.7038214206695557, "learning_rate": 1.5609620625802634e-05, "loss": 0.5169, "step": 12110 }, { "epoch": 0.3311912054255086, "grad_norm": 1.2037220001220703, "learning_rate": 1.560888738113258e-05, "loss": 0.561, "step": 12111 }, { "epoch": 0.33121855173922554, "grad_norm": 1.5702574253082275, "learning_rate": 1.5608154092462386e-05, "loss": 0.5472, "step": 12112 }, { "epoch": 0.33124589805294247, "grad_norm": 2.425171375274658, "learning_rate": 1.5607420759797797e-05, "loss": 0.389, "step": 12113 }, { "epoch": 0.3312732443666594, "grad_norm": 2.3312811851501465, "learning_rate": 1.5606687383144575e-05, "loss": 0.5374, "step": 12114 }, { "epoch": 0.33130059068037626, "grad_norm": 1.1696091890335083, "learning_rate": 1.5605953962508465e-05, "loss": 0.5635, "step": 12115 }, { "epoch": 0.3313279369940932, "grad_norm": 1.355048418045044, "learning_rate": 1.5605220497895225e-05, "loss": 0.5357, "step": 12116 }, { "epoch": 0.3313552833078101, "grad_norm": 1.0620510578155518, "learning_rate": 1.5604486989310607e-05, "loss": 0.5289, "step": 12117 }, { "epoch": 0.33138262962152704, "grad_norm": 1.6431288719177246, "learning_rate": 1.560375343676037e-05, "loss": 0.8721, "step": 12118 }, { "epoch": 0.3314099759352439, "grad_norm": 1.1794774532318115, "learning_rate": 1.5603019840250258e-05, "loss": 0.5291, "step": 12119 }, { "epoch": 0.33143732224896083, "grad_norm": 1.2298141717910767, "learning_rate": 1.5602286199786036e-05, "loss": 0.512, "step": 12120 }, { "epoch": 0.33146466856267776, "grad_norm": 1.3143857717514038, "learning_rate": 1.560155251537345e-05, "loss": 0.5414, "step": 12121 }, { "epoch": 0.3314920148763947, "grad_norm": 1.3297324180603027, "learning_rate": 1.5600818787018268e-05, "loss": 0.5045, "step": 12122 }, { "epoch": 0.33151936119011155, "grad_norm": 1.9296956062316895, "learning_rate": 1.560008501472624e-05, "loss": 0.5195, "step": 12123 }, { "epoch": 0.3315467075038285, "grad_norm": 1.6054319143295288, "learning_rate": 1.5599351198503114e-05, "loss": 0.5399, "step": 12124 }, { "epoch": 0.3315740538175454, "grad_norm": 1.5499768257141113, "learning_rate": 1.5598617338354656e-05, "loss": 0.551, "step": 12125 }, { "epoch": 0.3316014001312623, "grad_norm": 1.2825088500976562, "learning_rate": 1.559788343428662e-05, "loss": 0.5388, "step": 12126 }, { "epoch": 0.3316287464449792, "grad_norm": 1.274840235710144, "learning_rate": 1.5597149486304767e-05, "loss": 0.5537, "step": 12127 }, { "epoch": 0.3316560927586961, "grad_norm": 1.1889629364013672, "learning_rate": 1.559641549441485e-05, "loss": 0.5449, "step": 12128 }, { "epoch": 0.33168343907241304, "grad_norm": 1.5242054462432861, "learning_rate": 1.5595681458622627e-05, "loss": 0.4851, "step": 12129 }, { "epoch": 0.33171078538612997, "grad_norm": 1.207135796546936, "learning_rate": 1.559494737893386e-05, "loss": 0.4954, "step": 12130 }, { "epoch": 0.33173813169984684, "grad_norm": 1.3165076971054077, "learning_rate": 1.5594213255354304e-05, "loss": 0.5363, "step": 12131 }, { "epoch": 0.33176547801356376, "grad_norm": 1.4778008460998535, "learning_rate": 1.559347908788972e-05, "loss": 0.5697, "step": 12132 }, { "epoch": 0.3317928243272807, "grad_norm": 1.5139292478561401, "learning_rate": 1.5592744876545863e-05, "loss": 0.5437, "step": 12133 }, { "epoch": 0.3318201706409976, "grad_norm": 1.3047760725021362, "learning_rate": 1.55920106213285e-05, "loss": 0.5409, "step": 12134 }, { "epoch": 0.3318475169547145, "grad_norm": 1.3822492361068726, "learning_rate": 1.559127632224339e-05, "loss": 0.4658, "step": 12135 }, { "epoch": 0.3318748632684314, "grad_norm": 1.7386201620101929, "learning_rate": 1.5590541979296286e-05, "loss": 0.5487, "step": 12136 }, { "epoch": 0.33190220958214833, "grad_norm": 1.3515710830688477, "learning_rate": 1.5589807592492957e-05, "loss": 0.5508, "step": 12137 }, { "epoch": 0.33192955589586526, "grad_norm": 1.308322787284851, "learning_rate": 1.5589073161839155e-05, "loss": 0.4656, "step": 12138 }, { "epoch": 0.3319569022095821, "grad_norm": 1.0896567106246948, "learning_rate": 1.5588338687340652e-05, "loss": 0.5601, "step": 12139 }, { "epoch": 0.33198424852329905, "grad_norm": 1.3013567924499512, "learning_rate": 1.5587604169003206e-05, "loss": 0.5397, "step": 12140 }, { "epoch": 0.332011594837016, "grad_norm": 1.248868465423584, "learning_rate": 1.5586869606832574e-05, "loss": 0.5543, "step": 12141 }, { "epoch": 0.3320389411507329, "grad_norm": 2.1191482543945312, "learning_rate": 1.5586135000834523e-05, "loss": 0.4808, "step": 12142 }, { "epoch": 0.33206628746444977, "grad_norm": 1.4933280944824219, "learning_rate": 1.5585400351014815e-05, "loss": 0.5222, "step": 12143 }, { "epoch": 0.3320936337781667, "grad_norm": 1.5508784055709839, "learning_rate": 1.5584665657379212e-05, "loss": 0.5182, "step": 12144 }, { "epoch": 0.3321209800918836, "grad_norm": 1.2040945291519165, "learning_rate": 1.5583930919933482e-05, "loss": 0.527, "step": 12145 }, { "epoch": 0.33214832640560055, "grad_norm": 1.2227929830551147, "learning_rate": 1.5583196138683382e-05, "loss": 0.5541, "step": 12146 }, { "epoch": 0.3321756727193174, "grad_norm": 1.9220397472381592, "learning_rate": 1.5582461313634683e-05, "loss": 0.5634, "step": 12147 }, { "epoch": 0.33220301903303434, "grad_norm": 1.1644177436828613, "learning_rate": 1.5581726444793143e-05, "loss": 0.5398, "step": 12148 }, { "epoch": 0.33223036534675127, "grad_norm": 1.4857416152954102, "learning_rate": 1.558099153216453e-05, "loss": 0.5486, "step": 12149 }, { "epoch": 0.3322577116604682, "grad_norm": 1.1790742874145508, "learning_rate": 1.558025657575461e-05, "loss": 0.5326, "step": 12150 }, { "epoch": 0.33228505797418506, "grad_norm": 1.1436039209365845, "learning_rate": 1.5579521575569147e-05, "loss": 0.5384, "step": 12151 }, { "epoch": 0.332312404287902, "grad_norm": 1.1776596307754517, "learning_rate": 1.557878653161391e-05, "loss": 0.5701, "step": 12152 }, { "epoch": 0.3323397506016189, "grad_norm": 1.183323860168457, "learning_rate": 1.557805144389466e-05, "loss": 0.5498, "step": 12153 }, { "epoch": 0.33236709691533584, "grad_norm": 1.234316110610962, "learning_rate": 1.5577316312417167e-05, "loss": 0.549, "step": 12154 }, { "epoch": 0.3323944432290527, "grad_norm": 1.280428409576416, "learning_rate": 1.5576581137187198e-05, "loss": 0.5059, "step": 12155 }, { "epoch": 0.33242178954276963, "grad_norm": 1.3033688068389893, "learning_rate": 1.5575845918210516e-05, "loss": 0.5817, "step": 12156 }, { "epoch": 0.33244913585648656, "grad_norm": 1.5040342807769775, "learning_rate": 1.5575110655492896e-05, "loss": 0.5957, "step": 12157 }, { "epoch": 0.3324764821702035, "grad_norm": 1.5350964069366455, "learning_rate": 1.55743753490401e-05, "loss": 0.5553, "step": 12158 }, { "epoch": 0.33250382848392035, "grad_norm": 1.3263520002365112, "learning_rate": 1.55736399988579e-05, "loss": 0.4751, "step": 12159 }, { "epoch": 0.3325311747976373, "grad_norm": 1.2712534666061401, "learning_rate": 1.5572904604952058e-05, "loss": 0.5252, "step": 12160 }, { "epoch": 0.3325585211113542, "grad_norm": 1.104794979095459, "learning_rate": 1.5572169167328352e-05, "loss": 0.5482, "step": 12161 }, { "epoch": 0.3325858674250711, "grad_norm": 1.0881508588790894, "learning_rate": 1.5571433685992548e-05, "loss": 0.504, "step": 12162 }, { "epoch": 0.332613213738788, "grad_norm": 1.5972862243652344, "learning_rate": 1.557069816095041e-05, "loss": 0.5281, "step": 12163 }, { "epoch": 0.3326405600525049, "grad_norm": 1.4129689931869507, "learning_rate": 1.5569962592207714e-05, "loss": 0.5605, "step": 12164 }, { "epoch": 0.33266790636622184, "grad_norm": 1.1607050895690918, "learning_rate": 1.556922697977023e-05, "loss": 0.4877, "step": 12165 }, { "epoch": 0.33269525267993877, "grad_norm": 1.096691370010376, "learning_rate": 1.556849132364373e-05, "loss": 0.5239, "step": 12166 }, { "epoch": 0.33272259899365564, "grad_norm": 2.583587646484375, "learning_rate": 1.556775562383398e-05, "loss": 0.8783, "step": 12167 }, { "epoch": 0.33274994530737256, "grad_norm": 1.2886767387390137, "learning_rate": 1.5567019880346754e-05, "loss": 0.5361, "step": 12168 }, { "epoch": 0.3327772916210895, "grad_norm": 1.3648539781570435, "learning_rate": 1.5566284093187824e-05, "loss": 0.4387, "step": 12169 }, { "epoch": 0.3328046379348064, "grad_norm": 1.5486109256744385, "learning_rate": 1.5565548262362963e-05, "loss": 0.8558, "step": 12170 }, { "epoch": 0.3328319842485233, "grad_norm": 1.5839303731918335, "learning_rate": 1.5564812387877942e-05, "loss": 0.5972, "step": 12171 }, { "epoch": 0.3328593305622402, "grad_norm": 1.1123298406600952, "learning_rate": 1.5564076469738533e-05, "loss": 0.5685, "step": 12172 }, { "epoch": 0.33288667687595713, "grad_norm": 1.5258395671844482, "learning_rate": 1.556334050795051e-05, "loss": 0.4917, "step": 12173 }, { "epoch": 0.33291402318967406, "grad_norm": 1.0675976276397705, "learning_rate": 1.5562604502519643e-05, "loss": 0.5116, "step": 12174 }, { "epoch": 0.3329413695033909, "grad_norm": 1.125242829322815, "learning_rate": 1.5561868453451716e-05, "loss": 0.5515, "step": 12175 }, { "epoch": 0.33296871581710785, "grad_norm": 1.020198941230774, "learning_rate": 1.556113236075249e-05, "loss": 0.5345, "step": 12176 }, { "epoch": 0.3329960621308248, "grad_norm": 1.2510008811950684, "learning_rate": 1.556039622442775e-05, "loss": 0.523, "step": 12177 }, { "epoch": 0.3330234084445417, "grad_norm": 1.2386444807052612, "learning_rate": 1.5559660044483263e-05, "loss": 0.5111, "step": 12178 }, { "epoch": 0.33305075475825857, "grad_norm": 1.072197437286377, "learning_rate": 1.555892382092481e-05, "loss": 0.5396, "step": 12179 }, { "epoch": 0.3330781010719755, "grad_norm": 1.5163164138793945, "learning_rate": 1.5558187553758168e-05, "loss": 0.4962, "step": 12180 }, { "epoch": 0.3331054473856924, "grad_norm": 1.0474189519882202, "learning_rate": 1.5557451242989105e-05, "loss": 0.5548, "step": 12181 }, { "epoch": 0.33313279369940935, "grad_norm": 1.5012022256851196, "learning_rate": 1.5556714888623398e-05, "loss": 0.5809, "step": 12182 }, { "epoch": 0.3331601400131262, "grad_norm": 1.527517557144165, "learning_rate": 1.5555978490666828e-05, "loss": 0.531, "step": 12183 }, { "epoch": 0.33318748632684314, "grad_norm": 2.7229020595550537, "learning_rate": 1.555524204912517e-05, "loss": 0.8676, "step": 12184 }, { "epoch": 0.33321483264056007, "grad_norm": 1.113529920578003, "learning_rate": 1.5554505564004205e-05, "loss": 0.5864, "step": 12185 }, { "epoch": 0.333242178954277, "grad_norm": 1.1546108722686768, "learning_rate": 1.5553769035309705e-05, "loss": 0.4894, "step": 12186 }, { "epoch": 0.33326952526799386, "grad_norm": 1.1039471626281738, "learning_rate": 1.555303246304745e-05, "loss": 0.5647, "step": 12187 }, { "epoch": 0.3332968715817108, "grad_norm": 1.1544464826583862, "learning_rate": 1.5552295847223214e-05, "loss": 0.5185, "step": 12188 }, { "epoch": 0.3333242178954277, "grad_norm": 1.4904736280441284, "learning_rate": 1.5551559187842787e-05, "loss": 0.8697, "step": 12189 }, { "epoch": 0.3333515642091446, "grad_norm": 1.3763424158096313, "learning_rate": 1.5550822484911936e-05, "loss": 0.5219, "step": 12190 }, { "epoch": 0.3333789105228615, "grad_norm": 1.2670485973358154, "learning_rate": 1.5550085738436443e-05, "loss": 0.5348, "step": 12191 }, { "epoch": 0.33340625683657843, "grad_norm": 1.29674232006073, "learning_rate": 1.554934894842209e-05, "loss": 0.5444, "step": 12192 }, { "epoch": 0.33343360315029535, "grad_norm": 1.1450088024139404, "learning_rate": 1.554861211487466e-05, "loss": 0.5594, "step": 12193 }, { "epoch": 0.3334609494640122, "grad_norm": 1.1649961471557617, "learning_rate": 1.5547875237799928e-05, "loss": 0.4642, "step": 12194 }, { "epoch": 0.33348829577772915, "grad_norm": 1.7741351127624512, "learning_rate": 1.5547138317203677e-05, "loss": 0.5204, "step": 12195 }, { "epoch": 0.3335156420914461, "grad_norm": 2.012661933898926, "learning_rate": 1.5546401353091685e-05, "loss": 0.4787, "step": 12196 }, { "epoch": 0.333542988405163, "grad_norm": 1.3374724388122559, "learning_rate": 1.5545664345469737e-05, "loss": 0.5031, "step": 12197 }, { "epoch": 0.33357033471887987, "grad_norm": 1.1920280456542969, "learning_rate": 1.5544927294343612e-05, "loss": 0.5394, "step": 12198 }, { "epoch": 0.3335976810325968, "grad_norm": 1.261000633239746, "learning_rate": 1.5544190199719095e-05, "loss": 0.5348, "step": 12199 }, { "epoch": 0.3336250273463137, "grad_norm": 1.1136103868484497, "learning_rate": 1.554345306160196e-05, "loss": 0.5253, "step": 12200 }, { "epoch": 0.33365237366003064, "grad_norm": 1.2311887741088867, "learning_rate": 1.5542715879998e-05, "loss": 0.5164, "step": 12201 }, { "epoch": 0.3336797199737475, "grad_norm": 1.2415047883987427, "learning_rate": 1.5541978654912994e-05, "loss": 0.5588, "step": 12202 }, { "epoch": 0.33370706628746444, "grad_norm": 1.2281851768493652, "learning_rate": 1.5541241386352725e-05, "loss": 0.5302, "step": 12203 }, { "epoch": 0.33373441260118136, "grad_norm": 1.0954829454421997, "learning_rate": 1.5540504074322976e-05, "loss": 0.5041, "step": 12204 }, { "epoch": 0.3337617589148983, "grad_norm": 1.4219579696655273, "learning_rate": 1.5539766718829534e-05, "loss": 0.4132, "step": 12205 }, { "epoch": 0.33378910522861516, "grad_norm": 1.4334591627120972, "learning_rate": 1.5539029319878175e-05, "loss": 0.5467, "step": 12206 }, { "epoch": 0.3338164515423321, "grad_norm": 1.3004564046859741, "learning_rate": 1.5538291877474697e-05, "loss": 0.5211, "step": 12207 }, { "epoch": 0.333843797856049, "grad_norm": 1.4936461448669434, "learning_rate": 1.5537554391624873e-05, "loss": 0.5354, "step": 12208 }, { "epoch": 0.33387114416976593, "grad_norm": 4.070037364959717, "learning_rate": 1.5536816862334493e-05, "loss": 0.8578, "step": 12209 }, { "epoch": 0.3338984904834828, "grad_norm": 1.128287434577942, "learning_rate": 1.5536079289609348e-05, "loss": 0.5379, "step": 12210 }, { "epoch": 0.3339258367971997, "grad_norm": 1.2986198663711548, "learning_rate": 1.5535341673455214e-05, "loss": 0.5224, "step": 12211 }, { "epoch": 0.33395318311091665, "grad_norm": 1.186984658241272, "learning_rate": 1.553460401387789e-05, "loss": 0.5192, "step": 12212 }, { "epoch": 0.3339805294246336, "grad_norm": 1.5621000528335571, "learning_rate": 1.5533866310883146e-05, "loss": 0.872, "step": 12213 }, { "epoch": 0.33400787573835045, "grad_norm": 1.293950080871582, "learning_rate": 1.553312856447678e-05, "loss": 0.5515, "step": 12214 }, { "epoch": 0.33403522205206737, "grad_norm": 1.732582688331604, "learning_rate": 1.553239077466458e-05, "loss": 0.5966, "step": 12215 }, { "epoch": 0.3340625683657843, "grad_norm": 1.7054702043533325, "learning_rate": 1.553165294145233e-05, "loss": 0.8418, "step": 12216 }, { "epoch": 0.3340899146795012, "grad_norm": 1.284094214439392, "learning_rate": 1.5530915064845822e-05, "loss": 0.5669, "step": 12217 }, { "epoch": 0.3341172609932181, "grad_norm": 1.1876708269119263, "learning_rate": 1.5530177144850836e-05, "loss": 0.5644, "step": 12218 }, { "epoch": 0.334144607306935, "grad_norm": 1.6620396375656128, "learning_rate": 1.552943918147317e-05, "loss": 0.4207, "step": 12219 }, { "epoch": 0.33417195362065194, "grad_norm": 1.0842567682266235, "learning_rate": 1.5528701174718613e-05, "loss": 0.5354, "step": 12220 }, { "epoch": 0.33419929993436887, "grad_norm": 1.4542659521102905, "learning_rate": 1.552796312459295e-05, "loss": 0.4958, "step": 12221 }, { "epoch": 0.33422664624808573, "grad_norm": 1.178398609161377, "learning_rate": 1.5527225031101967e-05, "loss": 0.5231, "step": 12222 }, { "epoch": 0.33425399256180266, "grad_norm": 1.4735885858535767, "learning_rate": 1.552648689425146e-05, "loss": 0.8835, "step": 12223 }, { "epoch": 0.3342813388755196, "grad_norm": 1.2446060180664062, "learning_rate": 1.5525748714047224e-05, "loss": 0.5013, "step": 12224 }, { "epoch": 0.3343086851892365, "grad_norm": 1.1922897100448608, "learning_rate": 1.552501049049504e-05, "loss": 0.5341, "step": 12225 }, { "epoch": 0.3343360315029534, "grad_norm": 1.328777551651001, "learning_rate": 1.5524272223600703e-05, "loss": 0.5481, "step": 12226 }, { "epoch": 0.3343633778166703, "grad_norm": 1.340844988822937, "learning_rate": 1.5523533913370008e-05, "loss": 0.4356, "step": 12227 }, { "epoch": 0.33439072413038723, "grad_norm": 1.2109341621398926, "learning_rate": 1.5522795559808747e-05, "loss": 0.8772, "step": 12228 }, { "epoch": 0.33441807044410415, "grad_norm": 1.773282766342163, "learning_rate": 1.5522057162922702e-05, "loss": 0.4315, "step": 12229 }, { "epoch": 0.334445416757821, "grad_norm": 1.155156135559082, "learning_rate": 1.5521318722717675e-05, "loss": 0.4641, "step": 12230 }, { "epoch": 0.33447276307153795, "grad_norm": 1.2569512128829956, "learning_rate": 1.552058023919946e-05, "loss": 0.5405, "step": 12231 }, { "epoch": 0.3345001093852549, "grad_norm": 1.2927939891815186, "learning_rate": 1.5519841712373844e-05, "loss": 0.5463, "step": 12232 }, { "epoch": 0.3345274556989718, "grad_norm": 1.352707028388977, "learning_rate": 1.551910314224662e-05, "loss": 0.5134, "step": 12233 }, { "epoch": 0.33455480201268867, "grad_norm": 1.2252815961837769, "learning_rate": 1.551836452882359e-05, "loss": 0.5499, "step": 12234 }, { "epoch": 0.3345821483264056, "grad_norm": 1.1682345867156982, "learning_rate": 1.551762587211054e-05, "loss": 0.5533, "step": 12235 }, { "epoch": 0.3346094946401225, "grad_norm": 1.2617765665054321, "learning_rate": 1.5516887172113268e-05, "loss": 0.5447, "step": 12236 }, { "epoch": 0.33463684095383944, "grad_norm": 1.2135486602783203, "learning_rate": 1.551614842883757e-05, "loss": 0.5408, "step": 12237 }, { "epoch": 0.3346641872675563, "grad_norm": 1.3326166868209839, "learning_rate": 1.551540964228924e-05, "loss": 0.5632, "step": 12238 }, { "epoch": 0.33469153358127324, "grad_norm": 1.1556298732757568, "learning_rate": 1.5514670812474074e-05, "loss": 0.5207, "step": 12239 }, { "epoch": 0.33471887989499016, "grad_norm": 1.280572533607483, "learning_rate": 1.5513931939397867e-05, "loss": 0.4561, "step": 12240 }, { "epoch": 0.3347462262087071, "grad_norm": 1.1637122631072998, "learning_rate": 1.5513193023066415e-05, "loss": 0.5231, "step": 12241 }, { "epoch": 0.33477357252242396, "grad_norm": 1.3022031784057617, "learning_rate": 1.5512454063485514e-05, "loss": 0.5518, "step": 12242 }, { "epoch": 0.3348009188361409, "grad_norm": 1.1994032859802246, "learning_rate": 1.5511715060660963e-05, "loss": 0.5577, "step": 12243 }, { "epoch": 0.3348282651498578, "grad_norm": 1.1581499576568604, "learning_rate": 1.551097601459856e-05, "loss": 0.5215, "step": 12244 }, { "epoch": 0.33485561146357473, "grad_norm": 2.4609198570251465, "learning_rate": 1.5510236925304102e-05, "loss": 0.5395, "step": 12245 }, { "epoch": 0.3348829577772916, "grad_norm": 1.2451837062835693, "learning_rate": 1.5509497792783387e-05, "loss": 0.5217, "step": 12246 }, { "epoch": 0.3349103040910085, "grad_norm": 1.5371156930923462, "learning_rate": 1.5508758617042206e-05, "loss": 0.8574, "step": 12247 }, { "epoch": 0.33493765040472545, "grad_norm": 1.2079894542694092, "learning_rate": 1.5508019398086366e-05, "loss": 0.5226, "step": 12248 }, { "epoch": 0.3349649967184424, "grad_norm": 2.0910043716430664, "learning_rate": 1.5507280135921668e-05, "loss": 0.4002, "step": 12249 }, { "epoch": 0.33499234303215925, "grad_norm": 1.3580667972564697, "learning_rate": 1.5506540830553905e-05, "loss": 0.4612, "step": 12250 }, { "epoch": 0.33501968934587617, "grad_norm": 1.7124552726745605, "learning_rate": 1.550580148198888e-05, "loss": 0.5926, "step": 12251 }, { "epoch": 0.3350470356595931, "grad_norm": 1.1952507495880127, "learning_rate": 1.5505062090232388e-05, "loss": 0.5044, "step": 12252 }, { "epoch": 0.33507438197331, "grad_norm": 1.2713855504989624, "learning_rate": 1.5504322655290235e-05, "loss": 0.5569, "step": 12253 }, { "epoch": 0.3351017282870269, "grad_norm": 1.2524406909942627, "learning_rate": 1.5503583177168222e-05, "loss": 0.5357, "step": 12254 }, { "epoch": 0.3351290746007438, "grad_norm": 1.2795164585113525, "learning_rate": 1.5502843655872147e-05, "loss": 0.546, "step": 12255 }, { "epoch": 0.33515642091446074, "grad_norm": 1.2493747472763062, "learning_rate": 1.550210409140781e-05, "loss": 0.8726, "step": 12256 }, { "epoch": 0.33518376722817766, "grad_norm": 1.0442135334014893, "learning_rate": 1.5501364483781016e-05, "loss": 0.5015, "step": 12257 }, { "epoch": 0.33521111354189453, "grad_norm": 1.2456495761871338, "learning_rate": 1.5500624832997565e-05, "loss": 0.5212, "step": 12258 }, { "epoch": 0.33523845985561146, "grad_norm": 1.7521717548370361, "learning_rate": 1.5499885139063263e-05, "loss": 0.4059, "step": 12259 }, { "epoch": 0.3352658061693284, "grad_norm": 1.4670743942260742, "learning_rate": 1.549914540198391e-05, "loss": 0.5371, "step": 12260 }, { "epoch": 0.3352931524830453, "grad_norm": 1.2013380527496338, "learning_rate": 1.5498405621765305e-05, "loss": 0.5172, "step": 12261 }, { "epoch": 0.3353204987967622, "grad_norm": 1.174164891242981, "learning_rate": 1.5497665798413256e-05, "loss": 0.832, "step": 12262 }, { "epoch": 0.3353478451104791, "grad_norm": 1.4416394233703613, "learning_rate": 1.549692593193357e-05, "loss": 0.4498, "step": 12263 }, { "epoch": 0.33537519142419603, "grad_norm": 1.2475268840789795, "learning_rate": 1.5496186022332045e-05, "loss": 0.5375, "step": 12264 }, { "epoch": 0.33540253773791295, "grad_norm": 1.5115233659744263, "learning_rate": 1.5495446069614487e-05, "loss": 0.5723, "step": 12265 }, { "epoch": 0.3354298840516298, "grad_norm": 1.1939805746078491, "learning_rate": 1.54947060737867e-05, "loss": 0.5447, "step": 12266 }, { "epoch": 0.33545723036534675, "grad_norm": 1.22902250289917, "learning_rate": 1.549396603485449e-05, "loss": 0.5233, "step": 12267 }, { "epoch": 0.3354845766790637, "grad_norm": 1.3014674186706543, "learning_rate": 1.5493225952823664e-05, "loss": 0.4536, "step": 12268 }, { "epoch": 0.3355119229927806, "grad_norm": 1.416617512702942, "learning_rate": 1.5492485827700027e-05, "loss": 0.5911, "step": 12269 }, { "epoch": 0.33553926930649747, "grad_norm": 1.3790721893310547, "learning_rate": 1.5491745659489383e-05, "loss": 0.8963, "step": 12270 }, { "epoch": 0.3355666156202144, "grad_norm": 1.9788575172424316, "learning_rate": 1.549100544819754e-05, "loss": 0.5235, "step": 12271 }, { "epoch": 0.3355939619339313, "grad_norm": 1.338978886604309, "learning_rate": 1.5490265193830304e-05, "loss": 0.5381, "step": 12272 }, { "epoch": 0.33562130824764824, "grad_norm": 1.2745662927627563, "learning_rate": 1.5489524896393488e-05, "loss": 0.5758, "step": 12273 }, { "epoch": 0.3356486545613651, "grad_norm": 2.096663236618042, "learning_rate": 1.5488784555892886e-05, "loss": 0.3818, "step": 12274 }, { "epoch": 0.33567600087508204, "grad_norm": 1.3268218040466309, "learning_rate": 1.5488044172334317e-05, "loss": 0.5436, "step": 12275 }, { "epoch": 0.33570334718879896, "grad_norm": 1.5721408128738403, "learning_rate": 1.5487303745723587e-05, "loss": 0.5621, "step": 12276 }, { "epoch": 0.3357306935025159, "grad_norm": 1.1327311992645264, "learning_rate": 1.5486563276066503e-05, "loss": 0.5274, "step": 12277 }, { "epoch": 0.33575803981623276, "grad_norm": 1.1988905668258667, "learning_rate": 1.5485822763368874e-05, "loss": 0.5486, "step": 12278 }, { "epoch": 0.3357853861299497, "grad_norm": 1.2441991567611694, "learning_rate": 1.548508220763651e-05, "loss": 0.541, "step": 12279 }, { "epoch": 0.3358127324436666, "grad_norm": 1.0265041589736938, "learning_rate": 1.548434160887522e-05, "loss": 0.5527, "step": 12280 }, { "epoch": 0.33584007875738353, "grad_norm": 1.1640945672988892, "learning_rate": 1.548360096709081e-05, "loss": 0.541, "step": 12281 }, { "epoch": 0.3358674250711004, "grad_norm": 1.2379505634307861, "learning_rate": 1.54828602822891e-05, "loss": 0.543, "step": 12282 }, { "epoch": 0.3358947713848173, "grad_norm": 1.495469570159912, "learning_rate": 1.5482119554475886e-05, "loss": 0.5233, "step": 12283 }, { "epoch": 0.33592211769853425, "grad_norm": 1.4556931257247925, "learning_rate": 1.548137878365699e-05, "loss": 0.5378, "step": 12284 }, { "epoch": 0.3359494640122512, "grad_norm": 1.1898795366287231, "learning_rate": 1.5480637969838223e-05, "loss": 0.5411, "step": 12285 }, { "epoch": 0.33597681032596804, "grad_norm": 1.5861408710479736, "learning_rate": 1.5479897113025392e-05, "loss": 0.5479, "step": 12286 }, { "epoch": 0.33600415663968497, "grad_norm": 1.1640082597732544, "learning_rate": 1.5479156213224307e-05, "loss": 0.5437, "step": 12287 }, { "epoch": 0.3360315029534019, "grad_norm": 1.4421778917312622, "learning_rate": 1.5478415270440786e-05, "loss": 0.501, "step": 12288 }, { "epoch": 0.3360588492671188, "grad_norm": 1.1767303943634033, "learning_rate": 1.5477674284680643e-05, "loss": 0.5075, "step": 12289 }, { "epoch": 0.3360861955808357, "grad_norm": 1.436230182647705, "learning_rate": 1.547693325594968e-05, "loss": 0.4278, "step": 12290 }, { "epoch": 0.3361135418945526, "grad_norm": 1.1330264806747437, "learning_rate": 1.5476192184253723e-05, "loss": 0.532, "step": 12291 }, { "epoch": 0.33614088820826954, "grad_norm": 1.2802538871765137, "learning_rate": 1.5475451069598573e-05, "loss": 0.559, "step": 12292 }, { "epoch": 0.3361682345219864, "grad_norm": 1.3814153671264648, "learning_rate": 1.5474709911990058e-05, "loss": 0.5594, "step": 12293 }, { "epoch": 0.33619558083570333, "grad_norm": 1.2459523677825928, "learning_rate": 1.547396871143398e-05, "loss": 0.4719, "step": 12294 }, { "epoch": 0.33622292714942026, "grad_norm": 0.9905849695205688, "learning_rate": 1.547322746793616e-05, "loss": 0.5297, "step": 12295 }, { "epoch": 0.3362502734631372, "grad_norm": 1.2213841676712036, "learning_rate": 1.5472486181502406e-05, "loss": 0.5136, "step": 12296 }, { "epoch": 0.33627761977685405, "grad_norm": 1.7767733335494995, "learning_rate": 1.547174485213854e-05, "loss": 0.4921, "step": 12297 }, { "epoch": 0.336304966090571, "grad_norm": 1.3792604207992554, "learning_rate": 1.5471003479850378e-05, "loss": 0.5377, "step": 12298 }, { "epoch": 0.3363323124042879, "grad_norm": 1.2427325248718262, "learning_rate": 1.547026206464373e-05, "loss": 0.4455, "step": 12299 }, { "epoch": 0.33635965871800483, "grad_norm": 1.2920867204666138, "learning_rate": 1.546952060652442e-05, "loss": 0.5469, "step": 12300 }, { "epoch": 0.3363870050317217, "grad_norm": 1.8013434410095215, "learning_rate": 1.5468779105498258e-05, "loss": 0.8603, "step": 12301 }, { "epoch": 0.3364143513454386, "grad_norm": 1.3783138990402222, "learning_rate": 1.546803756157106e-05, "loss": 0.8652, "step": 12302 }, { "epoch": 0.33644169765915555, "grad_norm": 1.5208425521850586, "learning_rate": 1.546729597474865e-05, "loss": 0.4677, "step": 12303 }, { "epoch": 0.3364690439728725, "grad_norm": 2.117100477218628, "learning_rate": 1.546655434503684e-05, "loss": 0.4079, "step": 12304 }, { "epoch": 0.33649639028658934, "grad_norm": 1.5258415937423706, "learning_rate": 1.5465812672441452e-05, "loss": 0.4008, "step": 12305 }, { "epoch": 0.33652373660030627, "grad_norm": 1.5236843824386597, "learning_rate": 1.5465070956968296e-05, "loss": 0.4676, "step": 12306 }, { "epoch": 0.3365510829140232, "grad_norm": 1.4855138063430786, "learning_rate": 1.54643291986232e-05, "loss": 0.5729, "step": 12307 }, { "epoch": 0.3365784292277401, "grad_norm": 1.3006880283355713, "learning_rate": 1.5463587397411982e-05, "loss": 0.3817, "step": 12308 }, { "epoch": 0.336605775541457, "grad_norm": 1.4528533220291138, "learning_rate": 1.5462845553340455e-05, "loss": 0.5517, "step": 12309 }, { "epoch": 0.3366331218551739, "grad_norm": 1.5348557233810425, "learning_rate": 1.5462103666414444e-05, "loss": 0.8824, "step": 12310 }, { "epoch": 0.33666046816889084, "grad_norm": 1.4081319570541382, "learning_rate": 1.5461361736639764e-05, "loss": 0.5272, "step": 12311 }, { "epoch": 0.33668781448260776, "grad_norm": 1.3776514530181885, "learning_rate": 1.546061976402224e-05, "loss": 0.5342, "step": 12312 }, { "epoch": 0.33671516079632463, "grad_norm": 1.5902152061462402, "learning_rate": 1.5459877748567693e-05, "loss": 0.5509, "step": 12313 }, { "epoch": 0.33674250711004156, "grad_norm": 1.2089298963546753, "learning_rate": 1.5459135690281936e-05, "loss": 0.5285, "step": 12314 }, { "epoch": 0.3367698534237585, "grad_norm": 1.33259916305542, "learning_rate": 1.54583935891708e-05, "loss": 0.5417, "step": 12315 }, { "epoch": 0.3367971997374754, "grad_norm": 1.4611254930496216, "learning_rate": 1.5457651445240106e-05, "loss": 0.5124, "step": 12316 }, { "epoch": 0.3368245460511923, "grad_norm": 1.3446087837219238, "learning_rate": 1.5456909258495664e-05, "loss": 0.5479, "step": 12317 }, { "epoch": 0.3368518923649092, "grad_norm": 1.1698657274246216, "learning_rate": 1.5456167028943308e-05, "loss": 0.5268, "step": 12318 }, { "epoch": 0.3368792386786261, "grad_norm": 1.47588050365448, "learning_rate": 1.545542475658886e-05, "loss": 0.5663, "step": 12319 }, { "epoch": 0.33690658499234305, "grad_norm": 1.4227827787399292, "learning_rate": 1.545468244143814e-05, "loss": 0.5701, "step": 12320 }, { "epoch": 0.3369339313060599, "grad_norm": 1.4514884948730469, "learning_rate": 1.5453940083496965e-05, "loss": 0.5479, "step": 12321 }, { "epoch": 0.33696127761977684, "grad_norm": 1.6110048294067383, "learning_rate": 1.545319768277117e-05, "loss": 0.5551, "step": 12322 }, { "epoch": 0.33698862393349377, "grad_norm": 1.3615773916244507, "learning_rate": 1.5452455239266572e-05, "loss": 0.8176, "step": 12323 }, { "epoch": 0.3370159702472107, "grad_norm": 1.4451555013656616, "learning_rate": 1.5451712752988996e-05, "loss": 0.5603, "step": 12324 }, { "epoch": 0.33704331656092756, "grad_norm": 1.448783278465271, "learning_rate": 1.545097022394427e-05, "loss": 0.5346, "step": 12325 }, { "epoch": 0.3370706628746445, "grad_norm": 1.4225518703460693, "learning_rate": 1.5450227652138215e-05, "loss": 0.538, "step": 12326 }, { "epoch": 0.3370980091883614, "grad_norm": 1.4060182571411133, "learning_rate": 1.5449485037576662e-05, "loss": 0.4381, "step": 12327 }, { "epoch": 0.33712535550207834, "grad_norm": 1.8571399450302124, "learning_rate": 1.5448742380265428e-05, "loss": 0.5411, "step": 12328 }, { "epoch": 0.3371527018157952, "grad_norm": 1.2941268682479858, "learning_rate": 1.5447999680210346e-05, "loss": 0.5698, "step": 12329 }, { "epoch": 0.33718004812951213, "grad_norm": 1.3128002882003784, "learning_rate": 1.5447256937417238e-05, "loss": 0.5373, "step": 12330 }, { "epoch": 0.33720739444322906, "grad_norm": 1.3963595628738403, "learning_rate": 1.5446514151891934e-05, "loss": 0.5151, "step": 12331 }, { "epoch": 0.337234740756946, "grad_norm": 1.3443299531936646, "learning_rate": 1.544577132364026e-05, "loss": 0.5632, "step": 12332 }, { "epoch": 0.33726208707066285, "grad_norm": 1.8150471448898315, "learning_rate": 1.544502845266804e-05, "loss": 0.5037, "step": 12333 }, { "epoch": 0.3372894333843798, "grad_norm": 1.6017565727233887, "learning_rate": 1.5444285538981104e-05, "loss": 0.8928, "step": 12334 }, { "epoch": 0.3373167796980967, "grad_norm": 1.385098934173584, "learning_rate": 1.5443542582585284e-05, "loss": 0.567, "step": 12335 }, { "epoch": 0.3373441260118136, "grad_norm": 1.4424595832824707, "learning_rate": 1.54427995834864e-05, "loss": 0.5407, "step": 12336 }, { "epoch": 0.3373714723255305, "grad_norm": 1.6568506956100464, "learning_rate": 1.5442056541690284e-05, "loss": 0.5784, "step": 12337 }, { "epoch": 0.3373988186392474, "grad_norm": 1.8777233362197876, "learning_rate": 1.544131345720277e-05, "loss": 0.4204, "step": 12338 }, { "epoch": 0.33742616495296435, "grad_norm": 1.2325743436813354, "learning_rate": 1.544057033002968e-05, "loss": 0.5388, "step": 12339 }, { "epoch": 0.33745351126668127, "grad_norm": 1.1317341327667236, "learning_rate": 1.543982716017685e-05, "loss": 0.5097, "step": 12340 }, { "epoch": 0.33748085758039814, "grad_norm": 1.1972965002059937, "learning_rate": 1.5439083947650103e-05, "loss": 0.5305, "step": 12341 }, { "epoch": 0.33750820389411507, "grad_norm": 1.5998564958572388, "learning_rate": 1.5438340692455276e-05, "loss": 0.4671, "step": 12342 }, { "epoch": 0.337535550207832, "grad_norm": 1.3193047046661377, "learning_rate": 1.5437597394598197e-05, "loss": 0.5133, "step": 12343 }, { "epoch": 0.3375628965215489, "grad_norm": 1.3600746393203735, "learning_rate": 1.5436854054084696e-05, "loss": 0.5419, "step": 12344 }, { "epoch": 0.3375902428352658, "grad_norm": 1.4325904846191406, "learning_rate": 1.5436110670920605e-05, "loss": 0.5337, "step": 12345 }, { "epoch": 0.3376175891489827, "grad_norm": 1.405335545539856, "learning_rate": 1.5435367245111753e-05, "loss": 0.5507, "step": 12346 }, { "epoch": 0.33764493546269964, "grad_norm": 1.4387811422348022, "learning_rate": 1.5434623776663977e-05, "loss": 0.5331, "step": 12347 }, { "epoch": 0.33767228177641656, "grad_norm": 1.307795763015747, "learning_rate": 1.543388026558311e-05, "loss": 0.5619, "step": 12348 }, { "epoch": 0.33769962809013343, "grad_norm": 1.4243828058242798, "learning_rate": 1.5433136711874978e-05, "loss": 0.5425, "step": 12349 }, { "epoch": 0.33772697440385036, "grad_norm": 4.265448093414307, "learning_rate": 1.5432393115545417e-05, "loss": 0.3573, "step": 12350 }, { "epoch": 0.3377543207175673, "grad_norm": 2.657135486602783, "learning_rate": 1.5431649476600263e-05, "loss": 0.4589, "step": 12351 }, { "epoch": 0.3377816670312842, "grad_norm": 1.3725248575210571, "learning_rate": 1.5430905795045347e-05, "loss": 0.5194, "step": 12352 }, { "epoch": 0.3378090133450011, "grad_norm": 1.162634253501892, "learning_rate": 1.5430162070886503e-05, "loss": 0.5669, "step": 12353 }, { "epoch": 0.337836359658718, "grad_norm": 1.171583652496338, "learning_rate": 1.5429418304129566e-05, "loss": 0.5137, "step": 12354 }, { "epoch": 0.3378637059724349, "grad_norm": 2.0114474296569824, "learning_rate": 1.542867449478037e-05, "loss": 0.5723, "step": 12355 }, { "epoch": 0.33789105228615185, "grad_norm": 1.2695680856704712, "learning_rate": 1.542793064284475e-05, "loss": 0.5479, "step": 12356 }, { "epoch": 0.3379183985998687, "grad_norm": 1.1771601438522339, "learning_rate": 1.542718674832854e-05, "loss": 0.5144, "step": 12357 }, { "epoch": 0.33794574491358564, "grad_norm": 1.2304941415786743, "learning_rate": 1.542644281123758e-05, "loss": 0.5582, "step": 12358 }, { "epoch": 0.33797309122730257, "grad_norm": 1.3286705017089844, "learning_rate": 1.54256988315777e-05, "loss": 0.5584, "step": 12359 }, { "epoch": 0.3380004375410195, "grad_norm": 1.4221851825714111, "learning_rate": 1.5424954809354743e-05, "loss": 0.5602, "step": 12360 }, { "epoch": 0.33802778385473636, "grad_norm": 1.4717856645584106, "learning_rate": 1.5424210744574542e-05, "loss": 0.4038, "step": 12361 }, { "epoch": 0.3380551301684533, "grad_norm": 2.3565077781677246, "learning_rate": 1.5423466637242933e-05, "loss": 0.5269, "step": 12362 }, { "epoch": 0.3380824764821702, "grad_norm": 1.7395068407058716, "learning_rate": 1.5422722487365757e-05, "loss": 0.562, "step": 12363 }, { "epoch": 0.33810982279588714, "grad_norm": 1.2328861951828003, "learning_rate": 1.5421978294948848e-05, "loss": 0.5343, "step": 12364 }, { "epoch": 0.338137169109604, "grad_norm": 1.5222450494766235, "learning_rate": 1.542123405999804e-05, "loss": 0.5447, "step": 12365 }, { "epoch": 0.33816451542332093, "grad_norm": 1.2580962181091309, "learning_rate": 1.5420489782519183e-05, "loss": 0.5367, "step": 12366 }, { "epoch": 0.33819186173703786, "grad_norm": 1.3251841068267822, "learning_rate": 1.5419745462518106e-05, "loss": 0.4466, "step": 12367 }, { "epoch": 0.3382192080507548, "grad_norm": 1.4561777114868164, "learning_rate": 1.541900110000065e-05, "loss": 0.5649, "step": 12368 }, { "epoch": 0.33824655436447165, "grad_norm": 1.1435707807540894, "learning_rate": 1.541825669497266e-05, "loss": 0.5357, "step": 12369 }, { "epoch": 0.3382739006781886, "grad_norm": 1.3573720455169678, "learning_rate": 1.541751224743997e-05, "loss": 0.5181, "step": 12370 }, { "epoch": 0.3383012469919055, "grad_norm": 1.837292194366455, "learning_rate": 1.5416767757408423e-05, "loss": 0.6045, "step": 12371 }, { "epoch": 0.3383285933056224, "grad_norm": 1.4799890518188477, "learning_rate": 1.541602322488385e-05, "loss": 0.4885, "step": 12372 }, { "epoch": 0.3383559396193393, "grad_norm": 1.1953564882278442, "learning_rate": 1.5415278649872105e-05, "loss": 0.4834, "step": 12373 }, { "epoch": 0.3383832859330562, "grad_norm": 1.6945325136184692, "learning_rate": 1.541453403237902e-05, "loss": 0.5542, "step": 12374 }, { "epoch": 0.33841063224677315, "grad_norm": 1.1466071605682373, "learning_rate": 1.541378937241044e-05, "loss": 0.5186, "step": 12375 }, { "epoch": 0.33843797856049007, "grad_norm": 3.7788617610931396, "learning_rate": 1.5413044669972206e-05, "loss": 0.8684, "step": 12376 }, { "epoch": 0.33846532487420694, "grad_norm": 1.6141599416732788, "learning_rate": 1.541229992507016e-05, "loss": 0.5031, "step": 12377 }, { "epoch": 0.33849267118792387, "grad_norm": 1.340208649635315, "learning_rate": 1.5411555137710146e-05, "loss": 0.5273, "step": 12378 }, { "epoch": 0.3385200175016408, "grad_norm": 1.4608269929885864, "learning_rate": 1.5410810307898003e-05, "loss": 0.3858, "step": 12379 }, { "epoch": 0.3385473638153577, "grad_norm": 1.480778694152832, "learning_rate": 1.5410065435639573e-05, "loss": 0.5297, "step": 12380 }, { "epoch": 0.3385747101290746, "grad_norm": 1.1367799043655396, "learning_rate": 1.540932052094071e-05, "loss": 0.5503, "step": 12381 }, { "epoch": 0.3386020564427915, "grad_norm": 1.212130069732666, "learning_rate": 1.5408575563807242e-05, "loss": 0.533, "step": 12382 }, { "epoch": 0.33862940275650844, "grad_norm": 1.305762529373169, "learning_rate": 1.540783056424502e-05, "loss": 0.917, "step": 12383 }, { "epoch": 0.33865674907022536, "grad_norm": 1.1866058111190796, "learning_rate": 1.5407085522259894e-05, "loss": 0.5362, "step": 12384 }, { "epoch": 0.33868409538394223, "grad_norm": 1.32516610622406, "learning_rate": 1.5406340437857705e-05, "loss": 0.462, "step": 12385 }, { "epoch": 0.33871144169765915, "grad_norm": 2.075289249420166, "learning_rate": 1.5405595311044298e-05, "loss": 0.4919, "step": 12386 }, { "epoch": 0.3387387880113761, "grad_norm": 1.2975902557373047, "learning_rate": 1.540485014182551e-05, "loss": 0.548, "step": 12387 }, { "epoch": 0.338766134325093, "grad_norm": 1.3690547943115234, "learning_rate": 1.54041049302072e-05, "loss": 0.5261, "step": 12388 }, { "epoch": 0.3387934806388099, "grad_norm": 1.2855852842330933, "learning_rate": 1.5403359676195204e-05, "loss": 0.8643, "step": 12389 }, { "epoch": 0.3388208269525268, "grad_norm": 1.1511331796646118, "learning_rate": 1.5402614379795373e-05, "loss": 0.528, "step": 12390 }, { "epoch": 0.3388481732662437, "grad_norm": 1.162861943244934, "learning_rate": 1.540186904101355e-05, "loss": 0.5176, "step": 12391 }, { "epoch": 0.33887551957996065, "grad_norm": 1.1625336408615112, "learning_rate": 1.540112365985559e-05, "loss": 0.5241, "step": 12392 }, { "epoch": 0.3389028658936775, "grad_norm": 1.4919885396957397, "learning_rate": 1.540037823632733e-05, "loss": 0.4934, "step": 12393 }, { "epoch": 0.33893021220739444, "grad_norm": 1.3803707361221313, "learning_rate": 1.5399632770434624e-05, "loss": 0.4529, "step": 12394 }, { "epoch": 0.33895755852111137, "grad_norm": 1.2316826581954956, "learning_rate": 1.539888726218332e-05, "loss": 0.5417, "step": 12395 }, { "epoch": 0.33898490483482824, "grad_norm": 1.3111083507537842, "learning_rate": 1.5398141711579263e-05, "loss": 0.887, "step": 12396 }, { "epoch": 0.33901225114854516, "grad_norm": 1.1978996992111206, "learning_rate": 1.5397396118628308e-05, "loss": 0.5041, "step": 12397 }, { "epoch": 0.3390395974622621, "grad_norm": 1.3769549131393433, "learning_rate": 1.5396650483336296e-05, "loss": 0.5453, "step": 12398 }, { "epoch": 0.339066943775979, "grad_norm": 1.662989854812622, "learning_rate": 1.5395904805709076e-05, "loss": 0.5341, "step": 12399 }, { "epoch": 0.3390942900896959, "grad_norm": 1.0391123294830322, "learning_rate": 1.5395159085752503e-05, "loss": 0.5403, "step": 12400 }, { "epoch": 0.3391216364034128, "grad_norm": 1.1715302467346191, "learning_rate": 1.5394413323472428e-05, "loss": 0.5204, "step": 12401 }, { "epoch": 0.33914898271712973, "grad_norm": 1.386100172996521, "learning_rate": 1.53936675188747e-05, "loss": 0.5434, "step": 12402 }, { "epoch": 0.33917632903084666, "grad_norm": 2.7254247665405273, "learning_rate": 1.5392921671965163e-05, "loss": 0.4675, "step": 12403 }, { "epoch": 0.3392036753445635, "grad_norm": 1.1097875833511353, "learning_rate": 1.539217578274968e-05, "loss": 0.55, "step": 12404 }, { "epoch": 0.33923102165828045, "grad_norm": 1.8209284543991089, "learning_rate": 1.539142985123409e-05, "loss": 0.8247, "step": 12405 }, { "epoch": 0.3392583679719974, "grad_norm": 1.2974553108215332, "learning_rate": 1.5390683877424254e-05, "loss": 0.5443, "step": 12406 }, { "epoch": 0.3392857142857143, "grad_norm": 1.4800381660461426, "learning_rate": 1.5389937861326013e-05, "loss": 0.552, "step": 12407 }, { "epoch": 0.33931306059943117, "grad_norm": 1.501779317855835, "learning_rate": 1.5389191802945235e-05, "loss": 0.8695, "step": 12408 }, { "epoch": 0.3393404069131481, "grad_norm": 1.6916582584381104, "learning_rate": 1.538844570228776e-05, "loss": 0.5794, "step": 12409 }, { "epoch": 0.339367753226865, "grad_norm": 1.1328258514404297, "learning_rate": 1.5387699559359443e-05, "loss": 0.529, "step": 12410 }, { "epoch": 0.33939509954058195, "grad_norm": 1.178861379623413, "learning_rate": 1.5386953374166144e-05, "loss": 0.4785, "step": 12411 }, { "epoch": 0.3394224458542988, "grad_norm": 1.4152488708496094, "learning_rate": 1.5386207146713704e-05, "loss": 0.397, "step": 12412 }, { "epoch": 0.33944979216801574, "grad_norm": 1.2938045263290405, "learning_rate": 1.538546087700799e-05, "loss": 0.584, "step": 12413 }, { "epoch": 0.33947713848173267, "grad_norm": 1.544995665550232, "learning_rate": 1.5384714565054853e-05, "loss": 0.5544, "step": 12414 }, { "epoch": 0.3395044847954496, "grad_norm": 1.3382983207702637, "learning_rate": 1.5383968210860144e-05, "loss": 0.5363, "step": 12415 }, { "epoch": 0.33953183110916646, "grad_norm": 1.3428891897201538, "learning_rate": 1.5383221814429717e-05, "loss": 0.5111, "step": 12416 }, { "epoch": 0.3395591774228834, "grad_norm": 1.1984385251998901, "learning_rate": 1.5382475375769432e-05, "loss": 0.5119, "step": 12417 }, { "epoch": 0.3395865237366003, "grad_norm": 1.0404632091522217, "learning_rate": 1.538172889488514e-05, "loss": 0.5628, "step": 12418 }, { "epoch": 0.33961387005031723, "grad_norm": 2.117309808731079, "learning_rate": 1.5380982371782704e-05, "loss": 0.4624, "step": 12419 }, { "epoch": 0.3396412163640341, "grad_norm": 1.4132499694824219, "learning_rate": 1.5380235806467975e-05, "loss": 0.5711, "step": 12420 }, { "epoch": 0.33966856267775103, "grad_norm": 1.3864307403564453, "learning_rate": 1.5379489198946807e-05, "loss": 0.547, "step": 12421 }, { "epoch": 0.33969590899146795, "grad_norm": 1.385703206062317, "learning_rate": 1.5378742549225064e-05, "loss": 0.8814, "step": 12422 }, { "epoch": 0.3397232553051849, "grad_norm": 1.3690212965011597, "learning_rate": 1.5377995857308596e-05, "loss": 0.5328, "step": 12423 }, { "epoch": 0.33975060161890175, "grad_norm": 1.7133252620697021, "learning_rate": 1.5377249123203263e-05, "loss": 0.4086, "step": 12424 }, { "epoch": 0.3397779479326187, "grad_norm": 1.8757513761520386, "learning_rate": 1.5376502346914927e-05, "loss": 0.9069, "step": 12425 }, { "epoch": 0.3398052942463356, "grad_norm": 2.1944668292999268, "learning_rate": 1.5375755528449442e-05, "loss": 0.4045, "step": 12426 }, { "epoch": 0.3398326405600525, "grad_norm": 1.1907856464385986, "learning_rate": 1.5375008667812668e-05, "loss": 0.8549, "step": 12427 }, { "epoch": 0.3398599868737694, "grad_norm": 1.3843480348587036, "learning_rate": 1.5374261765010464e-05, "loss": 0.541, "step": 12428 }, { "epoch": 0.3398873331874863, "grad_norm": 1.479239821434021, "learning_rate": 1.5373514820048686e-05, "loss": 0.5756, "step": 12429 }, { "epoch": 0.33991467950120324, "grad_norm": 1.5535755157470703, "learning_rate": 1.53727678329332e-05, "loss": 0.8725, "step": 12430 }, { "epoch": 0.33994202581492017, "grad_norm": 1.2427394390106201, "learning_rate": 1.5372020803669857e-05, "loss": 0.5433, "step": 12431 }, { "epoch": 0.33996937212863704, "grad_norm": 1.8432468175888062, "learning_rate": 1.537127373226453e-05, "loss": 0.4184, "step": 12432 }, { "epoch": 0.33999671844235396, "grad_norm": 1.4870082139968872, "learning_rate": 1.537052661872307e-05, "loss": 0.5516, "step": 12433 }, { "epoch": 0.3400240647560709, "grad_norm": 1.4687460660934448, "learning_rate": 1.536977946305134e-05, "loss": 0.4343, "step": 12434 }, { "epoch": 0.3400514110697878, "grad_norm": 1.2944809198379517, "learning_rate": 1.53690322652552e-05, "loss": 0.4765, "step": 12435 }, { "epoch": 0.3400787573835047, "grad_norm": 1.345590353012085, "learning_rate": 1.536828502534051e-05, "loss": 0.5721, "step": 12436 }, { "epoch": 0.3401061036972216, "grad_norm": 1.5078692436218262, "learning_rate": 1.536753774331314e-05, "loss": 0.8304, "step": 12437 }, { "epoch": 0.34013345001093853, "grad_norm": 1.2813254594802856, "learning_rate": 1.5366790419178946e-05, "loss": 0.5451, "step": 12438 }, { "epoch": 0.34016079632465546, "grad_norm": 1.666688323020935, "learning_rate": 1.536604305294379e-05, "loss": 0.3961, "step": 12439 }, { "epoch": 0.3401881426383723, "grad_norm": 1.234997034072876, "learning_rate": 1.5365295644613536e-05, "loss": 0.5625, "step": 12440 }, { "epoch": 0.34021548895208925, "grad_norm": 1.3656281232833862, "learning_rate": 1.536454819419405e-05, "loss": 0.8417, "step": 12441 }, { "epoch": 0.3402428352658062, "grad_norm": 1.499067783355713, "learning_rate": 1.536380070169119e-05, "loss": 0.4944, "step": 12442 }, { "epoch": 0.3402701815795231, "grad_norm": 1.5013060569763184, "learning_rate": 1.5363053167110825e-05, "loss": 0.5827, "step": 12443 }, { "epoch": 0.34029752789323997, "grad_norm": 1.6452138423919678, "learning_rate": 1.5362305590458814e-05, "loss": 0.8703, "step": 12444 }, { "epoch": 0.3403248742069569, "grad_norm": 1.104133129119873, "learning_rate": 1.5361557971741026e-05, "loss": 0.5505, "step": 12445 }, { "epoch": 0.3403522205206738, "grad_norm": 6.478231430053711, "learning_rate": 1.536081031096333e-05, "loss": 0.4357, "step": 12446 }, { "epoch": 0.34037956683439075, "grad_norm": 1.700322151184082, "learning_rate": 1.5360062608131582e-05, "loss": 0.499, "step": 12447 }, { "epoch": 0.3404069131481076, "grad_norm": 1.6820569038391113, "learning_rate": 1.5359314863251648e-05, "loss": 0.5428, "step": 12448 }, { "epoch": 0.34043425946182454, "grad_norm": 1.3428547382354736, "learning_rate": 1.5358567076329404e-05, "loss": 0.5545, "step": 12449 }, { "epoch": 0.34046160577554146, "grad_norm": 1.172964096069336, "learning_rate": 1.5357819247370703e-05, "loss": 0.537, "step": 12450 }, { "epoch": 0.3404889520892584, "grad_norm": 1.1069257259368896, "learning_rate": 1.535707137638142e-05, "loss": 0.5539, "step": 12451 }, { "epoch": 0.34051629840297526, "grad_norm": 1.6830363273620605, "learning_rate": 1.5356323463367422e-05, "loss": 0.5312, "step": 12452 }, { "epoch": 0.3405436447166922, "grad_norm": 1.6932772397994995, "learning_rate": 1.535557550833457e-05, "loss": 0.4562, "step": 12453 }, { "epoch": 0.3405709910304091, "grad_norm": 2.467195510864258, "learning_rate": 1.535482751128874e-05, "loss": 0.4465, "step": 12454 }, { "epoch": 0.34059833734412603, "grad_norm": 1.1978349685668945, "learning_rate": 1.535407947223579e-05, "loss": 0.5699, "step": 12455 }, { "epoch": 0.3406256836578429, "grad_norm": 1.2252352237701416, "learning_rate": 1.5353331391181593e-05, "loss": 0.5445, "step": 12456 }, { "epoch": 0.34065302997155983, "grad_norm": 1.4902362823486328, "learning_rate": 1.5352583268132023e-05, "loss": 0.5526, "step": 12457 }, { "epoch": 0.34068037628527675, "grad_norm": 1.0680736303329468, "learning_rate": 1.535183510309294e-05, "loss": 0.5174, "step": 12458 }, { "epoch": 0.3407077225989937, "grad_norm": 1.7253684997558594, "learning_rate": 1.535108689607022e-05, "loss": 0.4669, "step": 12459 }, { "epoch": 0.34073506891271055, "grad_norm": 1.4370650053024292, "learning_rate": 1.5350338647069727e-05, "loss": 0.5304, "step": 12460 }, { "epoch": 0.3407624152264275, "grad_norm": 1.151947259902954, "learning_rate": 1.5349590356097332e-05, "loss": 0.5024, "step": 12461 }, { "epoch": 0.3407897615401444, "grad_norm": 1.7388157844543457, "learning_rate": 1.5348842023158907e-05, "loss": 0.8797, "step": 12462 }, { "epoch": 0.3408171078538613, "grad_norm": 1.5401537418365479, "learning_rate": 1.5348093648260322e-05, "loss": 0.5423, "step": 12463 }, { "epoch": 0.3408444541675782, "grad_norm": 1.2980858087539673, "learning_rate": 1.5347345231407447e-05, "loss": 0.5492, "step": 12464 }, { "epoch": 0.3408718004812951, "grad_norm": 1.4056103229522705, "learning_rate": 1.5346596772606153e-05, "loss": 0.5035, "step": 12465 }, { "epoch": 0.34089914679501204, "grad_norm": 1.4102425575256348, "learning_rate": 1.5345848271862315e-05, "loss": 0.5416, "step": 12466 }, { "epoch": 0.34092649310872897, "grad_norm": 1.8961580991744995, "learning_rate": 1.5345099729181798e-05, "loss": 0.52, "step": 12467 }, { "epoch": 0.34095383942244584, "grad_norm": 1.4538675546646118, "learning_rate": 1.534435114457048e-05, "loss": 0.5352, "step": 12468 }, { "epoch": 0.34098118573616276, "grad_norm": 1.226595163345337, "learning_rate": 1.534360251803423e-05, "loss": 0.5044, "step": 12469 }, { "epoch": 0.3410085320498797, "grad_norm": 1.4875720739364624, "learning_rate": 1.534285384957892e-05, "loss": 0.4855, "step": 12470 }, { "epoch": 0.3410358783635966, "grad_norm": 1.6733428239822388, "learning_rate": 1.534210513921043e-05, "loss": 0.5095, "step": 12471 }, { "epoch": 0.3410632246773135, "grad_norm": 1.282043695449829, "learning_rate": 1.5341356386934626e-05, "loss": 0.5552, "step": 12472 }, { "epoch": 0.3410905709910304, "grad_norm": 1.4429352283477783, "learning_rate": 1.5340607592757386e-05, "loss": 0.5056, "step": 12473 }, { "epoch": 0.34111791730474733, "grad_norm": 1.2318408489227295, "learning_rate": 1.533985875668458e-05, "loss": 0.504, "step": 12474 }, { "epoch": 0.34114526361846426, "grad_norm": 1.6384918689727783, "learning_rate": 1.5339109878722088e-05, "loss": 0.5194, "step": 12475 }, { "epoch": 0.3411726099321811, "grad_norm": 1.3157440423965454, "learning_rate": 1.533836095887578e-05, "loss": 0.845, "step": 12476 }, { "epoch": 0.34119995624589805, "grad_norm": 1.4104522466659546, "learning_rate": 1.5337611997151533e-05, "loss": 0.4869, "step": 12477 }, { "epoch": 0.341227302559615, "grad_norm": 1.2851113080978394, "learning_rate": 1.533686299355522e-05, "loss": 0.5528, "step": 12478 }, { "epoch": 0.3412546488733319, "grad_norm": 1.596569538116455, "learning_rate": 1.5336113948092722e-05, "loss": 0.5567, "step": 12479 }, { "epoch": 0.34128199518704877, "grad_norm": 1.2892357110977173, "learning_rate": 1.5335364860769904e-05, "loss": 0.5551, "step": 12480 }, { "epoch": 0.3413093415007657, "grad_norm": 1.3397722244262695, "learning_rate": 1.5334615731592658e-05, "loss": 0.5438, "step": 12481 }, { "epoch": 0.3413366878144826, "grad_norm": 1.4407401084899902, "learning_rate": 1.533386656056685e-05, "loss": 0.542, "step": 12482 }, { "epoch": 0.34136403412819954, "grad_norm": 1.3171069622039795, "learning_rate": 1.5333117347698362e-05, "loss": 0.5522, "step": 12483 }, { "epoch": 0.3413913804419164, "grad_norm": 1.2344684600830078, "learning_rate": 1.533236809299307e-05, "loss": 0.537, "step": 12484 }, { "epoch": 0.34141872675563334, "grad_norm": 1.5385724306106567, "learning_rate": 1.5331618796456847e-05, "loss": 0.4259, "step": 12485 }, { "epoch": 0.34144607306935026, "grad_norm": 1.073673129081726, "learning_rate": 1.5330869458095575e-05, "loss": 0.5379, "step": 12486 }, { "epoch": 0.3414734193830672, "grad_norm": 1.2083953619003296, "learning_rate": 1.5330120077915138e-05, "loss": 0.538, "step": 12487 }, { "epoch": 0.34150076569678406, "grad_norm": 1.3804490566253662, "learning_rate": 1.5329370655921403e-05, "loss": 0.4476, "step": 12488 }, { "epoch": 0.341528112010501, "grad_norm": 1.5728075504302979, "learning_rate": 1.5328621192120257e-05, "loss": 0.5243, "step": 12489 }, { "epoch": 0.3415554583242179, "grad_norm": 1.2526607513427734, "learning_rate": 1.5327871686517578e-05, "loss": 0.4834, "step": 12490 }, { "epoch": 0.34158280463793483, "grad_norm": 1.3846218585968018, "learning_rate": 1.5327122139119244e-05, "loss": 0.563, "step": 12491 }, { "epoch": 0.3416101509516517, "grad_norm": 1.591722011566162, "learning_rate": 1.532637254993114e-05, "loss": 0.5342, "step": 12492 }, { "epoch": 0.34163749726536863, "grad_norm": 3.395347833633423, "learning_rate": 1.5325622918959138e-05, "loss": 0.5736, "step": 12493 }, { "epoch": 0.34166484357908555, "grad_norm": 1.4199291467666626, "learning_rate": 1.5324873246209122e-05, "loss": 0.5417, "step": 12494 }, { "epoch": 0.3416921898928024, "grad_norm": 1.1934292316436768, "learning_rate": 1.532412353168698e-05, "loss": 0.5098, "step": 12495 }, { "epoch": 0.34171953620651935, "grad_norm": 1.2715890407562256, "learning_rate": 1.532337377539858e-05, "loss": 0.5641, "step": 12496 }, { "epoch": 0.3417468825202363, "grad_norm": 1.2034902572631836, "learning_rate": 1.5322623977349814e-05, "loss": 0.5094, "step": 12497 }, { "epoch": 0.3417742288339532, "grad_norm": 2.8422365188598633, "learning_rate": 1.5321874137546562e-05, "loss": 0.5812, "step": 12498 }, { "epoch": 0.34180157514767007, "grad_norm": 1.4324803352355957, "learning_rate": 1.5321124255994702e-05, "loss": 0.5284, "step": 12499 }, { "epoch": 0.341828921461387, "grad_norm": 5.0285139083862305, "learning_rate": 1.5320374332700124e-05, "loss": 0.42, "step": 12500 }, { "epoch": 0.3418562677751039, "grad_norm": 1.6688522100448608, "learning_rate": 1.5319624367668705e-05, "loss": 0.5104, "step": 12501 }, { "epoch": 0.34188361408882084, "grad_norm": 1.547556757926941, "learning_rate": 1.5318874360906325e-05, "loss": 0.4606, "step": 12502 }, { "epoch": 0.3419109604025377, "grad_norm": 1.3889892101287842, "learning_rate": 1.531812431241888e-05, "loss": 0.5348, "step": 12503 }, { "epoch": 0.34193830671625464, "grad_norm": 1.7581557035446167, "learning_rate": 1.531737422221224e-05, "loss": 0.8361, "step": 12504 }, { "epoch": 0.34196565302997156, "grad_norm": 1.3152934312820435, "learning_rate": 1.53166240902923e-05, "loss": 0.5496, "step": 12505 }, { "epoch": 0.3419929993436885, "grad_norm": 1.3902310132980347, "learning_rate": 1.531587391666494e-05, "loss": 0.5312, "step": 12506 }, { "epoch": 0.34202034565740536, "grad_norm": 1.267975926399231, "learning_rate": 1.531512370133604e-05, "loss": 0.5414, "step": 12507 }, { "epoch": 0.3420476919711223, "grad_norm": 1.5318189859390259, "learning_rate": 1.5314373444311498e-05, "loss": 0.523, "step": 12508 }, { "epoch": 0.3420750382848392, "grad_norm": 1.050729751586914, "learning_rate": 1.531362314559719e-05, "loss": 0.5466, "step": 12509 }, { "epoch": 0.34210238459855613, "grad_norm": 1.2473735809326172, "learning_rate": 1.5312872805199e-05, "loss": 0.5466, "step": 12510 }, { "epoch": 0.342129730912273, "grad_norm": 1.9944363832473755, "learning_rate": 1.5312122423122822e-05, "loss": 0.8313, "step": 12511 }, { "epoch": 0.3421570772259899, "grad_norm": 1.5420411825180054, "learning_rate": 1.5311371999374534e-05, "loss": 0.8457, "step": 12512 }, { "epoch": 0.34218442353970685, "grad_norm": 1.5987719297409058, "learning_rate": 1.5310621533960032e-05, "loss": 0.5597, "step": 12513 }, { "epoch": 0.3422117698534238, "grad_norm": 1.4134470224380493, "learning_rate": 1.5309871026885196e-05, "loss": 0.853, "step": 12514 }, { "epoch": 0.34223911616714064, "grad_norm": 1.2352179288864136, "learning_rate": 1.530912047815592e-05, "loss": 0.5538, "step": 12515 }, { "epoch": 0.34226646248085757, "grad_norm": 1.6087156534194946, "learning_rate": 1.5308369887778084e-05, "loss": 0.5408, "step": 12516 }, { "epoch": 0.3422938087945745, "grad_norm": 1.4609562158584595, "learning_rate": 1.530761925575758e-05, "loss": 0.4667, "step": 12517 }, { "epoch": 0.3423211551082914, "grad_norm": 1.3297432661056519, "learning_rate": 1.5306868582100296e-05, "loss": 0.8826, "step": 12518 }, { "epoch": 0.3423485014220083, "grad_norm": 1.5120052099227905, "learning_rate": 1.5306117866812124e-05, "loss": 0.5338, "step": 12519 }, { "epoch": 0.3423758477357252, "grad_norm": 1.4108110666275024, "learning_rate": 1.530536710989895e-05, "loss": 0.5539, "step": 12520 }, { "epoch": 0.34240319404944214, "grad_norm": 1.6624171733856201, "learning_rate": 1.5304616311366665e-05, "loss": 0.5374, "step": 12521 }, { "epoch": 0.34243054036315906, "grad_norm": 2.166156053543091, "learning_rate": 1.5303865471221154e-05, "loss": 0.5518, "step": 12522 }, { "epoch": 0.34245788667687593, "grad_norm": 1.824001669883728, "learning_rate": 1.5303114589468315e-05, "loss": 0.4967, "step": 12523 }, { "epoch": 0.34248523299059286, "grad_norm": 1.6844465732574463, "learning_rate": 1.5302363666114035e-05, "loss": 0.5351, "step": 12524 }, { "epoch": 0.3425125793043098, "grad_norm": 1.509047508239746, "learning_rate": 1.5301612701164203e-05, "loss": 0.4947, "step": 12525 }, { "epoch": 0.3425399256180267, "grad_norm": 1.2409332990646362, "learning_rate": 1.5300861694624708e-05, "loss": 0.5396, "step": 12526 }, { "epoch": 0.3425672719317436, "grad_norm": 2.6490747928619385, "learning_rate": 1.5300110646501448e-05, "loss": 0.3986, "step": 12527 }, { "epoch": 0.3425946182454605, "grad_norm": 1.890222430229187, "learning_rate": 1.5299359556800312e-05, "loss": 0.4308, "step": 12528 }, { "epoch": 0.3426219645591774, "grad_norm": 1.690466046333313, "learning_rate": 1.529860842552719e-05, "loss": 0.5718, "step": 12529 }, { "epoch": 0.34264931087289435, "grad_norm": 1.3656666278839111, "learning_rate": 1.529785725268798e-05, "loss": 0.5393, "step": 12530 }, { "epoch": 0.3426766571866112, "grad_norm": 1.2853429317474365, "learning_rate": 1.5297106038288565e-05, "loss": 0.5494, "step": 12531 }, { "epoch": 0.34270400350032815, "grad_norm": 1.2139707803726196, "learning_rate": 1.5296354782334845e-05, "loss": 0.5222, "step": 12532 }, { "epoch": 0.34273134981404507, "grad_norm": 1.8403772115707397, "learning_rate": 1.5295603484832716e-05, "loss": 0.5274, "step": 12533 }, { "epoch": 0.342758696127762, "grad_norm": 1.3025248050689697, "learning_rate": 1.5294852145788065e-05, "loss": 0.565, "step": 12534 }, { "epoch": 0.34278604244147887, "grad_norm": 1.4879848957061768, "learning_rate": 1.5294100765206793e-05, "loss": 0.4927, "step": 12535 }, { "epoch": 0.3428133887551958, "grad_norm": 1.3106744289398193, "learning_rate": 1.5293349343094784e-05, "loss": 0.5326, "step": 12536 }, { "epoch": 0.3428407350689127, "grad_norm": 1.2007620334625244, "learning_rate": 1.5292597879457943e-05, "loss": 0.5266, "step": 12537 }, { "epoch": 0.34286808138262964, "grad_norm": 1.6130492687225342, "learning_rate": 1.529184637430216e-05, "loss": 0.6055, "step": 12538 }, { "epoch": 0.3428954276963465, "grad_norm": 1.4305553436279297, "learning_rate": 1.529109482763333e-05, "loss": 0.56, "step": 12539 }, { "epoch": 0.34292277401006344, "grad_norm": 2.443087577819824, "learning_rate": 1.5290343239457352e-05, "loss": 0.5484, "step": 12540 }, { "epoch": 0.34295012032378036, "grad_norm": 1.2051169872283936, "learning_rate": 1.5289591609780122e-05, "loss": 0.5367, "step": 12541 }, { "epoch": 0.3429774666374973, "grad_norm": 1.342118501663208, "learning_rate": 1.528883993860753e-05, "loss": 0.5385, "step": 12542 }, { "epoch": 0.34300481295121416, "grad_norm": 1.493795394897461, "learning_rate": 1.528808822594548e-05, "loss": 0.5342, "step": 12543 }, { "epoch": 0.3430321592649311, "grad_norm": 1.1699280738830566, "learning_rate": 1.5287336471799866e-05, "loss": 0.5535, "step": 12544 }, { "epoch": 0.343059505578648, "grad_norm": 1.316988468170166, "learning_rate": 1.5286584676176586e-05, "loss": 0.4705, "step": 12545 }, { "epoch": 0.34308685189236493, "grad_norm": 1.3073549270629883, "learning_rate": 1.5285832839081537e-05, "loss": 0.5291, "step": 12546 }, { "epoch": 0.3431141982060818, "grad_norm": 1.5842901468276978, "learning_rate": 1.5285080960520616e-05, "loss": 0.5058, "step": 12547 }, { "epoch": 0.3431415445197987, "grad_norm": 1.6164659261703491, "learning_rate": 1.528432904049972e-05, "loss": 0.5301, "step": 12548 }, { "epoch": 0.34316889083351565, "grad_norm": 1.508388638496399, "learning_rate": 1.5283577079024753e-05, "loss": 0.507, "step": 12549 }, { "epoch": 0.3431962371472326, "grad_norm": 1.2866421937942505, "learning_rate": 1.528282507610161e-05, "loss": 0.5049, "step": 12550 }, { "epoch": 0.34322358346094944, "grad_norm": 1.5627268552780151, "learning_rate": 1.528207303173619e-05, "loss": 0.5133, "step": 12551 }, { "epoch": 0.34325092977466637, "grad_norm": 1.3521223068237305, "learning_rate": 1.5281320945934395e-05, "loss": 0.5324, "step": 12552 }, { "epoch": 0.3432782760883833, "grad_norm": 5.771712779998779, "learning_rate": 1.528056881870212e-05, "loss": 0.4105, "step": 12553 }, { "epoch": 0.3433056224021002, "grad_norm": 1.843809723854065, "learning_rate": 1.5279816650045274e-05, "loss": 0.9023, "step": 12554 }, { "epoch": 0.3433329687158171, "grad_norm": 1.4442105293273926, "learning_rate": 1.527906443996975e-05, "loss": 0.5534, "step": 12555 }, { "epoch": 0.343360315029534, "grad_norm": 1.5129276514053345, "learning_rate": 1.527831218848145e-05, "loss": 0.5406, "step": 12556 }, { "epoch": 0.34338766134325094, "grad_norm": 1.7051646709442139, "learning_rate": 1.5277559895586277e-05, "loss": 0.9028, "step": 12557 }, { "epoch": 0.34341500765696786, "grad_norm": 1.419176697731018, "learning_rate": 1.527680756129013e-05, "loss": 0.4713, "step": 12558 }, { "epoch": 0.34344235397068473, "grad_norm": 1.3668849468231201, "learning_rate": 1.5276055185598916e-05, "loss": 0.5717, "step": 12559 }, { "epoch": 0.34346970028440166, "grad_norm": 2.4799394607543945, "learning_rate": 1.527530276851853e-05, "loss": 0.5374, "step": 12560 }, { "epoch": 0.3434970465981186, "grad_norm": 1.4822461605072021, "learning_rate": 1.5274550310054883e-05, "loss": 0.5632, "step": 12561 }, { "epoch": 0.3435243929118355, "grad_norm": 1.5512423515319824, "learning_rate": 1.527379781021387e-05, "loss": 0.5458, "step": 12562 }, { "epoch": 0.3435517392255524, "grad_norm": 0.9804496169090271, "learning_rate": 1.5273045269001402e-05, "loss": 0.5338, "step": 12563 }, { "epoch": 0.3435790855392693, "grad_norm": 2.2219934463500977, "learning_rate": 1.5272292686423372e-05, "loss": 0.8319, "step": 12564 }, { "epoch": 0.3436064318529862, "grad_norm": 1.4658418893814087, "learning_rate": 1.5271540062485692e-05, "loss": 0.5252, "step": 12565 }, { "epoch": 0.34363377816670315, "grad_norm": 1.490525245666504, "learning_rate": 1.5270787397194265e-05, "loss": 0.5623, "step": 12566 }, { "epoch": 0.34366112448042, "grad_norm": 1.3915467262268066, "learning_rate": 1.5270034690554992e-05, "loss": 0.5622, "step": 12567 }, { "epoch": 0.34368847079413695, "grad_norm": 1.642237663269043, "learning_rate": 1.5269281942573782e-05, "loss": 0.4756, "step": 12568 }, { "epoch": 0.34371581710785387, "grad_norm": 1.0878623723983765, "learning_rate": 1.5268529153256535e-05, "loss": 0.4858, "step": 12569 }, { "epoch": 0.3437431634215708, "grad_norm": 1.3333097696304321, "learning_rate": 1.526777632260916e-05, "loss": 0.5571, "step": 12570 }, { "epoch": 0.34377050973528767, "grad_norm": 1.1587955951690674, "learning_rate": 1.5267023450637565e-05, "loss": 0.5669, "step": 12571 }, { "epoch": 0.3437978560490046, "grad_norm": 1.4508110284805298, "learning_rate": 1.526627053734765e-05, "loss": 0.3996, "step": 12572 }, { "epoch": 0.3438252023627215, "grad_norm": 1.374772310256958, "learning_rate": 1.526551758274533e-05, "loss": 0.8363, "step": 12573 }, { "epoch": 0.34385254867643844, "grad_norm": 1.137858271598816, "learning_rate": 1.5264764586836503e-05, "loss": 0.5288, "step": 12574 }, { "epoch": 0.3438798949901553, "grad_norm": 1.1065053939819336, "learning_rate": 1.5264011549627077e-05, "loss": 0.5023, "step": 12575 }, { "epoch": 0.34390724130387224, "grad_norm": 1.147585153579712, "learning_rate": 1.5263258471122966e-05, "loss": 0.5233, "step": 12576 }, { "epoch": 0.34393458761758916, "grad_norm": 1.1833875179290771, "learning_rate": 1.526250535133007e-05, "loss": 0.5602, "step": 12577 }, { "epoch": 0.3439619339313061, "grad_norm": 1.2931758165359497, "learning_rate": 1.5261752190254305e-05, "loss": 0.5719, "step": 12578 }, { "epoch": 0.34398928024502295, "grad_norm": 1.5402638912200928, "learning_rate": 1.526099898790157e-05, "loss": 0.5857, "step": 12579 }, { "epoch": 0.3440166265587399, "grad_norm": 1.7955825328826904, "learning_rate": 1.5260245744277784e-05, "loss": 0.5848, "step": 12580 }, { "epoch": 0.3440439728724568, "grad_norm": 1.7792481184005737, "learning_rate": 1.5259492459388847e-05, "loss": 0.4489, "step": 12581 }, { "epoch": 0.34407131918617373, "grad_norm": 1.6080297231674194, "learning_rate": 1.525873913324067e-05, "loss": 0.5841, "step": 12582 }, { "epoch": 0.3440986654998906, "grad_norm": 1.3137489557266235, "learning_rate": 1.5257985765839167e-05, "loss": 0.5198, "step": 12583 }, { "epoch": 0.3441260118136075, "grad_norm": 1.6387900114059448, "learning_rate": 1.5257232357190246e-05, "loss": 0.5676, "step": 12584 }, { "epoch": 0.34415335812732445, "grad_norm": 1.8623967170715332, "learning_rate": 1.5256478907299814e-05, "loss": 0.5976, "step": 12585 }, { "epoch": 0.3441807044410414, "grad_norm": 1.764025092124939, "learning_rate": 1.5255725416173786e-05, "loss": 0.4611, "step": 12586 }, { "epoch": 0.34420805075475824, "grad_norm": 1.3141826391220093, "learning_rate": 1.5254971883818073e-05, "loss": 0.5348, "step": 12587 }, { "epoch": 0.34423539706847517, "grad_norm": 1.1418402194976807, "learning_rate": 1.525421831023858e-05, "loss": 0.5368, "step": 12588 }, { "epoch": 0.3442627433821921, "grad_norm": 1.2259836196899414, "learning_rate": 1.5253464695441226e-05, "loss": 0.4532, "step": 12589 }, { "epoch": 0.344290089695909, "grad_norm": 1.4391295909881592, "learning_rate": 1.525271103943192e-05, "loss": 0.5436, "step": 12590 }, { "epoch": 0.3443174360096259, "grad_norm": 1.7106692790985107, "learning_rate": 1.5251957342216572e-05, "loss": 0.8867, "step": 12591 }, { "epoch": 0.3443447823233428, "grad_norm": 1.3707817792892456, "learning_rate": 1.52512036038011e-05, "loss": 0.3895, "step": 12592 }, { "epoch": 0.34437212863705974, "grad_norm": 1.4991817474365234, "learning_rate": 1.525044982419141e-05, "loss": 0.8671, "step": 12593 }, { "epoch": 0.34439947495077666, "grad_norm": 1.5070915222167969, "learning_rate": 1.5249696003393419e-05, "loss": 0.5657, "step": 12594 }, { "epoch": 0.34442682126449353, "grad_norm": 1.3733974695205688, "learning_rate": 1.5248942141413044e-05, "loss": 0.5635, "step": 12595 }, { "epoch": 0.34445416757821046, "grad_norm": 1.3610174655914307, "learning_rate": 1.5248188238256192e-05, "loss": 0.5444, "step": 12596 }, { "epoch": 0.3444815138919274, "grad_norm": 1.5725085735321045, "learning_rate": 1.5247434293928781e-05, "loss": 0.4847, "step": 12597 }, { "epoch": 0.34450886020564425, "grad_norm": 1.5109199285507202, "learning_rate": 1.5246680308436727e-05, "loss": 0.5319, "step": 12598 }, { "epoch": 0.3445362065193612, "grad_norm": 4.378344535827637, "learning_rate": 1.5245926281785939e-05, "loss": 0.4245, "step": 12599 }, { "epoch": 0.3445635528330781, "grad_norm": 1.2344367504119873, "learning_rate": 1.5245172213982338e-05, "loss": 0.5376, "step": 12600 }, { "epoch": 0.344590899146795, "grad_norm": 1.401064395904541, "learning_rate": 1.5244418105031835e-05, "loss": 0.5622, "step": 12601 }, { "epoch": 0.3446182454605119, "grad_norm": 1.523712158203125, "learning_rate": 1.5243663954940347e-05, "loss": 0.8484, "step": 12602 }, { "epoch": 0.3446455917742288, "grad_norm": 1.2771570682525635, "learning_rate": 1.5242909763713797e-05, "loss": 0.5517, "step": 12603 }, { "epoch": 0.34467293808794575, "grad_norm": 1.4014265537261963, "learning_rate": 1.5242155531358088e-05, "loss": 0.8566, "step": 12604 }, { "epoch": 0.34470028440166267, "grad_norm": 1.2639926671981812, "learning_rate": 1.5241401257879147e-05, "loss": 0.5511, "step": 12605 }, { "epoch": 0.34472763071537954, "grad_norm": 1.2126812934875488, "learning_rate": 1.5240646943282889e-05, "loss": 0.4611, "step": 12606 }, { "epoch": 0.34475497702909647, "grad_norm": 1.4813296794891357, "learning_rate": 1.5239892587575228e-05, "loss": 0.8016, "step": 12607 }, { "epoch": 0.3447823233428134, "grad_norm": 2.039274215698242, "learning_rate": 1.5239138190762091e-05, "loss": 0.551, "step": 12608 }, { "epoch": 0.3448096696565303, "grad_norm": 1.4068657159805298, "learning_rate": 1.5238383752849382e-05, "loss": 0.5453, "step": 12609 }, { "epoch": 0.3448370159702472, "grad_norm": 1.3079530000686646, "learning_rate": 1.5237629273843028e-05, "loss": 0.4778, "step": 12610 }, { "epoch": 0.3448643622839641, "grad_norm": 1.3460017442703247, "learning_rate": 1.523687475374895e-05, "loss": 0.5442, "step": 12611 }, { "epoch": 0.34489170859768103, "grad_norm": 1.3766578435897827, "learning_rate": 1.523612019257306e-05, "loss": 0.5592, "step": 12612 }, { "epoch": 0.34491905491139796, "grad_norm": 1.597751259803772, "learning_rate": 1.5235365590321282e-05, "loss": 0.5747, "step": 12613 }, { "epoch": 0.34494640122511483, "grad_norm": 1.5591386556625366, "learning_rate": 1.5234610946999536e-05, "loss": 0.427, "step": 12614 }, { "epoch": 0.34497374753883175, "grad_norm": 3.756404399871826, "learning_rate": 1.5233856262613737e-05, "loss": 0.5328, "step": 12615 }, { "epoch": 0.3450010938525487, "grad_norm": 1.21957528591156, "learning_rate": 1.523310153716981e-05, "loss": 0.5316, "step": 12616 }, { "epoch": 0.3450284401662656, "grad_norm": 1.3532333374023438, "learning_rate": 1.5232346770673673e-05, "loss": 0.5361, "step": 12617 }, { "epoch": 0.3450557864799825, "grad_norm": 1.445525884628296, "learning_rate": 1.523159196313125e-05, "loss": 0.3839, "step": 12618 }, { "epoch": 0.3450831327936994, "grad_norm": 1.5556411743164062, "learning_rate": 1.523083711454846e-05, "loss": 0.5901, "step": 12619 }, { "epoch": 0.3451104791074163, "grad_norm": 1.4908428192138672, "learning_rate": 1.5230082224931225e-05, "loss": 0.5674, "step": 12620 }, { "epoch": 0.34513782542113325, "grad_norm": 1.48995840549469, "learning_rate": 1.5229327294285465e-05, "loss": 0.536, "step": 12621 }, { "epoch": 0.3451651717348501, "grad_norm": 1.4679455757141113, "learning_rate": 1.5228572322617106e-05, "loss": 0.5806, "step": 12622 }, { "epoch": 0.34519251804856704, "grad_norm": 1.336397647857666, "learning_rate": 1.5227817309932068e-05, "loss": 0.5445, "step": 12623 }, { "epoch": 0.34521986436228397, "grad_norm": 1.3017915487289429, "learning_rate": 1.5227062256236274e-05, "loss": 0.5791, "step": 12624 }, { "epoch": 0.3452472106760009, "grad_norm": 1.4381438493728638, "learning_rate": 1.5226307161535647e-05, "loss": 0.5514, "step": 12625 }, { "epoch": 0.34527455698971776, "grad_norm": 1.2315014600753784, "learning_rate": 1.522555202583611e-05, "loss": 0.5361, "step": 12626 }, { "epoch": 0.3453019033034347, "grad_norm": 1.5481129884719849, "learning_rate": 1.5224796849143593e-05, "loss": 0.4184, "step": 12627 }, { "epoch": 0.3453292496171516, "grad_norm": 1.3580995798110962, "learning_rate": 1.522404163146401e-05, "loss": 0.8526, "step": 12628 }, { "epoch": 0.34535659593086854, "grad_norm": 1.5393264293670654, "learning_rate": 1.5223286372803292e-05, "loss": 0.5368, "step": 12629 }, { "epoch": 0.3453839422445854, "grad_norm": 1.3435304164886475, "learning_rate": 1.5222531073167365e-05, "loss": 0.8298, "step": 12630 }, { "epoch": 0.34541128855830233, "grad_norm": 1.0685678720474243, "learning_rate": 1.5221775732562147e-05, "loss": 0.5378, "step": 12631 }, { "epoch": 0.34543863487201926, "grad_norm": 1.0705987215042114, "learning_rate": 1.5221020350993572e-05, "loss": 0.524, "step": 12632 }, { "epoch": 0.3454659811857362, "grad_norm": 1.0717689990997314, "learning_rate": 1.5220264928467563e-05, "loss": 0.5104, "step": 12633 }, { "epoch": 0.34549332749945305, "grad_norm": 1.1734646558761597, "learning_rate": 1.5219509464990041e-05, "loss": 0.5445, "step": 12634 }, { "epoch": 0.34552067381317, "grad_norm": 1.1821918487548828, "learning_rate": 1.5218753960566937e-05, "loss": 0.5403, "step": 12635 }, { "epoch": 0.3455480201268869, "grad_norm": 1.6235811710357666, "learning_rate": 1.5217998415204175e-05, "loss": 0.4095, "step": 12636 }, { "epoch": 0.3455753664406038, "grad_norm": 1.4954041242599487, "learning_rate": 1.5217242828907686e-05, "loss": 0.5905, "step": 12637 }, { "epoch": 0.3456027127543207, "grad_norm": 1.2575863599777222, "learning_rate": 1.5216487201683397e-05, "loss": 0.3864, "step": 12638 }, { "epoch": 0.3456300590680376, "grad_norm": 1.5309096574783325, "learning_rate": 1.521573153353723e-05, "loss": 0.5601, "step": 12639 }, { "epoch": 0.34565740538175455, "grad_norm": 1.3601192235946655, "learning_rate": 1.5214975824475117e-05, "loss": 0.4806, "step": 12640 }, { "epoch": 0.34568475169547147, "grad_norm": 1.3128303289413452, "learning_rate": 1.521422007450299e-05, "loss": 0.5188, "step": 12641 }, { "epoch": 0.34571209800918834, "grad_norm": 1.219617247581482, "learning_rate": 1.521346428362677e-05, "loss": 0.514, "step": 12642 }, { "epoch": 0.34573944432290526, "grad_norm": 1.5994898080825806, "learning_rate": 1.5212708451852397e-05, "loss": 0.4871, "step": 12643 }, { "epoch": 0.3457667906366222, "grad_norm": 1.3240363597869873, "learning_rate": 1.5211952579185784e-05, "loss": 0.5579, "step": 12644 }, { "epoch": 0.3457941369503391, "grad_norm": 1.3407846689224243, "learning_rate": 1.5211196665632874e-05, "loss": 0.4327, "step": 12645 }, { "epoch": 0.345821483264056, "grad_norm": 1.1709080934524536, "learning_rate": 1.5210440711199592e-05, "loss": 0.3665, "step": 12646 }, { "epoch": 0.3458488295777729, "grad_norm": 1.517194390296936, "learning_rate": 1.520968471589187e-05, "loss": 0.5668, "step": 12647 }, { "epoch": 0.34587617589148983, "grad_norm": 1.2628551721572876, "learning_rate": 1.520892867971564e-05, "loss": 0.5486, "step": 12648 }, { "epoch": 0.34590352220520676, "grad_norm": 1.1584738492965698, "learning_rate": 1.5208172602676825e-05, "loss": 0.539, "step": 12649 }, { "epoch": 0.34593086851892363, "grad_norm": 1.1779301166534424, "learning_rate": 1.5207416484781365e-05, "loss": 0.5554, "step": 12650 }, { "epoch": 0.34595821483264055, "grad_norm": 1.481650948524475, "learning_rate": 1.5206660326035187e-05, "loss": 0.5371, "step": 12651 }, { "epoch": 0.3459855611463575, "grad_norm": 1.3653998374938965, "learning_rate": 1.5205904126444227e-05, "loss": 0.5183, "step": 12652 }, { "epoch": 0.3460129074600744, "grad_norm": 1.6420942544937134, "learning_rate": 1.5205147886014411e-05, "loss": 0.4505, "step": 12653 }, { "epoch": 0.3460402537737913, "grad_norm": 1.3103587627410889, "learning_rate": 1.5204391604751677e-05, "loss": 0.5656, "step": 12654 }, { "epoch": 0.3460676000875082, "grad_norm": 1.7837709188461304, "learning_rate": 1.520363528266195e-05, "loss": 0.5397, "step": 12655 }, { "epoch": 0.3460949464012251, "grad_norm": 1.4786312580108643, "learning_rate": 1.5202878919751171e-05, "loss": 0.5492, "step": 12656 }, { "epoch": 0.34612229271494205, "grad_norm": 1.1091156005859375, "learning_rate": 1.5202122516025275e-05, "loss": 0.3963, "step": 12657 }, { "epoch": 0.3461496390286589, "grad_norm": 1.5292800664901733, "learning_rate": 1.5201366071490189e-05, "loss": 0.4685, "step": 12658 }, { "epoch": 0.34617698534237584, "grad_norm": 1.4536317586898804, "learning_rate": 1.520060958615185e-05, "loss": 0.5572, "step": 12659 }, { "epoch": 0.34620433165609277, "grad_norm": 1.2569665908813477, "learning_rate": 1.5199853060016194e-05, "loss": 0.5365, "step": 12660 }, { "epoch": 0.3462316779698097, "grad_norm": 1.4568651914596558, "learning_rate": 1.5199096493089152e-05, "loss": 0.4176, "step": 12661 }, { "epoch": 0.34625902428352656, "grad_norm": 1.1172754764556885, "learning_rate": 1.5198339885376663e-05, "loss": 0.544, "step": 12662 }, { "epoch": 0.3462863705972435, "grad_norm": 1.1203367710113525, "learning_rate": 1.519758323688466e-05, "loss": 0.5181, "step": 12663 }, { "epoch": 0.3463137169109604, "grad_norm": 1.1163181066513062, "learning_rate": 1.5196826547619075e-05, "loss": 0.5371, "step": 12664 }, { "epoch": 0.34634106322467734, "grad_norm": 1.2046157121658325, "learning_rate": 1.5196069817585854e-05, "loss": 0.535, "step": 12665 }, { "epoch": 0.3463684095383942, "grad_norm": 1.3187087774276733, "learning_rate": 1.5195313046790926e-05, "loss": 0.4642, "step": 12666 }, { "epoch": 0.34639575585211113, "grad_norm": 1.3711881637573242, "learning_rate": 1.519455623524023e-05, "loss": 0.5066, "step": 12667 }, { "epoch": 0.34642310216582806, "grad_norm": 1.9336779117584229, "learning_rate": 1.51937993829397e-05, "loss": 0.5421, "step": 12668 }, { "epoch": 0.346450448479545, "grad_norm": 1.2466710805892944, "learning_rate": 1.5193042489895278e-05, "loss": 0.5382, "step": 12669 }, { "epoch": 0.34647779479326185, "grad_norm": 1.5886812210083008, "learning_rate": 1.5192285556112896e-05, "loss": 0.5289, "step": 12670 }, { "epoch": 0.3465051411069788, "grad_norm": 1.0407897233963013, "learning_rate": 1.5191528581598499e-05, "loss": 0.5088, "step": 12671 }, { "epoch": 0.3465324874206957, "grad_norm": 1.392040729522705, "learning_rate": 1.5190771566358015e-05, "loss": 0.5838, "step": 12672 }, { "epoch": 0.3465598337344126, "grad_norm": 2.2023377418518066, "learning_rate": 1.5190014510397397e-05, "loss": 0.439, "step": 12673 }, { "epoch": 0.3465871800481295, "grad_norm": 1.1302549839019775, "learning_rate": 1.5189257413722568e-05, "loss": 0.5469, "step": 12674 }, { "epoch": 0.3466145263618464, "grad_norm": 1.3774733543395996, "learning_rate": 1.518850027633948e-05, "loss": 0.5338, "step": 12675 }, { "epoch": 0.34664187267556335, "grad_norm": 1.2834246158599854, "learning_rate": 1.5187743098254066e-05, "loss": 0.5308, "step": 12676 }, { "epoch": 0.34666921898928027, "grad_norm": 1.2032288312911987, "learning_rate": 1.5186985879472267e-05, "loss": 0.5419, "step": 12677 }, { "epoch": 0.34669656530299714, "grad_norm": 1.095847487449646, "learning_rate": 1.5186228620000025e-05, "loss": 0.549, "step": 12678 }, { "epoch": 0.34672391161671406, "grad_norm": 1.2778606414794922, "learning_rate": 1.518547131984328e-05, "loss": 0.5485, "step": 12679 }, { "epoch": 0.346751257930431, "grad_norm": 1.3684102296829224, "learning_rate": 1.5184713979007968e-05, "loss": 0.4728, "step": 12680 }, { "epoch": 0.3467786042441479, "grad_norm": 1.5086874961853027, "learning_rate": 1.5183956597500036e-05, "loss": 0.5742, "step": 12681 }, { "epoch": 0.3468059505578648, "grad_norm": 1.6724272966384888, "learning_rate": 1.518319917532542e-05, "loss": 0.5264, "step": 12682 }, { "epoch": 0.3468332968715817, "grad_norm": 2.638364553451538, "learning_rate": 1.5182441712490067e-05, "loss": 0.5577, "step": 12683 }, { "epoch": 0.34686064318529863, "grad_norm": 1.3995070457458496, "learning_rate": 1.518168420899992e-05, "loss": 0.5642, "step": 12684 }, { "epoch": 0.34688798949901556, "grad_norm": 1.2747377157211304, "learning_rate": 1.5180926664860919e-05, "loss": 0.5516, "step": 12685 }, { "epoch": 0.34691533581273243, "grad_norm": 1.274521827697754, "learning_rate": 1.5180169080079004e-05, "loss": 0.548, "step": 12686 }, { "epoch": 0.34694268212644935, "grad_norm": 1.2462304830551147, "learning_rate": 1.5179411454660123e-05, "loss": 0.5313, "step": 12687 }, { "epoch": 0.3469700284401663, "grad_norm": 1.5113924741744995, "learning_rate": 1.5178653788610214e-05, "loss": 0.5231, "step": 12688 }, { "epoch": 0.3469973747538832, "grad_norm": 1.3225523233413696, "learning_rate": 1.5177896081935223e-05, "loss": 0.4608, "step": 12689 }, { "epoch": 0.3470247210676001, "grad_norm": 1.6097909212112427, "learning_rate": 1.5177138334641096e-05, "loss": 0.521, "step": 12690 }, { "epoch": 0.347052067381317, "grad_norm": 1.3418477773666382, "learning_rate": 1.5176380546733775e-05, "loss": 0.5197, "step": 12691 }, { "epoch": 0.3470794136950339, "grad_norm": 1.563108205795288, "learning_rate": 1.5175622718219205e-05, "loss": 0.5583, "step": 12692 }, { "epoch": 0.34710676000875085, "grad_norm": 1.390464186668396, "learning_rate": 1.5174864849103332e-05, "loss": 0.5223, "step": 12693 }, { "epoch": 0.3471341063224677, "grad_norm": 2.1660780906677246, "learning_rate": 1.5174106939392101e-05, "loss": 0.3898, "step": 12694 }, { "epoch": 0.34716145263618464, "grad_norm": 1.3300821781158447, "learning_rate": 1.5173348989091459e-05, "loss": 0.4243, "step": 12695 }, { "epoch": 0.34718879894990157, "grad_norm": 1.1967209577560425, "learning_rate": 1.5172590998207346e-05, "loss": 0.5195, "step": 12696 }, { "epoch": 0.3472161452636185, "grad_norm": 1.2266732454299927, "learning_rate": 1.5171832966745715e-05, "loss": 0.5409, "step": 12697 }, { "epoch": 0.34724349157733536, "grad_norm": 1.2316797971725464, "learning_rate": 1.5171074894712508e-05, "loss": 0.5523, "step": 12698 }, { "epoch": 0.3472708378910523, "grad_norm": 1.6572096347808838, "learning_rate": 1.5170316782113674e-05, "loss": 0.8666, "step": 12699 }, { "epoch": 0.3472981842047692, "grad_norm": 1.429794192314148, "learning_rate": 1.5169558628955162e-05, "loss": 0.569, "step": 12700 }, { "epoch": 0.3473255305184861, "grad_norm": 1.6121420860290527, "learning_rate": 1.5168800435242914e-05, "loss": 0.546, "step": 12701 }, { "epoch": 0.347352876832203, "grad_norm": 1.6531368494033813, "learning_rate": 1.5168042200982884e-05, "loss": 0.5968, "step": 12702 }, { "epoch": 0.34738022314591993, "grad_norm": 1.3527008295059204, "learning_rate": 1.5167283926181019e-05, "loss": 0.5597, "step": 12703 }, { "epoch": 0.34740756945963686, "grad_norm": 2.848564624786377, "learning_rate": 1.5166525610843261e-05, "loss": 0.5484, "step": 12704 }, { "epoch": 0.3474349157733537, "grad_norm": 1.4668320417404175, "learning_rate": 1.5165767254975565e-05, "loss": 0.5282, "step": 12705 }, { "epoch": 0.34746226208707065, "grad_norm": 1.268501877784729, "learning_rate": 1.5165008858583883e-05, "loss": 0.5397, "step": 12706 }, { "epoch": 0.3474896084007876, "grad_norm": 1.4743939638137817, "learning_rate": 1.5164250421674153e-05, "loss": 0.5294, "step": 12707 }, { "epoch": 0.3475169547145045, "grad_norm": 1.3928554058074951, "learning_rate": 1.5163491944252338e-05, "loss": 0.5423, "step": 12708 }, { "epoch": 0.34754430102822137, "grad_norm": 1.4059549570083618, "learning_rate": 1.516273342632438e-05, "loss": 0.5421, "step": 12709 }, { "epoch": 0.3475716473419383, "grad_norm": 1.4215266704559326, "learning_rate": 1.5161974867896228e-05, "loss": 0.5559, "step": 12710 }, { "epoch": 0.3475989936556552, "grad_norm": 1.6899220943450928, "learning_rate": 1.5161216268973839e-05, "loss": 0.5448, "step": 12711 }, { "epoch": 0.34762633996937214, "grad_norm": 1.563830018043518, "learning_rate": 1.5160457629563161e-05, "loss": 0.4681, "step": 12712 }, { "epoch": 0.347653686283089, "grad_norm": 4.395621299743652, "learning_rate": 1.5159698949670145e-05, "loss": 0.8433, "step": 12713 }, { "epoch": 0.34768103259680594, "grad_norm": 2.295992612838745, "learning_rate": 1.515894022930074e-05, "loss": 0.4364, "step": 12714 }, { "epoch": 0.34770837891052286, "grad_norm": 1.3783855438232422, "learning_rate": 1.5158181468460902e-05, "loss": 0.5373, "step": 12715 }, { "epoch": 0.3477357252242398, "grad_norm": 1.4240007400512695, "learning_rate": 1.5157422667156585e-05, "loss": 0.5096, "step": 12716 }, { "epoch": 0.34776307153795666, "grad_norm": 1.3174723386764526, "learning_rate": 1.5156663825393736e-05, "loss": 0.5721, "step": 12717 }, { "epoch": 0.3477904178516736, "grad_norm": 1.3217287063598633, "learning_rate": 1.5155904943178308e-05, "loss": 0.531, "step": 12718 }, { "epoch": 0.3478177641653905, "grad_norm": 1.6037496328353882, "learning_rate": 1.5155146020516262e-05, "loss": 0.5857, "step": 12719 }, { "epoch": 0.34784511047910743, "grad_norm": 1.483542799949646, "learning_rate": 1.5154387057413544e-05, "loss": 0.4757, "step": 12720 }, { "epoch": 0.3478724567928243, "grad_norm": 1.2236897945404053, "learning_rate": 1.515362805387611e-05, "loss": 0.5187, "step": 12721 }, { "epoch": 0.3478998031065412, "grad_norm": 1.636279582977295, "learning_rate": 1.5152869009909917e-05, "loss": 0.851, "step": 12722 }, { "epoch": 0.34792714942025815, "grad_norm": 1.4220021963119507, "learning_rate": 1.5152109925520912e-05, "loss": 0.5657, "step": 12723 }, { "epoch": 0.3479544957339751, "grad_norm": 1.8763930797576904, "learning_rate": 1.5151350800715058e-05, "loss": 0.5613, "step": 12724 }, { "epoch": 0.34798184204769195, "grad_norm": 1.1731241941452026, "learning_rate": 1.5150591635498304e-05, "loss": 0.5512, "step": 12725 }, { "epoch": 0.34800918836140887, "grad_norm": 1.5052087306976318, "learning_rate": 1.514983242987661e-05, "loss": 0.5261, "step": 12726 }, { "epoch": 0.3480365346751258, "grad_norm": 1.194219708442688, "learning_rate": 1.5149073183855931e-05, "loss": 0.521, "step": 12727 }, { "epoch": 0.3480638809888427, "grad_norm": 1.349724531173706, "learning_rate": 1.514831389744222e-05, "loss": 0.4073, "step": 12728 }, { "epoch": 0.3480912273025596, "grad_norm": 1.4053353071212769, "learning_rate": 1.5147554570641432e-05, "loss": 0.4482, "step": 12729 }, { "epoch": 0.3481185736162765, "grad_norm": 1.3081945180892944, "learning_rate": 1.5146795203459532e-05, "loss": 0.5243, "step": 12730 }, { "epoch": 0.34814591992999344, "grad_norm": 1.4263170957565308, "learning_rate": 1.5146035795902472e-05, "loss": 0.5448, "step": 12731 }, { "epoch": 0.34817326624371037, "grad_norm": 1.3867496252059937, "learning_rate": 1.5145276347976208e-05, "loss": 0.8505, "step": 12732 }, { "epoch": 0.34820061255742724, "grad_norm": 1.3469492197036743, "learning_rate": 1.5144516859686697e-05, "loss": 0.5567, "step": 12733 }, { "epoch": 0.34822795887114416, "grad_norm": 1.0949170589447021, "learning_rate": 1.5143757331039903e-05, "loss": 0.5604, "step": 12734 }, { "epoch": 0.3482553051848611, "grad_norm": 1.262188196182251, "learning_rate": 1.5142997762041777e-05, "loss": 0.5229, "step": 12735 }, { "epoch": 0.348282651498578, "grad_norm": 1.3113963603973389, "learning_rate": 1.5142238152698283e-05, "loss": 0.527, "step": 12736 }, { "epoch": 0.3483099978122949, "grad_norm": 1.34225332736969, "learning_rate": 1.5141478503015376e-05, "loss": 0.5216, "step": 12737 }, { "epoch": 0.3483373441260118, "grad_norm": 1.5870764255523682, "learning_rate": 1.5140718812999023e-05, "loss": 0.5322, "step": 12738 }, { "epoch": 0.34836469043972873, "grad_norm": 1.5538203716278076, "learning_rate": 1.5139959082655171e-05, "loss": 0.5458, "step": 12739 }, { "epoch": 0.34839203675344566, "grad_norm": 1.0729750394821167, "learning_rate": 1.5139199311989788e-05, "loss": 0.5116, "step": 12740 }, { "epoch": 0.3484193830671625, "grad_norm": 1.2544838190078735, "learning_rate": 1.5138439501008835e-05, "loss": 0.5463, "step": 12741 }, { "epoch": 0.34844672938087945, "grad_norm": 1.8602396249771118, "learning_rate": 1.5137679649718267e-05, "loss": 0.4074, "step": 12742 }, { "epoch": 0.3484740756945964, "grad_norm": 1.416141152381897, "learning_rate": 1.5136919758124049e-05, "loss": 0.4723, "step": 12743 }, { "epoch": 0.3485014220083133, "grad_norm": 1.5079048871994019, "learning_rate": 1.5136159826232143e-05, "loss": 0.5444, "step": 12744 }, { "epoch": 0.34852876832203017, "grad_norm": 1.8822799921035767, "learning_rate": 1.5135399854048504e-05, "loss": 0.5703, "step": 12745 }, { "epoch": 0.3485561146357471, "grad_norm": 1.5027310848236084, "learning_rate": 1.5134639841579104e-05, "loss": 0.5476, "step": 12746 }, { "epoch": 0.348583460949464, "grad_norm": 1.324903964996338, "learning_rate": 1.5133879788829897e-05, "loss": 0.5443, "step": 12747 }, { "epoch": 0.34861080726318094, "grad_norm": 1.4961950778961182, "learning_rate": 1.5133119695806845e-05, "loss": 0.5352, "step": 12748 }, { "epoch": 0.3486381535768978, "grad_norm": 1.480228066444397, "learning_rate": 1.5132359562515919e-05, "loss": 0.5097, "step": 12749 }, { "epoch": 0.34866549989061474, "grad_norm": 1.2003984451293945, "learning_rate": 1.5131599388963074e-05, "loss": 0.5374, "step": 12750 }, { "epoch": 0.34869284620433166, "grad_norm": 1.3930864334106445, "learning_rate": 1.5130839175154276e-05, "loss": 0.5322, "step": 12751 }, { "epoch": 0.3487201925180486, "grad_norm": 1.2000292539596558, "learning_rate": 1.5130078921095488e-05, "loss": 0.5427, "step": 12752 }, { "epoch": 0.34874753883176546, "grad_norm": 1.1391856670379639, "learning_rate": 1.5129318626792675e-05, "loss": 0.5252, "step": 12753 }, { "epoch": 0.3487748851454824, "grad_norm": 1.164442539215088, "learning_rate": 1.5128558292251803e-05, "loss": 0.5316, "step": 12754 }, { "epoch": 0.3488022314591993, "grad_norm": 1.186334252357483, "learning_rate": 1.512779791747883e-05, "loss": 0.4208, "step": 12755 }, { "epoch": 0.34882957777291623, "grad_norm": 1.2458769083023071, "learning_rate": 1.5127037502479728e-05, "loss": 0.5235, "step": 12756 }, { "epoch": 0.3488569240866331, "grad_norm": 1.4221128225326538, "learning_rate": 1.5126277047260462e-05, "loss": 0.4757, "step": 12757 }, { "epoch": 0.34888427040035, "grad_norm": 1.1672298908233643, "learning_rate": 1.5125516551826991e-05, "loss": 0.362, "step": 12758 }, { "epoch": 0.34891161671406695, "grad_norm": 1.2448346614837646, "learning_rate": 1.5124756016185286e-05, "loss": 0.5651, "step": 12759 }, { "epoch": 0.3489389630277839, "grad_norm": 1.830802083015442, "learning_rate": 1.5123995440341315e-05, "loss": 0.582, "step": 12760 }, { "epoch": 0.34896630934150075, "grad_norm": 1.1728194952011108, "learning_rate": 1.512323482430104e-05, "loss": 0.515, "step": 12761 }, { "epoch": 0.34899365565521767, "grad_norm": 1.4139996767044067, "learning_rate": 1.512247416807043e-05, "loss": 0.5354, "step": 12762 }, { "epoch": 0.3490210019689346, "grad_norm": 1.1304552555084229, "learning_rate": 1.5121713471655452e-05, "loss": 0.5354, "step": 12763 }, { "epoch": 0.3490483482826515, "grad_norm": 1.2591255903244019, "learning_rate": 1.5120952735062071e-05, "loss": 0.5595, "step": 12764 }, { "epoch": 0.3490756945963684, "grad_norm": 1.3991955518722534, "learning_rate": 1.512019195829626e-05, "loss": 0.5701, "step": 12765 }, { "epoch": 0.3491030409100853, "grad_norm": 1.5464115142822266, "learning_rate": 1.5119431141363983e-05, "loss": 0.5543, "step": 12766 }, { "epoch": 0.34913038722380224, "grad_norm": 1.140435814857483, "learning_rate": 1.5118670284271209e-05, "loss": 0.518, "step": 12767 }, { "epoch": 0.34915773353751917, "grad_norm": 1.3444993495941162, "learning_rate": 1.5117909387023905e-05, "loss": 0.6058, "step": 12768 }, { "epoch": 0.34918507985123604, "grad_norm": 1.0286340713500977, "learning_rate": 1.5117148449628046e-05, "loss": 0.4951, "step": 12769 }, { "epoch": 0.34921242616495296, "grad_norm": 1.2070401906967163, "learning_rate": 1.5116387472089598e-05, "loss": 0.5285, "step": 12770 }, { "epoch": 0.3492397724786699, "grad_norm": 1.3049252033233643, "learning_rate": 1.5115626454414525e-05, "loss": 0.5258, "step": 12771 }, { "epoch": 0.3492671187923868, "grad_norm": 1.4083173274993896, "learning_rate": 1.5114865396608804e-05, "loss": 0.4703, "step": 12772 }, { "epoch": 0.3492944651061037, "grad_norm": 1.3352164030075073, "learning_rate": 1.5114104298678407e-05, "loss": 0.8589, "step": 12773 }, { "epoch": 0.3493218114198206, "grad_norm": 1.4205361604690552, "learning_rate": 1.51133431606293e-05, "loss": 0.5612, "step": 12774 }, { "epoch": 0.34934915773353753, "grad_norm": 1.266778826713562, "learning_rate": 1.5112581982467452e-05, "loss": 0.537, "step": 12775 }, { "epoch": 0.34937650404725445, "grad_norm": 1.368504524230957, "learning_rate": 1.5111820764198839e-05, "loss": 0.5446, "step": 12776 }, { "epoch": 0.3494038503609713, "grad_norm": 1.22798752784729, "learning_rate": 1.5111059505829429e-05, "loss": 0.5526, "step": 12777 }, { "epoch": 0.34943119667468825, "grad_norm": 1.227861762046814, "learning_rate": 1.51102982073652e-05, "loss": 0.5154, "step": 12778 }, { "epoch": 0.3494585429884052, "grad_norm": 1.1828683614730835, "learning_rate": 1.5109536868812114e-05, "loss": 0.5165, "step": 12779 }, { "epoch": 0.3494858893021221, "grad_norm": 1.3093266487121582, "learning_rate": 1.5108775490176152e-05, "loss": 0.5268, "step": 12780 }, { "epoch": 0.34951323561583897, "grad_norm": 1.4516702890396118, "learning_rate": 1.5108014071463284e-05, "loss": 0.5332, "step": 12781 }, { "epoch": 0.3495405819295559, "grad_norm": 1.1185040473937988, "learning_rate": 1.5107252612679482e-05, "loss": 0.5245, "step": 12782 }, { "epoch": 0.3495679282432728, "grad_norm": 11.688499450683594, "learning_rate": 1.5106491113830722e-05, "loss": 0.5342, "step": 12783 }, { "epoch": 0.34959527455698974, "grad_norm": 1.3219852447509766, "learning_rate": 1.5105729574922975e-05, "loss": 0.5397, "step": 12784 }, { "epoch": 0.3496226208707066, "grad_norm": 1.3651471138000488, "learning_rate": 1.5104967995962217e-05, "loss": 0.4543, "step": 12785 }, { "epoch": 0.34964996718442354, "grad_norm": 1.1854755878448486, "learning_rate": 1.5104206376954422e-05, "loss": 0.5636, "step": 12786 }, { "epoch": 0.34967731349814046, "grad_norm": 2.191513776779175, "learning_rate": 1.5103444717905565e-05, "loss": 0.5157, "step": 12787 }, { "epoch": 0.3497046598118574, "grad_norm": 1.3381495475769043, "learning_rate": 1.510268301882162e-05, "loss": 0.5442, "step": 12788 }, { "epoch": 0.34973200612557426, "grad_norm": 1.08432137966156, "learning_rate": 1.5101921279708563e-05, "loss": 0.4241, "step": 12789 }, { "epoch": 0.3497593524392912, "grad_norm": 1.5940254926681519, "learning_rate": 1.5101159500572369e-05, "loss": 0.5421, "step": 12790 }, { "epoch": 0.3497866987530081, "grad_norm": 1.2492587566375732, "learning_rate": 1.5100397681419013e-05, "loss": 0.875, "step": 12791 }, { "epoch": 0.34981404506672503, "grad_norm": 1.113470435142517, "learning_rate": 1.5099635822254475e-05, "loss": 0.5192, "step": 12792 }, { "epoch": 0.3498413913804419, "grad_norm": 1.5830169916152954, "learning_rate": 1.5098873923084727e-05, "loss": 0.5532, "step": 12793 }, { "epoch": 0.3498687376941588, "grad_norm": 1.6068148612976074, "learning_rate": 1.5098111983915749e-05, "loss": 0.5383, "step": 12794 }, { "epoch": 0.34989608400787575, "grad_norm": 1.1998460292816162, "learning_rate": 1.5097350004753519e-05, "loss": 0.5524, "step": 12795 }, { "epoch": 0.3499234303215927, "grad_norm": 1.3432183265686035, "learning_rate": 1.509658798560401e-05, "loss": 0.5431, "step": 12796 }, { "epoch": 0.34995077663530955, "grad_norm": 1.2133934497833252, "learning_rate": 1.5095825926473202e-05, "loss": 0.5689, "step": 12797 }, { "epoch": 0.34997812294902647, "grad_norm": 1.125630259513855, "learning_rate": 1.5095063827367076e-05, "loss": 0.5471, "step": 12798 }, { "epoch": 0.3500054692627434, "grad_norm": 1.1542145013809204, "learning_rate": 1.5094301688291605e-05, "loss": 0.5175, "step": 12799 }, { "epoch": 0.35003281557646027, "grad_norm": 1.6387686729431152, "learning_rate": 1.5093539509252775e-05, "loss": 0.4686, "step": 12800 }, { "epoch": 0.3500601618901772, "grad_norm": 1.1947318315505981, "learning_rate": 1.5092777290256559e-05, "loss": 0.531, "step": 12801 }, { "epoch": 0.3500875082038941, "grad_norm": 1.2912336587905884, "learning_rate": 1.5092015031308938e-05, "loss": 0.4117, "step": 12802 }, { "epoch": 0.35011485451761104, "grad_norm": 1.2816107273101807, "learning_rate": 1.5091252732415892e-05, "loss": 0.4941, "step": 12803 }, { "epoch": 0.3501422008313279, "grad_norm": 1.2647995948791504, "learning_rate": 1.5090490393583403e-05, "loss": 0.5628, "step": 12804 }, { "epoch": 0.35016954714504483, "grad_norm": 1.2724987268447876, "learning_rate": 1.508972801481745e-05, "loss": 0.8593, "step": 12805 }, { "epoch": 0.35019689345876176, "grad_norm": 1.6213051080703735, "learning_rate": 1.508896559612401e-05, "loss": 0.5875, "step": 12806 }, { "epoch": 0.3502242397724787, "grad_norm": 1.525185227394104, "learning_rate": 1.5088203137509067e-05, "loss": 0.8382, "step": 12807 }, { "epoch": 0.35025158608619555, "grad_norm": 1.2832427024841309, "learning_rate": 1.5087440638978606e-05, "loss": 0.5491, "step": 12808 }, { "epoch": 0.3502789323999125, "grad_norm": 1.5141786336898804, "learning_rate": 1.5086678100538602e-05, "loss": 0.5467, "step": 12809 }, { "epoch": 0.3503062787136294, "grad_norm": 1.755128026008606, "learning_rate": 1.5085915522195041e-05, "loss": 0.4312, "step": 12810 }, { "epoch": 0.35033362502734633, "grad_norm": 1.514392375946045, "learning_rate": 1.5085152903953901e-05, "loss": 0.5259, "step": 12811 }, { "epoch": 0.3503609713410632, "grad_norm": 1.5020781755447388, "learning_rate": 1.508439024582117e-05, "loss": 0.873, "step": 12812 }, { "epoch": 0.3503883176547801, "grad_norm": 1.3707289695739746, "learning_rate": 1.5083627547802829e-05, "loss": 0.5422, "step": 12813 }, { "epoch": 0.35041566396849705, "grad_norm": 1.3986356258392334, "learning_rate": 1.508286480990486e-05, "loss": 0.5266, "step": 12814 }, { "epoch": 0.350443010282214, "grad_norm": 1.4398523569107056, "learning_rate": 1.5082102032133246e-05, "loss": 0.522, "step": 12815 }, { "epoch": 0.35047035659593084, "grad_norm": 1.543126106262207, "learning_rate": 1.5081339214493974e-05, "loss": 0.5466, "step": 12816 }, { "epoch": 0.35049770290964777, "grad_norm": 1.2252404689788818, "learning_rate": 1.5080576356993024e-05, "loss": 0.5485, "step": 12817 }, { "epoch": 0.3505250492233647, "grad_norm": 1.3328882455825806, "learning_rate": 1.5079813459636382e-05, "loss": 0.5412, "step": 12818 }, { "epoch": 0.3505523955370816, "grad_norm": 1.2493488788604736, "learning_rate": 1.5079050522430034e-05, "loss": 0.5274, "step": 12819 }, { "epoch": 0.3505797418507985, "grad_norm": 1.3504012823104858, "learning_rate": 1.5078287545379966e-05, "loss": 0.5438, "step": 12820 }, { "epoch": 0.3506070881645154, "grad_norm": 1.2489999532699585, "learning_rate": 1.507752452849216e-05, "loss": 0.5424, "step": 12821 }, { "epoch": 0.35063443447823234, "grad_norm": 1.5017606019973755, "learning_rate": 1.50767614717726e-05, "loss": 0.4702, "step": 12822 }, { "epoch": 0.35066178079194926, "grad_norm": 1.0590286254882812, "learning_rate": 1.5075998375227277e-05, "loss": 0.5254, "step": 12823 }, { "epoch": 0.35068912710566613, "grad_norm": 1.2118946313858032, "learning_rate": 1.5075235238862176e-05, "loss": 0.5477, "step": 12824 }, { "epoch": 0.35071647341938306, "grad_norm": 1.1216440200805664, "learning_rate": 1.5074472062683284e-05, "loss": 0.5202, "step": 12825 }, { "epoch": 0.3507438197331, "grad_norm": 1.3591622114181519, "learning_rate": 1.5073708846696585e-05, "loss": 0.5431, "step": 12826 }, { "epoch": 0.3507711660468169, "grad_norm": 1.238039255142212, "learning_rate": 1.5072945590908067e-05, "loss": 0.4291, "step": 12827 }, { "epoch": 0.3507985123605338, "grad_norm": 1.5600156784057617, "learning_rate": 1.507218229532372e-05, "loss": 0.5575, "step": 12828 }, { "epoch": 0.3508258586742507, "grad_norm": 1.1303151845932007, "learning_rate": 1.5071418959949529e-05, "loss": 0.5431, "step": 12829 }, { "epoch": 0.3508532049879676, "grad_norm": 1.2855443954467773, "learning_rate": 1.5070655584791487e-05, "loss": 0.5032, "step": 12830 }, { "epoch": 0.35088055130168455, "grad_norm": 1.2835230827331543, "learning_rate": 1.5069892169855575e-05, "loss": 0.5275, "step": 12831 }, { "epoch": 0.3509078976154014, "grad_norm": 1.0907188653945923, "learning_rate": 1.5069128715147787e-05, "loss": 0.546, "step": 12832 }, { "epoch": 0.35093524392911835, "grad_norm": 1.3609216213226318, "learning_rate": 1.5068365220674111e-05, "loss": 0.5387, "step": 12833 }, { "epoch": 0.35096259024283527, "grad_norm": 1.2195998430252075, "learning_rate": 1.5067601686440534e-05, "loss": 0.546, "step": 12834 }, { "epoch": 0.3509899365565522, "grad_norm": 1.565894603729248, "learning_rate": 1.5066838112453051e-05, "loss": 0.3994, "step": 12835 }, { "epoch": 0.35101728287026907, "grad_norm": 1.5034306049346924, "learning_rate": 1.5066074498717646e-05, "loss": 0.4806, "step": 12836 }, { "epoch": 0.351044629183986, "grad_norm": 1.3628093004226685, "learning_rate": 1.5065310845240316e-05, "loss": 0.5535, "step": 12837 }, { "epoch": 0.3510719754977029, "grad_norm": 1.1643564701080322, "learning_rate": 1.5064547152027045e-05, "loss": 0.5304, "step": 12838 }, { "epoch": 0.35109932181141984, "grad_norm": 1.125135898590088, "learning_rate": 1.5063783419083828e-05, "loss": 0.5534, "step": 12839 }, { "epoch": 0.3511266681251367, "grad_norm": 1.39322030544281, "learning_rate": 1.5063019646416656e-05, "loss": 0.5388, "step": 12840 }, { "epoch": 0.35115401443885363, "grad_norm": 1.3978923559188843, "learning_rate": 1.5062255834031518e-05, "loss": 0.558, "step": 12841 }, { "epoch": 0.35118136075257056, "grad_norm": 1.6095408201217651, "learning_rate": 1.5061491981934408e-05, "loss": 0.5187, "step": 12842 }, { "epoch": 0.3512087070662875, "grad_norm": 1.406186580657959, "learning_rate": 1.5060728090131319e-05, "loss": 0.5227, "step": 12843 }, { "epoch": 0.35123605338000435, "grad_norm": 1.4349653720855713, "learning_rate": 1.5059964158628244e-05, "loss": 0.5526, "step": 12844 }, { "epoch": 0.3512633996937213, "grad_norm": 1.4433645009994507, "learning_rate": 1.505920018743117e-05, "loss": 0.4681, "step": 12845 }, { "epoch": 0.3512907460074382, "grad_norm": 1.3498564958572388, "learning_rate": 1.5058436176546098e-05, "loss": 0.5387, "step": 12846 }, { "epoch": 0.35131809232115513, "grad_norm": 1.383240818977356, "learning_rate": 1.5057672125979015e-05, "loss": 0.5562, "step": 12847 }, { "epoch": 0.351345438634872, "grad_norm": 1.2040196657180786, "learning_rate": 1.5056908035735917e-05, "loss": 0.519, "step": 12848 }, { "epoch": 0.3513727849485889, "grad_norm": 1.513251781463623, "learning_rate": 1.50561439058228e-05, "loss": 0.869, "step": 12849 }, { "epoch": 0.35140013126230585, "grad_norm": 1.2131824493408203, "learning_rate": 1.5055379736245658e-05, "loss": 0.5578, "step": 12850 }, { "epoch": 0.3514274775760228, "grad_norm": 1.1310269832611084, "learning_rate": 1.5054615527010485e-05, "loss": 0.5541, "step": 12851 }, { "epoch": 0.35145482388973964, "grad_norm": 1.302082896232605, "learning_rate": 1.5053851278123273e-05, "loss": 0.5123, "step": 12852 }, { "epoch": 0.35148217020345657, "grad_norm": 1.1432629823684692, "learning_rate": 1.505308698959002e-05, "loss": 0.5375, "step": 12853 }, { "epoch": 0.3515095165171735, "grad_norm": 1.1855918169021606, "learning_rate": 1.5052322661416722e-05, "loss": 0.5651, "step": 12854 }, { "epoch": 0.3515368628308904, "grad_norm": 1.4205862283706665, "learning_rate": 1.5051558293609377e-05, "loss": 0.5635, "step": 12855 }, { "epoch": 0.3515642091446073, "grad_norm": 1.3203814029693604, "learning_rate": 1.5050793886173978e-05, "loss": 0.5507, "step": 12856 }, { "epoch": 0.3515915554583242, "grad_norm": 1.3305455446243286, "learning_rate": 1.5050029439116523e-05, "loss": 0.4174, "step": 12857 }, { "epoch": 0.35161890177204114, "grad_norm": 1.3613865375518799, "learning_rate": 1.5049264952443006e-05, "loss": 0.5391, "step": 12858 }, { "epoch": 0.35164624808575806, "grad_norm": 1.2704813480377197, "learning_rate": 1.5048500426159426e-05, "loss": 0.5298, "step": 12859 }, { "epoch": 0.35167359439947493, "grad_norm": 1.213823676109314, "learning_rate": 1.5047735860271782e-05, "loss": 0.5331, "step": 12860 }, { "epoch": 0.35170094071319186, "grad_norm": 1.3410086631774902, "learning_rate": 1.5046971254786073e-05, "loss": 0.5179, "step": 12861 }, { "epoch": 0.3517282870269088, "grad_norm": 1.106117844581604, "learning_rate": 1.5046206609708293e-05, "loss": 0.5577, "step": 12862 }, { "epoch": 0.3517556333406257, "grad_norm": 1.3316009044647217, "learning_rate": 1.5045441925044442e-05, "loss": 0.5428, "step": 12863 }, { "epoch": 0.3517829796543426, "grad_norm": 1.203681468963623, "learning_rate": 1.5044677200800518e-05, "loss": 0.5393, "step": 12864 }, { "epoch": 0.3518103259680595, "grad_norm": 1.4654335975646973, "learning_rate": 1.5043912436982523e-05, "loss": 0.8337, "step": 12865 }, { "epoch": 0.3518376722817764, "grad_norm": 1.3495562076568604, "learning_rate": 1.5043147633596454e-05, "loss": 0.5208, "step": 12866 }, { "epoch": 0.35186501859549335, "grad_norm": 1.5097217559814453, "learning_rate": 1.504238279064831e-05, "loss": 0.8304, "step": 12867 }, { "epoch": 0.3518923649092102, "grad_norm": 1.7357441186904907, "learning_rate": 1.5041617908144094e-05, "loss": 0.5314, "step": 12868 }, { "epoch": 0.35191971122292715, "grad_norm": 1.646597981452942, "learning_rate": 1.5040852986089803e-05, "loss": 0.5665, "step": 12869 }, { "epoch": 0.35194705753664407, "grad_norm": 1.474704623222351, "learning_rate": 1.504008802449144e-05, "loss": 0.5225, "step": 12870 }, { "epoch": 0.351974403850361, "grad_norm": 1.210537314414978, "learning_rate": 1.5039323023355002e-05, "loss": 0.5638, "step": 12871 }, { "epoch": 0.35200175016407786, "grad_norm": 1.168878197669983, "learning_rate": 1.5038557982686495e-05, "loss": 0.519, "step": 12872 }, { "epoch": 0.3520290964777948, "grad_norm": 1.4149178266525269, "learning_rate": 1.503779290249192e-05, "loss": 0.5654, "step": 12873 }, { "epoch": 0.3520564427915117, "grad_norm": 1.2824870347976685, "learning_rate": 1.5037027782777274e-05, "loss": 0.5609, "step": 12874 }, { "epoch": 0.35208378910522864, "grad_norm": 1.2736543416976929, "learning_rate": 1.5036262623548567e-05, "loss": 0.532, "step": 12875 }, { "epoch": 0.3521111354189455, "grad_norm": 2.133129358291626, "learning_rate": 1.5035497424811794e-05, "loss": 0.8775, "step": 12876 }, { "epoch": 0.35213848173266243, "grad_norm": 1.6919089555740356, "learning_rate": 1.503473218657296e-05, "loss": 0.883, "step": 12877 }, { "epoch": 0.35216582804637936, "grad_norm": 1.3274564743041992, "learning_rate": 1.503396690883807e-05, "loss": 0.5517, "step": 12878 }, { "epoch": 0.3521931743600963, "grad_norm": 1.346125841140747, "learning_rate": 1.503320159161313e-05, "loss": 0.5253, "step": 12879 }, { "epoch": 0.35222052067381315, "grad_norm": 1.4592244625091553, "learning_rate": 1.5032436234904136e-05, "loss": 0.5006, "step": 12880 }, { "epoch": 0.3522478669875301, "grad_norm": 1.5436097383499146, "learning_rate": 1.5031670838717096e-05, "loss": 0.5063, "step": 12881 }, { "epoch": 0.352275213301247, "grad_norm": 1.2927842140197754, "learning_rate": 1.5030905403058013e-05, "loss": 0.5392, "step": 12882 }, { "epoch": 0.35230255961496393, "grad_norm": 1.2715122699737549, "learning_rate": 1.5030139927932897e-05, "loss": 0.566, "step": 12883 }, { "epoch": 0.3523299059286808, "grad_norm": 1.2064918279647827, "learning_rate": 1.5029374413347748e-05, "loss": 0.5514, "step": 12884 }, { "epoch": 0.3523572522423977, "grad_norm": 1.583289623260498, "learning_rate": 1.502860885930857e-05, "loss": 0.4471, "step": 12885 }, { "epoch": 0.35238459855611465, "grad_norm": 1.3900794982910156, "learning_rate": 1.5027843265821371e-05, "loss": 0.5521, "step": 12886 }, { "epoch": 0.3524119448698316, "grad_norm": 1.2956883907318115, "learning_rate": 1.5027077632892155e-05, "loss": 0.5353, "step": 12887 }, { "epoch": 0.35243929118354844, "grad_norm": 1.4163650274276733, "learning_rate": 1.5026311960526933e-05, "loss": 0.5344, "step": 12888 }, { "epoch": 0.35246663749726537, "grad_norm": 1.3989320993423462, "learning_rate": 1.5025546248731709e-05, "loss": 0.5323, "step": 12889 }, { "epoch": 0.3524939838109823, "grad_norm": 1.2372902631759644, "learning_rate": 1.5024780497512485e-05, "loss": 0.5272, "step": 12890 }, { "epoch": 0.3525213301246992, "grad_norm": 1.3644070625305176, "learning_rate": 1.5024014706875272e-05, "loss": 0.4123, "step": 12891 }, { "epoch": 0.3525486764384161, "grad_norm": 1.2604399919509888, "learning_rate": 1.5023248876826083e-05, "loss": 0.5291, "step": 12892 }, { "epoch": 0.352576022752133, "grad_norm": 1.1088992357254028, "learning_rate": 1.5022483007370915e-05, "loss": 0.5544, "step": 12893 }, { "epoch": 0.35260336906584994, "grad_norm": 1.1943343877792358, "learning_rate": 1.5021717098515785e-05, "loss": 0.5446, "step": 12894 }, { "epoch": 0.35263071537956686, "grad_norm": 1.2987403869628906, "learning_rate": 1.5020951150266694e-05, "loss": 0.5714, "step": 12895 }, { "epoch": 0.35265806169328373, "grad_norm": 3.8585000038146973, "learning_rate": 1.5020185162629656e-05, "loss": 0.9241, "step": 12896 }, { "epoch": 0.35268540800700066, "grad_norm": 1.200020432472229, "learning_rate": 1.501941913561068e-05, "loss": 0.5604, "step": 12897 }, { "epoch": 0.3527127543207176, "grad_norm": 1.3430477380752563, "learning_rate": 1.5018653069215772e-05, "loss": 0.5024, "step": 12898 }, { "epoch": 0.3527401006344345, "grad_norm": 1.2780755758285522, "learning_rate": 1.501788696345094e-05, "loss": 0.5158, "step": 12899 }, { "epoch": 0.3527674469481514, "grad_norm": 1.0770152807235718, "learning_rate": 1.5017120818322202e-05, "loss": 0.564, "step": 12900 }, { "epoch": 0.3527947932618683, "grad_norm": 1.1074856519699097, "learning_rate": 1.5016354633835559e-05, "loss": 0.5464, "step": 12901 }, { "epoch": 0.3528221395755852, "grad_norm": 1.3496098518371582, "learning_rate": 1.5015588409997026e-05, "loss": 0.8404, "step": 12902 }, { "epoch": 0.3528494858893021, "grad_norm": 1.002358078956604, "learning_rate": 1.5014822146812615e-05, "loss": 0.5247, "step": 12903 }, { "epoch": 0.352876832203019, "grad_norm": 1.6218578815460205, "learning_rate": 1.5014055844288336e-05, "loss": 0.5738, "step": 12904 }, { "epoch": 0.35290417851673594, "grad_norm": 1.5664088726043701, "learning_rate": 1.5013289502430199e-05, "loss": 0.5584, "step": 12905 }, { "epoch": 0.35293152483045287, "grad_norm": 1.4839212894439697, "learning_rate": 1.5012523121244215e-05, "loss": 0.5729, "step": 12906 }, { "epoch": 0.35295887114416974, "grad_norm": 1.1354472637176514, "learning_rate": 1.5011756700736402e-05, "loss": 0.4982, "step": 12907 }, { "epoch": 0.35298621745788666, "grad_norm": 1.1315504312515259, "learning_rate": 1.5010990240912765e-05, "loss": 0.5347, "step": 12908 }, { "epoch": 0.3530135637716036, "grad_norm": 1.240902066230774, "learning_rate": 1.5010223741779322e-05, "loss": 0.5028, "step": 12909 }, { "epoch": 0.3530409100853205, "grad_norm": 1.3144011497497559, "learning_rate": 1.5009457203342082e-05, "loss": 0.5278, "step": 12910 }, { "epoch": 0.3530682563990374, "grad_norm": 1.3167496919631958, "learning_rate": 1.500869062560706e-05, "loss": 0.5425, "step": 12911 }, { "epoch": 0.3530956027127543, "grad_norm": 1.1877007484436035, "learning_rate": 1.500792400858027e-05, "loss": 0.5721, "step": 12912 }, { "epoch": 0.35312294902647123, "grad_norm": 1.4959665536880493, "learning_rate": 1.5007157352267725e-05, "loss": 0.5511, "step": 12913 }, { "epoch": 0.35315029534018816, "grad_norm": 1.4363536834716797, "learning_rate": 1.5006390656675442e-05, "loss": 0.5701, "step": 12914 }, { "epoch": 0.35317764165390503, "grad_norm": 1.4421274662017822, "learning_rate": 1.500562392180943e-05, "loss": 0.4445, "step": 12915 }, { "epoch": 0.35320498796762195, "grad_norm": 2.583150863647461, "learning_rate": 1.5004857147675708e-05, "loss": 0.9134, "step": 12916 }, { "epoch": 0.3532323342813389, "grad_norm": 2.2365622520446777, "learning_rate": 1.5004090334280292e-05, "loss": 0.8844, "step": 12917 }, { "epoch": 0.3532596805950558, "grad_norm": 1.1761716604232788, "learning_rate": 1.5003323481629192e-05, "loss": 0.5646, "step": 12918 }, { "epoch": 0.35328702690877267, "grad_norm": 1.2560850381851196, "learning_rate": 1.5002556589728432e-05, "loss": 0.5652, "step": 12919 }, { "epoch": 0.3533143732224896, "grad_norm": 1.4535189867019653, "learning_rate": 1.5001789658584021e-05, "loss": 0.5462, "step": 12920 }, { "epoch": 0.3533417195362065, "grad_norm": 1.3551621437072754, "learning_rate": 1.5001022688201977e-05, "loss": 0.3734, "step": 12921 }, { "epoch": 0.35336906584992345, "grad_norm": 1.3313093185424805, "learning_rate": 1.5000255678588317e-05, "loss": 0.5542, "step": 12922 }, { "epoch": 0.3533964121636403, "grad_norm": 1.368360161781311, "learning_rate": 1.4999488629749059e-05, "loss": 0.4778, "step": 12923 }, { "epoch": 0.35342375847735724, "grad_norm": 1.3258204460144043, "learning_rate": 1.4998721541690223e-05, "loss": 0.4748, "step": 12924 }, { "epoch": 0.35345110479107417, "grad_norm": 1.2744158506393433, "learning_rate": 1.4997954414417819e-05, "loss": 0.5695, "step": 12925 }, { "epoch": 0.3534784511047911, "grad_norm": 1.3878422975540161, "learning_rate": 1.499718724793787e-05, "loss": 0.4105, "step": 12926 }, { "epoch": 0.35350579741850796, "grad_norm": 1.3315746784210205, "learning_rate": 1.4996420042256394e-05, "loss": 0.551, "step": 12927 }, { "epoch": 0.3535331437322249, "grad_norm": 1.393923282623291, "learning_rate": 1.499565279737941e-05, "loss": 0.5844, "step": 12928 }, { "epoch": 0.3535604900459418, "grad_norm": 1.2194043397903442, "learning_rate": 1.4994885513312935e-05, "loss": 0.5187, "step": 12929 }, { "epoch": 0.35358783635965874, "grad_norm": 1.1297063827514648, "learning_rate": 1.4994118190062987e-05, "loss": 0.5334, "step": 12930 }, { "epoch": 0.3536151826733756, "grad_norm": 1.3926398754119873, "learning_rate": 1.4993350827635589e-05, "loss": 0.458, "step": 12931 }, { "epoch": 0.35364252898709253, "grad_norm": 1.3462549448013306, "learning_rate": 1.499258342603676e-05, "loss": 0.5118, "step": 12932 }, { "epoch": 0.35366987530080946, "grad_norm": 1.33156156539917, "learning_rate": 1.4991815985272518e-05, "loss": 0.5886, "step": 12933 }, { "epoch": 0.3536972216145264, "grad_norm": 1.3503011465072632, "learning_rate": 1.4991048505348885e-05, "loss": 0.5362, "step": 12934 }, { "epoch": 0.35372456792824325, "grad_norm": 1.569785237312317, "learning_rate": 1.4990280986271879e-05, "loss": 0.5232, "step": 12935 }, { "epoch": 0.3537519142419602, "grad_norm": 1.271284818649292, "learning_rate": 1.4989513428047524e-05, "loss": 0.5442, "step": 12936 }, { "epoch": 0.3537792605556771, "grad_norm": 1.5080221891403198, "learning_rate": 1.498874583068184e-05, "loss": 0.5073, "step": 12937 }, { "epoch": 0.353806606869394, "grad_norm": 1.4017258882522583, "learning_rate": 1.498797819418085e-05, "loss": 0.5324, "step": 12938 }, { "epoch": 0.3538339531831109, "grad_norm": 1.275806188583374, "learning_rate": 1.4987210518550575e-05, "loss": 0.5073, "step": 12939 }, { "epoch": 0.3538612994968278, "grad_norm": 1.8416255712509155, "learning_rate": 1.4986442803797037e-05, "loss": 0.4138, "step": 12940 }, { "epoch": 0.35388864581054474, "grad_norm": 1.2189416885375977, "learning_rate": 1.4985675049926256e-05, "loss": 0.5639, "step": 12941 }, { "epoch": 0.35391599212426167, "grad_norm": 1.4873950481414795, "learning_rate": 1.4984907256944258e-05, "loss": 0.5418, "step": 12942 }, { "epoch": 0.35394333843797854, "grad_norm": 1.3501307964324951, "learning_rate": 1.4984139424857067e-05, "loss": 0.5682, "step": 12943 }, { "epoch": 0.35397068475169546, "grad_norm": 1.2746893167495728, "learning_rate": 1.4983371553670704e-05, "loss": 0.5428, "step": 12944 }, { "epoch": 0.3539980310654124, "grad_norm": 1.227234959602356, "learning_rate": 1.4982603643391193e-05, "loss": 0.5624, "step": 12945 }, { "epoch": 0.3540253773791293, "grad_norm": 4.3917927742004395, "learning_rate": 1.4981835694024562e-05, "loss": 0.9462, "step": 12946 }, { "epoch": 0.3540527236928462, "grad_norm": 4.069392204284668, "learning_rate": 1.4981067705576828e-05, "loss": 0.8894, "step": 12947 }, { "epoch": 0.3540800700065631, "grad_norm": 1.5040311813354492, "learning_rate": 1.4980299678054025e-05, "loss": 0.4998, "step": 12948 }, { "epoch": 0.35410741632028003, "grad_norm": 1.6329879760742188, "learning_rate": 1.4979531611462167e-05, "loss": 0.5056, "step": 12949 }, { "epoch": 0.35413476263399696, "grad_norm": 1.531867265701294, "learning_rate": 1.4978763505807288e-05, "loss": 0.5371, "step": 12950 }, { "epoch": 0.3541621089477138, "grad_norm": 1.7673625946044922, "learning_rate": 1.497799536109541e-05, "loss": 0.5624, "step": 12951 }, { "epoch": 0.35418945526143075, "grad_norm": 1.4629778861999512, "learning_rate": 1.4977227177332558e-05, "loss": 0.4901, "step": 12952 }, { "epoch": 0.3542168015751477, "grad_norm": 1.134419560432434, "learning_rate": 1.497645895452476e-05, "loss": 0.4926, "step": 12953 }, { "epoch": 0.3542441478888646, "grad_norm": 1.2337682247161865, "learning_rate": 1.4975690692678042e-05, "loss": 0.5421, "step": 12954 }, { "epoch": 0.35427149420258147, "grad_norm": 1.3699567317962646, "learning_rate": 1.497492239179843e-05, "loss": 0.5463, "step": 12955 }, { "epoch": 0.3542988405162984, "grad_norm": 1.3633832931518555, "learning_rate": 1.4974154051891954e-05, "loss": 0.5487, "step": 12956 }, { "epoch": 0.3543261868300153, "grad_norm": 1.546159029006958, "learning_rate": 1.4973385672964638e-05, "loss": 0.5391, "step": 12957 }, { "epoch": 0.35435353314373225, "grad_norm": 1.2597970962524414, "learning_rate": 1.497261725502251e-05, "loss": 0.508, "step": 12958 }, { "epoch": 0.3543808794574491, "grad_norm": 2.029467821121216, "learning_rate": 1.4971848798071603e-05, "loss": 0.5508, "step": 12959 }, { "epoch": 0.35440822577116604, "grad_norm": 1.0744775533676147, "learning_rate": 1.4971080302117935e-05, "loss": 0.4628, "step": 12960 }, { "epoch": 0.35443557208488297, "grad_norm": 1.153814435005188, "learning_rate": 1.4970311767167545e-05, "loss": 0.5429, "step": 12961 }, { "epoch": 0.3544629183985999, "grad_norm": 1.2291133403778076, "learning_rate": 1.496954319322646e-05, "loss": 0.4449, "step": 12962 }, { "epoch": 0.35449026471231676, "grad_norm": 1.3197293281555176, "learning_rate": 1.4968774580300702e-05, "loss": 0.5525, "step": 12963 }, { "epoch": 0.3545176110260337, "grad_norm": 1.411461591720581, "learning_rate": 1.496800592839631e-05, "loss": 0.5221, "step": 12964 }, { "epoch": 0.3545449573397506, "grad_norm": 1.159354567527771, "learning_rate": 1.4967237237519308e-05, "loss": 0.5347, "step": 12965 }, { "epoch": 0.35457230365346754, "grad_norm": 1.236220121383667, "learning_rate": 1.4966468507675726e-05, "loss": 0.5678, "step": 12966 }, { "epoch": 0.3545996499671844, "grad_norm": 1.4141833782196045, "learning_rate": 1.4965699738871599e-05, "loss": 0.5119, "step": 12967 }, { "epoch": 0.35462699628090133, "grad_norm": 1.2152740955352783, "learning_rate": 1.4964930931112955e-05, "loss": 0.5648, "step": 12968 }, { "epoch": 0.35465434259461825, "grad_norm": 1.318919062614441, "learning_rate": 1.4964162084405824e-05, "loss": 0.5148, "step": 12969 }, { "epoch": 0.3546816889083352, "grad_norm": 1.3795719146728516, "learning_rate": 1.496339319875624e-05, "loss": 0.431, "step": 12970 }, { "epoch": 0.35470903522205205, "grad_norm": 1.5286080837249756, "learning_rate": 1.496262427417023e-05, "loss": 0.4929, "step": 12971 }, { "epoch": 0.354736381535769, "grad_norm": 1.4768966436386108, "learning_rate": 1.4961855310653833e-05, "loss": 0.5371, "step": 12972 }, { "epoch": 0.3547637278494859, "grad_norm": 1.1000336408615112, "learning_rate": 1.4961086308213075e-05, "loss": 0.5498, "step": 12973 }, { "epoch": 0.3547910741632028, "grad_norm": 1.2745885848999023, "learning_rate": 1.4960317266853989e-05, "loss": 0.5704, "step": 12974 }, { "epoch": 0.3548184204769197, "grad_norm": 1.1267520189285278, "learning_rate": 1.4959548186582616e-05, "loss": 0.5532, "step": 12975 }, { "epoch": 0.3548457667906366, "grad_norm": 1.2292227745056152, "learning_rate": 1.4958779067404979e-05, "loss": 0.5279, "step": 12976 }, { "epoch": 0.35487311310435354, "grad_norm": 1.2345263957977295, "learning_rate": 1.4958009909327116e-05, "loss": 0.5615, "step": 12977 }, { "epoch": 0.35490045941807047, "grad_norm": 1.2246123552322388, "learning_rate": 1.4957240712355062e-05, "loss": 0.5057, "step": 12978 }, { "epoch": 0.35492780573178734, "grad_norm": 1.542231559753418, "learning_rate": 1.4956471476494848e-05, "loss": 0.5235, "step": 12979 }, { "epoch": 0.35495515204550426, "grad_norm": 1.5940189361572266, "learning_rate": 1.495570220175251e-05, "loss": 0.5239, "step": 12980 }, { "epoch": 0.3549824983592212, "grad_norm": 1.339836835861206, "learning_rate": 1.4954932888134087e-05, "loss": 0.5451, "step": 12981 }, { "epoch": 0.3550098446729381, "grad_norm": 4.327398300170898, "learning_rate": 1.4954163535645605e-05, "loss": 1.1233, "step": 12982 }, { "epoch": 0.355037190986655, "grad_norm": 1.4812681674957275, "learning_rate": 1.4953394144293105e-05, "loss": 0.4494, "step": 12983 }, { "epoch": 0.3550645373003719, "grad_norm": 1.449120283126831, "learning_rate": 1.4952624714082626e-05, "loss": 0.5438, "step": 12984 }, { "epoch": 0.35509188361408883, "grad_norm": 1.2600369453430176, "learning_rate": 1.4951855245020194e-05, "loss": 0.5494, "step": 12985 }, { "epoch": 0.35511922992780576, "grad_norm": 1.3252376317977905, "learning_rate": 1.4951085737111854e-05, "loss": 0.4496, "step": 12986 }, { "epoch": 0.3551465762415226, "grad_norm": 1.2296980619430542, "learning_rate": 1.495031619036364e-05, "loss": 0.5451, "step": 12987 }, { "epoch": 0.35517392255523955, "grad_norm": 2.3671953678131104, "learning_rate": 1.494954660478159e-05, "loss": 0.5611, "step": 12988 }, { "epoch": 0.3552012688689565, "grad_norm": 1.1295225620269775, "learning_rate": 1.4948776980371738e-05, "loss": 0.5254, "step": 12989 }, { "epoch": 0.3552286151826734, "grad_norm": 1.184716820716858, "learning_rate": 1.4948007317140122e-05, "loss": 0.5321, "step": 12990 }, { "epoch": 0.35525596149639027, "grad_norm": 0.9931182861328125, "learning_rate": 1.4947237615092783e-05, "loss": 0.535, "step": 12991 }, { "epoch": 0.3552833078101072, "grad_norm": 1.3785631656646729, "learning_rate": 1.4946467874235757e-05, "loss": 0.5864, "step": 12992 }, { "epoch": 0.3553106541238241, "grad_norm": 1.0478843450546265, "learning_rate": 1.4945698094575084e-05, "loss": 0.5183, "step": 12993 }, { "epoch": 0.35533800043754105, "grad_norm": 1.0043479204177856, "learning_rate": 1.49449282761168e-05, "loss": 0.5187, "step": 12994 }, { "epoch": 0.3553653467512579, "grad_norm": 1.2323402166366577, "learning_rate": 1.4944158418866944e-05, "loss": 0.5459, "step": 12995 }, { "epoch": 0.35539269306497484, "grad_norm": 1.4800761938095093, "learning_rate": 1.4943388522831556e-05, "loss": 0.5416, "step": 12996 }, { "epoch": 0.35542003937869177, "grad_norm": 3.7504823207855225, "learning_rate": 1.494261858801668e-05, "loss": 0.5226, "step": 12997 }, { "epoch": 0.3554473856924087, "grad_norm": 1.3714810609817505, "learning_rate": 1.494184861442835e-05, "loss": 0.5841, "step": 12998 }, { "epoch": 0.35547473200612556, "grad_norm": 2.8807947635650635, "learning_rate": 1.4941078602072608e-05, "loss": 0.9401, "step": 12999 }, { "epoch": 0.3555020783198425, "grad_norm": 1.2642160654067993, "learning_rate": 1.4940308550955496e-05, "loss": 0.538, "step": 13000 }, { "epoch": 0.3555294246335594, "grad_norm": 1.304478406906128, "learning_rate": 1.4939538461083051e-05, "loss": 0.5256, "step": 13001 }, { "epoch": 0.3555567709472763, "grad_norm": 2.577972650527954, "learning_rate": 1.493876833246132e-05, "loss": 0.9019, "step": 13002 }, { "epoch": 0.3555841172609932, "grad_norm": 1.7339754104614258, "learning_rate": 1.4937998165096341e-05, "loss": 0.8872, "step": 13003 }, { "epoch": 0.35561146357471013, "grad_norm": 1.218036413192749, "learning_rate": 1.4937227958994154e-05, "loss": 0.5126, "step": 13004 }, { "epoch": 0.35563880988842705, "grad_norm": 1.4263490438461304, "learning_rate": 1.4936457714160806e-05, "loss": 0.5053, "step": 13005 }, { "epoch": 0.3556661562021439, "grad_norm": 1.2154524326324463, "learning_rate": 1.4935687430602334e-05, "loss": 0.519, "step": 13006 }, { "epoch": 0.35569350251586085, "grad_norm": 1.4496281147003174, "learning_rate": 1.4934917108324785e-05, "loss": 0.8664, "step": 13007 }, { "epoch": 0.3557208488295778, "grad_norm": 1.319332242012024, "learning_rate": 1.4934146747334199e-05, "loss": 0.5561, "step": 13008 }, { "epoch": 0.3557481951432947, "grad_norm": 1.5146429538726807, "learning_rate": 1.4933376347636619e-05, "loss": 0.4727, "step": 13009 }, { "epoch": 0.35577554145701157, "grad_norm": 1.3086402416229248, "learning_rate": 1.4932605909238092e-05, "loss": 0.5305, "step": 13010 }, { "epoch": 0.3558028877707285, "grad_norm": 1.1834418773651123, "learning_rate": 1.4931835432144659e-05, "loss": 0.538, "step": 13011 }, { "epoch": 0.3558302340844454, "grad_norm": 1.4363659620285034, "learning_rate": 1.4931064916362364e-05, "loss": 0.516, "step": 13012 }, { "epoch": 0.35585758039816234, "grad_norm": 1.369173526763916, "learning_rate": 1.4930294361897255e-05, "loss": 0.4733, "step": 13013 }, { "epoch": 0.3558849267118792, "grad_norm": 1.5525261163711548, "learning_rate": 1.4929523768755371e-05, "loss": 0.4752, "step": 13014 }, { "epoch": 0.35591227302559614, "grad_norm": 2.7306342124938965, "learning_rate": 1.4928753136942762e-05, "loss": 0.4119, "step": 13015 }, { "epoch": 0.35593961933931306, "grad_norm": 1.3478398323059082, "learning_rate": 1.4927982466465473e-05, "loss": 0.5524, "step": 13016 }, { "epoch": 0.35596696565303, "grad_norm": 1.151387333869934, "learning_rate": 1.4927211757329548e-05, "loss": 0.5213, "step": 13017 }, { "epoch": 0.35599431196674686, "grad_norm": 1.0627444982528687, "learning_rate": 1.4926441009541032e-05, "loss": 0.5272, "step": 13018 }, { "epoch": 0.3560216582804638, "grad_norm": 1.376518964767456, "learning_rate": 1.4925670223105972e-05, "loss": 0.4737, "step": 13019 }, { "epoch": 0.3560490045941807, "grad_norm": 1.0327743291854858, "learning_rate": 1.4924899398030418e-05, "loss": 0.5174, "step": 13020 }, { "epoch": 0.35607635090789763, "grad_norm": 1.4843095541000366, "learning_rate": 1.4924128534320414e-05, "loss": 0.5109, "step": 13021 }, { "epoch": 0.3561036972216145, "grad_norm": 2.6797046661376953, "learning_rate": 1.4923357631982008e-05, "loss": 0.9237, "step": 13022 }, { "epoch": 0.3561310435353314, "grad_norm": 2.6945912837982178, "learning_rate": 1.4922586691021244e-05, "loss": 0.9001, "step": 13023 }, { "epoch": 0.35615838984904835, "grad_norm": 1.1940529346466064, "learning_rate": 1.4921815711444175e-05, "loss": 0.5539, "step": 13024 }, { "epoch": 0.3561857361627653, "grad_norm": 1.4654788970947266, "learning_rate": 1.4921044693256845e-05, "loss": 0.5787, "step": 13025 }, { "epoch": 0.35621308247648215, "grad_norm": 1.3426918983459473, "learning_rate": 1.4920273636465303e-05, "loss": 0.5463, "step": 13026 }, { "epoch": 0.35624042879019907, "grad_norm": 1.1049203872680664, "learning_rate": 1.4919502541075603e-05, "loss": 0.5505, "step": 13027 }, { "epoch": 0.356267775103916, "grad_norm": 1.2196383476257324, "learning_rate": 1.4918731407093787e-05, "loss": 0.5342, "step": 13028 }, { "epoch": 0.3562951214176329, "grad_norm": 1.2327083349227905, "learning_rate": 1.491796023452591e-05, "loss": 0.5436, "step": 13029 }, { "epoch": 0.3563224677313498, "grad_norm": 1.6422785520553589, "learning_rate": 1.4917189023378016e-05, "loss": 0.5703, "step": 13030 }, { "epoch": 0.3563498140450667, "grad_norm": 1.1815122365951538, "learning_rate": 1.491641777365616e-05, "loss": 0.5629, "step": 13031 }, { "epoch": 0.35637716035878364, "grad_norm": 1.2559969425201416, "learning_rate": 1.491564648536639e-05, "loss": 0.5153, "step": 13032 }, { "epoch": 0.35640450667250057, "grad_norm": 1.3499821424484253, "learning_rate": 1.4914875158514757e-05, "loss": 0.5432, "step": 13033 }, { "epoch": 0.35643185298621743, "grad_norm": 1.0076875686645508, "learning_rate": 1.4914103793107308e-05, "loss": 0.5309, "step": 13034 }, { "epoch": 0.35645919929993436, "grad_norm": 1.3743127584457397, "learning_rate": 1.4913332389150102e-05, "loss": 0.5202, "step": 13035 }, { "epoch": 0.3564865456136513, "grad_norm": 1.3469465970993042, "learning_rate": 1.4912560946649184e-05, "loss": 0.5241, "step": 13036 }, { "epoch": 0.3565138919273682, "grad_norm": 1.3715918064117432, "learning_rate": 1.491178946561061e-05, "loss": 0.5621, "step": 13037 }, { "epoch": 0.3565412382410851, "grad_norm": 1.3514878749847412, "learning_rate": 1.491101794604043e-05, "loss": 0.6281, "step": 13038 }, { "epoch": 0.356568584554802, "grad_norm": 2.03999662399292, "learning_rate": 1.4910246387944694e-05, "loss": 0.5372, "step": 13039 }, { "epoch": 0.35659593086851893, "grad_norm": 1.045691967010498, "learning_rate": 1.490947479132946e-05, "loss": 0.5406, "step": 13040 }, { "epoch": 0.35662327718223585, "grad_norm": 1.4845401048660278, "learning_rate": 1.4908703156200775e-05, "loss": 0.4093, "step": 13041 }, { "epoch": 0.3566506234959527, "grad_norm": 1.3661963939666748, "learning_rate": 1.4907931482564692e-05, "loss": 0.596, "step": 13042 }, { "epoch": 0.35667796980966965, "grad_norm": 1.5463017225265503, "learning_rate": 1.4907159770427275e-05, "loss": 0.5638, "step": 13043 }, { "epoch": 0.3567053161233866, "grad_norm": 1.3540048599243164, "learning_rate": 1.4906388019794564e-05, "loss": 0.5354, "step": 13044 }, { "epoch": 0.3567326624371035, "grad_norm": 1.3558906316757202, "learning_rate": 1.4905616230672624e-05, "loss": 0.5435, "step": 13045 }, { "epoch": 0.35676000875082037, "grad_norm": 1.113480806350708, "learning_rate": 1.4904844403067505e-05, "loss": 0.514, "step": 13046 }, { "epoch": 0.3567873550645373, "grad_norm": 1.4974476099014282, "learning_rate": 1.4904072536985262e-05, "loss": 0.4737, "step": 13047 }, { "epoch": 0.3568147013782542, "grad_norm": 1.2689471244812012, "learning_rate": 1.490330063243195e-05, "loss": 0.5506, "step": 13048 }, { "epoch": 0.35684204769197114, "grad_norm": 2.1592938899993896, "learning_rate": 1.490252868941362e-05, "loss": 0.5428, "step": 13049 }, { "epoch": 0.356869394005688, "grad_norm": 1.2171014547348022, "learning_rate": 1.4901756707936337e-05, "loss": 0.5394, "step": 13050 }, { "epoch": 0.35689674031940494, "grad_norm": 1.2087759971618652, "learning_rate": 1.490098468800615e-05, "loss": 0.5559, "step": 13051 }, { "epoch": 0.35692408663312186, "grad_norm": 1.0360991954803467, "learning_rate": 1.4900212629629119e-05, "loss": 0.5689, "step": 13052 }, { "epoch": 0.3569514329468388, "grad_norm": 1.4843499660491943, "learning_rate": 1.4899440532811298e-05, "loss": 0.5505, "step": 13053 }, { "epoch": 0.35697877926055566, "grad_norm": 1.2115775346755981, "learning_rate": 1.4898668397558745e-05, "loss": 0.4728, "step": 13054 }, { "epoch": 0.3570061255742726, "grad_norm": 1.289301872253418, "learning_rate": 1.4897896223877519e-05, "loss": 0.559, "step": 13055 }, { "epoch": 0.3570334718879895, "grad_norm": 1.1009392738342285, "learning_rate": 1.4897124011773674e-05, "loss": 0.5333, "step": 13056 }, { "epoch": 0.35706081820170643, "grad_norm": 2.7851219177246094, "learning_rate": 1.489635176125327e-05, "loss": 0.9432, "step": 13057 }, { "epoch": 0.3570881645154233, "grad_norm": 1.220626711845398, "learning_rate": 1.489557947232236e-05, "loss": 0.5386, "step": 13058 }, { "epoch": 0.3571155108291402, "grad_norm": 1.4663817882537842, "learning_rate": 1.4894807144987012e-05, "loss": 0.5537, "step": 13059 }, { "epoch": 0.35714285714285715, "grad_norm": 1.4398995637893677, "learning_rate": 1.4894034779253277e-05, "loss": 0.5118, "step": 13060 }, { "epoch": 0.3571702034565741, "grad_norm": 1.1918765306472778, "learning_rate": 1.4893262375127216e-05, "loss": 0.5528, "step": 13061 }, { "epoch": 0.35719754977029095, "grad_norm": 1.2152678966522217, "learning_rate": 1.4892489932614891e-05, "loss": 0.5211, "step": 13062 }, { "epoch": 0.35722489608400787, "grad_norm": 1.3151119947433472, "learning_rate": 1.4891717451722359e-05, "loss": 0.5249, "step": 13063 }, { "epoch": 0.3572522423977248, "grad_norm": 1.2353872060775757, "learning_rate": 1.4890944932455678e-05, "loss": 0.5393, "step": 13064 }, { "epoch": 0.3572795887114417, "grad_norm": 1.1177897453308105, "learning_rate": 1.489017237482091e-05, "loss": 0.5236, "step": 13065 }, { "epoch": 0.3573069350251586, "grad_norm": 1.214269757270813, "learning_rate": 1.4889399778824119e-05, "loss": 0.5688, "step": 13066 }, { "epoch": 0.3573342813388755, "grad_norm": 1.3276803493499756, "learning_rate": 1.4888627144471361e-05, "loss": 0.4771, "step": 13067 }, { "epoch": 0.35736162765259244, "grad_norm": 1.3630234003067017, "learning_rate": 1.48878544717687e-05, "loss": 0.5483, "step": 13068 }, { "epoch": 0.35738897396630936, "grad_norm": 1.2551976442337036, "learning_rate": 1.4887081760722196e-05, "loss": 0.5482, "step": 13069 }, { "epoch": 0.35741632028002623, "grad_norm": 1.2621195316314697, "learning_rate": 1.488630901133791e-05, "loss": 0.5234, "step": 13070 }, { "epoch": 0.35744366659374316, "grad_norm": 1.4425616264343262, "learning_rate": 1.4885536223621904e-05, "loss": 0.5604, "step": 13071 }, { "epoch": 0.3574710129074601, "grad_norm": 1.386231541633606, "learning_rate": 1.4884763397580244e-05, "loss": 0.4708, "step": 13072 }, { "epoch": 0.357498359221177, "grad_norm": 2.091862440109253, "learning_rate": 1.488399053321899e-05, "loss": 0.5267, "step": 13073 }, { "epoch": 0.3575257055348939, "grad_norm": 2.0865721702575684, "learning_rate": 1.4883217630544202e-05, "loss": 0.8819, "step": 13074 }, { "epoch": 0.3575530518486108, "grad_norm": 1.0934138298034668, "learning_rate": 1.4882444689561945e-05, "loss": 0.5051, "step": 13075 }, { "epoch": 0.35758039816232773, "grad_norm": 1.45403254032135, "learning_rate": 1.4881671710278288e-05, "loss": 0.5449, "step": 13076 }, { "epoch": 0.35760774447604465, "grad_norm": 1.260999321937561, "learning_rate": 1.4880898692699287e-05, "loss": 0.5294, "step": 13077 }, { "epoch": 0.3576350907897615, "grad_norm": 1.2342451810836792, "learning_rate": 1.4880125636831012e-05, "loss": 0.5672, "step": 13078 }, { "epoch": 0.35766243710347845, "grad_norm": 1.059014916419983, "learning_rate": 1.4879352542679522e-05, "loss": 0.5513, "step": 13079 }, { "epoch": 0.3576897834171954, "grad_norm": 1.1929889917373657, "learning_rate": 1.4878579410250883e-05, "loss": 0.5305, "step": 13080 }, { "epoch": 0.3577171297309123, "grad_norm": 1.4405527114868164, "learning_rate": 1.4877806239551166e-05, "loss": 0.849, "step": 13081 }, { "epoch": 0.35774447604462917, "grad_norm": 1.301611304283142, "learning_rate": 1.487703303058643e-05, "loss": 0.5415, "step": 13082 }, { "epoch": 0.3577718223583461, "grad_norm": 1.233402132987976, "learning_rate": 1.4876259783362742e-05, "loss": 0.5012, "step": 13083 }, { "epoch": 0.357799168672063, "grad_norm": 4.003921985626221, "learning_rate": 1.4875486497886166e-05, "loss": 0.8443, "step": 13084 }, { "epoch": 0.35782651498577994, "grad_norm": 1.4184080362319946, "learning_rate": 1.4874713174162773e-05, "loss": 0.5519, "step": 13085 }, { "epoch": 0.3578538612994968, "grad_norm": 1.6225777864456177, "learning_rate": 1.4873939812198628e-05, "loss": 0.5413, "step": 13086 }, { "epoch": 0.35788120761321374, "grad_norm": 1.4723984003067017, "learning_rate": 1.4873166411999796e-05, "loss": 0.8393, "step": 13087 }, { "epoch": 0.35790855392693066, "grad_norm": 1.5465275049209595, "learning_rate": 1.4872392973572343e-05, "loss": 0.5465, "step": 13088 }, { "epoch": 0.3579359002406476, "grad_norm": 1.5174670219421387, "learning_rate": 1.4871619496922343e-05, "loss": 0.5781, "step": 13089 }, { "epoch": 0.35796324655436446, "grad_norm": 1.2212568521499634, "learning_rate": 1.4870845982055855e-05, "loss": 0.5362, "step": 13090 }, { "epoch": 0.3579905928680814, "grad_norm": 1.2739254236221313, "learning_rate": 1.4870072428978952e-05, "loss": 0.4593, "step": 13091 }, { "epoch": 0.3580179391817983, "grad_norm": 1.2747118473052979, "learning_rate": 1.4869298837697704e-05, "loss": 0.5288, "step": 13092 }, { "epoch": 0.35804528549551523, "grad_norm": 2.0984675884246826, "learning_rate": 1.4868525208218171e-05, "loss": 0.5345, "step": 13093 }, { "epoch": 0.3580726318092321, "grad_norm": 1.7148464918136597, "learning_rate": 1.4867751540546433e-05, "loss": 0.8335, "step": 13094 }, { "epoch": 0.358099978122949, "grad_norm": 1.22808039188385, "learning_rate": 1.4866977834688552e-05, "loss": 0.5288, "step": 13095 }, { "epoch": 0.35812732443666595, "grad_norm": 1.29315984249115, "learning_rate": 1.48662040906506e-05, "loss": 0.4188, "step": 13096 }, { "epoch": 0.3581546707503829, "grad_norm": 1.3685693740844727, "learning_rate": 1.4865430308438646e-05, "loss": 0.5466, "step": 13097 }, { "epoch": 0.35818201706409974, "grad_norm": 1.0831270217895508, "learning_rate": 1.486465648805876e-05, "loss": 0.5394, "step": 13098 }, { "epoch": 0.35820936337781667, "grad_norm": 1.3116456270217896, "learning_rate": 1.4863882629517012e-05, "loss": 0.518, "step": 13099 }, { "epoch": 0.3582367096915336, "grad_norm": 1.1860291957855225, "learning_rate": 1.4863108732819477e-05, "loss": 0.5515, "step": 13100 }, { "epoch": 0.3582640560052505, "grad_norm": 1.3049603700637817, "learning_rate": 1.486233479797222e-05, "loss": 0.5559, "step": 13101 }, { "epoch": 0.3582914023189674, "grad_norm": 1.3899271488189697, "learning_rate": 1.4861560824981318e-05, "loss": 0.5232, "step": 13102 }, { "epoch": 0.3583187486326843, "grad_norm": 1.043424367904663, "learning_rate": 1.4860786813852834e-05, "loss": 0.5511, "step": 13103 }, { "epoch": 0.35834609494640124, "grad_norm": 1.2504966259002686, "learning_rate": 1.4860012764592848e-05, "loss": 0.5256, "step": 13104 }, { "epoch": 0.3583734412601181, "grad_norm": 1.659993290901184, "learning_rate": 1.4859238677207433e-05, "loss": 0.5652, "step": 13105 }, { "epoch": 0.35840078757383503, "grad_norm": 1.3049869537353516, "learning_rate": 1.4858464551702654e-05, "loss": 0.534, "step": 13106 }, { "epoch": 0.35842813388755196, "grad_norm": 1.7826777696609497, "learning_rate": 1.4857690388084588e-05, "loss": 0.4035, "step": 13107 }, { "epoch": 0.3584554802012689, "grad_norm": 1.4174795150756836, "learning_rate": 1.485691618635931e-05, "loss": 0.6076, "step": 13108 }, { "epoch": 0.35848282651498575, "grad_norm": 1.5627566576004028, "learning_rate": 1.4856141946532892e-05, "loss": 0.5375, "step": 13109 }, { "epoch": 0.3585101728287027, "grad_norm": 1.1757651567459106, "learning_rate": 1.4855367668611405e-05, "loss": 0.5391, "step": 13110 }, { "epoch": 0.3585375191424196, "grad_norm": 1.6879547834396362, "learning_rate": 1.4854593352600928e-05, "loss": 0.5577, "step": 13111 }, { "epoch": 0.35856486545613653, "grad_norm": 1.3149163722991943, "learning_rate": 1.4853818998507528e-05, "loss": 0.5081, "step": 13112 }, { "epoch": 0.3585922117698534, "grad_norm": 1.3202637434005737, "learning_rate": 1.4853044606337285e-05, "loss": 0.581, "step": 13113 }, { "epoch": 0.3586195580835703, "grad_norm": 1.531954050064087, "learning_rate": 1.4852270176096276e-05, "loss": 0.5613, "step": 13114 }, { "epoch": 0.35864690439728725, "grad_norm": 1.3162415027618408, "learning_rate": 1.4851495707790568e-05, "loss": 0.5385, "step": 13115 }, { "epoch": 0.35867425071100417, "grad_norm": 1.3411234617233276, "learning_rate": 1.4850721201426247e-05, "loss": 0.5303, "step": 13116 }, { "epoch": 0.35870159702472104, "grad_norm": 1.4507800340652466, "learning_rate": 1.4849946657009377e-05, "loss": 0.8362, "step": 13117 }, { "epoch": 0.35872894333843797, "grad_norm": 2.2395718097686768, "learning_rate": 1.4849172074546045e-05, "loss": 0.514, "step": 13118 }, { "epoch": 0.3587562896521549, "grad_norm": 1.3811790943145752, "learning_rate": 1.4848397454042323e-05, "loss": 0.5507, "step": 13119 }, { "epoch": 0.3587836359658718, "grad_norm": 1.6789650917053223, "learning_rate": 1.4847622795504284e-05, "loss": 0.4409, "step": 13120 }, { "epoch": 0.3588109822795887, "grad_norm": 1.4643830060958862, "learning_rate": 1.4846848098938012e-05, "loss": 0.4644, "step": 13121 }, { "epoch": 0.3588383285933056, "grad_norm": 1.1987015008926392, "learning_rate": 1.4846073364349579e-05, "loss": 0.5336, "step": 13122 }, { "epoch": 0.35886567490702254, "grad_norm": 1.4347258806228638, "learning_rate": 1.4845298591745063e-05, "loss": 0.5353, "step": 13123 }, { "epoch": 0.35889302122073946, "grad_norm": 1.3006492853164673, "learning_rate": 1.4844523781130544e-05, "loss": 0.4377, "step": 13124 }, { "epoch": 0.35892036753445633, "grad_norm": 1.2676464319229126, "learning_rate": 1.4843748932512103e-05, "loss": 0.846, "step": 13125 }, { "epoch": 0.35894771384817326, "grad_norm": 1.4150984287261963, "learning_rate": 1.484297404589581e-05, "loss": 0.5286, "step": 13126 }, { "epoch": 0.3589750601618902, "grad_norm": 1.2993741035461426, "learning_rate": 1.4842199121287753e-05, "loss": 0.4362, "step": 13127 }, { "epoch": 0.3590024064756071, "grad_norm": 1.4574275016784668, "learning_rate": 1.4841424158694002e-05, "loss": 0.814, "step": 13128 }, { "epoch": 0.359029752789324, "grad_norm": 1.5147308111190796, "learning_rate": 1.4840649158120642e-05, "loss": 0.5562, "step": 13129 }, { "epoch": 0.3590570991030409, "grad_norm": 1.1749323606491089, "learning_rate": 1.4839874119573755e-05, "loss": 0.5462, "step": 13130 }, { "epoch": 0.3590844454167578, "grad_norm": 1.545689582824707, "learning_rate": 1.4839099043059413e-05, "loss": 0.5712, "step": 13131 }, { "epoch": 0.35911179173047475, "grad_norm": 1.232844591140747, "learning_rate": 1.4838323928583702e-05, "loss": 0.5505, "step": 13132 }, { "epoch": 0.3591391380441916, "grad_norm": 1.2046477794647217, "learning_rate": 1.4837548776152701e-05, "loss": 0.3826, "step": 13133 }, { "epoch": 0.35916648435790854, "grad_norm": 1.4260252714157104, "learning_rate": 1.4836773585772493e-05, "loss": 0.5406, "step": 13134 }, { "epoch": 0.35919383067162547, "grad_norm": 1.364312767982483, "learning_rate": 1.4835998357449157e-05, "loss": 0.5558, "step": 13135 }, { "epoch": 0.3592211769853424, "grad_norm": 1.5946943759918213, "learning_rate": 1.4835223091188775e-05, "loss": 0.5678, "step": 13136 }, { "epoch": 0.35924852329905926, "grad_norm": 2.520561456680298, "learning_rate": 1.483444778699743e-05, "loss": 0.3705, "step": 13137 }, { "epoch": 0.3592758696127762, "grad_norm": 1.4191094636917114, "learning_rate": 1.48336724448812e-05, "loss": 0.5491, "step": 13138 }, { "epoch": 0.3593032159264931, "grad_norm": 1.0574239492416382, "learning_rate": 1.4832897064846171e-05, "loss": 0.5513, "step": 13139 }, { "epoch": 0.35933056224021004, "grad_norm": 1.427682638168335, "learning_rate": 1.4832121646898422e-05, "loss": 0.5125, "step": 13140 }, { "epoch": 0.3593579085539269, "grad_norm": 1.2183133363723755, "learning_rate": 1.4831346191044043e-05, "loss": 0.5537, "step": 13141 }, { "epoch": 0.35938525486764383, "grad_norm": 1.2605174779891968, "learning_rate": 1.4830570697289109e-05, "loss": 0.5256, "step": 13142 }, { "epoch": 0.35941260118136076, "grad_norm": 1.4467334747314453, "learning_rate": 1.4829795165639713e-05, "loss": 0.5628, "step": 13143 }, { "epoch": 0.3594399474950777, "grad_norm": 1.100866675376892, "learning_rate": 1.4829019596101927e-05, "loss": 0.4985, "step": 13144 }, { "epoch": 0.35946729380879455, "grad_norm": 1.0364993810653687, "learning_rate": 1.4828243988681843e-05, "loss": 0.545, "step": 13145 }, { "epoch": 0.3594946401225115, "grad_norm": 1.83334481716156, "learning_rate": 1.4827468343385546e-05, "loss": 0.3716, "step": 13146 }, { "epoch": 0.3595219864362284, "grad_norm": 1.4103001356124878, "learning_rate": 1.4826692660219117e-05, "loss": 0.847, "step": 13147 }, { "epoch": 0.3595493327499453, "grad_norm": 1.2947072982788086, "learning_rate": 1.4825916939188643e-05, "loss": 0.5186, "step": 13148 }, { "epoch": 0.3595766790636622, "grad_norm": 1.2025972604751587, "learning_rate": 1.4825141180300206e-05, "loss": 0.5381, "step": 13149 }, { "epoch": 0.3596040253773791, "grad_norm": 1.2900718450546265, "learning_rate": 1.4824365383559896e-05, "loss": 0.5603, "step": 13150 }, { "epoch": 0.35963137169109605, "grad_norm": 1.5676548480987549, "learning_rate": 1.48235895489738e-05, "loss": 0.4679, "step": 13151 }, { "epoch": 0.35965871800481297, "grad_norm": 1.453698992729187, "learning_rate": 1.4822813676547999e-05, "loss": 0.8431, "step": 13152 }, { "epoch": 0.35968606431852984, "grad_norm": 1.1853121519088745, "learning_rate": 1.4822037766288579e-05, "loss": 0.8541, "step": 13153 }, { "epoch": 0.35971341063224677, "grad_norm": 1.2375975847244263, "learning_rate": 1.4821261818201636e-05, "loss": 0.5277, "step": 13154 }, { "epoch": 0.3597407569459637, "grad_norm": 1.4436789751052856, "learning_rate": 1.4820485832293246e-05, "loss": 0.5756, "step": 13155 }, { "epoch": 0.3597681032596806, "grad_norm": 1.140707015991211, "learning_rate": 1.4819709808569504e-05, "loss": 0.5194, "step": 13156 }, { "epoch": 0.3597954495733975, "grad_norm": 1.395621418952942, "learning_rate": 1.4818933747036493e-05, "loss": 0.549, "step": 13157 }, { "epoch": 0.3598227958871144, "grad_norm": 1.37472665309906, "learning_rate": 1.4818157647700301e-05, "loss": 0.543, "step": 13158 }, { "epoch": 0.35985014220083134, "grad_norm": 1.2401000261306763, "learning_rate": 1.4817381510567023e-05, "loss": 0.5164, "step": 13159 }, { "epoch": 0.35987748851454826, "grad_norm": 1.1495667695999146, "learning_rate": 1.4816605335642741e-05, "loss": 0.5402, "step": 13160 }, { "epoch": 0.35990483482826513, "grad_norm": 1.8053771257400513, "learning_rate": 1.4815829122933545e-05, "loss": 0.396, "step": 13161 }, { "epoch": 0.35993218114198205, "grad_norm": 1.274778127670288, "learning_rate": 1.4815052872445525e-05, "loss": 0.5225, "step": 13162 }, { "epoch": 0.359959527455699, "grad_norm": 1.9593535661697388, "learning_rate": 1.4814276584184772e-05, "loss": 0.8703, "step": 13163 }, { "epoch": 0.3599868737694159, "grad_norm": 1.189971685409546, "learning_rate": 1.4813500258157372e-05, "loss": 0.5268, "step": 13164 }, { "epoch": 0.3600142200831328, "grad_norm": 1.601233720779419, "learning_rate": 1.4812723894369418e-05, "loss": 0.5537, "step": 13165 }, { "epoch": 0.3600415663968497, "grad_norm": 1.4262611865997314, "learning_rate": 1.4811947492827e-05, "loss": 0.5298, "step": 13166 }, { "epoch": 0.3600689127105666, "grad_norm": 1.2643622159957886, "learning_rate": 1.4811171053536206e-05, "loss": 0.5314, "step": 13167 }, { "epoch": 0.36009625902428355, "grad_norm": 1.3377532958984375, "learning_rate": 1.481039457650313e-05, "loss": 0.4669, "step": 13168 }, { "epoch": 0.3601236053380004, "grad_norm": 1.5828298330307007, "learning_rate": 1.480961806173386e-05, "loss": 0.5411, "step": 13169 }, { "epoch": 0.36015095165171734, "grad_norm": 1.4115899801254272, "learning_rate": 1.4808841509234495e-05, "loss": 0.5462, "step": 13170 }, { "epoch": 0.36017829796543427, "grad_norm": 1.412190318107605, "learning_rate": 1.4808064919011118e-05, "loss": 0.5238, "step": 13171 }, { "epoch": 0.3602056442791512, "grad_norm": 1.3240249156951904, "learning_rate": 1.4807288291069827e-05, "loss": 0.5556, "step": 13172 }, { "epoch": 0.36023299059286806, "grad_norm": 1.279586672782898, "learning_rate": 1.480651162541671e-05, "loss": 0.5795, "step": 13173 }, { "epoch": 0.360260336906585, "grad_norm": 1.6037406921386719, "learning_rate": 1.4805734922057861e-05, "loss": 0.4897, "step": 13174 }, { "epoch": 0.3602876832203019, "grad_norm": 1.090964913368225, "learning_rate": 1.480495818099938e-05, "loss": 0.4954, "step": 13175 }, { "epoch": 0.36031502953401884, "grad_norm": 1.316269874572754, "learning_rate": 1.4804181402247349e-05, "loss": 0.8818, "step": 13176 }, { "epoch": 0.3603423758477357, "grad_norm": 1.3861340284347534, "learning_rate": 1.4803404585807868e-05, "loss": 0.5516, "step": 13177 }, { "epoch": 0.36036972216145263, "grad_norm": 1.2428982257843018, "learning_rate": 1.4802627731687031e-05, "loss": 0.5338, "step": 13178 }, { "epoch": 0.36039706847516956, "grad_norm": 1.408839464187622, "learning_rate": 1.480185083989093e-05, "loss": 0.5394, "step": 13179 }, { "epoch": 0.3604244147888865, "grad_norm": 1.1587700843811035, "learning_rate": 1.4801073910425661e-05, "loss": 0.4993, "step": 13180 }, { "epoch": 0.36045176110260335, "grad_norm": 1.456636905670166, "learning_rate": 1.4800296943297319e-05, "loss": 0.5124, "step": 13181 }, { "epoch": 0.3604791074163203, "grad_norm": 1.348618507385254, "learning_rate": 1.4799519938511997e-05, "loss": 0.5521, "step": 13182 }, { "epoch": 0.3605064537300372, "grad_norm": 1.3419160842895508, "learning_rate": 1.479874289607579e-05, "loss": 0.5063, "step": 13183 }, { "epoch": 0.3605338000437541, "grad_norm": 1.1334800720214844, "learning_rate": 1.47979658159948e-05, "loss": 0.5378, "step": 13184 }, { "epoch": 0.360561146357471, "grad_norm": 1.2365119457244873, "learning_rate": 1.4797188698275115e-05, "loss": 0.5485, "step": 13185 }, { "epoch": 0.3605884926711879, "grad_norm": 1.323581576347351, "learning_rate": 1.4796411542922837e-05, "loss": 0.547, "step": 13186 }, { "epoch": 0.36061583898490485, "grad_norm": 1.3228873014450073, "learning_rate": 1.4795634349944057e-05, "loss": 0.5734, "step": 13187 }, { "epoch": 0.36064318529862177, "grad_norm": 1.4109126329421997, "learning_rate": 1.4794857119344876e-05, "loss": 0.5181, "step": 13188 }, { "epoch": 0.36067053161233864, "grad_norm": 1.1861131191253662, "learning_rate": 1.4794079851131393e-05, "loss": 0.509, "step": 13189 }, { "epoch": 0.36069787792605557, "grad_norm": 1.8622242212295532, "learning_rate": 1.4793302545309703e-05, "loss": 0.5547, "step": 13190 }, { "epoch": 0.3607252242397725, "grad_norm": 1.34073007106781, "learning_rate": 1.4792525201885901e-05, "loss": 0.5751, "step": 13191 }, { "epoch": 0.3607525705534894, "grad_norm": 1.4495468139648438, "learning_rate": 1.4791747820866087e-05, "loss": 0.517, "step": 13192 }, { "epoch": 0.3607799168672063, "grad_norm": 1.358713984489441, "learning_rate": 1.479097040225636e-05, "loss": 0.523, "step": 13193 }, { "epoch": 0.3608072631809232, "grad_norm": 1.5593916177749634, "learning_rate": 1.4790192946062823e-05, "loss": 0.567, "step": 13194 }, { "epoch": 0.36083460949464014, "grad_norm": 1.2157632112503052, "learning_rate": 1.4789415452291564e-05, "loss": 0.528, "step": 13195 }, { "epoch": 0.36086195580835706, "grad_norm": 1.3330588340759277, "learning_rate": 1.4788637920948693e-05, "loss": 0.5472, "step": 13196 }, { "epoch": 0.36088930212207393, "grad_norm": 1.0484079122543335, "learning_rate": 1.4787860352040305e-05, "loss": 0.5304, "step": 13197 }, { "epoch": 0.36091664843579085, "grad_norm": 1.452234148979187, "learning_rate": 1.4787082745572498e-05, "loss": 0.5074, "step": 13198 }, { "epoch": 0.3609439947495078, "grad_norm": 1.3280744552612305, "learning_rate": 1.4786305101551374e-05, "loss": 0.5889, "step": 13199 }, { "epoch": 0.3609713410632247, "grad_norm": 1.734879970550537, "learning_rate": 1.4785527419983035e-05, "loss": 0.4163, "step": 13200 }, { "epoch": 0.3609986873769416, "grad_norm": 1.5715171098709106, "learning_rate": 1.478474970087358e-05, "loss": 0.4786, "step": 13201 }, { "epoch": 0.3610260336906585, "grad_norm": 1.8247740268707275, "learning_rate": 1.4783971944229109e-05, "loss": 0.542, "step": 13202 }, { "epoch": 0.3610533800043754, "grad_norm": 1.4829310178756714, "learning_rate": 1.4783194150055727e-05, "loss": 0.5137, "step": 13203 }, { "epoch": 0.36108072631809235, "grad_norm": 2.9725286960601807, "learning_rate": 1.4782416318359527e-05, "loss": 0.5627, "step": 13204 }, { "epoch": 0.3611080726318092, "grad_norm": 1.0703208446502686, "learning_rate": 1.4781638449146623e-05, "loss": 0.5243, "step": 13205 }, { "epoch": 0.36113541894552614, "grad_norm": 2.0536510944366455, "learning_rate": 1.4780860542423108e-05, "loss": 0.5611, "step": 13206 }, { "epoch": 0.36116276525924307, "grad_norm": 1.0377781391143799, "learning_rate": 1.4780082598195086e-05, "loss": 0.5378, "step": 13207 }, { "epoch": 0.36119011157295994, "grad_norm": 1.0686912536621094, "learning_rate": 1.4779304616468667e-05, "loss": 0.533, "step": 13208 }, { "epoch": 0.36121745788667686, "grad_norm": 1.1482582092285156, "learning_rate": 1.4778526597249943e-05, "loss": 0.5374, "step": 13209 }, { "epoch": 0.3612448042003938, "grad_norm": 1.3491873741149902, "learning_rate": 1.4777748540545028e-05, "loss": 0.4036, "step": 13210 }, { "epoch": 0.3612721505141107, "grad_norm": 1.3496088981628418, "learning_rate": 1.4776970446360014e-05, "loss": 0.4656, "step": 13211 }, { "epoch": 0.3612994968278276, "grad_norm": 1.6302461624145508, "learning_rate": 1.4776192314701015e-05, "loss": 0.5545, "step": 13212 }, { "epoch": 0.3613268431415445, "grad_norm": 1.258927345275879, "learning_rate": 1.4775414145574132e-05, "loss": 0.5507, "step": 13213 }, { "epoch": 0.36135418945526143, "grad_norm": 1.225807785987854, "learning_rate": 1.4774635938985467e-05, "loss": 0.5243, "step": 13214 }, { "epoch": 0.36138153576897836, "grad_norm": 1.2461578845977783, "learning_rate": 1.4773857694941126e-05, "loss": 0.8966, "step": 13215 }, { "epoch": 0.3614088820826952, "grad_norm": 1.2772701978683472, "learning_rate": 1.4773079413447217e-05, "loss": 0.4385, "step": 13216 }, { "epoch": 0.36143622839641215, "grad_norm": 1.4282280206680298, "learning_rate": 1.4772301094509843e-05, "loss": 0.5549, "step": 13217 }, { "epoch": 0.3614635747101291, "grad_norm": 1.2956901788711548, "learning_rate": 1.4771522738135106e-05, "loss": 0.5623, "step": 13218 }, { "epoch": 0.361490921023846, "grad_norm": 1.0675427913665771, "learning_rate": 1.4770744344329121e-05, "loss": 0.5066, "step": 13219 }, { "epoch": 0.36151826733756287, "grad_norm": 1.5197190046310425, "learning_rate": 1.4769965913097989e-05, "loss": 0.4345, "step": 13220 }, { "epoch": 0.3615456136512798, "grad_norm": 1.1137444972991943, "learning_rate": 1.4769187444447815e-05, "loss": 0.5571, "step": 13221 }, { "epoch": 0.3615729599649967, "grad_norm": 2.041841745376587, "learning_rate": 1.4768408938384706e-05, "loss": 0.5643, "step": 13222 }, { "epoch": 0.36160030627871365, "grad_norm": 1.1032557487487793, "learning_rate": 1.4767630394914774e-05, "loss": 0.5235, "step": 13223 }, { "epoch": 0.3616276525924305, "grad_norm": 1.171088457107544, "learning_rate": 1.4766851814044121e-05, "loss": 0.5415, "step": 13224 }, { "epoch": 0.36165499890614744, "grad_norm": 1.1886529922485352, "learning_rate": 1.4766073195778859e-05, "loss": 0.5317, "step": 13225 }, { "epoch": 0.36168234521986437, "grad_norm": 1.6918354034423828, "learning_rate": 1.476529454012509e-05, "loss": 0.4966, "step": 13226 }, { "epoch": 0.3617096915335813, "grad_norm": 1.1254104375839233, "learning_rate": 1.4764515847088931e-05, "loss": 0.5179, "step": 13227 }, { "epoch": 0.36173703784729816, "grad_norm": 1.4436943531036377, "learning_rate": 1.4763737116676485e-05, "loss": 0.5862, "step": 13228 }, { "epoch": 0.3617643841610151, "grad_norm": 1.2125084400177002, "learning_rate": 1.4762958348893861e-05, "loss": 0.4903, "step": 13229 }, { "epoch": 0.361791730474732, "grad_norm": 1.2395215034484863, "learning_rate": 1.4762179543747171e-05, "loss": 0.523, "step": 13230 }, { "epoch": 0.36181907678844893, "grad_norm": 1.2669661045074463, "learning_rate": 1.4761400701242521e-05, "loss": 0.4464, "step": 13231 }, { "epoch": 0.3618464231021658, "grad_norm": 1.267417073249817, "learning_rate": 1.4760621821386023e-05, "loss": 0.5492, "step": 13232 }, { "epoch": 0.36187376941588273, "grad_norm": 1.177142858505249, "learning_rate": 1.4759842904183787e-05, "loss": 0.5008, "step": 13233 }, { "epoch": 0.36190111572959965, "grad_norm": 1.2751948833465576, "learning_rate": 1.475906394964192e-05, "loss": 0.5239, "step": 13234 }, { "epoch": 0.3619284620433166, "grad_norm": 1.3652338981628418, "learning_rate": 1.475828495776654e-05, "loss": 0.5403, "step": 13235 }, { "epoch": 0.36195580835703345, "grad_norm": 1.3765054941177368, "learning_rate": 1.475750592856375e-05, "loss": 0.571, "step": 13236 }, { "epoch": 0.3619831546707504, "grad_norm": 1.1494587659835815, "learning_rate": 1.4756726862039666e-05, "loss": 0.537, "step": 13237 }, { "epoch": 0.3620105009844673, "grad_norm": 1.4641430377960205, "learning_rate": 1.47559477582004e-05, "loss": 0.5609, "step": 13238 }, { "epoch": 0.3620378472981842, "grad_norm": 1.5988519191741943, "learning_rate": 1.475516861705206e-05, "loss": 0.8283, "step": 13239 }, { "epoch": 0.3620651936119011, "grad_norm": 1.153756856918335, "learning_rate": 1.4754389438600765e-05, "loss": 0.5379, "step": 13240 }, { "epoch": 0.362092539925618, "grad_norm": 1.3529032468795776, "learning_rate": 1.4753610222852616e-05, "loss": 0.5858, "step": 13241 }, { "epoch": 0.36211988623933494, "grad_norm": 1.300844669342041, "learning_rate": 1.4752830969813737e-05, "loss": 0.5372, "step": 13242 }, { "epoch": 0.36214723255305187, "grad_norm": 1.4740455150604248, "learning_rate": 1.4752051679490235e-05, "loss": 0.4696, "step": 13243 }, { "epoch": 0.36217457886676874, "grad_norm": 1.6269954442977905, "learning_rate": 1.4751272351888226e-05, "loss": 0.4354, "step": 13244 }, { "epoch": 0.36220192518048566, "grad_norm": 1.2692590951919556, "learning_rate": 1.4750492987013822e-05, "loss": 0.5268, "step": 13245 }, { "epoch": 0.3622292714942026, "grad_norm": 1.2499792575836182, "learning_rate": 1.4749713584873137e-05, "loss": 0.5183, "step": 13246 }, { "epoch": 0.3622566178079195, "grad_norm": 1.469637393951416, "learning_rate": 1.4748934145472284e-05, "loss": 0.5546, "step": 13247 }, { "epoch": 0.3622839641216364, "grad_norm": 1.6334364414215088, "learning_rate": 1.4748154668817383e-05, "loss": 0.5608, "step": 13248 }, { "epoch": 0.3623113104353533, "grad_norm": 1.5969260931015015, "learning_rate": 1.4747375154914546e-05, "loss": 0.534, "step": 13249 }, { "epoch": 0.36233865674907023, "grad_norm": 1.2033236026763916, "learning_rate": 1.4746595603769882e-05, "loss": 0.4979, "step": 13250 }, { "epoch": 0.36236600306278716, "grad_norm": 1.19318687915802, "learning_rate": 1.4745816015389514e-05, "loss": 0.5096, "step": 13251 }, { "epoch": 0.362393349376504, "grad_norm": 1.2921172380447388, "learning_rate": 1.4745036389779555e-05, "loss": 0.4954, "step": 13252 }, { "epoch": 0.36242069569022095, "grad_norm": 1.1435648202896118, "learning_rate": 1.4744256726946118e-05, "loss": 0.4987, "step": 13253 }, { "epoch": 0.3624480420039379, "grad_norm": 1.23629629611969, "learning_rate": 1.4743477026895325e-05, "loss": 0.5502, "step": 13254 }, { "epoch": 0.3624753883176548, "grad_norm": 2.757986068725586, "learning_rate": 1.4742697289633288e-05, "loss": 0.4923, "step": 13255 }, { "epoch": 0.36250273463137167, "grad_norm": 1.4108527898788452, "learning_rate": 1.4741917515166128e-05, "loss": 0.571, "step": 13256 }, { "epoch": 0.3625300809450886, "grad_norm": 1.2872281074523926, "learning_rate": 1.4741137703499956e-05, "loss": 0.5519, "step": 13257 }, { "epoch": 0.3625574272588055, "grad_norm": 1.6354933977127075, "learning_rate": 1.4740357854640895e-05, "loss": 0.5635, "step": 13258 }, { "epoch": 0.36258477357252245, "grad_norm": 1.230944275856018, "learning_rate": 1.4739577968595061e-05, "loss": 0.5533, "step": 13259 }, { "epoch": 0.3626121198862393, "grad_norm": 2.5552499294281006, "learning_rate": 1.473879804536857e-05, "loss": 0.4001, "step": 13260 }, { "epoch": 0.36263946619995624, "grad_norm": 1.4117058515548706, "learning_rate": 1.4738018084967543e-05, "loss": 0.5258, "step": 13261 }, { "epoch": 0.36266681251367316, "grad_norm": 1.3742820024490356, "learning_rate": 1.47372380873981e-05, "loss": 0.5225, "step": 13262 }, { "epoch": 0.3626941588273901, "grad_norm": 1.3795099258422852, "learning_rate": 1.4736458052666354e-05, "loss": 0.5026, "step": 13263 }, { "epoch": 0.36272150514110696, "grad_norm": 1.2753195762634277, "learning_rate": 1.4735677980778429e-05, "loss": 0.5348, "step": 13264 }, { "epoch": 0.3627488514548239, "grad_norm": 1.474149227142334, "learning_rate": 1.4734897871740441e-05, "loss": 0.8179, "step": 13265 }, { "epoch": 0.3627761977685408, "grad_norm": 1.2689712047576904, "learning_rate": 1.4734117725558514e-05, "loss": 0.5304, "step": 13266 }, { "epoch": 0.36280354408225773, "grad_norm": 1.6073582172393799, "learning_rate": 1.4733337542238768e-05, "loss": 0.4736, "step": 13267 }, { "epoch": 0.3628308903959746, "grad_norm": 1.0040223598480225, "learning_rate": 1.4732557321787318e-05, "loss": 0.5788, "step": 13268 }, { "epoch": 0.36285823670969153, "grad_norm": 1.3061286211013794, "learning_rate": 1.4731777064210287e-05, "loss": 0.5117, "step": 13269 }, { "epoch": 0.36288558302340845, "grad_norm": 1.201388955116272, "learning_rate": 1.4730996769513797e-05, "loss": 0.5357, "step": 13270 }, { "epoch": 0.3629129293371254, "grad_norm": 1.3485277891159058, "learning_rate": 1.4730216437703968e-05, "loss": 0.5113, "step": 13271 }, { "epoch": 0.36294027565084225, "grad_norm": 1.2602571249008179, "learning_rate": 1.4729436068786922e-05, "loss": 0.5394, "step": 13272 }, { "epoch": 0.3629676219645592, "grad_norm": 1.273788571357727, "learning_rate": 1.4728655662768785e-05, "loss": 0.5329, "step": 13273 }, { "epoch": 0.3629949682782761, "grad_norm": 1.2878696918487549, "learning_rate": 1.4727875219655672e-05, "loss": 0.513, "step": 13274 }, { "epoch": 0.363022314591993, "grad_norm": 1.4971073865890503, "learning_rate": 1.4727094739453709e-05, "loss": 0.5462, "step": 13275 }, { "epoch": 0.3630496609057099, "grad_norm": 1.3760666847229004, "learning_rate": 1.4726314222169019e-05, "loss": 0.5432, "step": 13276 }, { "epoch": 0.3630770072194268, "grad_norm": 1.211092233657837, "learning_rate": 1.4725533667807721e-05, "loss": 0.5388, "step": 13277 }, { "epoch": 0.36310435353314374, "grad_norm": 1.1605674028396606, "learning_rate": 1.4724753076375944e-05, "loss": 0.5025, "step": 13278 }, { "epoch": 0.36313169984686067, "grad_norm": 1.3704378604888916, "learning_rate": 1.4723972447879806e-05, "loss": 0.5626, "step": 13279 }, { "epoch": 0.36315904616057754, "grad_norm": 1.1989076137542725, "learning_rate": 1.4723191782325436e-05, "loss": 0.521, "step": 13280 }, { "epoch": 0.36318639247429446, "grad_norm": 5.539313793182373, "learning_rate": 1.4722411079718956e-05, "loss": 0.3634, "step": 13281 }, { "epoch": 0.3632137387880114, "grad_norm": 1.2510795593261719, "learning_rate": 1.4721630340066488e-05, "loss": 0.5591, "step": 13282 }, { "epoch": 0.3632410851017283, "grad_norm": 1.228947401046753, "learning_rate": 1.4720849563374162e-05, "loss": 0.3806, "step": 13283 }, { "epoch": 0.3632684314154452, "grad_norm": 1.4843156337738037, "learning_rate": 1.47200687496481e-05, "loss": 0.5414, "step": 13284 }, { "epoch": 0.3632957777291621, "grad_norm": 1.529898762702942, "learning_rate": 1.4719287898894423e-05, "loss": 0.5685, "step": 13285 }, { "epoch": 0.36332312404287903, "grad_norm": 1.2506438493728638, "learning_rate": 1.4718507011119263e-05, "loss": 0.4918, "step": 13286 }, { "epoch": 0.36335047035659596, "grad_norm": 1.2479192018508911, "learning_rate": 1.4717726086328743e-05, "loss": 0.5466, "step": 13287 }, { "epoch": 0.3633778166703128, "grad_norm": 1.4533721208572388, "learning_rate": 1.4716945124528988e-05, "loss": 0.5627, "step": 13288 }, { "epoch": 0.36340516298402975, "grad_norm": 1.2618305683135986, "learning_rate": 1.4716164125726128e-05, "loss": 0.5535, "step": 13289 }, { "epoch": 0.3634325092977467, "grad_norm": 2.568725109100342, "learning_rate": 1.4715383089926288e-05, "loss": 0.5608, "step": 13290 }, { "epoch": 0.3634598556114636, "grad_norm": 1.4872390031814575, "learning_rate": 1.4714602017135594e-05, "loss": 0.5215, "step": 13291 }, { "epoch": 0.36348720192518047, "grad_norm": 1.222886323928833, "learning_rate": 1.4713820907360173e-05, "loss": 0.5244, "step": 13292 }, { "epoch": 0.3635145482388974, "grad_norm": 1.805993914604187, "learning_rate": 1.4713039760606154e-05, "loss": 0.5458, "step": 13293 }, { "epoch": 0.3635418945526143, "grad_norm": 1.1425020694732666, "learning_rate": 1.4712258576879666e-05, "loss": 0.5341, "step": 13294 }, { "epoch": 0.36356924086633124, "grad_norm": 1.053471326828003, "learning_rate": 1.4711477356186831e-05, "loss": 0.5097, "step": 13295 }, { "epoch": 0.3635965871800481, "grad_norm": 1.4257844686508179, "learning_rate": 1.4710696098533786e-05, "loss": 0.5133, "step": 13296 }, { "epoch": 0.36362393349376504, "grad_norm": 1.4312981367111206, "learning_rate": 1.4709914803926657e-05, "loss": 0.5429, "step": 13297 }, { "epoch": 0.36365127980748196, "grad_norm": 1.159329891204834, "learning_rate": 1.4709133472371569e-05, "loss": 0.5233, "step": 13298 }, { "epoch": 0.3636786261211989, "grad_norm": 1.1813546419143677, "learning_rate": 1.4708352103874657e-05, "loss": 0.5405, "step": 13299 }, { "epoch": 0.36370597243491576, "grad_norm": 1.2142293453216553, "learning_rate": 1.4707570698442044e-05, "loss": 0.5312, "step": 13300 }, { "epoch": 0.3637333187486327, "grad_norm": 1.121482253074646, "learning_rate": 1.4706789256079867e-05, "loss": 0.499, "step": 13301 }, { "epoch": 0.3637606650623496, "grad_norm": 1.5837348699569702, "learning_rate": 1.4706007776794252e-05, "loss": 0.439, "step": 13302 }, { "epoch": 0.36378801137606653, "grad_norm": 1.400369644165039, "learning_rate": 1.470522626059133e-05, "loss": 0.54, "step": 13303 }, { "epoch": 0.3638153576897834, "grad_norm": 1.3629919290542603, "learning_rate": 1.4704444707477232e-05, "loss": 0.5502, "step": 13304 }, { "epoch": 0.36384270400350033, "grad_norm": 1.4828543663024902, "learning_rate": 1.470366311745809e-05, "loss": 0.7983, "step": 13305 }, { "epoch": 0.36387005031721725, "grad_norm": 1.4358115196228027, "learning_rate": 1.4702881490540033e-05, "loss": 0.5315, "step": 13306 }, { "epoch": 0.3638973966309341, "grad_norm": 1.215984582901001, "learning_rate": 1.4702099826729195e-05, "loss": 0.5452, "step": 13307 }, { "epoch": 0.36392474294465105, "grad_norm": 1.3358862400054932, "learning_rate": 1.4701318126031708e-05, "loss": 0.3689, "step": 13308 }, { "epoch": 0.363952089258368, "grad_norm": 1.314008355140686, "learning_rate": 1.4700536388453702e-05, "loss": 0.5584, "step": 13309 }, { "epoch": 0.3639794355720849, "grad_norm": 1.1714280843734741, "learning_rate": 1.4699754614001312e-05, "loss": 0.5444, "step": 13310 }, { "epoch": 0.36400678188580177, "grad_norm": 1.5206252336502075, "learning_rate": 1.4698972802680668e-05, "loss": 0.5187, "step": 13311 }, { "epoch": 0.3640341281995187, "grad_norm": 1.3393449783325195, "learning_rate": 1.4698190954497906e-05, "loss": 0.5439, "step": 13312 }, { "epoch": 0.3640614745132356, "grad_norm": 1.5357877016067505, "learning_rate": 1.4697409069459159e-05, "loss": 0.5644, "step": 13313 }, { "epoch": 0.36408882082695254, "grad_norm": 1.0154896974563599, "learning_rate": 1.469662714757056e-05, "loss": 0.5419, "step": 13314 }, { "epoch": 0.3641161671406694, "grad_norm": 1.3475984334945679, "learning_rate": 1.4695845188838237e-05, "loss": 0.4812, "step": 13315 }, { "epoch": 0.36414351345438634, "grad_norm": 1.263919711112976, "learning_rate": 1.4695063193268337e-05, "loss": 0.8671, "step": 13316 }, { "epoch": 0.36417085976810326, "grad_norm": 1.4073636531829834, "learning_rate": 1.4694281160866985e-05, "loss": 0.5396, "step": 13317 }, { "epoch": 0.3641982060818202, "grad_norm": 1.4169330596923828, "learning_rate": 1.469349909164032e-05, "loss": 0.5477, "step": 13318 }, { "epoch": 0.36422555239553706, "grad_norm": 1.2414714097976685, "learning_rate": 1.4692716985594476e-05, "loss": 0.5462, "step": 13319 }, { "epoch": 0.364252898709254, "grad_norm": 1.0378776788711548, "learning_rate": 1.4691934842735585e-05, "loss": 0.5327, "step": 13320 }, { "epoch": 0.3642802450229709, "grad_norm": 1.344249963760376, "learning_rate": 1.4691152663069788e-05, "loss": 0.4133, "step": 13321 }, { "epoch": 0.36430759133668783, "grad_norm": 1.3141460418701172, "learning_rate": 1.4690370446603214e-05, "loss": 0.5255, "step": 13322 }, { "epoch": 0.3643349376504047, "grad_norm": 1.5857316255569458, "learning_rate": 1.4689588193342009e-05, "loss": 0.5343, "step": 13323 }, { "epoch": 0.3643622839641216, "grad_norm": 1.18534517288208, "learning_rate": 1.4688805903292302e-05, "loss": 0.4679, "step": 13324 }, { "epoch": 0.36438963027783855, "grad_norm": 1.2882181406021118, "learning_rate": 1.4688023576460233e-05, "loss": 0.8344, "step": 13325 }, { "epoch": 0.3644169765915555, "grad_norm": 1.3030184507369995, "learning_rate": 1.4687241212851935e-05, "loss": 0.5434, "step": 13326 }, { "epoch": 0.36444432290527234, "grad_norm": 1.1970964670181274, "learning_rate": 1.4686458812473553e-05, "loss": 0.3991, "step": 13327 }, { "epoch": 0.36447166921898927, "grad_norm": 1.2102551460266113, "learning_rate": 1.4685676375331219e-05, "loss": 0.545, "step": 13328 }, { "epoch": 0.3644990155327062, "grad_norm": 1.272351861000061, "learning_rate": 1.4684893901431075e-05, "loss": 0.4926, "step": 13329 }, { "epoch": 0.3645263618464231, "grad_norm": 1.315417766571045, "learning_rate": 1.4684111390779252e-05, "loss": 0.5295, "step": 13330 }, { "epoch": 0.36455370816014, "grad_norm": 1.7941762208938599, "learning_rate": 1.4683328843381897e-05, "loss": 0.5368, "step": 13331 }, { "epoch": 0.3645810544738569, "grad_norm": 1.4976987838745117, "learning_rate": 1.4682546259245145e-05, "loss": 0.5235, "step": 13332 }, { "epoch": 0.36460840078757384, "grad_norm": 1.2529587745666504, "learning_rate": 1.4681763638375133e-05, "loss": 0.4663, "step": 13333 }, { "epoch": 0.36463574710129076, "grad_norm": 1.1400790214538574, "learning_rate": 1.4680980980778005e-05, "loss": 0.5043, "step": 13334 }, { "epoch": 0.36466309341500763, "grad_norm": 1.4055449962615967, "learning_rate": 1.4680198286459897e-05, "loss": 0.5459, "step": 13335 }, { "epoch": 0.36469043972872456, "grad_norm": 1.2193986177444458, "learning_rate": 1.4679415555426952e-05, "loss": 0.3735, "step": 13336 }, { "epoch": 0.3647177860424415, "grad_norm": 1.438868522644043, "learning_rate": 1.4678632787685309e-05, "loss": 0.4445, "step": 13337 }, { "epoch": 0.3647451323561584, "grad_norm": 1.4624823331832886, "learning_rate": 1.4677849983241108e-05, "loss": 0.5353, "step": 13338 }, { "epoch": 0.3647724786698753, "grad_norm": 1.2335140705108643, "learning_rate": 1.467706714210049e-05, "loss": 0.5428, "step": 13339 }, { "epoch": 0.3647998249835922, "grad_norm": 1.5296220779418945, "learning_rate": 1.4676284264269597e-05, "loss": 0.4852, "step": 13340 }, { "epoch": 0.3648271712973091, "grad_norm": 1.4799916744232178, "learning_rate": 1.467550134975457e-05, "loss": 0.5608, "step": 13341 }, { "epoch": 0.36485451761102605, "grad_norm": 1.404212236404419, "learning_rate": 1.467471839856155e-05, "loss": 0.8636, "step": 13342 }, { "epoch": 0.3648818639247429, "grad_norm": 1.2428356409072876, "learning_rate": 1.467393541069668e-05, "loss": 0.5196, "step": 13343 }, { "epoch": 0.36490921023845985, "grad_norm": 1.285283088684082, "learning_rate": 1.4673152386166101e-05, "loss": 0.5323, "step": 13344 }, { "epoch": 0.36493655655217677, "grad_norm": 1.2445849180221558, "learning_rate": 1.467236932497596e-05, "loss": 0.5345, "step": 13345 }, { "epoch": 0.3649639028658937, "grad_norm": 1.7039670944213867, "learning_rate": 1.4671586227132393e-05, "loss": 0.5425, "step": 13346 }, { "epoch": 0.36499124917961057, "grad_norm": 1.472440481185913, "learning_rate": 1.4670803092641549e-05, "loss": 0.5218, "step": 13347 }, { "epoch": 0.3650185954933275, "grad_norm": 1.2337008714675903, "learning_rate": 1.4670019921509568e-05, "loss": 0.494, "step": 13348 }, { "epoch": 0.3650459418070444, "grad_norm": 1.549014687538147, "learning_rate": 1.4669236713742594e-05, "loss": 0.5345, "step": 13349 }, { "epoch": 0.36507328812076134, "grad_norm": 1.8301044702529907, "learning_rate": 1.4668453469346774e-05, "loss": 0.5609, "step": 13350 }, { "epoch": 0.3651006344344782, "grad_norm": 1.3042088747024536, "learning_rate": 1.466767018832825e-05, "loss": 0.5126, "step": 13351 }, { "epoch": 0.36512798074819514, "grad_norm": 1.4019347429275513, "learning_rate": 1.4666886870693164e-05, "loss": 0.5541, "step": 13352 }, { "epoch": 0.36515532706191206, "grad_norm": 2.1054446697235107, "learning_rate": 1.466610351644767e-05, "loss": 0.8213, "step": 13353 }, { "epoch": 0.365182673375629, "grad_norm": 1.1714732646942139, "learning_rate": 1.4665320125597905e-05, "loss": 0.8245, "step": 13354 }, { "epoch": 0.36521001968934586, "grad_norm": 1.3725820779800415, "learning_rate": 1.4664536698150014e-05, "loss": 0.4514, "step": 13355 }, { "epoch": 0.3652373660030628, "grad_norm": 1.1934095621109009, "learning_rate": 1.4663753234110147e-05, "loss": 0.3875, "step": 13356 }, { "epoch": 0.3652647123167797, "grad_norm": 1.324174404144287, "learning_rate": 1.4662969733484447e-05, "loss": 0.5483, "step": 13357 }, { "epoch": 0.36529205863049663, "grad_norm": 1.3579782247543335, "learning_rate": 1.466218619627906e-05, "loss": 0.5489, "step": 13358 }, { "epoch": 0.3653194049442135, "grad_norm": 1.389170527458191, "learning_rate": 1.4661402622500138e-05, "loss": 0.4712, "step": 13359 }, { "epoch": 0.3653467512579304, "grad_norm": 1.3852592706680298, "learning_rate": 1.4660619012153824e-05, "loss": 0.4559, "step": 13360 }, { "epoch": 0.36537409757164735, "grad_norm": 1.1932734251022339, "learning_rate": 1.4659835365246262e-05, "loss": 0.5327, "step": 13361 }, { "epoch": 0.3654014438853643, "grad_norm": 1.468281865119934, "learning_rate": 1.4659051681783607e-05, "loss": 0.4729, "step": 13362 }, { "epoch": 0.36542879019908114, "grad_norm": 1.151124119758606, "learning_rate": 1.4658267961772001e-05, "loss": 0.546, "step": 13363 }, { "epoch": 0.36545613651279807, "grad_norm": 1.2123769521713257, "learning_rate": 1.4657484205217595e-05, "loss": 0.5202, "step": 13364 }, { "epoch": 0.365483482826515, "grad_norm": 1.4704240560531616, "learning_rate": 1.4656700412126535e-05, "loss": 0.497, "step": 13365 }, { "epoch": 0.3655108291402319, "grad_norm": 1.3569536209106445, "learning_rate": 1.465591658250497e-05, "loss": 0.5038, "step": 13366 }, { "epoch": 0.3655381754539488, "grad_norm": 1.1691893339157104, "learning_rate": 1.465513271635905e-05, "loss": 0.5277, "step": 13367 }, { "epoch": 0.3655655217676657, "grad_norm": 1.471554160118103, "learning_rate": 1.4654348813694926e-05, "loss": 0.5414, "step": 13368 }, { "epoch": 0.36559286808138264, "grad_norm": 1.2945408821105957, "learning_rate": 1.4653564874518742e-05, "loss": 0.5361, "step": 13369 }, { "epoch": 0.36562021439509956, "grad_norm": 1.151403784751892, "learning_rate": 1.4652780898836656e-05, "loss": 0.3567, "step": 13370 }, { "epoch": 0.36564756070881643, "grad_norm": 1.1674302816390991, "learning_rate": 1.4651996886654811e-05, "loss": 0.5256, "step": 13371 }, { "epoch": 0.36567490702253336, "grad_norm": 1.663696050643921, "learning_rate": 1.4651212837979358e-05, "loss": 0.5194, "step": 13372 }, { "epoch": 0.3657022533362503, "grad_norm": 1.2623693943023682, "learning_rate": 1.465042875281645e-05, "loss": 0.5252, "step": 13373 }, { "epoch": 0.3657295996499672, "grad_norm": 1.4622279405593872, "learning_rate": 1.4649644631172237e-05, "loss": 0.5294, "step": 13374 }, { "epoch": 0.3657569459636841, "grad_norm": 1.4205001592636108, "learning_rate": 1.4648860473052873e-05, "loss": 0.8839, "step": 13375 }, { "epoch": 0.365784292277401, "grad_norm": 1.5746318101882935, "learning_rate": 1.4648076278464505e-05, "loss": 0.527, "step": 13376 }, { "epoch": 0.3658116385911179, "grad_norm": 1.805288553237915, "learning_rate": 1.4647292047413283e-05, "loss": 0.4948, "step": 13377 }, { "epoch": 0.36583898490483485, "grad_norm": 1.5044658184051514, "learning_rate": 1.4646507779905368e-05, "loss": 0.5245, "step": 13378 }, { "epoch": 0.3658663312185517, "grad_norm": 1.3463256359100342, "learning_rate": 1.4645723475946904e-05, "loss": 0.8579, "step": 13379 }, { "epoch": 0.36589367753226865, "grad_norm": 1.2478071451187134, "learning_rate": 1.4644939135544048e-05, "loss": 0.4199, "step": 13380 }, { "epoch": 0.36592102384598557, "grad_norm": 1.2630311250686646, "learning_rate": 1.4644154758702953e-05, "loss": 0.4851, "step": 13381 }, { "epoch": 0.3659483701597025, "grad_norm": 1.326133131980896, "learning_rate": 1.4643370345429769e-05, "loss": 0.5536, "step": 13382 }, { "epoch": 0.36597571647341937, "grad_norm": 1.3268649578094482, "learning_rate": 1.4642585895730652e-05, "loss": 0.5544, "step": 13383 }, { "epoch": 0.3660030627871363, "grad_norm": 1.4235665798187256, "learning_rate": 1.4641801409611756e-05, "loss": 0.5115, "step": 13384 }, { "epoch": 0.3660304091008532, "grad_norm": 1.5273768901824951, "learning_rate": 1.4641016887079232e-05, "loss": 0.8333, "step": 13385 }, { "epoch": 0.36605775541457014, "grad_norm": 1.1718512773513794, "learning_rate": 1.4640232328139239e-05, "loss": 0.5474, "step": 13386 }, { "epoch": 0.366085101728287, "grad_norm": 1.2984758615493774, "learning_rate": 1.4639447732797926e-05, "loss": 0.5258, "step": 13387 }, { "epoch": 0.36611244804200394, "grad_norm": 1.1539382934570312, "learning_rate": 1.4638663101061457e-05, "loss": 0.8141, "step": 13388 }, { "epoch": 0.36613979435572086, "grad_norm": 1.1891038417816162, "learning_rate": 1.4637878432935977e-05, "loss": 0.5345, "step": 13389 }, { "epoch": 0.3661671406694378, "grad_norm": 1.2479082345962524, "learning_rate": 1.4637093728427647e-05, "loss": 0.5555, "step": 13390 }, { "epoch": 0.36619448698315465, "grad_norm": 1.2525551319122314, "learning_rate": 1.463630898754262e-05, "loss": 0.5175, "step": 13391 }, { "epoch": 0.3662218332968716, "grad_norm": 1.185484766960144, "learning_rate": 1.4635524210287057e-05, "loss": 0.5408, "step": 13392 }, { "epoch": 0.3662491796105885, "grad_norm": 1.2894341945648193, "learning_rate": 1.4634739396667109e-05, "loss": 0.4652, "step": 13393 }, { "epoch": 0.36627652592430543, "grad_norm": 1.1957731246948242, "learning_rate": 1.4633954546688935e-05, "loss": 0.5361, "step": 13394 }, { "epoch": 0.3663038722380223, "grad_norm": 1.439510703086853, "learning_rate": 1.4633169660358693e-05, "loss": 0.5103, "step": 13395 }, { "epoch": 0.3663312185517392, "grad_norm": 1.0917854309082031, "learning_rate": 1.4632384737682537e-05, "loss": 0.524, "step": 13396 }, { "epoch": 0.36635856486545615, "grad_norm": 1.174544334411621, "learning_rate": 1.4631599778666627e-05, "loss": 0.548, "step": 13397 }, { "epoch": 0.3663859111791731, "grad_norm": 1.5622812509536743, "learning_rate": 1.463081478331712e-05, "loss": 0.5323, "step": 13398 }, { "epoch": 0.36641325749288994, "grad_norm": 1.2079373598098755, "learning_rate": 1.4630029751640176e-05, "loss": 0.5251, "step": 13399 }, { "epoch": 0.36644060380660687, "grad_norm": 1.272950530052185, "learning_rate": 1.4629244683641948e-05, "loss": 0.4994, "step": 13400 }, { "epoch": 0.3664679501203238, "grad_norm": 1.2319698333740234, "learning_rate": 1.46284595793286e-05, "loss": 0.5115, "step": 13401 }, { "epoch": 0.3664952964340407, "grad_norm": 1.2567765712738037, "learning_rate": 1.4627674438706291e-05, "loss": 0.5579, "step": 13402 }, { "epoch": 0.3665226427477576, "grad_norm": 1.8097960948944092, "learning_rate": 1.4626889261781176e-05, "loss": 0.4685, "step": 13403 }, { "epoch": 0.3665499890614745, "grad_norm": 1.1397675275802612, "learning_rate": 1.4626104048559418e-05, "loss": 0.5125, "step": 13404 }, { "epoch": 0.36657733537519144, "grad_norm": 1.2743340730667114, "learning_rate": 1.4625318799047175e-05, "loss": 0.448, "step": 13405 }, { "epoch": 0.36660468168890836, "grad_norm": 1.3622592687606812, "learning_rate": 1.4624533513250608e-05, "loss": 0.866, "step": 13406 }, { "epoch": 0.36663202800262523, "grad_norm": 1.3690006732940674, "learning_rate": 1.462374819117588e-05, "loss": 0.5255, "step": 13407 }, { "epoch": 0.36665937431634216, "grad_norm": 1.7598180770874023, "learning_rate": 1.4622962832829146e-05, "loss": 0.8324, "step": 13408 }, { "epoch": 0.3666867206300591, "grad_norm": 1.922206163406372, "learning_rate": 1.4622177438216567e-05, "loss": 0.4023, "step": 13409 }, { "epoch": 0.36671406694377595, "grad_norm": 1.3440556526184082, "learning_rate": 1.4621392007344311e-05, "loss": 0.5622, "step": 13410 }, { "epoch": 0.3667414132574929, "grad_norm": 1.313981533050537, "learning_rate": 1.4620606540218533e-05, "loss": 0.5406, "step": 13411 }, { "epoch": 0.3667687595712098, "grad_norm": 1.938601016998291, "learning_rate": 1.4619821036845397e-05, "loss": 0.5475, "step": 13412 }, { "epoch": 0.3667961058849267, "grad_norm": 1.1839699745178223, "learning_rate": 1.4619035497231067e-05, "loss": 0.5218, "step": 13413 }, { "epoch": 0.3668234521986436, "grad_norm": 1.3521836996078491, "learning_rate": 1.46182499213817e-05, "loss": 0.4797, "step": 13414 }, { "epoch": 0.3668507985123605, "grad_norm": 1.3702424764633179, "learning_rate": 1.4617464309303464e-05, "loss": 0.5432, "step": 13415 }, { "epoch": 0.36687814482607745, "grad_norm": 1.5167559385299683, "learning_rate": 1.4616678661002521e-05, "loss": 0.5308, "step": 13416 }, { "epoch": 0.36690549113979437, "grad_norm": 1.2817542552947998, "learning_rate": 1.4615892976485033e-05, "loss": 0.4606, "step": 13417 }, { "epoch": 0.36693283745351124, "grad_norm": 1.1744399070739746, "learning_rate": 1.4615107255757164e-05, "loss": 0.4328, "step": 13418 }, { "epoch": 0.36696018376722817, "grad_norm": 2.1423821449279785, "learning_rate": 1.4614321498825074e-05, "loss": 0.4747, "step": 13419 }, { "epoch": 0.3669875300809451, "grad_norm": 1.4210844039916992, "learning_rate": 1.4613535705694933e-05, "loss": 0.5393, "step": 13420 }, { "epoch": 0.367014876394662, "grad_norm": 1.4293216466903687, "learning_rate": 1.4612749876372902e-05, "loss": 0.5502, "step": 13421 }, { "epoch": 0.3670422227083789, "grad_norm": 1.9125125408172607, "learning_rate": 1.4611964010865147e-05, "loss": 0.5275, "step": 13422 }, { "epoch": 0.3670695690220958, "grad_norm": 1.614914894104004, "learning_rate": 1.461117810917783e-05, "loss": 0.5285, "step": 13423 }, { "epoch": 0.36709691533581273, "grad_norm": 1.2701666355133057, "learning_rate": 1.4610392171317124e-05, "loss": 0.5275, "step": 13424 }, { "epoch": 0.36712426164952966, "grad_norm": 1.2594921588897705, "learning_rate": 1.4609606197289186e-05, "loss": 0.5621, "step": 13425 }, { "epoch": 0.36715160796324653, "grad_norm": 5.876875877380371, "learning_rate": 1.4608820187100184e-05, "loss": 0.8738, "step": 13426 }, { "epoch": 0.36717895427696345, "grad_norm": 1.2807917594909668, "learning_rate": 1.4608034140756284e-05, "loss": 0.5576, "step": 13427 }, { "epoch": 0.3672063005906804, "grad_norm": 1.2892476320266724, "learning_rate": 1.4607248058263652e-05, "loss": 0.5597, "step": 13428 }, { "epoch": 0.3672336469043973, "grad_norm": 1.0180567502975464, "learning_rate": 1.460646193962846e-05, "loss": 0.3773, "step": 13429 }, { "epoch": 0.3672609932181142, "grad_norm": 1.3043243885040283, "learning_rate": 1.4605675784856868e-05, "loss": 0.5275, "step": 13430 }, { "epoch": 0.3672883395318311, "grad_norm": 1.3811845779418945, "learning_rate": 1.4604889593955044e-05, "loss": 0.5461, "step": 13431 }, { "epoch": 0.367315685845548, "grad_norm": 1.2726802825927734, "learning_rate": 1.4604103366929158e-05, "loss": 0.8249, "step": 13432 }, { "epoch": 0.36734303215926495, "grad_norm": 1.1783981323242188, "learning_rate": 1.4603317103785378e-05, "loss": 0.5518, "step": 13433 }, { "epoch": 0.3673703784729818, "grad_norm": 1.3557813167572021, "learning_rate": 1.4602530804529869e-05, "loss": 0.83, "step": 13434 }, { "epoch": 0.36739772478669874, "grad_norm": 1.6837413311004639, "learning_rate": 1.4601744469168802e-05, "loss": 0.5272, "step": 13435 }, { "epoch": 0.36742507110041567, "grad_norm": 1.1690382957458496, "learning_rate": 1.4600958097708344e-05, "loss": 0.5481, "step": 13436 }, { "epoch": 0.3674524174141326, "grad_norm": 1.1343415975570679, "learning_rate": 1.4600171690154668e-05, "loss": 0.5363, "step": 13437 }, { "epoch": 0.36747976372784946, "grad_norm": 1.3925858736038208, "learning_rate": 1.4599385246513935e-05, "loss": 0.5306, "step": 13438 }, { "epoch": 0.3675071100415664, "grad_norm": 1.2474099397659302, "learning_rate": 1.459859876679232e-05, "loss": 0.3936, "step": 13439 }, { "epoch": 0.3675344563552833, "grad_norm": 1.1178492307662964, "learning_rate": 1.4597812250995995e-05, "loss": 0.5439, "step": 13440 }, { "epoch": 0.36756180266900024, "grad_norm": 1.1676944494247437, "learning_rate": 1.4597025699131123e-05, "loss": 0.5594, "step": 13441 }, { "epoch": 0.3675891489827171, "grad_norm": 1.373402714729309, "learning_rate": 1.459623911120388e-05, "loss": 0.4784, "step": 13442 }, { "epoch": 0.36761649529643403, "grad_norm": 1.1768479347229004, "learning_rate": 1.4595452487220436e-05, "loss": 0.5186, "step": 13443 }, { "epoch": 0.36764384161015096, "grad_norm": 1.8013083934783936, "learning_rate": 1.459466582718696e-05, "loss": 0.5791, "step": 13444 }, { "epoch": 0.3676711879238679, "grad_norm": 1.427910327911377, "learning_rate": 1.4593879131109621e-05, "loss": 0.4971, "step": 13445 }, { "epoch": 0.36769853423758475, "grad_norm": 1.323934555053711, "learning_rate": 1.4593092398994594e-05, "loss": 0.521, "step": 13446 }, { "epoch": 0.3677258805513017, "grad_norm": 1.6847561597824097, "learning_rate": 1.4592305630848051e-05, "loss": 0.5443, "step": 13447 }, { "epoch": 0.3677532268650186, "grad_norm": 1.6201204061508179, "learning_rate": 1.4591518826676164e-05, "loss": 0.4898, "step": 13448 }, { "epoch": 0.3677805731787355, "grad_norm": 1.0969748497009277, "learning_rate": 1.45907319864851e-05, "loss": 0.5007, "step": 13449 }, { "epoch": 0.3678079194924524, "grad_norm": 1.349812626838684, "learning_rate": 1.4589945110281036e-05, "loss": 0.8277, "step": 13450 }, { "epoch": 0.3678352658061693, "grad_norm": 1.3482141494750977, "learning_rate": 1.4589158198070148e-05, "loss": 0.5514, "step": 13451 }, { "epoch": 0.36786261211988625, "grad_norm": 2.2808215618133545, "learning_rate": 1.4588371249858604e-05, "loss": 0.8589, "step": 13452 }, { "epoch": 0.36788995843360317, "grad_norm": 2.4891233444213867, "learning_rate": 1.4587584265652579e-05, "loss": 0.4833, "step": 13453 }, { "epoch": 0.36791730474732004, "grad_norm": 1.2726902961730957, "learning_rate": 1.4586797245458245e-05, "loss": 0.5305, "step": 13454 }, { "epoch": 0.36794465106103696, "grad_norm": 1.287203311920166, "learning_rate": 1.4586010189281777e-05, "loss": 0.5323, "step": 13455 }, { "epoch": 0.3679719973747539, "grad_norm": 1.6119747161865234, "learning_rate": 1.4585223097129353e-05, "loss": 0.5513, "step": 13456 }, { "epoch": 0.3679993436884708, "grad_norm": 2.364766836166382, "learning_rate": 1.458443596900714e-05, "loss": 0.8512, "step": 13457 }, { "epoch": 0.3680266900021877, "grad_norm": 1.6904629468917847, "learning_rate": 1.4583648804921317e-05, "loss": 0.8646, "step": 13458 }, { "epoch": 0.3680540363159046, "grad_norm": 1.3038482666015625, "learning_rate": 1.4582861604878065e-05, "loss": 0.5154, "step": 13459 }, { "epoch": 0.36808138262962153, "grad_norm": 5.400272846221924, "learning_rate": 1.458207436888355e-05, "loss": 0.4098, "step": 13460 }, { "epoch": 0.36810872894333846, "grad_norm": 1.2373616695404053, "learning_rate": 1.458128709694395e-05, "loss": 0.8546, "step": 13461 }, { "epoch": 0.36813607525705533, "grad_norm": 1.3357470035552979, "learning_rate": 1.4580499789065444e-05, "loss": 0.5332, "step": 13462 }, { "epoch": 0.36816342157077225, "grad_norm": 1.737245798110962, "learning_rate": 1.4579712445254204e-05, "loss": 0.5124, "step": 13463 }, { "epoch": 0.3681907678844892, "grad_norm": 1.6365269422531128, "learning_rate": 1.457892506551641e-05, "loss": 0.4788, "step": 13464 }, { "epoch": 0.3682181141982061, "grad_norm": 1.326378583908081, "learning_rate": 1.4578137649858236e-05, "loss": 0.5174, "step": 13465 }, { "epoch": 0.368245460511923, "grad_norm": 1.1400545835494995, "learning_rate": 1.457735019828586e-05, "loss": 0.5233, "step": 13466 }, { "epoch": 0.3682728068256399, "grad_norm": 1.5344557762145996, "learning_rate": 1.4576562710805465e-05, "loss": 0.5459, "step": 13467 }, { "epoch": 0.3683001531393568, "grad_norm": 1.1541173458099365, "learning_rate": 1.4575775187423218e-05, "loss": 0.4909, "step": 13468 }, { "epoch": 0.36832749945307375, "grad_norm": 1.1521819829940796, "learning_rate": 1.4574987628145303e-05, "loss": 0.5408, "step": 13469 }, { "epoch": 0.3683548457667906, "grad_norm": 1.3039833307266235, "learning_rate": 1.4574200032977902e-05, "loss": 0.5574, "step": 13470 }, { "epoch": 0.36838219208050754, "grad_norm": 1.529878854751587, "learning_rate": 1.4573412401927182e-05, "loss": 0.4404, "step": 13471 }, { "epoch": 0.36840953839422447, "grad_norm": 1.4839032888412476, "learning_rate": 1.4572624734999337e-05, "loss": 0.5413, "step": 13472 }, { "epoch": 0.3684368847079414, "grad_norm": 1.315271258354187, "learning_rate": 1.4571837032200531e-05, "loss": 0.4361, "step": 13473 }, { "epoch": 0.36846423102165826, "grad_norm": 1.3868975639343262, "learning_rate": 1.4571049293536951e-05, "loss": 0.5405, "step": 13474 }, { "epoch": 0.3684915773353752, "grad_norm": 1.883335828781128, "learning_rate": 1.4570261519014778e-05, "loss": 0.8694, "step": 13475 }, { "epoch": 0.3685189236490921, "grad_norm": 1.7845276594161987, "learning_rate": 1.4569473708640189e-05, "loss": 0.4753, "step": 13476 }, { "epoch": 0.36854626996280904, "grad_norm": 1.3044910430908203, "learning_rate": 1.4568685862419367e-05, "loss": 0.888, "step": 13477 }, { "epoch": 0.3685736162765259, "grad_norm": 1.3291946649551392, "learning_rate": 1.4567897980358488e-05, "loss": 0.5252, "step": 13478 }, { "epoch": 0.36860096259024283, "grad_norm": 1.2580981254577637, "learning_rate": 1.4567110062463734e-05, "loss": 0.5412, "step": 13479 }, { "epoch": 0.36862830890395976, "grad_norm": 1.4066327810287476, "learning_rate": 1.4566322108741287e-05, "loss": 0.5682, "step": 13480 }, { "epoch": 0.3686556552176767, "grad_norm": 1.2964318990707397, "learning_rate": 1.456553411919733e-05, "loss": 0.5587, "step": 13481 }, { "epoch": 0.36868300153139355, "grad_norm": 1.3769501447677612, "learning_rate": 1.456474609383804e-05, "loss": 0.5425, "step": 13482 }, { "epoch": 0.3687103478451105, "grad_norm": 1.471191644668579, "learning_rate": 1.4563958032669603e-05, "loss": 0.8495, "step": 13483 }, { "epoch": 0.3687376941588274, "grad_norm": 1.4753174781799316, "learning_rate": 1.45631699356982e-05, "loss": 0.5957, "step": 13484 }, { "epoch": 0.3687650404725443, "grad_norm": 1.2105015516281128, "learning_rate": 1.4562381802930013e-05, "loss": 0.5363, "step": 13485 }, { "epoch": 0.3687923867862612, "grad_norm": 1.189871907234192, "learning_rate": 1.4561593634371223e-05, "loss": 0.5073, "step": 13486 }, { "epoch": 0.3688197330999781, "grad_norm": 1.8198078870773315, "learning_rate": 1.4560805430028016e-05, "loss": 0.5087, "step": 13487 }, { "epoch": 0.36884707941369504, "grad_norm": 1.153203010559082, "learning_rate": 1.4560017189906572e-05, "loss": 0.4385, "step": 13488 }, { "epoch": 0.36887442572741197, "grad_norm": 1.358088493347168, "learning_rate": 1.4559228914013082e-05, "loss": 0.5329, "step": 13489 }, { "epoch": 0.36890177204112884, "grad_norm": 1.1084263324737549, "learning_rate": 1.4558440602353718e-05, "loss": 0.5328, "step": 13490 }, { "epoch": 0.36892911835484576, "grad_norm": 1.1167336702346802, "learning_rate": 1.4557652254934676e-05, "loss": 0.5062, "step": 13491 }, { "epoch": 0.3689564646685627, "grad_norm": 1.4583417177200317, "learning_rate": 1.455686387176213e-05, "loss": 0.5141, "step": 13492 }, { "epoch": 0.3689838109822796, "grad_norm": 1.1477749347686768, "learning_rate": 1.4556075452842268e-05, "loss": 0.528, "step": 13493 }, { "epoch": 0.3690111572959965, "grad_norm": 1.2151775360107422, "learning_rate": 1.4555286998181282e-05, "loss": 0.5476, "step": 13494 }, { "epoch": 0.3690385036097134, "grad_norm": 1.834763765335083, "learning_rate": 1.4554498507785348e-05, "loss": 0.8522, "step": 13495 }, { "epoch": 0.36906584992343033, "grad_norm": 1.2384800910949707, "learning_rate": 1.4553709981660656e-05, "loss": 0.5079, "step": 13496 }, { "epoch": 0.36909319623714726, "grad_norm": 1.2752596139907837, "learning_rate": 1.4552921419813393e-05, "loss": 0.8765, "step": 13497 }, { "epoch": 0.36912054255086413, "grad_norm": 1.1004387140274048, "learning_rate": 1.4552132822249739e-05, "loss": 0.5173, "step": 13498 }, { "epoch": 0.36914788886458105, "grad_norm": 1.186317801475525, "learning_rate": 1.4551344188975884e-05, "loss": 0.5156, "step": 13499 }, { "epoch": 0.369175235178298, "grad_norm": 1.8717185258865356, "learning_rate": 1.4550555519998015e-05, "loss": 0.5819, "step": 13500 }, { "epoch": 0.3692025814920149, "grad_norm": 1.4301481246948242, "learning_rate": 1.4549766815322319e-05, "loss": 0.4318, "step": 13501 }, { "epoch": 0.3692299278057318, "grad_norm": 1.7314776182174683, "learning_rate": 1.4548978074954985e-05, "loss": 0.5293, "step": 13502 }, { "epoch": 0.3692572741194487, "grad_norm": 1.3819787502288818, "learning_rate": 1.4548189298902195e-05, "loss": 0.8729, "step": 13503 }, { "epoch": 0.3692846204331656, "grad_norm": 1.3237961530685425, "learning_rate": 1.4547400487170143e-05, "loss": 0.5348, "step": 13504 }, { "epoch": 0.36931196674688255, "grad_norm": 1.357985019683838, "learning_rate": 1.454661163976501e-05, "loss": 0.5378, "step": 13505 }, { "epoch": 0.3693393130605994, "grad_norm": 1.5863901376724243, "learning_rate": 1.4545822756692991e-05, "loss": 0.8531, "step": 13506 }, { "epoch": 0.36936665937431634, "grad_norm": 1.162940502166748, "learning_rate": 1.4545033837960271e-05, "loss": 0.5376, "step": 13507 }, { "epoch": 0.36939400568803327, "grad_norm": 1.1499788761138916, "learning_rate": 1.454424488357304e-05, "loss": 0.5305, "step": 13508 }, { "epoch": 0.3694213520017502, "grad_norm": 1.032070279121399, "learning_rate": 1.4543455893537486e-05, "loss": 0.5312, "step": 13509 }, { "epoch": 0.36944869831546706, "grad_norm": 1.619960069656372, "learning_rate": 1.4542666867859803e-05, "loss": 0.5599, "step": 13510 }, { "epoch": 0.369476044629184, "grad_norm": 1.2461049556732178, "learning_rate": 1.4541877806546173e-05, "loss": 0.5151, "step": 13511 }, { "epoch": 0.3695033909429009, "grad_norm": 1.3098934888839722, "learning_rate": 1.454108870960279e-05, "loss": 0.4393, "step": 13512 }, { "epoch": 0.3695307372566178, "grad_norm": 1.4448922872543335, "learning_rate": 1.4540299577035846e-05, "loss": 0.5296, "step": 13513 }, { "epoch": 0.3695580835703347, "grad_norm": 1.0960395336151123, "learning_rate": 1.4539510408851527e-05, "loss": 0.5378, "step": 13514 }, { "epoch": 0.36958542988405163, "grad_norm": 1.1812511682510376, "learning_rate": 1.4538721205056025e-05, "loss": 0.5358, "step": 13515 }, { "epoch": 0.36961277619776856, "grad_norm": 1.416316032409668, "learning_rate": 1.4537931965655536e-05, "loss": 0.5347, "step": 13516 }, { "epoch": 0.3696401225114854, "grad_norm": 1.608964204788208, "learning_rate": 1.4537142690656247e-05, "loss": 0.4279, "step": 13517 }, { "epoch": 0.36966746882520235, "grad_norm": 1.2598344087600708, "learning_rate": 1.4536353380064353e-05, "loss": 0.4924, "step": 13518 }, { "epoch": 0.3696948151389193, "grad_norm": 1.1942285299301147, "learning_rate": 1.4535564033886038e-05, "loss": 0.5971, "step": 13519 }, { "epoch": 0.3697221614526362, "grad_norm": 1.3179432153701782, "learning_rate": 1.45347746521275e-05, "loss": 0.3763, "step": 13520 }, { "epoch": 0.36974950776635307, "grad_norm": 1.5953993797302246, "learning_rate": 1.4533985234794937e-05, "loss": 0.864, "step": 13521 }, { "epoch": 0.36977685408007, "grad_norm": 1.143813133239746, "learning_rate": 1.453319578189453e-05, "loss": 0.541, "step": 13522 }, { "epoch": 0.3698042003937869, "grad_norm": 1.4180516004562378, "learning_rate": 1.4532406293432479e-05, "loss": 0.5196, "step": 13523 }, { "epoch": 0.36983154670750384, "grad_norm": 1.2631447315216064, "learning_rate": 1.4531616769414978e-05, "loss": 0.5572, "step": 13524 }, { "epoch": 0.3698588930212207, "grad_norm": 1.2831909656524658, "learning_rate": 1.4530827209848215e-05, "loss": 0.5314, "step": 13525 }, { "epoch": 0.36988623933493764, "grad_norm": 1.665075421333313, "learning_rate": 1.4530037614738393e-05, "loss": 0.5788, "step": 13526 }, { "epoch": 0.36991358564865456, "grad_norm": 1.1700336933135986, "learning_rate": 1.4529247984091697e-05, "loss": 0.5097, "step": 13527 }, { "epoch": 0.3699409319623715, "grad_norm": 1.1255981922149658, "learning_rate": 1.4528458317914325e-05, "loss": 0.5257, "step": 13528 }, { "epoch": 0.36996827827608836, "grad_norm": 1.148158311843872, "learning_rate": 1.4527668616212473e-05, "loss": 0.5401, "step": 13529 }, { "epoch": 0.3699956245898053, "grad_norm": 5.876651763916016, "learning_rate": 1.4526878878992339e-05, "loss": 0.8616, "step": 13530 }, { "epoch": 0.3700229709035222, "grad_norm": 3.0297389030456543, "learning_rate": 1.4526089106260108e-05, "loss": 0.8597, "step": 13531 }, { "epoch": 0.37005031721723913, "grad_norm": 1.3304964303970337, "learning_rate": 1.4525299298021987e-05, "loss": 0.4529, "step": 13532 }, { "epoch": 0.370077663530956, "grad_norm": 2.3622360229492188, "learning_rate": 1.4524509454284162e-05, "loss": 0.4003, "step": 13533 }, { "epoch": 0.3701050098446729, "grad_norm": 1.421513319015503, "learning_rate": 1.4523719575052834e-05, "loss": 0.5566, "step": 13534 }, { "epoch": 0.37013235615838985, "grad_norm": 1.2981199026107788, "learning_rate": 1.45229296603342e-05, "loss": 0.5334, "step": 13535 }, { "epoch": 0.3701597024721068, "grad_norm": 1.3987009525299072, "learning_rate": 1.4522139710134455e-05, "loss": 0.4711, "step": 13536 }, { "epoch": 0.37018704878582365, "grad_norm": 1.5245661735534668, "learning_rate": 1.4521349724459799e-05, "loss": 0.5663, "step": 13537 }, { "epoch": 0.37021439509954057, "grad_norm": 1.1266456842422485, "learning_rate": 1.4520559703316424e-05, "loss": 0.5553, "step": 13538 }, { "epoch": 0.3702417414132575, "grad_norm": 1.7827380895614624, "learning_rate": 1.451976964671053e-05, "loss": 0.5202, "step": 13539 }, { "epoch": 0.3702690877269744, "grad_norm": 1.1854172945022583, "learning_rate": 1.4518979554648321e-05, "loss": 0.5296, "step": 13540 }, { "epoch": 0.3702964340406913, "grad_norm": 1.1811906099319458, "learning_rate": 1.4518189427135982e-05, "loss": 0.5628, "step": 13541 }, { "epoch": 0.3703237803544082, "grad_norm": 1.6506651639938354, "learning_rate": 1.4517399264179723e-05, "loss": 0.5524, "step": 13542 }, { "epoch": 0.37035112666812514, "grad_norm": 1.3716466426849365, "learning_rate": 1.4516609065785736e-05, "loss": 0.5701, "step": 13543 }, { "epoch": 0.37037847298184207, "grad_norm": 2.9778177738189697, "learning_rate": 1.4515818831960223e-05, "loss": 0.3916, "step": 13544 }, { "epoch": 0.37040581929555894, "grad_norm": 1.780512809753418, "learning_rate": 1.4515028562709382e-05, "loss": 0.4624, "step": 13545 }, { "epoch": 0.37043316560927586, "grad_norm": 1.384331226348877, "learning_rate": 1.451423825803941e-05, "loss": 0.5589, "step": 13546 }, { "epoch": 0.3704605119229928, "grad_norm": 1.2900669574737549, "learning_rate": 1.4513447917956511e-05, "loss": 0.5377, "step": 13547 }, { "epoch": 0.3704878582367097, "grad_norm": 1.4057592153549194, "learning_rate": 1.4512657542466886e-05, "loss": 0.501, "step": 13548 }, { "epoch": 0.3705152045504266, "grad_norm": 1.6676725149154663, "learning_rate": 1.451186713157673e-05, "loss": 0.555, "step": 13549 }, { "epoch": 0.3705425508641435, "grad_norm": 1.377366542816162, "learning_rate": 1.4511076685292245e-05, "loss": 0.5561, "step": 13550 }, { "epoch": 0.37056989717786043, "grad_norm": 1.4440048933029175, "learning_rate": 1.4510286203619635e-05, "loss": 0.5747, "step": 13551 }, { "epoch": 0.37059724349157736, "grad_norm": 1.0954484939575195, "learning_rate": 1.4509495686565098e-05, "loss": 0.5366, "step": 13552 }, { "epoch": 0.3706245898052942, "grad_norm": 1.42532217502594, "learning_rate": 1.4508705134134836e-05, "loss": 0.5549, "step": 13553 }, { "epoch": 0.37065193611901115, "grad_norm": 1.2747160196304321, "learning_rate": 1.4507914546335054e-05, "loss": 0.5051, "step": 13554 }, { "epoch": 0.3706792824327281, "grad_norm": 1.3135383129119873, "learning_rate": 1.4507123923171946e-05, "loss": 0.506, "step": 13555 }, { "epoch": 0.370706628746445, "grad_norm": 1.1399095058441162, "learning_rate": 1.4506333264651725e-05, "loss": 0.5268, "step": 13556 }, { "epoch": 0.37073397506016187, "grad_norm": 1.5930054187774658, "learning_rate": 1.4505542570780582e-05, "loss": 0.5351, "step": 13557 }, { "epoch": 0.3707613213738788, "grad_norm": 1.2273013591766357, "learning_rate": 1.4504751841564728e-05, "loss": 0.4309, "step": 13558 }, { "epoch": 0.3707886676875957, "grad_norm": 1.305930733680725, "learning_rate": 1.4503961077010364e-05, "loss": 0.571, "step": 13559 }, { "epoch": 0.37081601400131264, "grad_norm": 1.1713759899139404, "learning_rate": 1.4503170277123693e-05, "loss": 0.5358, "step": 13560 }, { "epoch": 0.3708433603150295, "grad_norm": 1.6565287113189697, "learning_rate": 1.4502379441910918e-05, "loss": 0.4108, "step": 13561 }, { "epoch": 0.37087070662874644, "grad_norm": 1.1363959312438965, "learning_rate": 1.4501588571378244e-05, "loss": 0.5489, "step": 13562 }, { "epoch": 0.37089805294246336, "grad_norm": 1.1937237977981567, "learning_rate": 1.4500797665531874e-05, "loss": 0.4172, "step": 13563 }, { "epoch": 0.3709253992561803, "grad_norm": 1.3071260452270508, "learning_rate": 1.4500006724378013e-05, "loss": 0.5476, "step": 13564 }, { "epoch": 0.37095274556989716, "grad_norm": 1.3243540525436401, "learning_rate": 1.4499215747922865e-05, "loss": 0.4632, "step": 13565 }, { "epoch": 0.3709800918836141, "grad_norm": 1.1518391370773315, "learning_rate": 1.4498424736172635e-05, "loss": 0.5375, "step": 13566 }, { "epoch": 0.371007438197331, "grad_norm": 1.6301648616790771, "learning_rate": 1.4497633689133533e-05, "loss": 0.536, "step": 13567 }, { "epoch": 0.37103478451104793, "grad_norm": 2.464472770690918, "learning_rate": 1.4496842606811757e-05, "loss": 0.5503, "step": 13568 }, { "epoch": 0.3710621308247648, "grad_norm": 1.7006852626800537, "learning_rate": 1.4496051489213516e-05, "loss": 0.5741, "step": 13569 }, { "epoch": 0.3710894771384817, "grad_norm": 1.1290656328201294, "learning_rate": 1.4495260336345019e-05, "loss": 0.5256, "step": 13570 }, { "epoch": 0.37111682345219865, "grad_norm": 1.3017557859420776, "learning_rate": 1.4494469148212467e-05, "loss": 0.5141, "step": 13571 }, { "epoch": 0.3711441697659156, "grad_norm": 1.1987550258636475, "learning_rate": 1.4493677924822072e-05, "loss": 0.5367, "step": 13572 }, { "epoch": 0.37117151607963245, "grad_norm": 1.1139552593231201, "learning_rate": 1.4492886666180035e-05, "loss": 0.509, "step": 13573 }, { "epoch": 0.37119886239334937, "grad_norm": 1.3584136962890625, "learning_rate": 1.4492095372292567e-05, "loss": 0.5232, "step": 13574 }, { "epoch": 0.3712262087070663, "grad_norm": 1.2043406963348389, "learning_rate": 1.4491304043165877e-05, "loss": 0.833, "step": 13575 }, { "epoch": 0.3712535550207832, "grad_norm": 1.4685790538787842, "learning_rate": 1.449051267880617e-05, "loss": 0.5254, "step": 13576 }, { "epoch": 0.3712809013345001, "grad_norm": 1.3892146348953247, "learning_rate": 1.4489721279219654e-05, "loss": 0.5447, "step": 13577 }, { "epoch": 0.371308247648217, "grad_norm": 1.0823825597763062, "learning_rate": 1.4488929844412541e-05, "loss": 0.5335, "step": 13578 }, { "epoch": 0.37133559396193394, "grad_norm": 1.4299951791763306, "learning_rate": 1.4488138374391033e-05, "loss": 0.5247, "step": 13579 }, { "epoch": 0.37136294027565087, "grad_norm": 1.6568114757537842, "learning_rate": 1.4487346869161344e-05, "loss": 0.4152, "step": 13580 }, { "epoch": 0.37139028658936774, "grad_norm": 1.644391655921936, "learning_rate": 1.4486555328729682e-05, "loss": 0.4317, "step": 13581 }, { "epoch": 0.37141763290308466, "grad_norm": 1.480204463005066, "learning_rate": 1.4485763753102253e-05, "loss": 0.5423, "step": 13582 }, { "epoch": 0.3714449792168016, "grad_norm": 1.2129498720169067, "learning_rate": 1.4484972142285277e-05, "loss": 0.5535, "step": 13583 }, { "epoch": 0.3714723255305185, "grad_norm": 1.6000927686691284, "learning_rate": 1.4484180496284952e-05, "loss": 0.5476, "step": 13584 }, { "epoch": 0.3714996718442354, "grad_norm": 1.6133708953857422, "learning_rate": 1.4483388815107491e-05, "loss": 0.5211, "step": 13585 }, { "epoch": 0.3715270181579523, "grad_norm": 1.4395118951797485, "learning_rate": 1.448259709875911e-05, "loss": 0.5367, "step": 13586 }, { "epoch": 0.37155436447166923, "grad_norm": 1.2232999801635742, "learning_rate": 1.4481805347246015e-05, "loss": 0.5217, "step": 13587 }, { "epoch": 0.37158171078538615, "grad_norm": 1.7014559507369995, "learning_rate": 1.4481013560574417e-05, "loss": 0.5237, "step": 13588 }, { "epoch": 0.371609057099103, "grad_norm": 1.3312946557998657, "learning_rate": 1.448022173875053e-05, "loss": 0.5409, "step": 13589 }, { "epoch": 0.37163640341281995, "grad_norm": 1.3988592624664307, "learning_rate": 1.4479429881780565e-05, "loss": 0.5359, "step": 13590 }, { "epoch": 0.3716637497265369, "grad_norm": 1.1551690101623535, "learning_rate": 1.4478637989670734e-05, "loss": 0.5142, "step": 13591 }, { "epoch": 0.3716910960402538, "grad_norm": 1.1647679805755615, "learning_rate": 1.4477846062427246e-05, "loss": 0.4935, "step": 13592 }, { "epoch": 0.37171844235397067, "grad_norm": 1.4042160511016846, "learning_rate": 1.4477054100056314e-05, "loss": 0.5379, "step": 13593 }, { "epoch": 0.3717457886676876, "grad_norm": 1.3592153787612915, "learning_rate": 1.4476262102564155e-05, "loss": 0.5434, "step": 13594 }, { "epoch": 0.3717731349814045, "grad_norm": 1.6672612428665161, "learning_rate": 1.447547006995698e-05, "loss": 0.8342, "step": 13595 }, { "epoch": 0.37180048129512144, "grad_norm": 1.5101932287216187, "learning_rate": 1.4474678002241002e-05, "loss": 0.5494, "step": 13596 }, { "epoch": 0.3718278276088383, "grad_norm": 1.4614313840866089, "learning_rate": 1.4473885899422436e-05, "loss": 0.4724, "step": 13597 }, { "epoch": 0.37185517392255524, "grad_norm": 1.4753453731536865, "learning_rate": 1.4473093761507491e-05, "loss": 0.5468, "step": 13598 }, { "epoch": 0.37188252023627216, "grad_norm": 1.7366957664489746, "learning_rate": 1.4472301588502384e-05, "loss": 0.5319, "step": 13599 }, { "epoch": 0.3719098665499891, "grad_norm": 1.2799288034439087, "learning_rate": 1.4471509380413333e-05, "loss": 0.5665, "step": 13600 }, { "epoch": 0.37193721286370596, "grad_norm": 1.2826204299926758, "learning_rate": 1.4470717137246546e-05, "loss": 0.4633, "step": 13601 }, { "epoch": 0.3719645591774229, "grad_norm": 1.4701416492462158, "learning_rate": 1.4469924859008244e-05, "loss": 0.832, "step": 13602 }, { "epoch": 0.3719919054911398, "grad_norm": 1.4037055969238281, "learning_rate": 1.4469132545704638e-05, "loss": 0.533, "step": 13603 }, { "epoch": 0.37201925180485673, "grad_norm": 1.163835048675537, "learning_rate": 1.4468340197341944e-05, "loss": 0.5332, "step": 13604 }, { "epoch": 0.3720465981185736, "grad_norm": 2.282686471939087, "learning_rate": 1.4467547813926378e-05, "loss": 0.8421, "step": 13605 }, { "epoch": 0.3720739444322905, "grad_norm": 1.3527286052703857, "learning_rate": 1.446675539546416e-05, "loss": 0.5524, "step": 13606 }, { "epoch": 0.37210129074600745, "grad_norm": 1.2458088397979736, "learning_rate": 1.4465962941961503e-05, "loss": 0.5143, "step": 13607 }, { "epoch": 0.3721286370597244, "grad_norm": 1.6132216453552246, "learning_rate": 1.4465170453424619e-05, "loss": 0.4643, "step": 13608 }, { "epoch": 0.37215598337344125, "grad_norm": 1.317244291305542, "learning_rate": 1.4464377929859732e-05, "loss": 0.5728, "step": 13609 }, { "epoch": 0.37218332968715817, "grad_norm": 1.2103285789489746, "learning_rate": 1.4463585371273057e-05, "loss": 0.5246, "step": 13610 }, { "epoch": 0.3722106760008751, "grad_norm": 1.8751862049102783, "learning_rate": 1.4462792777670811e-05, "loss": 0.4425, "step": 13611 }, { "epoch": 0.37223802231459197, "grad_norm": 1.9805370569229126, "learning_rate": 1.446200014905921e-05, "loss": 0.513, "step": 13612 }, { "epoch": 0.3722653686283089, "grad_norm": 1.3594045639038086, "learning_rate": 1.4461207485444474e-05, "loss": 0.52, "step": 13613 }, { "epoch": 0.3722927149420258, "grad_norm": 1.7674814462661743, "learning_rate": 1.4460414786832821e-05, "loss": 0.534, "step": 13614 }, { "epoch": 0.37232006125574274, "grad_norm": 1.226227045059204, "learning_rate": 1.445962205323047e-05, "loss": 0.5354, "step": 13615 }, { "epoch": 0.3723474075694596, "grad_norm": 1.2322373390197754, "learning_rate": 1.4458829284643639e-05, "loss": 0.538, "step": 13616 }, { "epoch": 0.37237475388317653, "grad_norm": 1.5935709476470947, "learning_rate": 1.4458036481078547e-05, "loss": 0.4988, "step": 13617 }, { "epoch": 0.37240210019689346, "grad_norm": 1.2075510025024414, "learning_rate": 1.4457243642541415e-05, "loss": 0.5545, "step": 13618 }, { "epoch": 0.3724294465106104, "grad_norm": 1.2888461351394653, "learning_rate": 1.445645076903846e-05, "loss": 0.512, "step": 13619 }, { "epoch": 0.37245679282432725, "grad_norm": 1.4684420824050903, "learning_rate": 1.44556578605759e-05, "loss": 0.5401, "step": 13620 }, { "epoch": 0.3724841391380442, "grad_norm": 1.6144495010375977, "learning_rate": 1.4454864917159963e-05, "loss": 0.5425, "step": 13621 }, { "epoch": 0.3725114854517611, "grad_norm": 1.5661900043487549, "learning_rate": 1.4454071938796861e-05, "loss": 0.4841, "step": 13622 }, { "epoch": 0.37253883176547803, "grad_norm": 1.9036937952041626, "learning_rate": 1.4453278925492817e-05, "loss": 0.5596, "step": 13623 }, { "epoch": 0.3725661780791949, "grad_norm": 1.2195199728012085, "learning_rate": 1.4452485877254056e-05, "loss": 0.5508, "step": 13624 }, { "epoch": 0.3725935243929118, "grad_norm": 1.295241355895996, "learning_rate": 1.4451692794086794e-05, "loss": 0.5586, "step": 13625 }, { "epoch": 0.37262087070662875, "grad_norm": 1.179043173789978, "learning_rate": 1.4450899675997256e-05, "loss": 0.5324, "step": 13626 }, { "epoch": 0.3726482170203457, "grad_norm": 1.4212478399276733, "learning_rate": 1.4450106522991663e-05, "loss": 0.4667, "step": 13627 }, { "epoch": 0.37267556333406254, "grad_norm": 1.2257304191589355, "learning_rate": 1.4449313335076233e-05, "loss": 0.4931, "step": 13628 }, { "epoch": 0.37270290964777947, "grad_norm": 2.430022716522217, "learning_rate": 1.4448520112257199e-05, "loss": 0.5411, "step": 13629 }, { "epoch": 0.3727302559614964, "grad_norm": 2.4391541481018066, "learning_rate": 1.4447726854540771e-05, "loss": 0.4134, "step": 13630 }, { "epoch": 0.3727576022752133, "grad_norm": 1.2174500226974487, "learning_rate": 1.444693356193318e-05, "loss": 0.5413, "step": 13631 }, { "epoch": 0.3727849485889302, "grad_norm": 1.087005615234375, "learning_rate": 1.4446140234440646e-05, "loss": 0.512, "step": 13632 }, { "epoch": 0.3728122949026471, "grad_norm": 1.4917315244674683, "learning_rate": 1.4445346872069393e-05, "loss": 0.5354, "step": 13633 }, { "epoch": 0.37283964121636404, "grad_norm": 2.600666046142578, "learning_rate": 1.4444553474825647e-05, "loss": 0.8344, "step": 13634 }, { "epoch": 0.37286698753008096, "grad_norm": 1.3638345003128052, "learning_rate": 1.4443760042715629e-05, "loss": 0.5461, "step": 13635 }, { "epoch": 0.37289433384379783, "grad_norm": 1.4386688470840454, "learning_rate": 1.4442966575745561e-05, "loss": 0.5546, "step": 13636 }, { "epoch": 0.37292168015751476, "grad_norm": 1.2722593545913696, "learning_rate": 1.4442173073921675e-05, "loss": 0.5232, "step": 13637 }, { "epoch": 0.3729490264712317, "grad_norm": 1.2102488279342651, "learning_rate": 1.4441379537250186e-05, "loss": 0.5251, "step": 13638 }, { "epoch": 0.3729763727849486, "grad_norm": 1.1190892457962036, "learning_rate": 1.4440585965737328e-05, "loss": 0.5201, "step": 13639 }, { "epoch": 0.3730037190986655, "grad_norm": 1.223758339881897, "learning_rate": 1.4439792359389322e-05, "loss": 0.5426, "step": 13640 }, { "epoch": 0.3730310654123824, "grad_norm": 1.4398764371871948, "learning_rate": 1.4438998718212393e-05, "loss": 0.5525, "step": 13641 }, { "epoch": 0.3730584117260993, "grad_norm": 1.1927404403686523, "learning_rate": 1.443820504221277e-05, "loss": 0.5299, "step": 13642 }, { "epoch": 0.37308575803981625, "grad_norm": 2.21527361869812, "learning_rate": 1.443741133139668e-05, "loss": 0.5402, "step": 13643 }, { "epoch": 0.3731131043535331, "grad_norm": 1.2814419269561768, "learning_rate": 1.4436617585770342e-05, "loss": 0.5152, "step": 13644 }, { "epoch": 0.37314045066725005, "grad_norm": 1.497071623802185, "learning_rate": 1.4435823805339993e-05, "loss": 0.5702, "step": 13645 }, { "epoch": 0.37316779698096697, "grad_norm": 1.263975739479065, "learning_rate": 1.4435029990111848e-05, "loss": 0.5191, "step": 13646 }, { "epoch": 0.3731951432946839, "grad_norm": 1.6621899604797363, "learning_rate": 1.4434236140092145e-05, "loss": 0.5381, "step": 13647 }, { "epoch": 0.37322248960840076, "grad_norm": 1.3027890920639038, "learning_rate": 1.4433442255287107e-05, "loss": 0.4887, "step": 13648 }, { "epoch": 0.3732498359221177, "grad_norm": 1.5675474405288696, "learning_rate": 1.4432648335702961e-05, "loss": 0.5304, "step": 13649 }, { "epoch": 0.3732771822358346, "grad_norm": 1.3493149280548096, "learning_rate": 1.4431854381345938e-05, "loss": 0.567, "step": 13650 }, { "epoch": 0.37330452854955154, "grad_norm": 1.2789644002914429, "learning_rate": 1.4431060392222262e-05, "loss": 0.5226, "step": 13651 }, { "epoch": 0.3733318748632684, "grad_norm": 1.4460123777389526, "learning_rate": 1.4430266368338166e-05, "loss": 0.5367, "step": 13652 }, { "epoch": 0.37335922117698533, "grad_norm": 1.7165124416351318, "learning_rate": 1.4429472309699879e-05, "loss": 0.4616, "step": 13653 }, { "epoch": 0.37338656749070226, "grad_norm": 2.712162733078003, "learning_rate": 1.4428678216313627e-05, "loss": 0.3924, "step": 13654 }, { "epoch": 0.3734139138044192, "grad_norm": 1.8576092720031738, "learning_rate": 1.442788408818564e-05, "loss": 0.8621, "step": 13655 }, { "epoch": 0.37344126011813605, "grad_norm": 1.4874826669692993, "learning_rate": 1.4427089925322149e-05, "loss": 0.4883, "step": 13656 }, { "epoch": 0.373468606431853, "grad_norm": 1.5360896587371826, "learning_rate": 1.4426295727729382e-05, "loss": 0.5283, "step": 13657 }, { "epoch": 0.3734959527455699, "grad_norm": 1.5481798648834229, "learning_rate": 1.4425501495413569e-05, "loss": 0.5008, "step": 13658 }, { "epoch": 0.37352329905928683, "grad_norm": 1.3730908632278442, "learning_rate": 1.4424707228380947e-05, "loss": 0.4565, "step": 13659 }, { "epoch": 0.3735506453730037, "grad_norm": 1.592684030532837, "learning_rate": 1.442391292663774e-05, "loss": 0.4805, "step": 13660 }, { "epoch": 0.3735779916867206, "grad_norm": 1.4462271928787231, "learning_rate": 1.442311859019018e-05, "loss": 0.5574, "step": 13661 }, { "epoch": 0.37360533800043755, "grad_norm": 1.476529836654663, "learning_rate": 1.44223242190445e-05, "loss": 0.542, "step": 13662 }, { "epoch": 0.3736326843141545, "grad_norm": 1.6685742139816284, "learning_rate": 1.4421529813206932e-05, "loss": 0.5191, "step": 13663 }, { "epoch": 0.37366003062787134, "grad_norm": 1.2581651210784912, "learning_rate": 1.4420735372683707e-05, "loss": 0.5099, "step": 13664 }, { "epoch": 0.37368737694158827, "grad_norm": 1.5457983016967773, "learning_rate": 1.4419940897481056e-05, "loss": 0.5409, "step": 13665 }, { "epoch": 0.3737147232553052, "grad_norm": 1.3096522092819214, "learning_rate": 1.4419146387605212e-05, "loss": 0.524, "step": 13666 }, { "epoch": 0.3737420695690221, "grad_norm": 1.350946068763733, "learning_rate": 1.4418351843062411e-05, "loss": 0.5078, "step": 13667 }, { "epoch": 0.373769415882739, "grad_norm": 1.3100920915603638, "learning_rate": 1.4417557263858879e-05, "loss": 0.5208, "step": 13668 }, { "epoch": 0.3737967621964559, "grad_norm": 1.176161289215088, "learning_rate": 1.4416762650000858e-05, "loss": 0.4906, "step": 13669 }, { "epoch": 0.37382410851017284, "grad_norm": 1.2885419130325317, "learning_rate": 1.4415968001494574e-05, "loss": 0.5203, "step": 13670 }, { "epoch": 0.37385145482388976, "grad_norm": 1.5676324367523193, "learning_rate": 1.4415173318346268e-05, "loss": 0.5493, "step": 13671 }, { "epoch": 0.37387880113760663, "grad_norm": 1.3873506784439087, "learning_rate": 1.4414378600562167e-05, "loss": 0.538, "step": 13672 }, { "epoch": 0.37390614745132356, "grad_norm": 1.3670600652694702, "learning_rate": 1.4413583848148505e-05, "loss": 0.4073, "step": 13673 }, { "epoch": 0.3739334937650405, "grad_norm": 1.2393125295639038, "learning_rate": 1.4412789061111523e-05, "loss": 0.5202, "step": 13674 }, { "epoch": 0.3739608400787574, "grad_norm": 1.3466885089874268, "learning_rate": 1.4411994239457451e-05, "loss": 0.5154, "step": 13675 }, { "epoch": 0.3739881863924743, "grad_norm": 1.6256331205368042, "learning_rate": 1.4411199383192527e-05, "loss": 0.5593, "step": 13676 }, { "epoch": 0.3740155327061912, "grad_norm": 1.0851471424102783, "learning_rate": 1.4410404492322985e-05, "loss": 0.5106, "step": 13677 }, { "epoch": 0.3740428790199081, "grad_norm": 1.2351163625717163, "learning_rate": 1.4409609566855063e-05, "loss": 0.5645, "step": 13678 }, { "epoch": 0.37407022533362505, "grad_norm": 1.648354172706604, "learning_rate": 1.4408814606794991e-05, "loss": 0.5218, "step": 13679 }, { "epoch": 0.3740975716473419, "grad_norm": 1.380826473236084, "learning_rate": 1.4408019612149014e-05, "loss": 0.4559, "step": 13680 }, { "epoch": 0.37412491796105884, "grad_norm": 1.349253535270691, "learning_rate": 1.440722458292336e-05, "loss": 0.5167, "step": 13681 }, { "epoch": 0.37415226427477577, "grad_norm": 1.3745088577270508, "learning_rate": 1.440642951912427e-05, "loss": 0.4451, "step": 13682 }, { "epoch": 0.3741796105884927, "grad_norm": 1.324486255645752, "learning_rate": 1.4405634420757982e-05, "loss": 0.4939, "step": 13683 }, { "epoch": 0.37420695690220956, "grad_norm": 1.2933247089385986, "learning_rate": 1.440483928783073e-05, "loss": 0.5407, "step": 13684 }, { "epoch": 0.3742343032159265, "grad_norm": 1.4670552015304565, "learning_rate": 1.4404044120348752e-05, "loss": 0.5238, "step": 13685 }, { "epoch": 0.3742616495296434, "grad_norm": 1.0873117446899414, "learning_rate": 1.440324891831829e-05, "loss": 0.5275, "step": 13686 }, { "epoch": 0.37428899584336034, "grad_norm": 1.440909743309021, "learning_rate": 1.440245368174558e-05, "loss": 0.5395, "step": 13687 }, { "epoch": 0.3743163421570772, "grad_norm": 1.3510870933532715, "learning_rate": 1.4401658410636858e-05, "loss": 0.5646, "step": 13688 }, { "epoch": 0.37434368847079413, "grad_norm": 1.470456600189209, "learning_rate": 1.4400863104998369e-05, "loss": 0.5488, "step": 13689 }, { "epoch": 0.37437103478451106, "grad_norm": 1.707204818725586, "learning_rate": 1.4400067764836342e-05, "loss": 0.5531, "step": 13690 }, { "epoch": 0.374398381098228, "grad_norm": 1.368924856185913, "learning_rate": 1.4399272390157024e-05, "loss": 0.4608, "step": 13691 }, { "epoch": 0.37442572741194485, "grad_norm": 1.1060304641723633, "learning_rate": 1.4398476980966651e-05, "loss": 0.5013, "step": 13692 }, { "epoch": 0.3744530737256618, "grad_norm": 1.11102294921875, "learning_rate": 1.4397681537271464e-05, "loss": 0.5479, "step": 13693 }, { "epoch": 0.3744804200393787, "grad_norm": 1.5449707508087158, "learning_rate": 1.4396886059077703e-05, "loss": 0.554, "step": 13694 }, { "epoch": 0.37450776635309563, "grad_norm": 1.6749035120010376, "learning_rate": 1.4396090546391608e-05, "loss": 0.8537, "step": 13695 }, { "epoch": 0.3745351126668125, "grad_norm": 1.116782784461975, "learning_rate": 1.439529499921942e-05, "loss": 0.536, "step": 13696 }, { "epoch": 0.3745624589805294, "grad_norm": 1.1269009113311768, "learning_rate": 1.4394499417567383e-05, "loss": 0.5354, "step": 13697 }, { "epoch": 0.37458980529424635, "grad_norm": 1.2646132707595825, "learning_rate": 1.439370380144173e-05, "loss": 0.3699, "step": 13698 }, { "epoch": 0.3746171516079633, "grad_norm": 1.2778136730194092, "learning_rate": 1.439290815084871e-05, "loss": 0.5293, "step": 13699 }, { "epoch": 0.37464449792168014, "grad_norm": 1.3560504913330078, "learning_rate": 1.4392112465794561e-05, "loss": 0.5582, "step": 13700 }, { "epoch": 0.37467184423539707, "grad_norm": 2.5866429805755615, "learning_rate": 1.4391316746285525e-05, "loss": 0.4218, "step": 13701 }, { "epoch": 0.374699190549114, "grad_norm": 1.2929846048355103, "learning_rate": 1.4390520992327846e-05, "loss": 0.5171, "step": 13702 }, { "epoch": 0.3747265368628309, "grad_norm": 1.3964293003082275, "learning_rate": 1.4389725203927765e-05, "loss": 0.5336, "step": 13703 }, { "epoch": 0.3747538831765478, "grad_norm": 1.193017601966858, "learning_rate": 1.4388929381091525e-05, "loss": 0.5248, "step": 13704 }, { "epoch": 0.3747812294902647, "grad_norm": 1.2168315649032593, "learning_rate": 1.438813352382537e-05, "loss": 0.8469, "step": 13705 }, { "epoch": 0.37480857580398164, "grad_norm": 1.2637218236923218, "learning_rate": 1.4387337632135543e-05, "loss": 0.3905, "step": 13706 }, { "epoch": 0.37483592211769856, "grad_norm": 1.225736141204834, "learning_rate": 1.4386541706028285e-05, "loss": 0.4536, "step": 13707 }, { "epoch": 0.37486326843141543, "grad_norm": 1.2825011014938354, "learning_rate": 1.4385745745509843e-05, "loss": 0.5223, "step": 13708 }, { "epoch": 0.37489061474513236, "grad_norm": 1.3652327060699463, "learning_rate": 1.4384949750586458e-05, "loss": 0.5146, "step": 13709 }, { "epoch": 0.3749179610588493, "grad_norm": 1.1899077892303467, "learning_rate": 1.438415372126438e-05, "loss": 0.5385, "step": 13710 }, { "epoch": 0.3749453073725662, "grad_norm": 1.2006691694259644, "learning_rate": 1.4383357657549845e-05, "loss": 0.5573, "step": 13711 }, { "epoch": 0.3749726536862831, "grad_norm": 1.1724547147750854, "learning_rate": 1.4382561559449107e-05, "loss": 0.5385, "step": 13712 }, { "epoch": 0.375, "grad_norm": 1.217079997062683, "learning_rate": 1.4381765426968407e-05, "loss": 0.5244, "step": 13713 }, { "epoch": 0.3750273463137169, "grad_norm": 2.060920476913452, "learning_rate": 1.4380969260113986e-05, "loss": 0.3955, "step": 13714 }, { "epoch": 0.3750546926274338, "grad_norm": 1.3247770071029663, "learning_rate": 1.4380173058892098e-05, "loss": 0.5112, "step": 13715 }, { "epoch": 0.3750820389411507, "grad_norm": 1.2231279611587524, "learning_rate": 1.4379376823308983e-05, "loss": 0.8472, "step": 13716 }, { "epoch": 0.37510938525486764, "grad_norm": 1.45282781124115, "learning_rate": 1.4378580553370889e-05, "loss": 0.5714, "step": 13717 }, { "epoch": 0.37513673156858457, "grad_norm": 1.2438945770263672, "learning_rate": 1.4377784249084066e-05, "loss": 0.5522, "step": 13718 }, { "epoch": 0.37516407788230144, "grad_norm": 1.114375114440918, "learning_rate": 1.4376987910454753e-05, "loss": 0.5373, "step": 13719 }, { "epoch": 0.37519142419601836, "grad_norm": 1.2600704431533813, "learning_rate": 1.4376191537489204e-05, "loss": 0.4706, "step": 13720 }, { "epoch": 0.3752187705097353, "grad_norm": 1.204361081123352, "learning_rate": 1.4375395130193663e-05, "loss": 0.4513, "step": 13721 }, { "epoch": 0.3752461168234522, "grad_norm": 1.3567675352096558, "learning_rate": 1.4374598688574381e-05, "loss": 0.5096, "step": 13722 }, { "epoch": 0.3752734631371691, "grad_norm": 1.451623558998108, "learning_rate": 1.4373802212637603e-05, "loss": 0.5475, "step": 13723 }, { "epoch": 0.375300809450886, "grad_norm": 1.3527487516403198, "learning_rate": 1.4373005702389578e-05, "loss": 0.54, "step": 13724 }, { "epoch": 0.37532815576460293, "grad_norm": 1.3838063478469849, "learning_rate": 1.4372209157836552e-05, "loss": 0.5819, "step": 13725 }, { "epoch": 0.37535550207831986, "grad_norm": 1.2857614755630493, "learning_rate": 1.4371412578984776e-05, "loss": 0.5422, "step": 13726 }, { "epoch": 0.3753828483920367, "grad_norm": 1.3018932342529297, "learning_rate": 1.4370615965840498e-05, "loss": 0.5275, "step": 13727 }, { "epoch": 0.37541019470575365, "grad_norm": 1.1118035316467285, "learning_rate": 1.4369819318409967e-05, "loss": 0.5303, "step": 13728 }, { "epoch": 0.3754375410194706, "grad_norm": 1.3395631313323975, "learning_rate": 1.4369022636699436e-05, "loss": 0.5277, "step": 13729 }, { "epoch": 0.3754648873331875, "grad_norm": 1.3733985424041748, "learning_rate": 1.4368225920715148e-05, "loss": 0.5487, "step": 13730 }, { "epoch": 0.37549223364690437, "grad_norm": 1.6241273880004883, "learning_rate": 1.4367429170463361e-05, "loss": 0.5648, "step": 13731 }, { "epoch": 0.3755195799606213, "grad_norm": 1.3575807809829712, "learning_rate": 1.4366632385950322e-05, "loss": 0.5243, "step": 13732 }, { "epoch": 0.3755469262743382, "grad_norm": 1.0583000183105469, "learning_rate": 1.4365835567182275e-05, "loss": 0.5109, "step": 13733 }, { "epoch": 0.37557427258805515, "grad_norm": 1.3248780965805054, "learning_rate": 1.4365038714165482e-05, "loss": 0.4545, "step": 13734 }, { "epoch": 0.375601618901772, "grad_norm": 1.3615301847457886, "learning_rate": 1.4364241826906185e-05, "loss": 0.5574, "step": 13735 }, { "epoch": 0.37562896521548894, "grad_norm": 1.3566815853118896, "learning_rate": 1.4363444905410642e-05, "loss": 0.8219, "step": 13736 }, { "epoch": 0.37565631152920587, "grad_norm": 1.2993284463882446, "learning_rate": 1.43626479496851e-05, "loss": 0.8434, "step": 13737 }, { "epoch": 0.3756836578429228, "grad_norm": 1.2540684938430786, "learning_rate": 1.4361850959735813e-05, "loss": 0.458, "step": 13738 }, { "epoch": 0.37571100415663966, "grad_norm": 1.3526828289031982, "learning_rate": 1.436105393556903e-05, "loss": 0.5484, "step": 13739 }, { "epoch": 0.3757383504703566, "grad_norm": 1.1186435222625732, "learning_rate": 1.4360256877191012e-05, "loss": 0.5416, "step": 13740 }, { "epoch": 0.3757656967840735, "grad_norm": 1.1169503927230835, "learning_rate": 1.4359459784608005e-05, "loss": 0.525, "step": 13741 }, { "epoch": 0.37579304309779044, "grad_norm": 1.1372714042663574, "learning_rate": 1.4358662657826261e-05, "loss": 0.4845, "step": 13742 }, { "epoch": 0.3758203894115073, "grad_norm": 1.1966884136199951, "learning_rate": 1.4357865496852035e-05, "loss": 0.5332, "step": 13743 }, { "epoch": 0.37584773572522423, "grad_norm": 1.1995900869369507, "learning_rate": 1.435706830169158e-05, "loss": 0.543, "step": 13744 }, { "epoch": 0.37587508203894116, "grad_norm": 1.3540120124816895, "learning_rate": 1.4356271072351151e-05, "loss": 0.5716, "step": 13745 }, { "epoch": 0.3759024283526581, "grad_norm": 1.2748496532440186, "learning_rate": 1.4355473808837002e-05, "loss": 0.5609, "step": 13746 }, { "epoch": 0.37592977466637495, "grad_norm": 2.5009994506835938, "learning_rate": 1.4354676511155386e-05, "loss": 0.5206, "step": 13747 }, { "epoch": 0.3759571209800919, "grad_norm": 1.4919582605361938, "learning_rate": 1.4353879179312558e-05, "loss": 0.8471, "step": 13748 }, { "epoch": 0.3759844672938088, "grad_norm": 1.1817216873168945, "learning_rate": 1.4353081813314773e-05, "loss": 0.529, "step": 13749 }, { "epoch": 0.3760118136075257, "grad_norm": 1.311794400215149, "learning_rate": 1.4352284413168285e-05, "loss": 0.5308, "step": 13750 }, { "epoch": 0.3760391599212426, "grad_norm": 1.1957077980041504, "learning_rate": 1.4351486978879355e-05, "loss": 0.5123, "step": 13751 }, { "epoch": 0.3760665062349595, "grad_norm": 1.329856038093567, "learning_rate": 1.435068951045423e-05, "loss": 0.557, "step": 13752 }, { "epoch": 0.37609385254867644, "grad_norm": 1.5198686122894287, "learning_rate": 1.4349892007899171e-05, "loss": 0.3773, "step": 13753 }, { "epoch": 0.37612119886239337, "grad_norm": 1.13479745388031, "learning_rate": 1.4349094471220436e-05, "loss": 0.5117, "step": 13754 }, { "epoch": 0.37614854517611024, "grad_norm": 1.562219500541687, "learning_rate": 1.4348296900424275e-05, "loss": 0.4859, "step": 13755 }, { "epoch": 0.37617589148982716, "grad_norm": 1.3639631271362305, "learning_rate": 1.4347499295516951e-05, "loss": 0.8609, "step": 13756 }, { "epoch": 0.3762032378035441, "grad_norm": 2.0078487396240234, "learning_rate": 1.4346701656504718e-05, "loss": 0.5149, "step": 13757 }, { "epoch": 0.376230584117261, "grad_norm": 1.2972486019134521, "learning_rate": 1.4345903983393835e-05, "loss": 0.5643, "step": 13758 }, { "epoch": 0.3762579304309779, "grad_norm": 1.306951880455017, "learning_rate": 1.4345106276190557e-05, "loss": 0.5448, "step": 13759 }, { "epoch": 0.3762852767446948, "grad_norm": 1.2629231214523315, "learning_rate": 1.4344308534901141e-05, "loss": 0.4956, "step": 13760 }, { "epoch": 0.37631262305841173, "grad_norm": 1.3139315843582153, "learning_rate": 1.4343510759531852e-05, "loss": 0.8629, "step": 13761 }, { "epoch": 0.37633996937212866, "grad_norm": 1.7402105331420898, "learning_rate": 1.4342712950088939e-05, "loss": 0.5535, "step": 13762 }, { "epoch": 0.3763673156858455, "grad_norm": 1.0837818384170532, "learning_rate": 1.4341915106578666e-05, "loss": 0.5245, "step": 13763 }, { "epoch": 0.37639466199956245, "grad_norm": 1.1287537813186646, "learning_rate": 1.4341117229007292e-05, "loss": 0.509, "step": 13764 }, { "epoch": 0.3764220083132794, "grad_norm": 1.9273288249969482, "learning_rate": 1.4340319317381074e-05, "loss": 0.5544, "step": 13765 }, { "epoch": 0.3764493546269963, "grad_norm": 1.5546213388442993, "learning_rate": 1.4339521371706273e-05, "loss": 0.4665, "step": 13766 }, { "epoch": 0.37647670094071317, "grad_norm": 1.237889289855957, "learning_rate": 1.4338723391989148e-05, "loss": 0.5373, "step": 13767 }, { "epoch": 0.3765040472544301, "grad_norm": 1.253725290298462, "learning_rate": 1.433792537823596e-05, "loss": 0.5101, "step": 13768 }, { "epoch": 0.376531393568147, "grad_norm": 1.3481429815292358, "learning_rate": 1.433712733045297e-05, "loss": 0.5602, "step": 13769 }, { "epoch": 0.37655873988186395, "grad_norm": 1.5339092016220093, "learning_rate": 1.4336329248646431e-05, "loss": 0.4245, "step": 13770 }, { "epoch": 0.3765860861955808, "grad_norm": 1.391948938369751, "learning_rate": 1.4335531132822613e-05, "loss": 0.5153, "step": 13771 }, { "epoch": 0.37661343250929774, "grad_norm": 1.2236510515213013, "learning_rate": 1.4334732982987774e-05, "loss": 0.5609, "step": 13772 }, { "epoch": 0.37664077882301467, "grad_norm": 1.3839730024337769, "learning_rate": 1.4333934799148172e-05, "loss": 0.5547, "step": 13773 }, { "epoch": 0.3766681251367316, "grad_norm": 1.2053200006484985, "learning_rate": 1.4333136581310072e-05, "loss": 0.5837, "step": 13774 }, { "epoch": 0.37669547145044846, "grad_norm": 1.2619178295135498, "learning_rate": 1.4332338329479739e-05, "loss": 0.5102, "step": 13775 }, { "epoch": 0.3767228177641654, "grad_norm": 1.4225139617919922, "learning_rate": 1.4331540043663428e-05, "loss": 0.5233, "step": 13776 }, { "epoch": 0.3767501640778823, "grad_norm": 1.1159462928771973, "learning_rate": 1.4330741723867403e-05, "loss": 0.5458, "step": 13777 }, { "epoch": 0.37677751039159924, "grad_norm": 1.5476868152618408, "learning_rate": 1.432994337009793e-05, "loss": 0.562, "step": 13778 }, { "epoch": 0.3768048567053161, "grad_norm": 1.3371281623840332, "learning_rate": 1.4329144982361267e-05, "loss": 0.5159, "step": 13779 }, { "epoch": 0.37683220301903303, "grad_norm": 1.1153720617294312, "learning_rate": 1.4328346560663683e-05, "loss": 0.5602, "step": 13780 }, { "epoch": 0.37685954933274995, "grad_norm": 1.3642679452896118, "learning_rate": 1.4327548105011437e-05, "loss": 0.4957, "step": 13781 }, { "epoch": 0.3768868956464669, "grad_norm": 2.2442541122436523, "learning_rate": 1.4326749615410794e-05, "loss": 0.4683, "step": 13782 }, { "epoch": 0.37691424196018375, "grad_norm": 1.3775073289871216, "learning_rate": 1.4325951091868018e-05, "loss": 0.895, "step": 13783 }, { "epoch": 0.3769415882739007, "grad_norm": 1.5352822542190552, "learning_rate": 1.4325152534389373e-05, "loss": 0.5098, "step": 13784 }, { "epoch": 0.3769689345876176, "grad_norm": 1.2020180225372314, "learning_rate": 1.4324353942981122e-05, "loss": 0.4478, "step": 13785 }, { "epoch": 0.3769962809013345, "grad_norm": 1.1551824808120728, "learning_rate": 1.4323555317649534e-05, "loss": 0.5682, "step": 13786 }, { "epoch": 0.3770236272150514, "grad_norm": 1.2095249891281128, "learning_rate": 1.4322756658400869e-05, "loss": 0.5336, "step": 13787 }, { "epoch": 0.3770509735287683, "grad_norm": 1.455382227897644, "learning_rate": 1.4321957965241398e-05, "loss": 0.5504, "step": 13788 }, { "epoch": 0.37707831984248524, "grad_norm": 1.2468942403793335, "learning_rate": 1.432115923817738e-05, "loss": 0.4107, "step": 13789 }, { "epoch": 0.37710566615620217, "grad_norm": 5.245855808258057, "learning_rate": 1.4320360477215083e-05, "loss": 0.8847, "step": 13790 }, { "epoch": 0.37713301246991904, "grad_norm": 1.3776352405548096, "learning_rate": 1.4319561682360778e-05, "loss": 0.4421, "step": 13791 }, { "epoch": 0.37716035878363596, "grad_norm": 1.446945309638977, "learning_rate": 1.4318762853620724e-05, "loss": 0.5249, "step": 13792 }, { "epoch": 0.3771877050973529, "grad_norm": 1.1061431169509888, "learning_rate": 1.4317963991001192e-05, "loss": 0.5154, "step": 13793 }, { "epoch": 0.3772150514110698, "grad_norm": 1.126989722251892, "learning_rate": 1.431716509450845e-05, "loss": 0.5503, "step": 13794 }, { "epoch": 0.3772423977247867, "grad_norm": 1.5195196866989136, "learning_rate": 1.4316366164148759e-05, "loss": 0.5312, "step": 13795 }, { "epoch": 0.3772697440385036, "grad_norm": 1.136070728302002, "learning_rate": 1.431556719992839e-05, "loss": 0.5096, "step": 13796 }, { "epoch": 0.37729709035222053, "grad_norm": 1.1521451473236084, "learning_rate": 1.4314768201853613e-05, "loss": 0.5129, "step": 13797 }, { "epoch": 0.37732443666593746, "grad_norm": 1.3662444353103638, "learning_rate": 1.4313969169930692e-05, "loss": 0.5501, "step": 13798 }, { "epoch": 0.3773517829796543, "grad_norm": 1.6671432256698608, "learning_rate": 1.43131701041659e-05, "loss": 0.532, "step": 13799 }, { "epoch": 0.37737912929337125, "grad_norm": 1.5856274366378784, "learning_rate": 1.43123710045655e-05, "loss": 0.8654, "step": 13800 }, { "epoch": 0.3774064756070882, "grad_norm": 1.4135931730270386, "learning_rate": 1.431157187113576e-05, "loss": 0.8451, "step": 13801 }, { "epoch": 0.3774338219208051, "grad_norm": 1.5935776233673096, "learning_rate": 1.4310772703882958e-05, "loss": 0.4554, "step": 13802 }, { "epoch": 0.37746116823452197, "grad_norm": 1.3703047037124634, "learning_rate": 1.4309973502813353e-05, "loss": 0.54, "step": 13803 }, { "epoch": 0.3774885145482389, "grad_norm": 1.6338764429092407, "learning_rate": 1.4309174267933222e-05, "loss": 0.3999, "step": 13804 }, { "epoch": 0.3775158608619558, "grad_norm": 1.7149120569229126, "learning_rate": 1.430837499924883e-05, "loss": 0.5377, "step": 13805 }, { "epoch": 0.37754320717567275, "grad_norm": 1.370818018913269, "learning_rate": 1.4307575696766446e-05, "loss": 0.5329, "step": 13806 }, { "epoch": 0.3775705534893896, "grad_norm": 1.2232404947280884, "learning_rate": 1.4306776360492348e-05, "loss": 0.5124, "step": 13807 }, { "epoch": 0.37759789980310654, "grad_norm": 1.492426872253418, "learning_rate": 1.4305976990432797e-05, "loss": 0.5375, "step": 13808 }, { "epoch": 0.37762524611682347, "grad_norm": 1.0405762195587158, "learning_rate": 1.430517758659407e-05, "loss": 0.5352, "step": 13809 }, { "epoch": 0.3776525924305404, "grad_norm": 1.0921474695205688, "learning_rate": 1.4304378148982438e-05, "loss": 0.5121, "step": 13810 }, { "epoch": 0.37767993874425726, "grad_norm": 1.578969120979309, "learning_rate": 1.430357867760417e-05, "loss": 0.5313, "step": 13811 }, { "epoch": 0.3777072850579742, "grad_norm": 1.381516933441162, "learning_rate": 1.4302779172465538e-05, "loss": 0.53, "step": 13812 }, { "epoch": 0.3777346313716911, "grad_norm": 1.4798905849456787, "learning_rate": 1.4301979633572815e-05, "loss": 0.5459, "step": 13813 }, { "epoch": 0.37776197768540803, "grad_norm": 1.5699745416641235, "learning_rate": 1.4301180060932272e-05, "loss": 0.5244, "step": 13814 }, { "epoch": 0.3777893239991249, "grad_norm": 1.2961127758026123, "learning_rate": 1.430038045455018e-05, "loss": 0.5329, "step": 13815 }, { "epoch": 0.37781667031284183, "grad_norm": 1.527000069618225, "learning_rate": 1.4299580814432816e-05, "loss": 0.4529, "step": 13816 }, { "epoch": 0.37784401662655875, "grad_norm": 1.2412707805633545, "learning_rate": 1.4298781140586448e-05, "loss": 0.5138, "step": 13817 }, { "epoch": 0.3778713629402756, "grad_norm": 3.427136182785034, "learning_rate": 1.4297981433017353e-05, "loss": 0.3953, "step": 13818 }, { "epoch": 0.37789870925399255, "grad_norm": 1.4222452640533447, "learning_rate": 1.4297181691731805e-05, "loss": 0.9008, "step": 13819 }, { "epoch": 0.3779260555677095, "grad_norm": 1.7007817029953003, "learning_rate": 1.4296381916736073e-05, "loss": 0.5538, "step": 13820 }, { "epoch": 0.3779534018814264, "grad_norm": 1.6339480876922607, "learning_rate": 1.4295582108036437e-05, "loss": 0.5298, "step": 13821 }, { "epoch": 0.37798074819514327, "grad_norm": 1.3716994524002075, "learning_rate": 1.4294782265639166e-05, "loss": 0.5445, "step": 13822 }, { "epoch": 0.3780080945088602, "grad_norm": 1.4205228090286255, "learning_rate": 1.4293982389550539e-05, "loss": 0.5111, "step": 13823 }, { "epoch": 0.3780354408225771, "grad_norm": 8.374661445617676, "learning_rate": 1.4293182479776825e-05, "loss": 0.3986, "step": 13824 }, { "epoch": 0.37806278713629404, "grad_norm": 1.420140027999878, "learning_rate": 1.4292382536324306e-05, "loss": 0.5313, "step": 13825 }, { "epoch": 0.3780901334500109, "grad_norm": 1.4625903367996216, "learning_rate": 1.4291582559199252e-05, "loss": 0.4247, "step": 13826 }, { "epoch": 0.37811747976372784, "grad_norm": 1.3068408966064453, "learning_rate": 1.4290782548407942e-05, "loss": 0.5516, "step": 13827 }, { "epoch": 0.37814482607744476, "grad_norm": 1.8112701177597046, "learning_rate": 1.4289982503956648e-05, "loss": 0.5289, "step": 13828 }, { "epoch": 0.3781721723911617, "grad_norm": 1.3790968656539917, "learning_rate": 1.4289182425851653e-05, "loss": 0.468, "step": 13829 }, { "epoch": 0.37819951870487856, "grad_norm": 1.6686989068984985, "learning_rate": 1.4288382314099227e-05, "loss": 0.5345, "step": 13830 }, { "epoch": 0.3782268650185955, "grad_norm": 1.2990361452102661, "learning_rate": 1.4287582168705647e-05, "loss": 0.5394, "step": 13831 }, { "epoch": 0.3782542113323124, "grad_norm": 1.6866058111190796, "learning_rate": 1.4286781989677193e-05, "loss": 0.5028, "step": 13832 }, { "epoch": 0.37828155764602933, "grad_norm": 1.3555058240890503, "learning_rate": 1.428598177702014e-05, "loss": 0.5487, "step": 13833 }, { "epoch": 0.3783089039597462, "grad_norm": 1.3456730842590332, "learning_rate": 1.4285181530740766e-05, "loss": 0.5603, "step": 13834 }, { "epoch": 0.3783362502734631, "grad_norm": 1.233446478843689, "learning_rate": 1.428438125084535e-05, "loss": 0.4929, "step": 13835 }, { "epoch": 0.37836359658718005, "grad_norm": 1.256472110748291, "learning_rate": 1.4283580937340165e-05, "loss": 0.5047, "step": 13836 }, { "epoch": 0.378390942900897, "grad_norm": 1.6088470220565796, "learning_rate": 1.4282780590231497e-05, "loss": 0.5453, "step": 13837 }, { "epoch": 0.37841828921461385, "grad_norm": 1.5095396041870117, "learning_rate": 1.4281980209525619e-05, "loss": 0.575, "step": 13838 }, { "epoch": 0.37844563552833077, "grad_norm": 1.2689573764801025, "learning_rate": 1.4281179795228813e-05, "loss": 0.5265, "step": 13839 }, { "epoch": 0.3784729818420477, "grad_norm": 1.1047940254211426, "learning_rate": 1.4280379347347355e-05, "loss": 0.5352, "step": 13840 }, { "epoch": 0.3785003281557646, "grad_norm": 1.087439775466919, "learning_rate": 1.4279578865887524e-05, "loss": 0.5315, "step": 13841 }, { "epoch": 0.3785276744694815, "grad_norm": 2.5430049896240234, "learning_rate": 1.4278778350855605e-05, "loss": 0.8406, "step": 13842 }, { "epoch": 0.3785550207831984, "grad_norm": 1.3634198904037476, "learning_rate": 1.4277977802257872e-05, "loss": 0.5475, "step": 13843 }, { "epoch": 0.37858236709691534, "grad_norm": 1.650993824005127, "learning_rate": 1.4277177220100605e-05, "loss": 0.546, "step": 13844 }, { "epoch": 0.37860971341063226, "grad_norm": 1.3762627840042114, "learning_rate": 1.427637660439009e-05, "loss": 0.5384, "step": 13845 }, { "epoch": 0.37863705972434913, "grad_norm": 1.523701786994934, "learning_rate": 1.42755759551326e-05, "loss": 0.5461, "step": 13846 }, { "epoch": 0.37866440603806606, "grad_norm": 1.2987664937973022, "learning_rate": 1.4274775272334424e-05, "loss": 0.4508, "step": 13847 }, { "epoch": 0.378691752351783, "grad_norm": 1.3294466733932495, "learning_rate": 1.427397455600184e-05, "loss": 0.4735, "step": 13848 }, { "epoch": 0.3787190986654999, "grad_norm": 1.3158867359161377, "learning_rate": 1.4273173806141123e-05, "loss": 0.4682, "step": 13849 }, { "epoch": 0.3787464449792168, "grad_norm": 1.3146852254867554, "learning_rate": 1.4272373022758563e-05, "loss": 0.5437, "step": 13850 }, { "epoch": 0.3787737912929337, "grad_norm": 1.4815560579299927, "learning_rate": 1.427157220586044e-05, "loss": 0.4451, "step": 13851 }, { "epoch": 0.37880113760665063, "grad_norm": 1.1965855360031128, "learning_rate": 1.4270771355453032e-05, "loss": 0.4711, "step": 13852 }, { "epoch": 0.37882848392036755, "grad_norm": 1.5372624397277832, "learning_rate": 1.4269970471542629e-05, "loss": 0.5011, "step": 13853 }, { "epoch": 0.3788558302340844, "grad_norm": 1.1561318635940552, "learning_rate": 1.4269169554135504e-05, "loss": 0.561, "step": 13854 }, { "epoch": 0.37888317654780135, "grad_norm": 1.2906869649887085, "learning_rate": 1.426836860323795e-05, "loss": 0.5231, "step": 13855 }, { "epoch": 0.3789105228615183, "grad_norm": 1.1792329549789429, "learning_rate": 1.4267567618856244e-05, "loss": 0.5327, "step": 13856 }, { "epoch": 0.3789378691752352, "grad_norm": 1.0637680292129517, "learning_rate": 1.4266766600996672e-05, "loss": 0.502, "step": 13857 }, { "epoch": 0.37896521548895207, "grad_norm": 1.2729607820510864, "learning_rate": 1.4265965549665515e-05, "loss": 0.5309, "step": 13858 }, { "epoch": 0.378992561802669, "grad_norm": 2.305980682373047, "learning_rate": 1.4265164464869058e-05, "loss": 0.8124, "step": 13859 }, { "epoch": 0.3790199081163859, "grad_norm": 3.169552803039551, "learning_rate": 1.4264363346613586e-05, "loss": 0.8362, "step": 13860 }, { "epoch": 0.37904725443010284, "grad_norm": 1.305953860282898, "learning_rate": 1.4263562194905388e-05, "loss": 0.4704, "step": 13861 }, { "epoch": 0.3790746007438197, "grad_norm": 1.343388319015503, "learning_rate": 1.4262761009750738e-05, "loss": 0.4599, "step": 13862 }, { "epoch": 0.37910194705753664, "grad_norm": 1.277827262878418, "learning_rate": 1.426195979115593e-05, "loss": 0.4642, "step": 13863 }, { "epoch": 0.37912929337125356, "grad_norm": 1.106518030166626, "learning_rate": 1.4261158539127254e-05, "loss": 0.5322, "step": 13864 }, { "epoch": 0.3791566396849705, "grad_norm": 1.2644851207733154, "learning_rate": 1.426035725367098e-05, "loss": 0.5108, "step": 13865 }, { "epoch": 0.37918398599868736, "grad_norm": 1.5358846187591553, "learning_rate": 1.4259555934793406e-05, "loss": 0.3592, "step": 13866 }, { "epoch": 0.3792113323124043, "grad_norm": 1.149737000465393, "learning_rate": 1.4258754582500811e-05, "loss": 0.5258, "step": 13867 }, { "epoch": 0.3792386786261212, "grad_norm": 1.1398496627807617, "learning_rate": 1.4257953196799487e-05, "loss": 0.5366, "step": 13868 }, { "epoch": 0.37926602493983813, "grad_norm": 1.9490423202514648, "learning_rate": 1.4257151777695718e-05, "loss": 0.396, "step": 13869 }, { "epoch": 0.379293371253555, "grad_norm": 1.3428186178207397, "learning_rate": 1.4256350325195794e-05, "loss": 0.4882, "step": 13870 }, { "epoch": 0.3793207175672719, "grad_norm": 1.2514768838882446, "learning_rate": 1.4255548839305995e-05, "loss": 0.5277, "step": 13871 }, { "epoch": 0.37934806388098885, "grad_norm": 1.380266785621643, "learning_rate": 1.4254747320032616e-05, "loss": 0.4836, "step": 13872 }, { "epoch": 0.3793754101947058, "grad_norm": 2.3954505920410156, "learning_rate": 1.425394576738194e-05, "loss": 0.8401, "step": 13873 }, { "epoch": 0.37940275650842264, "grad_norm": 1.2976243495941162, "learning_rate": 1.4253144181360257e-05, "loss": 0.4389, "step": 13874 }, { "epoch": 0.37943010282213957, "grad_norm": 1.3461992740631104, "learning_rate": 1.4252342561973857e-05, "loss": 0.5433, "step": 13875 }, { "epoch": 0.3794574491358565, "grad_norm": 1.2433621883392334, "learning_rate": 1.4251540909229025e-05, "loss": 0.5655, "step": 13876 }, { "epoch": 0.3794847954495734, "grad_norm": 1.635888934135437, "learning_rate": 1.425073922313205e-05, "loss": 0.5453, "step": 13877 }, { "epoch": 0.3795121417632903, "grad_norm": 1.494493842124939, "learning_rate": 1.4249937503689225e-05, "loss": 0.5415, "step": 13878 }, { "epoch": 0.3795394880770072, "grad_norm": 1.2605036497116089, "learning_rate": 1.424913575090683e-05, "loss": 0.5232, "step": 13879 }, { "epoch": 0.37956683439072414, "grad_norm": 1.2831844091415405, "learning_rate": 1.4248333964791168e-05, "loss": 0.5541, "step": 13880 }, { "epoch": 0.37959418070444106, "grad_norm": 1.1481807231903076, "learning_rate": 1.4247532145348517e-05, "loss": 0.5361, "step": 13881 }, { "epoch": 0.37962152701815793, "grad_norm": 1.3316715955734253, "learning_rate": 1.4246730292585174e-05, "loss": 0.5512, "step": 13882 }, { "epoch": 0.37964887333187486, "grad_norm": 1.5190367698669434, "learning_rate": 1.4245928406507427e-05, "loss": 0.8451, "step": 13883 }, { "epoch": 0.3796762196455918, "grad_norm": 1.1569150686264038, "learning_rate": 1.4245126487121565e-05, "loss": 0.5201, "step": 13884 }, { "epoch": 0.3797035659593087, "grad_norm": 1.4083765745162964, "learning_rate": 1.4244324534433882e-05, "loss": 0.5261, "step": 13885 }, { "epoch": 0.3797309122730256, "grad_norm": 1.073881983757019, "learning_rate": 1.4243522548450667e-05, "loss": 0.5486, "step": 13886 }, { "epoch": 0.3797582585867425, "grad_norm": 1.0929685831069946, "learning_rate": 1.4242720529178211e-05, "loss": 0.556, "step": 13887 }, { "epoch": 0.37978560490045943, "grad_norm": 1.2352153062820435, "learning_rate": 1.424191847662281e-05, "loss": 0.5361, "step": 13888 }, { "epoch": 0.37981295121417635, "grad_norm": 1.3095061779022217, "learning_rate": 1.4241116390790746e-05, "loss": 0.5519, "step": 13889 }, { "epoch": 0.3798402975278932, "grad_norm": 2.130258321762085, "learning_rate": 1.424031427168832e-05, "loss": 0.5485, "step": 13890 }, { "epoch": 0.37986764384161015, "grad_norm": 1.139630675315857, "learning_rate": 1.4239512119321824e-05, "loss": 0.5448, "step": 13891 }, { "epoch": 0.3798949901553271, "grad_norm": 1.281328558921814, "learning_rate": 1.4238709933697546e-05, "loss": 0.5333, "step": 13892 }, { "epoch": 0.379922336469044, "grad_norm": 1.048677921295166, "learning_rate": 1.423790771482178e-05, "loss": 0.553, "step": 13893 }, { "epoch": 0.37994968278276087, "grad_norm": 1.2115118503570557, "learning_rate": 1.4237105462700826e-05, "loss": 0.5384, "step": 13894 }, { "epoch": 0.3799770290964778, "grad_norm": 1.1529074907302856, "learning_rate": 1.4236303177340968e-05, "loss": 0.5382, "step": 13895 }, { "epoch": 0.3800043754101947, "grad_norm": 1.3798354864120483, "learning_rate": 1.4235500858748508e-05, "loss": 0.4601, "step": 13896 }, { "epoch": 0.38003172172391164, "grad_norm": 1.2751262187957764, "learning_rate": 1.4234698506929731e-05, "loss": 0.538, "step": 13897 }, { "epoch": 0.3800590680376285, "grad_norm": 1.325584888458252, "learning_rate": 1.4233896121890938e-05, "loss": 0.8851, "step": 13898 }, { "epoch": 0.38008641435134544, "grad_norm": 1.0758240222930908, "learning_rate": 1.4233093703638425e-05, "loss": 0.5191, "step": 13899 }, { "epoch": 0.38011376066506236, "grad_norm": 1.1477445363998413, "learning_rate": 1.4232291252178478e-05, "loss": 0.5036, "step": 13900 }, { "epoch": 0.3801411069787793, "grad_norm": 1.4377644062042236, "learning_rate": 1.4231488767517397e-05, "loss": 0.5625, "step": 13901 }, { "epoch": 0.38016845329249616, "grad_norm": 1.2162071466445923, "learning_rate": 1.4230686249661481e-05, "loss": 0.4605, "step": 13902 }, { "epoch": 0.3801957996062131, "grad_norm": 1.205710768699646, "learning_rate": 1.422988369861702e-05, "loss": 0.5267, "step": 13903 }, { "epoch": 0.38022314591993, "grad_norm": 1.1490581035614014, "learning_rate": 1.422908111439031e-05, "loss": 0.5199, "step": 13904 }, { "epoch": 0.38025049223364693, "grad_norm": 1.3968836069107056, "learning_rate": 1.4228278496987651e-05, "loss": 0.5147, "step": 13905 }, { "epoch": 0.3802778385473638, "grad_norm": 1.275598168373108, "learning_rate": 1.4227475846415337e-05, "loss": 0.5184, "step": 13906 }, { "epoch": 0.3803051848610807, "grad_norm": 1.417568564414978, "learning_rate": 1.4226673162679666e-05, "loss": 0.4725, "step": 13907 }, { "epoch": 0.38033253117479765, "grad_norm": 1.1973168849945068, "learning_rate": 1.4225870445786932e-05, "loss": 0.5806, "step": 13908 }, { "epoch": 0.3803598774885146, "grad_norm": 1.2853230237960815, "learning_rate": 1.422506769574343e-05, "loss": 0.5336, "step": 13909 }, { "epoch": 0.38038722380223144, "grad_norm": 1.225257158279419, "learning_rate": 1.4224264912555467e-05, "loss": 0.5526, "step": 13910 }, { "epoch": 0.38041457011594837, "grad_norm": 1.2337483167648315, "learning_rate": 1.4223462096229332e-05, "loss": 0.5679, "step": 13911 }, { "epoch": 0.3804419164296653, "grad_norm": 1.1759023666381836, "learning_rate": 1.4222659246771326e-05, "loss": 0.5651, "step": 13912 }, { "epoch": 0.3804692627433822, "grad_norm": 1.144965648651123, "learning_rate": 1.4221856364187745e-05, "loss": 0.5058, "step": 13913 }, { "epoch": 0.3804966090570991, "grad_norm": 1.6092407703399658, "learning_rate": 1.4221053448484889e-05, "loss": 0.5131, "step": 13914 }, { "epoch": 0.380523955370816, "grad_norm": 1.303573727607727, "learning_rate": 1.4220250499669059e-05, "loss": 0.5386, "step": 13915 }, { "epoch": 0.38055130168453294, "grad_norm": 1.1965813636779785, "learning_rate": 1.421944751774655e-05, "loss": 0.5452, "step": 13916 }, { "epoch": 0.3805786479982498, "grad_norm": 0.9816043376922607, "learning_rate": 1.4218644502723665e-05, "loss": 0.5227, "step": 13917 }, { "epoch": 0.38060599431196673, "grad_norm": 1.2621594667434692, "learning_rate": 1.42178414546067e-05, "loss": 0.5448, "step": 13918 }, { "epoch": 0.38063334062568366, "grad_norm": 1.135246992111206, "learning_rate": 1.4217038373401954e-05, "loss": 0.5368, "step": 13919 }, { "epoch": 0.3806606869394006, "grad_norm": 1.0557260513305664, "learning_rate": 1.4216235259115729e-05, "loss": 0.5131, "step": 13920 }, { "epoch": 0.38068803325311745, "grad_norm": 1.2717275619506836, "learning_rate": 1.4215432111754326e-05, "loss": 0.5286, "step": 13921 }, { "epoch": 0.3807153795668344, "grad_norm": 1.4780164957046509, "learning_rate": 1.4214628931324045e-05, "loss": 0.4555, "step": 13922 }, { "epoch": 0.3807427258805513, "grad_norm": 1.472059965133667, "learning_rate": 1.4213825717831184e-05, "loss": 0.5203, "step": 13923 }, { "epoch": 0.3807700721942682, "grad_norm": 1.3902679681777954, "learning_rate": 1.4213022471282047e-05, "loss": 0.5322, "step": 13924 }, { "epoch": 0.3807974185079851, "grad_norm": 1.1136473417282104, "learning_rate": 1.4212219191682934e-05, "loss": 0.5474, "step": 13925 }, { "epoch": 0.380824764821702, "grad_norm": 1.7362792491912842, "learning_rate": 1.4211415879040149e-05, "loss": 0.5392, "step": 13926 }, { "epoch": 0.38085211113541895, "grad_norm": 1.433797001838684, "learning_rate": 1.421061253335999e-05, "loss": 0.8496, "step": 13927 }, { "epoch": 0.38087945744913587, "grad_norm": 1.3803091049194336, "learning_rate": 1.4209809154648757e-05, "loss": 0.528, "step": 13928 }, { "epoch": 0.38090680376285274, "grad_norm": 1.2727912664413452, "learning_rate": 1.4209005742912763e-05, "loss": 0.5385, "step": 13929 }, { "epoch": 0.38093415007656967, "grad_norm": 1.27342689037323, "learning_rate": 1.4208202298158297e-05, "loss": 0.4343, "step": 13930 }, { "epoch": 0.3809614963902866, "grad_norm": 1.122266411781311, "learning_rate": 1.4207398820391674e-05, "loss": 0.5212, "step": 13931 }, { "epoch": 0.3809888427040035, "grad_norm": 1.0853056907653809, "learning_rate": 1.4206595309619187e-05, "loss": 0.8378, "step": 13932 }, { "epoch": 0.3810161890177204, "grad_norm": 1.6552802324295044, "learning_rate": 1.4205791765847146e-05, "loss": 0.4502, "step": 13933 }, { "epoch": 0.3810435353314373, "grad_norm": 1.2489163875579834, "learning_rate": 1.4204988189081853e-05, "loss": 0.4396, "step": 13934 }, { "epoch": 0.38107088164515424, "grad_norm": 1.1625248193740845, "learning_rate": 1.4204184579329609e-05, "loss": 0.5451, "step": 13935 }, { "epoch": 0.38109822795887116, "grad_norm": 1.2519772052764893, "learning_rate": 1.4203380936596721e-05, "loss": 0.4956, "step": 13936 }, { "epoch": 0.38112557427258803, "grad_norm": 1.2792257070541382, "learning_rate": 1.4202577260889492e-05, "loss": 0.4528, "step": 13937 }, { "epoch": 0.38115292058630496, "grad_norm": 1.300628900527954, "learning_rate": 1.4201773552214226e-05, "loss": 0.5291, "step": 13938 }, { "epoch": 0.3811802669000219, "grad_norm": 1.0512056350708008, "learning_rate": 1.420096981057723e-05, "loss": 0.5307, "step": 13939 }, { "epoch": 0.3812076132137388, "grad_norm": 1.6054365634918213, "learning_rate": 1.4200166035984811e-05, "loss": 0.8503, "step": 13940 }, { "epoch": 0.3812349595274557, "grad_norm": 1.2191530466079712, "learning_rate": 1.4199362228443266e-05, "loss": 0.5392, "step": 13941 }, { "epoch": 0.3812623058411726, "grad_norm": 1.124990463256836, "learning_rate": 1.4198558387958911e-05, "loss": 0.505, "step": 13942 }, { "epoch": 0.3812896521548895, "grad_norm": 1.0215184688568115, "learning_rate": 1.4197754514538042e-05, "loss": 0.5176, "step": 13943 }, { "epoch": 0.38131699846860645, "grad_norm": 1.203171968460083, "learning_rate": 1.4196950608186974e-05, "loss": 0.5283, "step": 13944 }, { "epoch": 0.3813443447823233, "grad_norm": 1.2936537265777588, "learning_rate": 1.419614666891201e-05, "loss": 0.5062, "step": 13945 }, { "epoch": 0.38137169109604024, "grad_norm": 1.156051754951477, "learning_rate": 1.4195342696719454e-05, "loss": 0.494, "step": 13946 }, { "epoch": 0.38139903740975717, "grad_norm": 1.1554875373840332, "learning_rate": 1.4194538691615615e-05, "loss": 0.4953, "step": 13947 }, { "epoch": 0.3814263837234741, "grad_norm": 1.4040225744247437, "learning_rate": 1.4193734653606803e-05, "loss": 0.4589, "step": 13948 }, { "epoch": 0.38145373003719096, "grad_norm": 1.1181375980377197, "learning_rate": 1.419293058269932e-05, "loss": 0.5355, "step": 13949 }, { "epoch": 0.3814810763509079, "grad_norm": 1.3710079193115234, "learning_rate": 1.419212647889948e-05, "loss": 0.5759, "step": 13950 }, { "epoch": 0.3815084226646248, "grad_norm": 1.2648204565048218, "learning_rate": 1.4191322342213585e-05, "loss": 0.5614, "step": 13951 }, { "epoch": 0.38153576897834174, "grad_norm": 1.3789663314819336, "learning_rate": 1.4190518172647947e-05, "loss": 0.3686, "step": 13952 }, { "epoch": 0.3815631152920586, "grad_norm": 1.1754117012023926, "learning_rate": 1.4189713970208874e-05, "loss": 0.5063, "step": 13953 }, { "epoch": 0.38159046160577553, "grad_norm": 0.9944990277290344, "learning_rate": 1.4188909734902672e-05, "loss": 0.5527, "step": 13954 }, { "epoch": 0.38161780791949246, "grad_norm": 1.3996164798736572, "learning_rate": 1.4188105466735651e-05, "loss": 0.5227, "step": 13955 }, { "epoch": 0.3816451542332094, "grad_norm": 1.1902599334716797, "learning_rate": 1.4187301165714125e-05, "loss": 0.8465, "step": 13956 }, { "epoch": 0.38167250054692625, "grad_norm": 1.3661669492721558, "learning_rate": 1.4186496831844396e-05, "loss": 0.5263, "step": 13957 }, { "epoch": 0.3816998468606432, "grad_norm": 1.286039113998413, "learning_rate": 1.4185692465132778e-05, "loss": 0.5435, "step": 13958 }, { "epoch": 0.3817271931743601, "grad_norm": 1.1546273231506348, "learning_rate": 1.4184888065585582e-05, "loss": 0.5054, "step": 13959 }, { "epoch": 0.381754539488077, "grad_norm": 1.1200623512268066, "learning_rate": 1.4184083633209118e-05, "loss": 0.5308, "step": 13960 }, { "epoch": 0.3817818858017939, "grad_norm": 1.2654770612716675, "learning_rate": 1.4183279168009694e-05, "loss": 0.5567, "step": 13961 }, { "epoch": 0.3818092321155108, "grad_norm": 1.3994287252426147, "learning_rate": 1.4182474669993623e-05, "loss": 0.5287, "step": 13962 }, { "epoch": 0.38183657842922775, "grad_norm": 1.2486480474472046, "learning_rate": 1.4181670139167211e-05, "loss": 0.5184, "step": 13963 }, { "epoch": 0.38186392474294467, "grad_norm": 1.2005528211593628, "learning_rate": 1.418086557553678e-05, "loss": 0.5137, "step": 13964 }, { "epoch": 0.38189127105666154, "grad_norm": 1.3837969303131104, "learning_rate": 1.4180060979108632e-05, "loss": 0.4411, "step": 13965 }, { "epoch": 0.38191861737037847, "grad_norm": 1.1605814695358276, "learning_rate": 1.4179256349889081e-05, "loss": 0.5425, "step": 13966 }, { "epoch": 0.3819459636840954, "grad_norm": 1.1643633842468262, "learning_rate": 1.4178451687884441e-05, "loss": 0.5042, "step": 13967 }, { "epoch": 0.3819733099978123, "grad_norm": 1.2880927324295044, "learning_rate": 1.4177646993101025e-05, "loss": 0.5489, "step": 13968 }, { "epoch": 0.3820006563115292, "grad_norm": 1.0588595867156982, "learning_rate": 1.4176842265545143e-05, "loss": 0.5334, "step": 13969 }, { "epoch": 0.3820280026252461, "grad_norm": 1.2905898094177246, "learning_rate": 1.4176037505223109e-05, "loss": 0.5608, "step": 13970 }, { "epoch": 0.38205534893896304, "grad_norm": 1.394178032875061, "learning_rate": 1.4175232712141233e-05, "loss": 0.5415, "step": 13971 }, { "epoch": 0.38208269525267996, "grad_norm": 1.4364123344421387, "learning_rate": 1.4174427886305836e-05, "loss": 0.3951, "step": 13972 }, { "epoch": 0.38211004156639683, "grad_norm": 1.0938204526901245, "learning_rate": 1.4173623027723225e-05, "loss": 0.5362, "step": 13973 }, { "epoch": 0.38213738788011375, "grad_norm": 1.3583958148956299, "learning_rate": 1.4172818136399715e-05, "loss": 0.4002, "step": 13974 }, { "epoch": 0.3821647341938307, "grad_norm": 1.367697834968567, "learning_rate": 1.4172013212341624e-05, "loss": 0.5536, "step": 13975 }, { "epoch": 0.3821920805075476, "grad_norm": 1.2149248123168945, "learning_rate": 1.417120825555526e-05, "loss": 0.5505, "step": 13976 }, { "epoch": 0.3822194268212645, "grad_norm": 1.3999029397964478, "learning_rate": 1.4170403266046944e-05, "loss": 0.5463, "step": 13977 }, { "epoch": 0.3822467731349814, "grad_norm": 1.3446714878082275, "learning_rate": 1.4169598243822984e-05, "loss": 0.524, "step": 13978 }, { "epoch": 0.3822741194486983, "grad_norm": 1.184239387512207, "learning_rate": 1.4168793188889702e-05, "loss": 0.5267, "step": 13979 }, { "epoch": 0.38230146576241525, "grad_norm": 1.144021987915039, "learning_rate": 1.4167988101253412e-05, "loss": 0.8465, "step": 13980 }, { "epoch": 0.3823288120761321, "grad_norm": 1.1521841287612915, "learning_rate": 1.4167182980920426e-05, "loss": 0.5245, "step": 13981 }, { "epoch": 0.38235615838984904, "grad_norm": 1.1718392372131348, "learning_rate": 1.4166377827897063e-05, "loss": 0.5449, "step": 13982 }, { "epoch": 0.38238350470356597, "grad_norm": 1.1895991563796997, "learning_rate": 1.4165572642189637e-05, "loss": 0.5443, "step": 13983 }, { "epoch": 0.3824108510172829, "grad_norm": 1.1275415420532227, "learning_rate": 1.4164767423804468e-05, "loss": 0.5257, "step": 13984 }, { "epoch": 0.38243819733099976, "grad_norm": 1.0176972150802612, "learning_rate": 1.4163962172747871e-05, "loss": 0.5344, "step": 13985 }, { "epoch": 0.3824655436447167, "grad_norm": 1.3978211879730225, "learning_rate": 1.4163156889026159e-05, "loss": 0.4799, "step": 13986 }, { "epoch": 0.3824928899584336, "grad_norm": 1.3123465776443481, "learning_rate": 1.4162351572645658e-05, "loss": 0.4524, "step": 13987 }, { "epoch": 0.38252023627215054, "grad_norm": 1.1455808877944946, "learning_rate": 1.4161546223612679e-05, "loss": 0.5505, "step": 13988 }, { "epoch": 0.3825475825858674, "grad_norm": 1.5528969764709473, "learning_rate": 1.4160740841933539e-05, "loss": 0.5466, "step": 13989 }, { "epoch": 0.38257492889958433, "grad_norm": 1.2188706398010254, "learning_rate": 1.4159935427614557e-05, "loss": 0.4136, "step": 13990 }, { "epoch": 0.38260227521330126, "grad_norm": 1.6166740655899048, "learning_rate": 1.4159129980662057e-05, "loss": 0.5276, "step": 13991 }, { "epoch": 0.3826296215270182, "grad_norm": 1.2994446754455566, "learning_rate": 1.4158324501082347e-05, "loss": 0.5621, "step": 13992 }, { "epoch": 0.38265696784073505, "grad_norm": 1.2694954872131348, "learning_rate": 1.4157518988881754e-05, "loss": 0.5196, "step": 13993 }, { "epoch": 0.382684314154452, "grad_norm": 1.0017422437667847, "learning_rate": 1.4156713444066596e-05, "loss": 0.5227, "step": 13994 }, { "epoch": 0.3827116604681689, "grad_norm": 1.117702841758728, "learning_rate": 1.415590786664319e-05, "loss": 0.5195, "step": 13995 }, { "epoch": 0.3827390067818858, "grad_norm": 1.2958858013153076, "learning_rate": 1.4155102256617857e-05, "loss": 0.544, "step": 13996 }, { "epoch": 0.3827663530956027, "grad_norm": 1.562909722328186, "learning_rate": 1.4154296613996914e-05, "loss": 0.5756, "step": 13997 }, { "epoch": 0.3827936994093196, "grad_norm": 1.1688153743743896, "learning_rate": 1.4153490938786685e-05, "loss": 0.5055, "step": 13998 }, { "epoch": 0.38282104572303655, "grad_norm": 15.879273414611816, "learning_rate": 1.4152685230993488e-05, "loss": 0.4532, "step": 13999 }, { "epoch": 0.38284839203675347, "grad_norm": 1.6836620569229126, "learning_rate": 1.4151879490623643e-05, "loss": 0.4241, "step": 14000 }, { "epoch": 0.38287573835047034, "grad_norm": 1.269964575767517, "learning_rate": 1.4151073717683473e-05, "loss": 0.5049, "step": 14001 }, { "epoch": 0.38290308466418727, "grad_norm": 1.174017310142517, "learning_rate": 1.4150267912179299e-05, "loss": 0.5244, "step": 14002 }, { "epoch": 0.3829304309779042, "grad_norm": 1.5438156127929688, "learning_rate": 1.4149462074117438e-05, "loss": 0.8148, "step": 14003 }, { "epoch": 0.3829577772916211, "grad_norm": 1.257744550704956, "learning_rate": 1.4148656203504218e-05, "loss": 0.4976, "step": 14004 }, { "epoch": 0.382985123605338, "grad_norm": 1.3123528957366943, "learning_rate": 1.4147850300345957e-05, "loss": 0.5228, "step": 14005 }, { "epoch": 0.3830124699190549, "grad_norm": 1.4729140996932983, "learning_rate": 1.4147044364648977e-05, "loss": 0.5669, "step": 14006 }, { "epoch": 0.38303981623277183, "grad_norm": 1.4014594554901123, "learning_rate": 1.4146238396419602e-05, "loss": 0.3713, "step": 14007 }, { "epoch": 0.38306716254648876, "grad_norm": 1.144265055656433, "learning_rate": 1.4145432395664151e-05, "loss": 0.5252, "step": 14008 }, { "epoch": 0.38309450886020563, "grad_norm": 1.1201239824295044, "learning_rate": 1.4144626362388951e-05, "loss": 0.5409, "step": 14009 }, { "epoch": 0.38312185517392255, "grad_norm": 1.741748332977295, "learning_rate": 1.4143820296600326e-05, "loss": 0.5201, "step": 14010 }, { "epoch": 0.3831492014876395, "grad_norm": 1.1733556985855103, "learning_rate": 1.4143014198304592e-05, "loss": 0.5495, "step": 14011 }, { "epoch": 0.3831765478013564, "grad_norm": 1.5017774105072021, "learning_rate": 1.4142208067508079e-05, "loss": 0.8448, "step": 14012 }, { "epoch": 0.3832038941150733, "grad_norm": 1.2917660474777222, "learning_rate": 1.4141401904217113e-05, "loss": 0.5064, "step": 14013 }, { "epoch": 0.3832312404287902, "grad_norm": 1.552585244178772, "learning_rate": 1.4140595708438012e-05, "loss": 0.4741, "step": 14014 }, { "epoch": 0.3832585867425071, "grad_norm": 1.2350263595581055, "learning_rate": 1.4139789480177104e-05, "loss": 0.5544, "step": 14015 }, { "epoch": 0.38328593305622405, "grad_norm": 1.628563404083252, "learning_rate": 1.413898321944071e-05, "loss": 0.3807, "step": 14016 }, { "epoch": 0.3833132793699409, "grad_norm": 1.536636233329773, "learning_rate": 1.4138176926235158e-05, "loss": 0.5088, "step": 14017 }, { "epoch": 0.38334062568365784, "grad_norm": 1.0987130403518677, "learning_rate": 1.4137370600566776e-05, "loss": 0.5319, "step": 14018 }, { "epoch": 0.38336797199737477, "grad_norm": 1.398847222328186, "learning_rate": 1.4136564242441884e-05, "loss": 0.5289, "step": 14019 }, { "epoch": 0.38339531831109164, "grad_norm": 1.8318713903427124, "learning_rate": 1.4135757851866809e-05, "loss": 0.8449, "step": 14020 }, { "epoch": 0.38342266462480856, "grad_norm": 1.400296688079834, "learning_rate": 1.4134951428847879e-05, "loss": 0.4298, "step": 14021 }, { "epoch": 0.3834500109385255, "grad_norm": 1.1874463558197021, "learning_rate": 1.4134144973391418e-05, "loss": 0.5345, "step": 14022 }, { "epoch": 0.3834773572522424, "grad_norm": 1.309199333190918, "learning_rate": 1.4133338485503752e-05, "loss": 0.501, "step": 14023 }, { "epoch": 0.3835047035659593, "grad_norm": 1.2382298707962036, "learning_rate": 1.413253196519121e-05, "loss": 0.5202, "step": 14024 }, { "epoch": 0.3835320498796762, "grad_norm": 2.3024187088012695, "learning_rate": 1.4131725412460116e-05, "loss": 0.8617, "step": 14025 }, { "epoch": 0.38355939619339313, "grad_norm": 1.4259659051895142, "learning_rate": 1.4130918827316802e-05, "loss": 0.5433, "step": 14026 }, { "epoch": 0.38358674250711006, "grad_norm": 1.3643968105316162, "learning_rate": 1.413011220976759e-05, "loss": 0.8399, "step": 14027 }, { "epoch": 0.3836140888208269, "grad_norm": 1.4585360288619995, "learning_rate": 1.4129305559818809e-05, "loss": 0.4365, "step": 14028 }, { "epoch": 0.38364143513454385, "grad_norm": 1.514907717704773, "learning_rate": 1.412849887747679e-05, "loss": 0.8435, "step": 14029 }, { "epoch": 0.3836687814482608, "grad_norm": 1.4116029739379883, "learning_rate": 1.4127692162747859e-05, "loss": 0.4908, "step": 14030 }, { "epoch": 0.3836961277619777, "grad_norm": 1.4380488395690918, "learning_rate": 1.4126885415638345e-05, "loss": 0.5289, "step": 14031 }, { "epoch": 0.38372347407569457, "grad_norm": 1.945195198059082, "learning_rate": 1.4126078636154574e-05, "loss": 0.7985, "step": 14032 }, { "epoch": 0.3837508203894115, "grad_norm": 1.2864683866500854, "learning_rate": 1.412527182430288e-05, "loss": 0.5502, "step": 14033 }, { "epoch": 0.3837781667031284, "grad_norm": 1.3562902212142944, "learning_rate": 1.4124464980089589e-05, "loss": 0.4205, "step": 14034 }, { "epoch": 0.38380551301684535, "grad_norm": 1.2627266645431519, "learning_rate": 1.412365810352103e-05, "loss": 0.5142, "step": 14035 }, { "epoch": 0.3838328593305622, "grad_norm": 1.1430021524429321, "learning_rate": 1.4122851194603534e-05, "loss": 0.522, "step": 14036 }, { "epoch": 0.38386020564427914, "grad_norm": 1.3762634992599487, "learning_rate": 1.412204425334343e-05, "loss": 0.5844, "step": 14037 }, { "epoch": 0.38388755195799606, "grad_norm": 1.3363035917282104, "learning_rate": 1.4121237279747048e-05, "loss": 0.5364, "step": 14038 }, { "epoch": 0.383914898271713, "grad_norm": 1.631271243095398, "learning_rate": 1.4120430273820724e-05, "loss": 0.5388, "step": 14039 }, { "epoch": 0.38394224458542986, "grad_norm": 1.3428550958633423, "learning_rate": 1.4119623235570783e-05, "loss": 0.5269, "step": 14040 }, { "epoch": 0.3839695908991468, "grad_norm": 1.4890573024749756, "learning_rate": 1.4118816165003554e-05, "loss": 0.5366, "step": 14041 }, { "epoch": 0.3839969372128637, "grad_norm": 1.2357126474380493, "learning_rate": 1.4118009062125373e-05, "loss": 0.5239, "step": 14042 }, { "epoch": 0.38402428352658063, "grad_norm": 1.6569597721099854, "learning_rate": 1.411720192694257e-05, "loss": 0.5226, "step": 14043 }, { "epoch": 0.3840516298402975, "grad_norm": 1.3705424070358276, "learning_rate": 1.4116394759461473e-05, "loss": 0.4894, "step": 14044 }, { "epoch": 0.38407897615401443, "grad_norm": 1.2035596370697021, "learning_rate": 1.4115587559688421e-05, "loss": 0.5304, "step": 14045 }, { "epoch": 0.38410632246773135, "grad_norm": 1.2857513427734375, "learning_rate": 1.4114780327629743e-05, "loss": 0.4822, "step": 14046 }, { "epoch": 0.3841336687814483, "grad_norm": 1.2218645811080933, "learning_rate": 1.4113973063291767e-05, "loss": 0.5439, "step": 14047 }, { "epoch": 0.38416101509516515, "grad_norm": 2.674842119216919, "learning_rate": 1.4113165766680837e-05, "loss": 0.3854, "step": 14048 }, { "epoch": 0.3841883614088821, "grad_norm": 1.3710566759109497, "learning_rate": 1.4112358437803276e-05, "loss": 0.8101, "step": 14049 }, { "epoch": 0.384215707722599, "grad_norm": 1.263349175453186, "learning_rate": 1.411155107666542e-05, "loss": 0.5238, "step": 14050 }, { "epoch": 0.3842430540363159, "grad_norm": 2.353101968765259, "learning_rate": 1.4110743683273602e-05, "loss": 0.4175, "step": 14051 }, { "epoch": 0.3842704003500328, "grad_norm": 1.2247663736343384, "learning_rate": 1.4109936257634158e-05, "loss": 0.5455, "step": 14052 }, { "epoch": 0.3842977466637497, "grad_norm": 1.319188117980957, "learning_rate": 1.4109128799753422e-05, "loss": 0.5498, "step": 14053 }, { "epoch": 0.38432509297746664, "grad_norm": 1.5761252641677856, "learning_rate": 1.4108321309637725e-05, "loss": 0.4154, "step": 14054 }, { "epoch": 0.38435243929118357, "grad_norm": 1.6266313791275024, "learning_rate": 1.4107513787293401e-05, "loss": 0.5602, "step": 14055 }, { "epoch": 0.38437978560490044, "grad_norm": 1.382990837097168, "learning_rate": 1.4106706232726793e-05, "loss": 0.5162, "step": 14056 }, { "epoch": 0.38440713191861736, "grad_norm": 1.460411548614502, "learning_rate": 1.4105898645944225e-05, "loss": 0.4301, "step": 14057 }, { "epoch": 0.3844344782323343, "grad_norm": 1.4288686513900757, "learning_rate": 1.4105091026952043e-05, "loss": 0.4615, "step": 14058 }, { "epoch": 0.3844618245460512, "grad_norm": 2.4031453132629395, "learning_rate": 1.4104283375756573e-05, "loss": 0.4299, "step": 14059 }, { "epoch": 0.3844891708597681, "grad_norm": 1.5893515348434448, "learning_rate": 1.4103475692364156e-05, "loss": 0.5184, "step": 14060 }, { "epoch": 0.384516517173485, "grad_norm": 1.8844985961914062, "learning_rate": 1.410266797678113e-05, "loss": 0.5033, "step": 14061 }, { "epoch": 0.38454386348720193, "grad_norm": 1.5907057523727417, "learning_rate": 1.4101860229013825e-05, "loss": 0.5064, "step": 14062 }, { "epoch": 0.38457120980091886, "grad_norm": 1.5136218070983887, "learning_rate": 1.410105244906858e-05, "loss": 0.548, "step": 14063 }, { "epoch": 0.3845985561146357, "grad_norm": 1.269828200340271, "learning_rate": 1.4100244636951734e-05, "loss": 0.3819, "step": 14064 }, { "epoch": 0.38462590242835265, "grad_norm": 1.1401700973510742, "learning_rate": 1.4099436792669623e-05, "loss": 0.5182, "step": 14065 }, { "epoch": 0.3846532487420696, "grad_norm": 1.5291403532028198, "learning_rate": 1.4098628916228582e-05, "loss": 0.5276, "step": 14066 }, { "epoch": 0.3846805950557865, "grad_norm": 1.3435635566711426, "learning_rate": 1.4097821007634955e-05, "loss": 0.5117, "step": 14067 }, { "epoch": 0.38470794136950337, "grad_norm": 1.6323721408843994, "learning_rate": 1.409701306689507e-05, "loss": 0.5226, "step": 14068 }, { "epoch": 0.3847352876832203, "grad_norm": 1.4764238595962524, "learning_rate": 1.4096205094015276e-05, "loss": 0.5557, "step": 14069 }, { "epoch": 0.3847626339969372, "grad_norm": 1.5814998149871826, "learning_rate": 1.4095397089001903e-05, "loss": 0.4829, "step": 14070 }, { "epoch": 0.38478998031065414, "grad_norm": 1.4647789001464844, "learning_rate": 1.4094589051861292e-05, "loss": 0.8439, "step": 14071 }, { "epoch": 0.384817326624371, "grad_norm": 1.0974873304367065, "learning_rate": 1.4093780982599784e-05, "loss": 0.4278, "step": 14072 }, { "epoch": 0.38484467293808794, "grad_norm": 1.400041937828064, "learning_rate": 1.4092972881223714e-05, "loss": 0.8542, "step": 14073 }, { "epoch": 0.38487201925180486, "grad_norm": 1.4898039102554321, "learning_rate": 1.4092164747739426e-05, "loss": 0.5678, "step": 14074 }, { "epoch": 0.3848993655655218, "grad_norm": 6.114817142486572, "learning_rate": 1.4091356582153258e-05, "loss": 0.8595, "step": 14075 }, { "epoch": 0.38492671187923866, "grad_norm": 1.732588291168213, "learning_rate": 1.4090548384471546e-05, "loss": 0.5028, "step": 14076 }, { "epoch": 0.3849540581929556, "grad_norm": 1.6723874807357788, "learning_rate": 1.4089740154700633e-05, "loss": 0.5518, "step": 14077 }, { "epoch": 0.3849814045066725, "grad_norm": 1.228325366973877, "learning_rate": 1.408893189284686e-05, "loss": 0.5308, "step": 14078 }, { "epoch": 0.38500875082038943, "grad_norm": 1.3847216367721558, "learning_rate": 1.4088123598916568e-05, "loss": 0.4882, "step": 14079 }, { "epoch": 0.3850360971341063, "grad_norm": 1.183264136314392, "learning_rate": 1.4087315272916095e-05, "loss": 0.5075, "step": 14080 }, { "epoch": 0.38506344344782323, "grad_norm": 1.2993589639663696, "learning_rate": 1.4086506914851785e-05, "loss": 0.4878, "step": 14081 }, { "epoch": 0.38509078976154015, "grad_norm": 1.9058729410171509, "learning_rate": 1.4085698524729978e-05, "loss": 0.3953, "step": 14082 }, { "epoch": 0.3851181360752571, "grad_norm": 1.3909180164337158, "learning_rate": 1.4084890102557019e-05, "loss": 0.8404, "step": 14083 }, { "epoch": 0.38514548238897395, "grad_norm": 1.6242332458496094, "learning_rate": 1.4084081648339243e-05, "loss": 0.5563, "step": 14084 }, { "epoch": 0.3851728287026909, "grad_norm": 1.122235655784607, "learning_rate": 1.4083273162082998e-05, "loss": 0.5811, "step": 14085 }, { "epoch": 0.3852001750164078, "grad_norm": 1.4714293479919434, "learning_rate": 1.4082464643794623e-05, "loss": 0.5372, "step": 14086 }, { "epoch": 0.3852275213301247, "grad_norm": 1.4068681001663208, "learning_rate": 1.4081656093480461e-05, "loss": 0.4598, "step": 14087 }, { "epoch": 0.3852548676438416, "grad_norm": 1.3452656269073486, "learning_rate": 1.4080847511146858e-05, "loss": 0.8108, "step": 14088 }, { "epoch": 0.3852822139575585, "grad_norm": 1.6887863874435425, "learning_rate": 1.4080038896800153e-05, "loss": 0.4287, "step": 14089 }, { "epoch": 0.38530956027127544, "grad_norm": 1.357131838798523, "learning_rate": 1.407923025044669e-05, "loss": 0.5048, "step": 14090 }, { "epoch": 0.38533690658499237, "grad_norm": 1.3209216594696045, "learning_rate": 1.4078421572092815e-05, "loss": 0.4457, "step": 14091 }, { "epoch": 0.38536425289870924, "grad_norm": 1.3855899572372437, "learning_rate": 1.4077612861744871e-05, "loss": 0.8621, "step": 14092 }, { "epoch": 0.38539159921242616, "grad_norm": 1.3617666959762573, "learning_rate": 1.4076804119409202e-05, "loss": 0.5577, "step": 14093 }, { "epoch": 0.3854189455261431, "grad_norm": 1.2671629190444946, "learning_rate": 1.4075995345092153e-05, "loss": 0.5002, "step": 14094 }, { "epoch": 0.38544629183986, "grad_norm": 1.762063980102539, "learning_rate": 1.4075186538800064e-05, "loss": 0.4468, "step": 14095 }, { "epoch": 0.3854736381535769, "grad_norm": 1.623097538948059, "learning_rate": 1.4074377700539286e-05, "loss": 0.8524, "step": 14096 }, { "epoch": 0.3855009844672938, "grad_norm": 1.8616994619369507, "learning_rate": 1.407356883031616e-05, "loss": 0.5312, "step": 14097 }, { "epoch": 0.38552833078101073, "grad_norm": 1.2950619459152222, "learning_rate": 1.4072759928137034e-05, "loss": 0.515, "step": 14098 }, { "epoch": 0.38555567709472766, "grad_norm": 1.3263213634490967, "learning_rate": 1.4071950994008253e-05, "loss": 0.8325, "step": 14099 }, { "epoch": 0.3855830234084445, "grad_norm": 1.3999801874160767, "learning_rate": 1.4071142027936163e-05, "loss": 0.4419, "step": 14100 }, { "epoch": 0.38561036972216145, "grad_norm": 1.3756260871887207, "learning_rate": 1.4070333029927106e-05, "loss": 0.5009, "step": 14101 }, { "epoch": 0.3856377160358784, "grad_norm": 1.4540387392044067, "learning_rate": 1.4069523999987435e-05, "loss": 0.5345, "step": 14102 }, { "epoch": 0.3856650623495953, "grad_norm": 1.2318341732025146, "learning_rate": 1.4068714938123492e-05, "loss": 0.5616, "step": 14103 }, { "epoch": 0.38569240866331217, "grad_norm": 0.938463032245636, "learning_rate": 1.4067905844341627e-05, "loss": 0.3695, "step": 14104 }, { "epoch": 0.3857197549770291, "grad_norm": 1.4069907665252686, "learning_rate": 1.4067096718648184e-05, "loss": 0.5307, "step": 14105 }, { "epoch": 0.385747101290746, "grad_norm": 1.258815884590149, "learning_rate": 1.4066287561049512e-05, "loss": 0.5485, "step": 14106 }, { "epoch": 0.38577444760446294, "grad_norm": 1.0852619409561157, "learning_rate": 1.4065478371551959e-05, "loss": 0.4956, "step": 14107 }, { "epoch": 0.3858017939181798, "grad_norm": 1.2573328018188477, "learning_rate": 1.4064669150161872e-05, "loss": 0.4909, "step": 14108 }, { "epoch": 0.38582914023189674, "grad_norm": 1.4729912281036377, "learning_rate": 1.4063859896885599e-05, "loss": 0.5552, "step": 14109 }, { "epoch": 0.38585648654561366, "grad_norm": 1.2269176244735718, "learning_rate": 1.4063050611729492e-05, "loss": 0.5441, "step": 14110 }, { "epoch": 0.3858838328593306, "grad_norm": 1.3060163259506226, "learning_rate": 1.4062241294699892e-05, "loss": 0.5491, "step": 14111 }, { "epoch": 0.38591117917304746, "grad_norm": 1.1999852657318115, "learning_rate": 1.4061431945803152e-05, "loss": 0.5215, "step": 14112 }, { "epoch": 0.3859385254867644, "grad_norm": 1.2727631330490112, "learning_rate": 1.4060622565045626e-05, "loss": 0.5312, "step": 14113 }, { "epoch": 0.3859658718004813, "grad_norm": 1.2827537059783936, "learning_rate": 1.4059813152433655e-05, "loss": 0.5089, "step": 14114 }, { "epoch": 0.38599321811419823, "grad_norm": 1.351755142211914, "learning_rate": 1.4059003707973594e-05, "loss": 0.5471, "step": 14115 }, { "epoch": 0.3860205644279151, "grad_norm": 1.3444175720214844, "learning_rate": 1.4058194231671789e-05, "loss": 0.5203, "step": 14116 }, { "epoch": 0.386047910741632, "grad_norm": 1.2151401042938232, "learning_rate": 1.4057384723534593e-05, "loss": 0.5372, "step": 14117 }, { "epoch": 0.38607525705534895, "grad_norm": 1.5357627868652344, "learning_rate": 1.4056575183568357e-05, "loss": 0.5595, "step": 14118 }, { "epoch": 0.3861026033690659, "grad_norm": 1.1019036769866943, "learning_rate": 1.405576561177943e-05, "loss": 0.5358, "step": 14119 }, { "epoch": 0.38612994968278275, "grad_norm": 1.1622562408447266, "learning_rate": 1.405495600817416e-05, "loss": 0.521, "step": 14120 }, { "epoch": 0.38615729599649967, "grad_norm": 1.3737088441848755, "learning_rate": 1.4054146372758908e-05, "loss": 0.5198, "step": 14121 }, { "epoch": 0.3861846423102166, "grad_norm": 1.23716139793396, "learning_rate": 1.4053336705540013e-05, "loss": 0.5261, "step": 14122 }, { "epoch": 0.38621198862393347, "grad_norm": 1.3337838649749756, "learning_rate": 1.4052527006523834e-05, "loss": 0.5673, "step": 14123 }, { "epoch": 0.3862393349376504, "grad_norm": 1.53171706199646, "learning_rate": 1.405171727571672e-05, "loss": 0.8433, "step": 14124 }, { "epoch": 0.3862666812513673, "grad_norm": 1.2338337898254395, "learning_rate": 1.4050907513125028e-05, "loss": 0.543, "step": 14125 }, { "epoch": 0.38629402756508424, "grad_norm": 1.1775951385498047, "learning_rate": 1.4050097718755104e-05, "loss": 0.5356, "step": 14126 }, { "epoch": 0.3863213738788011, "grad_norm": 1.3126548528671265, "learning_rate": 1.4049287892613302e-05, "loss": 0.5151, "step": 14127 }, { "epoch": 0.38634872019251804, "grad_norm": 1.304943323135376, "learning_rate": 1.4048478034705978e-05, "loss": 0.5359, "step": 14128 }, { "epoch": 0.38637606650623496, "grad_norm": 1.519454836845398, "learning_rate": 1.4047668145039483e-05, "loss": 0.5739, "step": 14129 }, { "epoch": 0.3864034128199519, "grad_norm": 1.239979863166809, "learning_rate": 1.404685822362017e-05, "loss": 0.5482, "step": 14130 }, { "epoch": 0.38643075913366876, "grad_norm": 1.7432868480682373, "learning_rate": 1.4046048270454393e-05, "loss": 0.5183, "step": 14131 }, { "epoch": 0.3864581054473857, "grad_norm": 1.2243942022323608, "learning_rate": 1.4045238285548508e-05, "loss": 0.5334, "step": 14132 }, { "epoch": 0.3864854517611026, "grad_norm": 1.110119104385376, "learning_rate": 1.4044428268908863e-05, "loss": 0.5129, "step": 14133 }, { "epoch": 0.38651279807481953, "grad_norm": 1.8608431816101074, "learning_rate": 1.4043618220541819e-05, "loss": 0.5252, "step": 14134 }, { "epoch": 0.3865401443885364, "grad_norm": 1.246018648147583, "learning_rate": 1.4042808140453728e-05, "loss": 0.4908, "step": 14135 }, { "epoch": 0.3865674907022533, "grad_norm": 1.2413607835769653, "learning_rate": 1.4041998028650944e-05, "loss": 0.5403, "step": 14136 }, { "epoch": 0.38659483701597025, "grad_norm": 1.0745224952697754, "learning_rate": 1.4041187885139824e-05, "loss": 0.5294, "step": 14137 }, { "epoch": 0.3866221833296872, "grad_norm": 1.3628877401351929, "learning_rate": 1.4040377709926723e-05, "loss": 0.535, "step": 14138 }, { "epoch": 0.38664952964340404, "grad_norm": 1.6309162378311157, "learning_rate": 1.4039567503017995e-05, "loss": 0.5258, "step": 14139 }, { "epoch": 0.38667687595712097, "grad_norm": 2.74739670753479, "learning_rate": 1.4038757264419996e-05, "loss": 0.5272, "step": 14140 }, { "epoch": 0.3867042222708379, "grad_norm": 1.5484659671783447, "learning_rate": 1.4037946994139083e-05, "loss": 0.7999, "step": 14141 }, { "epoch": 0.3867315685845548, "grad_norm": 1.1774094104766846, "learning_rate": 1.4037136692181613e-05, "loss": 0.5412, "step": 14142 }, { "epoch": 0.3867589148982717, "grad_norm": 1.3271639347076416, "learning_rate": 1.403632635855394e-05, "loss": 0.5415, "step": 14143 }, { "epoch": 0.3867862612119886, "grad_norm": 1.3145325183868408, "learning_rate": 1.4035515993262422e-05, "loss": 0.5385, "step": 14144 }, { "epoch": 0.38681360752570554, "grad_norm": 1.589069128036499, "learning_rate": 1.4034705596313421e-05, "loss": 0.5046, "step": 14145 }, { "epoch": 0.38684095383942246, "grad_norm": 1.1420222520828247, "learning_rate": 1.403389516771329e-05, "loss": 0.5231, "step": 14146 }, { "epoch": 0.38686830015313933, "grad_norm": 1.3764187097549438, "learning_rate": 1.4033084707468383e-05, "loss": 0.5364, "step": 14147 }, { "epoch": 0.38689564646685626, "grad_norm": 1.15119469165802, "learning_rate": 1.4032274215585064e-05, "loss": 0.5204, "step": 14148 }, { "epoch": 0.3869229927805732, "grad_norm": 1.4245855808258057, "learning_rate": 1.4031463692069687e-05, "loss": 0.5228, "step": 14149 }, { "epoch": 0.3869503390942901, "grad_norm": 1.4006850719451904, "learning_rate": 1.4030653136928613e-05, "loss": 0.5149, "step": 14150 }, { "epoch": 0.386977685408007, "grad_norm": 1.1292798519134521, "learning_rate": 1.4029842550168198e-05, "loss": 0.5307, "step": 14151 }, { "epoch": 0.3870050317217239, "grad_norm": 1.6754183769226074, "learning_rate": 1.4029031931794803e-05, "loss": 0.8648, "step": 14152 }, { "epoch": 0.3870323780354408, "grad_norm": 1.1909700632095337, "learning_rate": 1.4028221281814787e-05, "loss": 0.5084, "step": 14153 }, { "epoch": 0.38705972434915775, "grad_norm": 1.291251540184021, "learning_rate": 1.4027410600234507e-05, "loss": 0.5468, "step": 14154 }, { "epoch": 0.3870870706628746, "grad_norm": 1.5230294466018677, "learning_rate": 1.4026599887060325e-05, "loss": 0.5391, "step": 14155 }, { "epoch": 0.38711441697659155, "grad_norm": 1.3156354427337646, "learning_rate": 1.4025789142298603e-05, "loss": 0.4497, "step": 14156 }, { "epoch": 0.38714176329030847, "grad_norm": 1.2036696672439575, "learning_rate": 1.4024978365955693e-05, "loss": 0.5159, "step": 14157 }, { "epoch": 0.3871691096040254, "grad_norm": 1.4610729217529297, "learning_rate": 1.4024167558037964e-05, "loss": 0.4437, "step": 14158 }, { "epoch": 0.38719645591774227, "grad_norm": 1.2670772075653076, "learning_rate": 1.4023356718551773e-05, "loss": 0.5044, "step": 14159 }, { "epoch": 0.3872238022314592, "grad_norm": 1.2599447965621948, "learning_rate": 1.4022545847503479e-05, "loss": 0.5012, "step": 14160 }, { "epoch": 0.3872511485451761, "grad_norm": 1.3337712287902832, "learning_rate": 1.4021734944899445e-05, "loss": 0.4147, "step": 14161 }, { "epoch": 0.38727849485889304, "grad_norm": 1.2413142919540405, "learning_rate": 1.4020924010746033e-05, "loss": 0.5373, "step": 14162 }, { "epoch": 0.3873058411726099, "grad_norm": 1.3892194032669067, "learning_rate": 1.4020113045049604e-05, "loss": 0.5188, "step": 14163 }, { "epoch": 0.38733318748632684, "grad_norm": 1.27267587184906, "learning_rate": 1.4019302047816521e-05, "loss": 0.5373, "step": 14164 }, { "epoch": 0.38736053380004376, "grad_norm": 1.3102190494537354, "learning_rate": 1.4018491019053142e-05, "loss": 0.5144, "step": 14165 }, { "epoch": 0.3873878801137607, "grad_norm": 1.583027958869934, "learning_rate": 1.4017679958765831e-05, "loss": 0.5137, "step": 14166 }, { "epoch": 0.38741522642747755, "grad_norm": 1.8325852155685425, "learning_rate": 1.4016868866960955e-05, "loss": 0.6037, "step": 14167 }, { "epoch": 0.3874425727411945, "grad_norm": 1.7331300973892212, "learning_rate": 1.401605774364487e-05, "loss": 0.4513, "step": 14168 }, { "epoch": 0.3874699190549114, "grad_norm": 1.8244401216506958, "learning_rate": 1.4015246588823945e-05, "loss": 0.439, "step": 14169 }, { "epoch": 0.38749726536862833, "grad_norm": 1.2862870693206787, "learning_rate": 1.4014435402504539e-05, "loss": 0.8255, "step": 14170 }, { "epoch": 0.3875246116823452, "grad_norm": 1.35366952419281, "learning_rate": 1.4013624184693015e-05, "loss": 0.5353, "step": 14171 }, { "epoch": 0.3875519579960621, "grad_norm": 1.3397951126098633, "learning_rate": 1.4012812935395741e-05, "loss": 0.5039, "step": 14172 }, { "epoch": 0.38757930430977905, "grad_norm": 1.477320909500122, "learning_rate": 1.4012001654619078e-05, "loss": 0.5481, "step": 14173 }, { "epoch": 0.387606650623496, "grad_norm": 1.1863235235214233, "learning_rate": 1.4011190342369394e-05, "loss": 0.5343, "step": 14174 }, { "epoch": 0.38763399693721284, "grad_norm": 1.1673805713653564, "learning_rate": 1.4010378998653046e-05, "loss": 0.5593, "step": 14175 }, { "epoch": 0.38766134325092977, "grad_norm": 1.2466939687728882, "learning_rate": 1.4009567623476406e-05, "loss": 0.5537, "step": 14176 }, { "epoch": 0.3876886895646467, "grad_norm": 1.3522942066192627, "learning_rate": 1.4008756216845839e-05, "loss": 0.5458, "step": 14177 }, { "epoch": 0.3877160358783636, "grad_norm": 1.2298195362091064, "learning_rate": 1.4007944778767701e-05, "loss": 0.4254, "step": 14178 }, { "epoch": 0.3877433821920805, "grad_norm": 1.0644341707229614, "learning_rate": 1.4007133309248368e-05, "loss": 0.5037, "step": 14179 }, { "epoch": 0.3877707285057974, "grad_norm": 1.4656747579574585, "learning_rate": 1.4006321808294203e-05, "loss": 0.5355, "step": 14180 }, { "epoch": 0.38779807481951434, "grad_norm": 1.1752017736434937, "learning_rate": 1.4005510275911571e-05, "loss": 0.5506, "step": 14181 }, { "epoch": 0.38782542113323126, "grad_norm": 3.281090497970581, "learning_rate": 1.4004698712106834e-05, "loss": 0.3452, "step": 14182 }, { "epoch": 0.38785276744694813, "grad_norm": 1.4112629890441895, "learning_rate": 1.4003887116886368e-05, "loss": 0.4789, "step": 14183 }, { "epoch": 0.38788011376066506, "grad_norm": 1.4803739786148071, "learning_rate": 1.400307549025653e-05, "loss": 0.5087, "step": 14184 }, { "epoch": 0.387907460074382, "grad_norm": 1.1542601585388184, "learning_rate": 1.4002263832223693e-05, "loss": 0.5087, "step": 14185 }, { "epoch": 0.3879348063880989, "grad_norm": 1.4868662357330322, "learning_rate": 1.4001452142794225e-05, "loss": 0.4805, "step": 14186 }, { "epoch": 0.3879621527018158, "grad_norm": 1.1357359886169434, "learning_rate": 1.4000640421974488e-05, "loss": 0.508, "step": 14187 }, { "epoch": 0.3879894990155327, "grad_norm": 1.3054205179214478, "learning_rate": 1.3999828669770853e-05, "loss": 0.531, "step": 14188 }, { "epoch": 0.3880168453292496, "grad_norm": 1.109532117843628, "learning_rate": 1.399901688618969e-05, "loss": 0.5387, "step": 14189 }, { "epoch": 0.38804419164296655, "grad_norm": 1.552283763885498, "learning_rate": 1.3998205071237361e-05, "loss": 0.5033, "step": 14190 }, { "epoch": 0.3880715379566834, "grad_norm": 1.3472611904144287, "learning_rate": 1.3997393224920241e-05, "loss": 0.537, "step": 14191 }, { "epoch": 0.38809888427040035, "grad_norm": 1.2864549160003662, "learning_rate": 1.3996581347244696e-05, "loss": 0.5308, "step": 14192 }, { "epoch": 0.38812623058411727, "grad_norm": 1.191049337387085, "learning_rate": 1.3995769438217096e-05, "loss": 0.812, "step": 14193 }, { "epoch": 0.3881535768978342, "grad_norm": 1.2859023809432983, "learning_rate": 1.3994957497843806e-05, "loss": 0.5572, "step": 14194 }, { "epoch": 0.38818092321155107, "grad_norm": 1.3268235921859741, "learning_rate": 1.39941455261312e-05, "loss": 0.5278, "step": 14195 }, { "epoch": 0.388208269525268, "grad_norm": 1.3549302816390991, "learning_rate": 1.3993333523085648e-05, "loss": 0.5224, "step": 14196 }, { "epoch": 0.3882356158389849, "grad_norm": 1.9684975147247314, "learning_rate": 1.3992521488713515e-05, "loss": 0.5017, "step": 14197 }, { "epoch": 0.38826296215270184, "grad_norm": 1.3671059608459473, "learning_rate": 1.3991709423021181e-05, "loss": 0.4585, "step": 14198 }, { "epoch": 0.3882903084664187, "grad_norm": 1.1988258361816406, "learning_rate": 1.3990897326015004e-05, "loss": 0.5442, "step": 14199 }, { "epoch": 0.38831765478013563, "grad_norm": 1.4206252098083496, "learning_rate": 1.3990085197701362e-05, "loss": 0.388, "step": 14200 }, { "epoch": 0.38834500109385256, "grad_norm": 1.165128231048584, "learning_rate": 1.3989273038086626e-05, "loss": 0.5591, "step": 14201 }, { "epoch": 0.3883723474075695, "grad_norm": 1.3528181314468384, "learning_rate": 1.3988460847177165e-05, "loss": 0.5578, "step": 14202 }, { "epoch": 0.38839969372128635, "grad_norm": 1.3525433540344238, "learning_rate": 1.3987648624979351e-05, "loss": 0.542, "step": 14203 }, { "epoch": 0.3884270400350033, "grad_norm": 1.3599889278411865, "learning_rate": 1.3986836371499557e-05, "loss": 0.5452, "step": 14204 }, { "epoch": 0.3884543863487202, "grad_norm": 1.3736704587936401, "learning_rate": 1.398602408674415e-05, "loss": 0.5216, "step": 14205 }, { "epoch": 0.38848173266243713, "grad_norm": 1.4408559799194336, "learning_rate": 1.3985211770719507e-05, "loss": 0.845, "step": 14206 }, { "epoch": 0.388509078976154, "grad_norm": 1.3901067972183228, "learning_rate": 1.3984399423432002e-05, "loss": 0.549, "step": 14207 }, { "epoch": 0.3885364252898709, "grad_norm": 1.376672387123108, "learning_rate": 1.3983587044888003e-05, "loss": 0.4771, "step": 14208 }, { "epoch": 0.38856377160358785, "grad_norm": 1.4008976221084595, "learning_rate": 1.3982774635093882e-05, "loss": 0.5094, "step": 14209 }, { "epoch": 0.3885911179173048, "grad_norm": 1.596854567527771, "learning_rate": 1.398196219405602e-05, "loss": 0.4136, "step": 14210 }, { "epoch": 0.38861846423102164, "grad_norm": 1.4184434413909912, "learning_rate": 1.3981149721780782e-05, "loss": 0.4792, "step": 14211 }, { "epoch": 0.38864581054473857, "grad_norm": 1.6810580492019653, "learning_rate": 1.3980337218274546e-05, "loss": 0.4123, "step": 14212 }, { "epoch": 0.3886731568584555, "grad_norm": 1.1400514841079712, "learning_rate": 1.3979524683543684e-05, "loss": 0.5104, "step": 14213 }, { "epoch": 0.3887005031721724, "grad_norm": 1.5525330305099487, "learning_rate": 1.397871211759457e-05, "loss": 0.5494, "step": 14214 }, { "epoch": 0.3887278494858893, "grad_norm": 1.320683479309082, "learning_rate": 1.3977899520433581e-05, "loss": 0.4935, "step": 14215 }, { "epoch": 0.3887551957996062, "grad_norm": 1.158312439918518, "learning_rate": 1.397708689206709e-05, "loss": 0.522, "step": 14216 }, { "epoch": 0.38878254211332314, "grad_norm": 1.3774362802505493, "learning_rate": 1.397627423250147e-05, "loss": 0.4319, "step": 14217 }, { "epoch": 0.38880988842704006, "grad_norm": 1.6226664781570435, "learning_rate": 1.39754615417431e-05, "loss": 0.4532, "step": 14218 }, { "epoch": 0.38883723474075693, "grad_norm": 1.3181238174438477, "learning_rate": 1.397464881979835e-05, "loss": 0.4292, "step": 14219 }, { "epoch": 0.38886458105447386, "grad_norm": 1.4774318933486938, "learning_rate": 1.39738360666736e-05, "loss": 0.563, "step": 14220 }, { "epoch": 0.3888919273681908, "grad_norm": 3.4116876125335693, "learning_rate": 1.3973023282375224e-05, "loss": 0.3936, "step": 14221 }, { "epoch": 0.38891927368190765, "grad_norm": 1.3541245460510254, "learning_rate": 1.39722104669096e-05, "loss": 0.5395, "step": 14222 }, { "epoch": 0.3889466199956246, "grad_norm": 1.3349508047103882, "learning_rate": 1.39713976202831e-05, "loss": 0.5457, "step": 14223 }, { "epoch": 0.3889739663093415, "grad_norm": 1.4765888452529907, "learning_rate": 1.3970584742502105e-05, "loss": 0.5787, "step": 14224 }, { "epoch": 0.3890013126230584, "grad_norm": 1.467616081237793, "learning_rate": 1.396977183357299e-05, "loss": 0.5429, "step": 14225 }, { "epoch": 0.3890286589367753, "grad_norm": 1.20027494430542, "learning_rate": 1.3968958893502133e-05, "loss": 0.5289, "step": 14226 }, { "epoch": 0.3890560052504922, "grad_norm": 1.3530298471450806, "learning_rate": 1.3968145922295908e-05, "loss": 0.507, "step": 14227 }, { "epoch": 0.38908335156420915, "grad_norm": 1.4422239065170288, "learning_rate": 1.3967332919960699e-05, "loss": 0.5632, "step": 14228 }, { "epoch": 0.38911069787792607, "grad_norm": 1.3425780534744263, "learning_rate": 1.3966519886502875e-05, "loss": 0.4904, "step": 14229 }, { "epoch": 0.38913804419164294, "grad_norm": 1.373146891593933, "learning_rate": 1.3965706821928819e-05, "loss": 0.5432, "step": 14230 }, { "epoch": 0.38916539050535986, "grad_norm": 1.4311925172805786, "learning_rate": 1.3964893726244912e-05, "loss": 0.538, "step": 14231 }, { "epoch": 0.3891927368190768, "grad_norm": 1.4035171270370483, "learning_rate": 1.396408059945753e-05, "loss": 0.5352, "step": 14232 }, { "epoch": 0.3892200831327937, "grad_norm": 1.4558336734771729, "learning_rate": 1.3963267441573048e-05, "loss": 0.4335, "step": 14233 }, { "epoch": 0.3892474294465106, "grad_norm": 1.3097623586654663, "learning_rate": 1.3962454252597852e-05, "loss": 0.5551, "step": 14234 }, { "epoch": 0.3892747757602275, "grad_norm": 1.5512531995773315, "learning_rate": 1.3961641032538316e-05, "loss": 0.4526, "step": 14235 }, { "epoch": 0.38930212207394443, "grad_norm": 1.0926413536071777, "learning_rate": 1.396082778140082e-05, "loss": 0.5157, "step": 14236 }, { "epoch": 0.38932946838766136, "grad_norm": 1.0563852787017822, "learning_rate": 1.3960014499191746e-05, "loss": 0.5303, "step": 14237 }, { "epoch": 0.38935681470137823, "grad_norm": 1.6350619792938232, "learning_rate": 1.395920118591747e-05, "loss": 0.5238, "step": 14238 }, { "epoch": 0.38938416101509515, "grad_norm": 1.3706934452056885, "learning_rate": 1.395838784158438e-05, "loss": 0.5223, "step": 14239 }, { "epoch": 0.3894115073288121, "grad_norm": 1.3223416805267334, "learning_rate": 1.3957574466198847e-05, "loss": 0.4148, "step": 14240 }, { "epoch": 0.389438853642529, "grad_norm": 1.5457189083099365, "learning_rate": 1.3956761059767256e-05, "loss": 0.5274, "step": 14241 }, { "epoch": 0.3894661999562459, "grad_norm": 1.2611366510391235, "learning_rate": 1.395594762229599e-05, "loss": 0.5303, "step": 14242 }, { "epoch": 0.3894935462699628, "grad_norm": 1.3499947786331177, "learning_rate": 1.3955134153791428e-05, "loss": 0.5538, "step": 14243 }, { "epoch": 0.3895208925836797, "grad_norm": 1.2530219554901123, "learning_rate": 1.395432065425995e-05, "loss": 0.5469, "step": 14244 }, { "epoch": 0.38954823889739665, "grad_norm": 1.182725429534912, "learning_rate": 1.395350712370794e-05, "loss": 0.5053, "step": 14245 }, { "epoch": 0.3895755852111135, "grad_norm": 1.3703439235687256, "learning_rate": 1.3952693562141781e-05, "loss": 0.5192, "step": 14246 }, { "epoch": 0.38960293152483044, "grad_norm": 1.4592581987380981, "learning_rate": 1.3951879969567852e-05, "loss": 0.4725, "step": 14247 }, { "epoch": 0.38963027783854737, "grad_norm": 1.2183696031570435, "learning_rate": 1.3951066345992537e-05, "loss": 0.535, "step": 14248 }, { "epoch": 0.3896576241522643, "grad_norm": 1.2318177223205566, "learning_rate": 1.3950252691422217e-05, "loss": 0.8527, "step": 14249 }, { "epoch": 0.38968497046598116, "grad_norm": 1.1889578104019165, "learning_rate": 1.394943900586328e-05, "loss": 0.5444, "step": 14250 }, { "epoch": 0.3897123167796981, "grad_norm": 1.4329195022583008, "learning_rate": 1.3948625289322105e-05, "loss": 0.5115, "step": 14251 }, { "epoch": 0.389739663093415, "grad_norm": 1.4931093454360962, "learning_rate": 1.3947811541805073e-05, "loss": 0.4808, "step": 14252 }, { "epoch": 0.38976700940713194, "grad_norm": 1.518107533454895, "learning_rate": 1.3946997763318573e-05, "loss": 0.5782, "step": 14253 }, { "epoch": 0.3897943557208488, "grad_norm": 2.3488168716430664, "learning_rate": 1.3946183953868983e-05, "loss": 0.552, "step": 14254 }, { "epoch": 0.38982170203456573, "grad_norm": 1.1064116954803467, "learning_rate": 1.394537011346269e-05, "loss": 0.5227, "step": 14255 }, { "epoch": 0.38984904834828266, "grad_norm": 1.2215005159378052, "learning_rate": 1.3944556242106084e-05, "loss": 0.3705, "step": 14256 }, { "epoch": 0.3898763946619996, "grad_norm": 1.308031678199768, "learning_rate": 1.3943742339805539e-05, "loss": 0.5007, "step": 14257 }, { "epoch": 0.38990374097571645, "grad_norm": 1.3215794563293457, "learning_rate": 1.394292840656745e-05, "loss": 0.461, "step": 14258 }, { "epoch": 0.3899310872894334, "grad_norm": 1.6115083694458008, "learning_rate": 1.3942114442398193e-05, "loss": 0.5544, "step": 14259 }, { "epoch": 0.3899584336031503, "grad_norm": 1.419948697090149, "learning_rate": 1.394130044730416e-05, "loss": 0.5332, "step": 14260 }, { "epoch": 0.3899857799168672, "grad_norm": 1.3203072547912598, "learning_rate": 1.3940486421291735e-05, "loss": 0.5619, "step": 14261 }, { "epoch": 0.3900131262305841, "grad_norm": 1.5046032667160034, "learning_rate": 1.39396723643673e-05, "loss": 0.4583, "step": 14262 }, { "epoch": 0.390040472544301, "grad_norm": 1.2511566877365112, "learning_rate": 1.3938858276537245e-05, "loss": 0.5189, "step": 14263 }, { "epoch": 0.39006781885801795, "grad_norm": 1.252669334411621, "learning_rate": 1.3938044157807957e-05, "loss": 0.4515, "step": 14264 }, { "epoch": 0.39009516517173487, "grad_norm": 1.1646695137023926, "learning_rate": 1.3937230008185818e-05, "loss": 0.5478, "step": 14265 }, { "epoch": 0.39012251148545174, "grad_norm": 1.2683823108673096, "learning_rate": 1.3936415827677221e-05, "loss": 0.5403, "step": 14266 }, { "epoch": 0.39014985779916866, "grad_norm": 1.2252784967422485, "learning_rate": 1.3935601616288549e-05, "loss": 0.5054, "step": 14267 }, { "epoch": 0.3901772041128856, "grad_norm": 1.2026976346969604, "learning_rate": 1.3934787374026187e-05, "loss": 0.5167, "step": 14268 }, { "epoch": 0.3902045504266025, "grad_norm": 1.8987840414047241, "learning_rate": 1.393397310089653e-05, "loss": 0.8291, "step": 14269 }, { "epoch": 0.3902318967403194, "grad_norm": 1.2771967649459839, "learning_rate": 1.3933158796905958e-05, "loss": 0.4125, "step": 14270 }, { "epoch": 0.3902592430540363, "grad_norm": 1.430037021636963, "learning_rate": 1.3932344462060863e-05, "loss": 0.5099, "step": 14271 }, { "epoch": 0.39028658936775323, "grad_norm": 1.170590877532959, "learning_rate": 1.3931530096367634e-05, "loss": 0.5167, "step": 14272 }, { "epoch": 0.39031393568147016, "grad_norm": 1.3967523574829102, "learning_rate": 1.3930715699832653e-05, "loss": 0.4922, "step": 14273 }, { "epoch": 0.39034128199518703, "grad_norm": 1.2584764957427979, "learning_rate": 1.3929901272462316e-05, "loss": 0.5163, "step": 14274 }, { "epoch": 0.39036862830890395, "grad_norm": 1.3784375190734863, "learning_rate": 1.3929086814263011e-05, "loss": 0.5514, "step": 14275 }, { "epoch": 0.3903959746226209, "grad_norm": 1.1789852380752563, "learning_rate": 1.3928272325241123e-05, "loss": 0.5212, "step": 14276 }, { "epoch": 0.3904233209363378, "grad_norm": 1.239671230316162, "learning_rate": 1.3927457805403046e-05, "loss": 0.521, "step": 14277 }, { "epoch": 0.3904506672500547, "grad_norm": 1.0877597332000732, "learning_rate": 1.3926643254755168e-05, "loss": 0.5332, "step": 14278 }, { "epoch": 0.3904780135637716, "grad_norm": 1.1507753133773804, "learning_rate": 1.3925828673303875e-05, "loss": 0.5619, "step": 14279 }, { "epoch": 0.3905053598774885, "grad_norm": 2.0209896564483643, "learning_rate": 1.3925014061055566e-05, "loss": 0.4072, "step": 14280 }, { "epoch": 0.39053270619120545, "grad_norm": 1.2492605447769165, "learning_rate": 1.3924199418016625e-05, "loss": 0.5008, "step": 14281 }, { "epoch": 0.3905600525049223, "grad_norm": 1.169296145439148, "learning_rate": 1.3923384744193445e-05, "loss": 0.5392, "step": 14282 }, { "epoch": 0.39058739881863924, "grad_norm": 1.8282334804534912, "learning_rate": 1.3922570039592413e-05, "loss": 0.5538, "step": 14283 }, { "epoch": 0.39061474513235617, "grad_norm": 1.2532386779785156, "learning_rate": 1.3921755304219921e-05, "loss": 0.5442, "step": 14284 }, { "epoch": 0.3906420914460731, "grad_norm": 1.1424528360366821, "learning_rate": 1.3920940538082367e-05, "loss": 0.5011, "step": 14285 }, { "epoch": 0.39066943775978996, "grad_norm": 1.3200854063034058, "learning_rate": 1.3920125741186139e-05, "loss": 0.5213, "step": 14286 }, { "epoch": 0.3906967840735069, "grad_norm": 1.2604484558105469, "learning_rate": 1.3919310913537625e-05, "loss": 0.5366, "step": 14287 }, { "epoch": 0.3907241303872238, "grad_norm": 2.4487483501434326, "learning_rate": 1.391849605514322e-05, "loss": 0.5331, "step": 14288 }, { "epoch": 0.39075147670094074, "grad_norm": 1.366216778755188, "learning_rate": 1.3917681166009315e-05, "loss": 0.5252, "step": 14289 }, { "epoch": 0.3907788230146576, "grad_norm": 1.2670857906341553, "learning_rate": 1.3916866246142304e-05, "loss": 0.5858, "step": 14290 }, { "epoch": 0.39080616932837453, "grad_norm": 1.1322695016860962, "learning_rate": 1.3916051295548582e-05, "loss": 0.4899, "step": 14291 }, { "epoch": 0.39083351564209146, "grad_norm": 1.2795146703720093, "learning_rate": 1.3915236314234536e-05, "loss": 0.5283, "step": 14292 }, { "epoch": 0.3908608619558084, "grad_norm": 1.4845350980758667, "learning_rate": 1.3914421302206567e-05, "loss": 0.8541, "step": 14293 }, { "epoch": 0.39088820826952525, "grad_norm": 1.6848832368850708, "learning_rate": 1.3913606259471062e-05, "loss": 0.8641, "step": 14294 }, { "epoch": 0.3909155545832422, "grad_norm": 1.5219968557357788, "learning_rate": 1.3912791186034418e-05, "loss": 0.5278, "step": 14295 }, { "epoch": 0.3909429008969591, "grad_norm": 1.3326398134231567, "learning_rate": 1.3911976081903029e-05, "loss": 0.5325, "step": 14296 }, { "epoch": 0.390970247210676, "grad_norm": 1.3479267358779907, "learning_rate": 1.3911160947083287e-05, "loss": 0.4544, "step": 14297 }, { "epoch": 0.3909975935243929, "grad_norm": 1.2362680435180664, "learning_rate": 1.3910345781581586e-05, "loss": 0.5345, "step": 14298 }, { "epoch": 0.3910249398381098, "grad_norm": 1.2128171920776367, "learning_rate": 1.390953058540433e-05, "loss": 0.5775, "step": 14299 }, { "epoch": 0.39105228615182674, "grad_norm": 1.2649112939834595, "learning_rate": 1.39087153585579e-05, "loss": 0.5051, "step": 14300 }, { "epoch": 0.39107963246554367, "grad_norm": 1.3742470741271973, "learning_rate": 1.39079001010487e-05, "loss": 0.5224, "step": 14301 }, { "epoch": 0.39110697877926054, "grad_norm": 1.3987594842910767, "learning_rate": 1.3907084812883126e-05, "loss": 0.5265, "step": 14302 }, { "epoch": 0.39113432509297746, "grad_norm": 1.3082551956176758, "learning_rate": 1.3906269494067568e-05, "loss": 0.5092, "step": 14303 }, { "epoch": 0.3911616714066944, "grad_norm": 1.1831272840499878, "learning_rate": 1.390545414460843e-05, "loss": 0.5131, "step": 14304 }, { "epoch": 0.3911890177204113, "grad_norm": 1.3430157899856567, "learning_rate": 1.3904638764512097e-05, "loss": 0.3826, "step": 14305 }, { "epoch": 0.3912163640341282, "grad_norm": 1.3749207258224487, "learning_rate": 1.3903823353784973e-05, "loss": 0.5494, "step": 14306 }, { "epoch": 0.3912437103478451, "grad_norm": 1.1950570344924927, "learning_rate": 1.3903007912433457e-05, "loss": 0.5411, "step": 14307 }, { "epoch": 0.39127105666156203, "grad_norm": 2.0218381881713867, "learning_rate": 1.390219244046394e-05, "loss": 0.8234, "step": 14308 }, { "epoch": 0.39129840297527896, "grad_norm": 1.3937616348266602, "learning_rate": 1.390137693788282e-05, "loss": 0.5459, "step": 14309 }, { "epoch": 0.39132574928899583, "grad_norm": 1.4364421367645264, "learning_rate": 1.3900561404696498e-05, "loss": 0.4091, "step": 14310 }, { "epoch": 0.39135309560271275, "grad_norm": 1.3285386562347412, "learning_rate": 1.3899745840911369e-05, "loss": 0.4428, "step": 14311 }, { "epoch": 0.3913804419164297, "grad_norm": 1.1741068363189697, "learning_rate": 1.3898930246533832e-05, "loss": 0.5181, "step": 14312 }, { "epoch": 0.3914077882301466, "grad_norm": 1.1167752742767334, "learning_rate": 1.3898114621570281e-05, "loss": 0.541, "step": 14313 }, { "epoch": 0.39143513454386347, "grad_norm": 1.4845682382583618, "learning_rate": 1.3897298966027122e-05, "loss": 0.5419, "step": 14314 }, { "epoch": 0.3914624808575804, "grad_norm": 1.162101149559021, "learning_rate": 1.3896483279910748e-05, "loss": 0.5378, "step": 14315 }, { "epoch": 0.3914898271712973, "grad_norm": 1.0417486429214478, "learning_rate": 1.389566756322756e-05, "loss": 0.5331, "step": 14316 }, { "epoch": 0.39151717348501425, "grad_norm": 1.2876390218734741, "learning_rate": 1.3894851815983954e-05, "loss": 0.4766, "step": 14317 }, { "epoch": 0.3915445197987311, "grad_norm": 1.1989002227783203, "learning_rate": 1.3894036038186334e-05, "loss": 0.5224, "step": 14318 }, { "epoch": 0.39157186611244804, "grad_norm": 1.0864650011062622, "learning_rate": 1.3893220229841095e-05, "loss": 0.5245, "step": 14319 }, { "epoch": 0.39159921242616497, "grad_norm": 1.2036921977996826, "learning_rate": 1.3892404390954642e-05, "loss": 0.5418, "step": 14320 }, { "epoch": 0.3916265587398819, "grad_norm": 0.9373669028282166, "learning_rate": 1.389158852153337e-05, "loss": 0.5358, "step": 14321 }, { "epoch": 0.39165390505359876, "grad_norm": 1.130170226097107, "learning_rate": 1.3890772621583684e-05, "loss": 0.4856, "step": 14322 }, { "epoch": 0.3916812513673157, "grad_norm": 1.3481779098510742, "learning_rate": 1.3889956691111978e-05, "loss": 0.3862, "step": 14323 }, { "epoch": 0.3917085976810326, "grad_norm": 2.3618853092193604, "learning_rate": 1.3889140730124657e-05, "loss": 0.8597, "step": 14324 }, { "epoch": 0.3917359439947495, "grad_norm": 1.6269731521606445, "learning_rate": 1.388832473862812e-05, "loss": 0.8228, "step": 14325 }, { "epoch": 0.3917632903084664, "grad_norm": 1.1112488508224487, "learning_rate": 1.3887508716628775e-05, "loss": 0.4859, "step": 14326 }, { "epoch": 0.39179063662218333, "grad_norm": 1.4887641668319702, "learning_rate": 1.3886692664133015e-05, "loss": 0.5497, "step": 14327 }, { "epoch": 0.39181798293590026, "grad_norm": 1.684265375137329, "learning_rate": 1.3885876581147245e-05, "loss": 0.4701, "step": 14328 }, { "epoch": 0.3918453292496171, "grad_norm": 1.2963579893112183, "learning_rate": 1.3885060467677867e-05, "loss": 0.4413, "step": 14329 }, { "epoch": 0.39187267556333405, "grad_norm": 1.1282626390457153, "learning_rate": 1.3884244323731282e-05, "loss": 0.5369, "step": 14330 }, { "epoch": 0.391900021877051, "grad_norm": 1.2811137437820435, "learning_rate": 1.3883428149313894e-05, "loss": 0.5002, "step": 14331 }, { "epoch": 0.3919273681907679, "grad_norm": 1.1648259162902832, "learning_rate": 1.3882611944432105e-05, "loss": 0.5298, "step": 14332 }, { "epoch": 0.39195471450448477, "grad_norm": 1.5168641805648804, "learning_rate": 1.3881795709092317e-05, "loss": 0.5555, "step": 14333 }, { "epoch": 0.3919820608182017, "grad_norm": 1.1752567291259766, "learning_rate": 1.3880979443300935e-05, "loss": 0.4991, "step": 14334 }, { "epoch": 0.3920094071319186, "grad_norm": 1.461470365524292, "learning_rate": 1.3880163147064361e-05, "loss": 0.5412, "step": 14335 }, { "epoch": 0.39203675344563554, "grad_norm": 1.5550333261489868, "learning_rate": 1.3879346820389001e-05, "loss": 0.5381, "step": 14336 }, { "epoch": 0.3920640997593524, "grad_norm": 1.092944860458374, "learning_rate": 1.3878530463281255e-05, "loss": 0.4785, "step": 14337 }, { "epoch": 0.39209144607306934, "grad_norm": 3.2129509449005127, "learning_rate": 1.3877714075747528e-05, "loss": 0.9136, "step": 14338 }, { "epoch": 0.39211879238678626, "grad_norm": 1.5104111433029175, "learning_rate": 1.3876897657794231e-05, "loss": 0.551, "step": 14339 }, { "epoch": 0.3921461387005032, "grad_norm": 1.1669671535491943, "learning_rate": 1.3876081209427758e-05, "loss": 0.5447, "step": 14340 }, { "epoch": 0.39217348501422006, "grad_norm": 1.190467357635498, "learning_rate": 1.3875264730654519e-05, "loss": 0.5534, "step": 14341 }, { "epoch": 0.392200831327937, "grad_norm": 1.0649769306182861, "learning_rate": 1.387444822148092e-05, "loss": 0.5422, "step": 14342 }, { "epoch": 0.3922281776416539, "grad_norm": 1.169999361038208, "learning_rate": 1.3873631681913365e-05, "loss": 0.5363, "step": 14343 }, { "epoch": 0.39225552395537083, "grad_norm": 1.32610285282135, "learning_rate": 1.3872815111958258e-05, "loss": 0.383, "step": 14344 }, { "epoch": 0.3922828702690877, "grad_norm": 1.2867501974105835, "learning_rate": 1.387199851162201e-05, "loss": 0.5181, "step": 14345 }, { "epoch": 0.3923102165828046, "grad_norm": 1.2479262351989746, "learning_rate": 1.3871181880911018e-05, "loss": 0.538, "step": 14346 }, { "epoch": 0.39233756289652155, "grad_norm": 1.3424267768859863, "learning_rate": 1.3870365219831699e-05, "loss": 0.5303, "step": 14347 }, { "epoch": 0.3923649092102385, "grad_norm": 1.7967417240142822, "learning_rate": 1.3869548528390451e-05, "loss": 0.5069, "step": 14348 }, { "epoch": 0.39239225552395535, "grad_norm": 1.1584793329238892, "learning_rate": 1.3868731806593683e-05, "loss": 0.5213, "step": 14349 }, { "epoch": 0.39241960183767227, "grad_norm": 1.3530818223953247, "learning_rate": 1.3867915054447806e-05, "loss": 0.4679, "step": 14350 }, { "epoch": 0.3924469481513892, "grad_norm": 1.8540616035461426, "learning_rate": 1.3867098271959219e-05, "loss": 0.8706, "step": 14351 }, { "epoch": 0.3924742944651061, "grad_norm": 1.5693094730377197, "learning_rate": 1.3866281459134337e-05, "loss": 0.3687, "step": 14352 }, { "epoch": 0.392501640778823, "grad_norm": 1.5141375064849854, "learning_rate": 1.3865464615979567e-05, "loss": 0.4884, "step": 14353 }, { "epoch": 0.3925289870925399, "grad_norm": 1.3729645013809204, "learning_rate": 1.3864647742501312e-05, "loss": 0.5355, "step": 14354 }, { "epoch": 0.39255633340625684, "grad_norm": 1.4391013383865356, "learning_rate": 1.3863830838705985e-05, "loss": 0.4561, "step": 14355 }, { "epoch": 0.39258367971997377, "grad_norm": 1.1591377258300781, "learning_rate": 1.3863013904599991e-05, "loss": 0.4937, "step": 14356 }, { "epoch": 0.39261102603369064, "grad_norm": 1.3339520692825317, "learning_rate": 1.3862196940189743e-05, "loss": 0.5138, "step": 14357 }, { "epoch": 0.39263837234740756, "grad_norm": 1.095423936843872, "learning_rate": 1.3861379945481645e-05, "loss": 0.5162, "step": 14358 }, { "epoch": 0.3926657186611245, "grad_norm": 1.8806846141815186, "learning_rate": 1.3860562920482105e-05, "loss": 0.8516, "step": 14359 }, { "epoch": 0.3926930649748414, "grad_norm": 1.1153289079666138, "learning_rate": 1.3859745865197539e-05, "loss": 0.5384, "step": 14360 }, { "epoch": 0.3927204112885583, "grad_norm": 1.0810167789459229, "learning_rate": 1.3858928779634353e-05, "loss": 0.4963, "step": 14361 }, { "epoch": 0.3927477576022752, "grad_norm": 1.3216071128845215, "learning_rate": 1.3858111663798954e-05, "loss": 0.5301, "step": 14362 }, { "epoch": 0.39277510391599213, "grad_norm": 1.379103183746338, "learning_rate": 1.3857294517697758e-05, "loss": 0.5528, "step": 14363 }, { "epoch": 0.39280245022970905, "grad_norm": 1.3718669414520264, "learning_rate": 1.3856477341337172e-05, "loss": 0.5009, "step": 14364 }, { "epoch": 0.3928297965434259, "grad_norm": 1.2566453218460083, "learning_rate": 1.3855660134723603e-05, "loss": 0.8553, "step": 14365 }, { "epoch": 0.39285714285714285, "grad_norm": 1.37956702709198, "learning_rate": 1.3854842897863467e-05, "loss": 0.5298, "step": 14366 }, { "epoch": 0.3928844891708598, "grad_norm": 1.4129550457000732, "learning_rate": 1.3854025630763176e-05, "loss": 0.5343, "step": 14367 }, { "epoch": 0.3929118354845767, "grad_norm": 1.1928616762161255, "learning_rate": 1.3853208333429134e-05, "loss": 0.8272, "step": 14368 }, { "epoch": 0.39293918179829357, "grad_norm": 1.2549293041229248, "learning_rate": 1.385239100586776e-05, "loss": 0.4609, "step": 14369 }, { "epoch": 0.3929665281120105, "grad_norm": 1.2884029150009155, "learning_rate": 1.3851573648085461e-05, "loss": 0.5464, "step": 14370 }, { "epoch": 0.3929938744257274, "grad_norm": 1.2614245414733887, "learning_rate": 1.3850756260088652e-05, "loss": 0.575, "step": 14371 }, { "epoch": 0.39302122073944434, "grad_norm": 1.291399598121643, "learning_rate": 1.3849938841883742e-05, "loss": 0.8275, "step": 14372 }, { "epoch": 0.3930485670531612, "grad_norm": 1.4301393032073975, "learning_rate": 1.3849121393477148e-05, "loss": 0.5367, "step": 14373 }, { "epoch": 0.39307591336687814, "grad_norm": 1.7002391815185547, "learning_rate": 1.3848303914875278e-05, "loss": 0.4293, "step": 14374 }, { "epoch": 0.39310325968059506, "grad_norm": 1.6005210876464844, "learning_rate": 1.384748640608455e-05, "loss": 0.5483, "step": 14375 }, { "epoch": 0.393130605994312, "grad_norm": 1.6250368356704712, "learning_rate": 1.384666886711137e-05, "loss": 0.3838, "step": 14376 }, { "epoch": 0.39315795230802886, "grad_norm": 1.2263994216918945, "learning_rate": 1.3845851297962159e-05, "loss": 0.5403, "step": 14377 }, { "epoch": 0.3931852986217458, "grad_norm": 1.1607441902160645, "learning_rate": 1.3845033698643323e-05, "loss": 0.481, "step": 14378 }, { "epoch": 0.3932126449354627, "grad_norm": 1.2830027341842651, "learning_rate": 1.3844216069161281e-05, "loss": 0.5283, "step": 14379 }, { "epoch": 0.39323999124917963, "grad_norm": 1.1382333040237427, "learning_rate": 1.3843398409522446e-05, "loss": 0.3863, "step": 14380 }, { "epoch": 0.3932673375628965, "grad_norm": 1.2330104112625122, "learning_rate": 1.3842580719733232e-05, "loss": 0.5473, "step": 14381 }, { "epoch": 0.3932946838766134, "grad_norm": 1.4362123012542725, "learning_rate": 1.3841762999800052e-05, "loss": 0.5058, "step": 14382 }, { "epoch": 0.39332203019033035, "grad_norm": 1.176820158958435, "learning_rate": 1.3840945249729326e-05, "loss": 0.5676, "step": 14383 }, { "epoch": 0.3933493765040473, "grad_norm": 1.4929991960525513, "learning_rate": 1.3840127469527463e-05, "loss": 0.5467, "step": 14384 }, { "epoch": 0.39337672281776415, "grad_norm": 1.4475897550582886, "learning_rate": 1.3839309659200883e-05, "loss": 0.5204, "step": 14385 }, { "epoch": 0.39340406913148107, "grad_norm": 1.2996522188186646, "learning_rate": 1.3838491818755998e-05, "loss": 0.5438, "step": 14386 }, { "epoch": 0.393431415445198, "grad_norm": 1.4173439741134644, "learning_rate": 1.3837673948199221e-05, "loss": 0.5363, "step": 14387 }, { "epoch": 0.3934587617589149, "grad_norm": 1.432370901107788, "learning_rate": 1.3836856047536977e-05, "loss": 0.5612, "step": 14388 }, { "epoch": 0.3934861080726318, "grad_norm": 1.3433876037597656, "learning_rate": 1.3836038116775674e-05, "loss": 0.5657, "step": 14389 }, { "epoch": 0.3935134543863487, "grad_norm": 1.1179746389389038, "learning_rate": 1.3835220155921736e-05, "loss": 0.5194, "step": 14390 }, { "epoch": 0.39354080070006564, "grad_norm": 1.3347796201705933, "learning_rate": 1.383440216498157e-05, "loss": 0.5322, "step": 14391 }, { "epoch": 0.39356814701378257, "grad_norm": 1.4166598320007324, "learning_rate": 1.3833584143961603e-05, "loss": 0.5396, "step": 14392 }, { "epoch": 0.39359549332749943, "grad_norm": 1.012515664100647, "learning_rate": 1.3832766092868241e-05, "loss": 0.5012, "step": 14393 }, { "epoch": 0.39362283964121636, "grad_norm": 1.4762235879898071, "learning_rate": 1.3831948011707915e-05, "loss": 0.5505, "step": 14394 }, { "epoch": 0.3936501859549333, "grad_norm": 1.4151607751846313, "learning_rate": 1.383112990048703e-05, "loss": 0.4894, "step": 14395 }, { "epoch": 0.3936775322686502, "grad_norm": 1.2091950178146362, "learning_rate": 1.3830311759212011e-05, "loss": 0.5435, "step": 14396 }, { "epoch": 0.3937048785823671, "grad_norm": 1.5901341438293457, "learning_rate": 1.3829493587889273e-05, "loss": 0.5482, "step": 14397 }, { "epoch": 0.393732224896084, "grad_norm": 1.0470592975616455, "learning_rate": 1.3828675386525237e-05, "loss": 0.5507, "step": 14398 }, { "epoch": 0.39375957120980093, "grad_norm": 1.2427887916564941, "learning_rate": 1.3827857155126321e-05, "loss": 0.5554, "step": 14399 }, { "epoch": 0.39378691752351785, "grad_norm": 1.265754222869873, "learning_rate": 1.3827038893698939e-05, "loss": 0.5381, "step": 14400 }, { "epoch": 0.3938142638372347, "grad_norm": 1.1378061771392822, "learning_rate": 1.3826220602249519e-05, "loss": 0.5469, "step": 14401 }, { "epoch": 0.39384161015095165, "grad_norm": 1.5097182989120483, "learning_rate": 1.382540228078447e-05, "loss": 0.5499, "step": 14402 }, { "epoch": 0.3938689564646686, "grad_norm": 1.484836220741272, "learning_rate": 1.3824583929310218e-05, "loss": 0.5253, "step": 14403 }, { "epoch": 0.3938963027783855, "grad_norm": 1.4050413370132446, "learning_rate": 1.3823765547833184e-05, "loss": 0.5156, "step": 14404 }, { "epoch": 0.39392364909210237, "grad_norm": 1.3446952104568481, "learning_rate": 1.3822947136359783e-05, "loss": 0.5394, "step": 14405 }, { "epoch": 0.3939509954058193, "grad_norm": 1.8352540731430054, "learning_rate": 1.3822128694896437e-05, "loss": 0.5588, "step": 14406 }, { "epoch": 0.3939783417195362, "grad_norm": 1.3587557077407837, "learning_rate": 1.3821310223449568e-05, "loss": 0.8614, "step": 14407 }, { "epoch": 0.39400568803325314, "grad_norm": 1.1983373165130615, "learning_rate": 1.3820491722025597e-05, "loss": 0.5255, "step": 14408 }, { "epoch": 0.39403303434697, "grad_norm": 1.3011794090270996, "learning_rate": 1.3819673190630942e-05, "loss": 0.5368, "step": 14409 }, { "epoch": 0.39406038066068694, "grad_norm": 1.6860641241073608, "learning_rate": 1.381885462927203e-05, "loss": 0.5096, "step": 14410 }, { "epoch": 0.39408772697440386, "grad_norm": 1.4535167217254639, "learning_rate": 1.3818036037955271e-05, "loss": 0.4303, "step": 14411 }, { "epoch": 0.3941150732881208, "grad_norm": 1.4178482294082642, "learning_rate": 1.3817217416687101e-05, "loss": 0.5076, "step": 14412 }, { "epoch": 0.39414241960183766, "grad_norm": 1.121449589729309, "learning_rate": 1.3816398765473929e-05, "loss": 0.5221, "step": 14413 }, { "epoch": 0.3941697659155546, "grad_norm": 1.1989398002624512, "learning_rate": 1.3815580084322185e-05, "loss": 0.5222, "step": 14414 }, { "epoch": 0.3941971122292715, "grad_norm": 1.238604187965393, "learning_rate": 1.381476137323829e-05, "loss": 0.5507, "step": 14415 }, { "epoch": 0.39422445854298843, "grad_norm": 1.505856990814209, "learning_rate": 1.3813942632228663e-05, "loss": 0.533, "step": 14416 }, { "epoch": 0.3942518048567053, "grad_norm": 1.4109556674957275, "learning_rate": 1.381312386129973e-05, "loss": 0.5721, "step": 14417 }, { "epoch": 0.3942791511704222, "grad_norm": 1.5010665655136108, "learning_rate": 1.3812305060457915e-05, "loss": 0.5245, "step": 14418 }, { "epoch": 0.39430649748413915, "grad_norm": 1.3859370946884155, "learning_rate": 1.3811486229709638e-05, "loss": 0.5348, "step": 14419 }, { "epoch": 0.3943338437978561, "grad_norm": 1.4876044988632202, "learning_rate": 1.3810667369061326e-05, "loss": 0.4734, "step": 14420 }, { "epoch": 0.39436119011157295, "grad_norm": 1.34676194190979, "learning_rate": 1.3809848478519399e-05, "loss": 0.4978, "step": 14421 }, { "epoch": 0.39438853642528987, "grad_norm": 1.3962973356246948, "learning_rate": 1.3809029558090284e-05, "loss": 0.5778, "step": 14422 }, { "epoch": 0.3944158827390068, "grad_norm": 1.1744349002838135, "learning_rate": 1.3808210607780404e-05, "loss": 0.5, "step": 14423 }, { "epoch": 0.3944432290527237, "grad_norm": 1.1872293949127197, "learning_rate": 1.3807391627596182e-05, "loss": 0.5419, "step": 14424 }, { "epoch": 0.3944705753664406, "grad_norm": 1.4566794633865356, "learning_rate": 1.3806572617544047e-05, "loss": 0.5568, "step": 14425 }, { "epoch": 0.3944979216801575, "grad_norm": 1.6485769748687744, "learning_rate": 1.380575357763042e-05, "loss": 0.472, "step": 14426 }, { "epoch": 0.39452526799387444, "grad_norm": 1.3476616144180298, "learning_rate": 1.3804934507861728e-05, "loss": 0.5344, "step": 14427 }, { "epoch": 0.3945526143075913, "grad_norm": 1.6852906942367554, "learning_rate": 1.3804115408244392e-05, "loss": 0.5306, "step": 14428 }, { "epoch": 0.39457996062130823, "grad_norm": 1.388479232788086, "learning_rate": 1.3803296278784846e-05, "loss": 0.5137, "step": 14429 }, { "epoch": 0.39460730693502516, "grad_norm": 1.263107180595398, "learning_rate": 1.3802477119489507e-05, "loss": 0.5267, "step": 14430 }, { "epoch": 0.3946346532487421, "grad_norm": 1.2987877130508423, "learning_rate": 1.3801657930364809e-05, "loss": 0.5297, "step": 14431 }, { "epoch": 0.39466199956245895, "grad_norm": 1.5331772565841675, "learning_rate": 1.3800838711417171e-05, "loss": 0.5334, "step": 14432 }, { "epoch": 0.3946893458761759, "grad_norm": 1.1171009540557861, "learning_rate": 1.3800019462653023e-05, "loss": 0.4505, "step": 14433 }, { "epoch": 0.3947166921898928, "grad_norm": 1.1000325679779053, "learning_rate": 1.3799200184078794e-05, "loss": 0.5177, "step": 14434 }, { "epoch": 0.39474403850360973, "grad_norm": 1.4044811725616455, "learning_rate": 1.3798380875700908e-05, "loss": 0.526, "step": 14435 }, { "epoch": 0.3947713848173266, "grad_norm": 1.191253900527954, "learning_rate": 1.379756153752579e-05, "loss": 0.4956, "step": 14436 }, { "epoch": 0.3947987311310435, "grad_norm": 1.1628007888793945, "learning_rate": 1.3796742169559873e-05, "loss": 0.5326, "step": 14437 }, { "epoch": 0.39482607744476045, "grad_norm": 1.3643780946731567, "learning_rate": 1.3795922771809582e-05, "loss": 0.5573, "step": 14438 }, { "epoch": 0.3948534237584774, "grad_norm": 1.1762028932571411, "learning_rate": 1.3795103344281347e-05, "loss": 0.527, "step": 14439 }, { "epoch": 0.39488077007219424, "grad_norm": 0.9878478050231934, "learning_rate": 1.3794283886981592e-05, "loss": 0.4985, "step": 14440 }, { "epoch": 0.39490811638591117, "grad_norm": 1.3934271335601807, "learning_rate": 1.3793464399916747e-05, "loss": 0.5536, "step": 14441 }, { "epoch": 0.3949354626996281, "grad_norm": 1.4012407064437866, "learning_rate": 1.3792644883093245e-05, "loss": 0.5447, "step": 14442 }, { "epoch": 0.394962809013345, "grad_norm": 1.1881431341171265, "learning_rate": 1.3791825336517509e-05, "loss": 0.4927, "step": 14443 }, { "epoch": 0.3949901553270619, "grad_norm": 1.3617427349090576, "learning_rate": 1.379100576019597e-05, "loss": 0.4374, "step": 14444 }, { "epoch": 0.3950175016407788, "grad_norm": 1.4669283628463745, "learning_rate": 1.3790186154135059e-05, "loss": 0.562, "step": 14445 }, { "epoch": 0.39504484795449574, "grad_norm": 1.1262383460998535, "learning_rate": 1.3789366518341203e-05, "loss": 0.4852, "step": 14446 }, { "epoch": 0.39507219426821266, "grad_norm": 1.167023777961731, "learning_rate": 1.3788546852820833e-05, "loss": 0.5219, "step": 14447 }, { "epoch": 0.39509954058192953, "grad_norm": 1.365841269493103, "learning_rate": 1.3787727157580379e-05, "loss": 0.5179, "step": 14448 }, { "epoch": 0.39512688689564646, "grad_norm": 1.4114152193069458, "learning_rate": 1.3786907432626272e-05, "loss": 0.5353, "step": 14449 }, { "epoch": 0.3951542332093634, "grad_norm": 1.7399145364761353, "learning_rate": 1.3786087677964943e-05, "loss": 0.5609, "step": 14450 }, { "epoch": 0.3951815795230803, "grad_norm": 1.347641944885254, "learning_rate": 1.3785267893602818e-05, "loss": 0.5435, "step": 14451 }, { "epoch": 0.3952089258367972, "grad_norm": 1.4354088306427002, "learning_rate": 1.3784448079546336e-05, "loss": 0.4401, "step": 14452 }, { "epoch": 0.3952362721505141, "grad_norm": 1.3051680326461792, "learning_rate": 1.3783628235801922e-05, "loss": 0.5229, "step": 14453 }, { "epoch": 0.395263618464231, "grad_norm": 1.3777825832366943, "learning_rate": 1.3782808362376007e-05, "loss": 0.5387, "step": 14454 }, { "epoch": 0.39529096477794795, "grad_norm": 1.4118090867996216, "learning_rate": 1.3781988459275028e-05, "loss": 0.4559, "step": 14455 }, { "epoch": 0.3953183110916648, "grad_norm": 1.1830967664718628, "learning_rate": 1.3781168526505413e-05, "loss": 0.541, "step": 14456 }, { "epoch": 0.39534565740538175, "grad_norm": 1.2115424871444702, "learning_rate": 1.3780348564073594e-05, "loss": 0.5272, "step": 14457 }, { "epoch": 0.39537300371909867, "grad_norm": 2.051368236541748, "learning_rate": 1.3779528571986007e-05, "loss": 0.4607, "step": 14458 }, { "epoch": 0.3954003500328156, "grad_norm": 1.411289095878601, "learning_rate": 1.377870855024908e-05, "loss": 0.5834, "step": 14459 }, { "epoch": 0.39542769634653246, "grad_norm": 1.190443992614746, "learning_rate": 1.3777888498869247e-05, "loss": 0.5156, "step": 14460 }, { "epoch": 0.3954550426602494, "grad_norm": 1.4660753011703491, "learning_rate": 1.3777068417852945e-05, "loss": 0.454, "step": 14461 }, { "epoch": 0.3954823889739663, "grad_norm": 1.2514684200286865, "learning_rate": 1.3776248307206604e-05, "loss": 0.523, "step": 14462 }, { "epoch": 0.39550973528768324, "grad_norm": 1.540899634361267, "learning_rate": 1.3775428166936655e-05, "loss": 0.5414, "step": 14463 }, { "epoch": 0.3955370816014001, "grad_norm": 1.2531368732452393, "learning_rate": 1.3774607997049536e-05, "loss": 0.5215, "step": 14464 }, { "epoch": 0.39556442791511703, "grad_norm": 1.5892606973648071, "learning_rate": 1.377378779755168e-05, "loss": 0.5264, "step": 14465 }, { "epoch": 0.39559177422883396, "grad_norm": 2.66601300239563, "learning_rate": 1.377296756844952e-05, "loss": 0.4592, "step": 14466 }, { "epoch": 0.3956191205425509, "grad_norm": 1.0772218704223633, "learning_rate": 1.3772147309749491e-05, "loss": 0.5272, "step": 14467 }, { "epoch": 0.39564646685626775, "grad_norm": 1.2475639581680298, "learning_rate": 1.3771327021458028e-05, "loss": 0.5252, "step": 14468 }, { "epoch": 0.3956738131699847, "grad_norm": 1.1641147136688232, "learning_rate": 1.3770506703581568e-05, "loss": 0.5122, "step": 14469 }, { "epoch": 0.3957011594837016, "grad_norm": 1.332692265510559, "learning_rate": 1.3769686356126539e-05, "loss": 0.507, "step": 14470 }, { "epoch": 0.39572850579741853, "grad_norm": 1.2348564863204956, "learning_rate": 1.3768865979099384e-05, "loss": 0.5186, "step": 14471 }, { "epoch": 0.3957558521111354, "grad_norm": 1.4764138460159302, "learning_rate": 1.3768045572506539e-05, "loss": 0.5587, "step": 14472 }, { "epoch": 0.3957831984248523, "grad_norm": 1.3358018398284912, "learning_rate": 1.3767225136354431e-05, "loss": 0.5675, "step": 14473 }, { "epoch": 0.39581054473856925, "grad_norm": 1.2975788116455078, "learning_rate": 1.3766404670649508e-05, "loss": 0.5228, "step": 14474 }, { "epoch": 0.3958378910522862, "grad_norm": 1.4003150463104248, "learning_rate": 1.37655841753982e-05, "loss": 0.573, "step": 14475 }, { "epoch": 0.39586523736600304, "grad_norm": 1.3949183225631714, "learning_rate": 1.3764763650606938e-05, "loss": 0.4386, "step": 14476 }, { "epoch": 0.39589258367971997, "grad_norm": 1.5529226064682007, "learning_rate": 1.3763943096282169e-05, "loss": 0.8854, "step": 14477 }, { "epoch": 0.3959199299934369, "grad_norm": 1.2283896207809448, "learning_rate": 1.3763122512430324e-05, "loss": 0.5321, "step": 14478 }, { "epoch": 0.3959472763071538, "grad_norm": 1.3970685005187988, "learning_rate": 1.3762301899057846e-05, "loss": 0.5227, "step": 14479 }, { "epoch": 0.3959746226208707, "grad_norm": 1.3247528076171875, "learning_rate": 1.3761481256171166e-05, "loss": 0.5601, "step": 14480 }, { "epoch": 0.3960019689345876, "grad_norm": 1.2840814590454102, "learning_rate": 1.376066058377672e-05, "loss": 0.5254, "step": 14481 }, { "epoch": 0.39602931524830454, "grad_norm": 1.7444103956222534, "learning_rate": 1.3759839881880955e-05, "loss": 0.5061, "step": 14482 }, { "epoch": 0.39605666156202146, "grad_norm": 1.392191767692566, "learning_rate": 1.3759019150490303e-05, "loss": 0.5574, "step": 14483 }, { "epoch": 0.39608400787573833, "grad_norm": 1.1090432405471802, "learning_rate": 1.3758198389611204e-05, "loss": 0.5347, "step": 14484 }, { "epoch": 0.39611135418945526, "grad_norm": 1.2077898979187012, "learning_rate": 1.3757377599250096e-05, "loss": 0.5155, "step": 14485 }, { "epoch": 0.3961387005031722, "grad_norm": 1.3401201963424683, "learning_rate": 1.3756556779413417e-05, "loss": 0.5057, "step": 14486 }, { "epoch": 0.3961660468168891, "grad_norm": 1.2091618776321411, "learning_rate": 1.3755735930107608e-05, "loss": 0.5191, "step": 14487 }, { "epoch": 0.396193393130606, "grad_norm": 1.3064512014389038, "learning_rate": 1.3754915051339108e-05, "loss": 0.5335, "step": 14488 }, { "epoch": 0.3962207394443229, "grad_norm": 1.4727469682693481, "learning_rate": 1.3754094143114354e-05, "loss": 0.4741, "step": 14489 }, { "epoch": 0.3962480857580398, "grad_norm": 1.4314085245132446, "learning_rate": 1.3753273205439792e-05, "loss": 0.5138, "step": 14490 }, { "epoch": 0.39627543207175675, "grad_norm": 1.2795871496200562, "learning_rate": 1.3752452238321854e-05, "loss": 0.5185, "step": 14491 }, { "epoch": 0.3963027783854736, "grad_norm": 1.5466586351394653, "learning_rate": 1.3751631241766985e-05, "loss": 0.5685, "step": 14492 }, { "epoch": 0.39633012469919054, "grad_norm": 1.5660308599472046, "learning_rate": 1.3750810215781628e-05, "loss": 0.8489, "step": 14493 }, { "epoch": 0.39635747101290747, "grad_norm": 1.4148930311203003, "learning_rate": 1.3749989160372216e-05, "loss": 0.5415, "step": 14494 }, { "epoch": 0.3963848173266244, "grad_norm": 1.398826241493225, "learning_rate": 1.3749168075545196e-05, "loss": 0.4662, "step": 14495 }, { "epoch": 0.39641216364034126, "grad_norm": 1.27549409866333, "learning_rate": 1.374834696130701e-05, "loss": 0.5172, "step": 14496 }, { "epoch": 0.3964395099540582, "grad_norm": 1.2959668636322021, "learning_rate": 1.3747525817664094e-05, "loss": 0.5295, "step": 14497 }, { "epoch": 0.3964668562677751, "grad_norm": 1.3650199174880981, "learning_rate": 1.3746704644622894e-05, "loss": 0.5726, "step": 14498 }, { "epoch": 0.39649420258149204, "grad_norm": 1.4361878633499146, "learning_rate": 1.3745883442189852e-05, "loss": 0.5175, "step": 14499 }, { "epoch": 0.3965215488952089, "grad_norm": 1.195989966392517, "learning_rate": 1.3745062210371404e-05, "loss": 0.5177, "step": 14500 }, { "epoch": 0.39654889520892583, "grad_norm": 1.367326021194458, "learning_rate": 1.3744240949174002e-05, "loss": 0.5155, "step": 14501 }, { "epoch": 0.39657624152264276, "grad_norm": 1.2339223623275757, "learning_rate": 1.374341965860408e-05, "loss": 0.545, "step": 14502 }, { "epoch": 0.3966035878363597, "grad_norm": 1.5949087142944336, "learning_rate": 1.3742598338668085e-05, "loss": 0.4847, "step": 14503 }, { "epoch": 0.39663093415007655, "grad_norm": 1.3662521839141846, "learning_rate": 1.3741776989372459e-05, "loss": 0.5452, "step": 14504 }, { "epoch": 0.3966582804637935, "grad_norm": 1.486586332321167, "learning_rate": 1.3740955610723648e-05, "loss": 0.5311, "step": 14505 }, { "epoch": 0.3966856267775104, "grad_norm": 1.3287153244018555, "learning_rate": 1.3740134202728088e-05, "loss": 0.518, "step": 14506 }, { "epoch": 0.39671297309122733, "grad_norm": 2.1846697330474854, "learning_rate": 1.3739312765392234e-05, "loss": 0.8813, "step": 14507 }, { "epoch": 0.3967403194049442, "grad_norm": 1.322116732597351, "learning_rate": 1.3738491298722518e-05, "loss": 0.5239, "step": 14508 }, { "epoch": 0.3967676657186611, "grad_norm": 2.421674966812134, "learning_rate": 1.3737669802725395e-05, "loss": 0.5323, "step": 14509 }, { "epoch": 0.39679501203237805, "grad_norm": 1.5398521423339844, "learning_rate": 1.37368482774073e-05, "loss": 0.5471, "step": 14510 }, { "epoch": 0.39682235834609497, "grad_norm": 1.1419697999954224, "learning_rate": 1.3736026722774682e-05, "loss": 0.5279, "step": 14511 }, { "epoch": 0.39684970465981184, "grad_norm": 1.2863444089889526, "learning_rate": 1.373520513883399e-05, "loss": 0.5052, "step": 14512 }, { "epoch": 0.39687705097352877, "grad_norm": 1.192367672920227, "learning_rate": 1.373438352559166e-05, "loss": 0.4983, "step": 14513 }, { "epoch": 0.3969043972872457, "grad_norm": 1.4953619241714478, "learning_rate": 1.3733561883054145e-05, "loss": 0.4691, "step": 14514 }, { "epoch": 0.3969317436009626, "grad_norm": 1.9650053977966309, "learning_rate": 1.373274021122789e-05, "loss": 0.8136, "step": 14515 }, { "epoch": 0.3969590899146795, "grad_norm": 1.2321453094482422, "learning_rate": 1.3731918510119335e-05, "loss": 0.8318, "step": 14516 }, { "epoch": 0.3969864362283964, "grad_norm": 1.2031235694885254, "learning_rate": 1.373109677973493e-05, "loss": 0.542, "step": 14517 }, { "epoch": 0.39701378254211334, "grad_norm": 1.470884084701538, "learning_rate": 1.3730275020081121e-05, "loss": 0.509, "step": 14518 }, { "epoch": 0.39704112885583026, "grad_norm": 1.1753209829330444, "learning_rate": 1.3729453231164354e-05, "loss": 0.5113, "step": 14519 }, { "epoch": 0.39706847516954713, "grad_norm": 1.487809658050537, "learning_rate": 1.3728631412991079e-05, "loss": 0.4777, "step": 14520 }, { "epoch": 0.39709582148326406, "grad_norm": 1.5813959836959839, "learning_rate": 1.3727809565567736e-05, "loss": 0.4619, "step": 14521 }, { "epoch": 0.397123167796981, "grad_norm": 1.4463129043579102, "learning_rate": 1.3726987688900775e-05, "loss": 0.5295, "step": 14522 }, { "epoch": 0.3971505141106979, "grad_norm": 1.0798066854476929, "learning_rate": 1.3726165782996648e-05, "loss": 0.528, "step": 14523 }, { "epoch": 0.3971778604244148, "grad_norm": 1.6122575998306274, "learning_rate": 1.3725343847861796e-05, "loss": 0.5065, "step": 14524 }, { "epoch": 0.3972052067381317, "grad_norm": 1.2166574001312256, "learning_rate": 1.372452188350267e-05, "loss": 0.5223, "step": 14525 }, { "epoch": 0.3972325530518486, "grad_norm": 1.2668598890304565, "learning_rate": 1.3723699889925723e-05, "loss": 0.5408, "step": 14526 }, { "epoch": 0.3972598993655655, "grad_norm": 1.3143389225006104, "learning_rate": 1.3722877867137392e-05, "loss": 0.5254, "step": 14527 }, { "epoch": 0.3972872456792824, "grad_norm": 1.3752567768096924, "learning_rate": 1.3722055815144138e-05, "loss": 0.5358, "step": 14528 }, { "epoch": 0.39731459199299934, "grad_norm": 1.3272323608398438, "learning_rate": 1.3721233733952399e-05, "loss": 0.5296, "step": 14529 }, { "epoch": 0.39734193830671627, "grad_norm": 1.4447225332260132, "learning_rate": 1.3720411623568629e-05, "loss": 0.4605, "step": 14530 }, { "epoch": 0.39736928462043314, "grad_norm": 1.734365701675415, "learning_rate": 1.3719589483999278e-05, "loss": 0.8294, "step": 14531 }, { "epoch": 0.39739663093415006, "grad_norm": 1.3007845878601074, "learning_rate": 1.3718767315250794e-05, "loss": 0.5137, "step": 14532 }, { "epoch": 0.397423977247867, "grad_norm": 1.4194704294204712, "learning_rate": 1.3717945117329628e-05, "loss": 0.8514, "step": 14533 }, { "epoch": 0.3974513235615839, "grad_norm": 1.3730158805847168, "learning_rate": 1.3717122890242229e-05, "loss": 0.47, "step": 14534 }, { "epoch": 0.3974786698753008, "grad_norm": 1.6987833976745605, "learning_rate": 1.3716300633995045e-05, "loss": 0.5363, "step": 14535 }, { "epoch": 0.3975060161890177, "grad_norm": 1.134901762008667, "learning_rate": 1.3715478348594528e-05, "loss": 0.3844, "step": 14536 }, { "epoch": 0.39753336250273463, "grad_norm": 1.4007179737091064, "learning_rate": 1.3714656034047132e-05, "loss": 0.5801, "step": 14537 }, { "epoch": 0.39756070881645156, "grad_norm": 1.4135398864746094, "learning_rate": 1.3713833690359302e-05, "loss": 0.5263, "step": 14538 }, { "epoch": 0.3975880551301684, "grad_norm": 1.1712933778762817, "learning_rate": 1.3713011317537494e-05, "loss": 0.5015, "step": 14539 }, { "epoch": 0.39761540144388535, "grad_norm": 1.455385684967041, "learning_rate": 1.3712188915588156e-05, "loss": 0.4442, "step": 14540 }, { "epoch": 0.3976427477576023, "grad_norm": 1.2626358270645142, "learning_rate": 1.3711366484517741e-05, "loss": 0.5317, "step": 14541 }, { "epoch": 0.3976700940713192, "grad_norm": 1.4934568405151367, "learning_rate": 1.37105440243327e-05, "loss": 0.5153, "step": 14542 }, { "epoch": 0.39769744038503607, "grad_norm": 1.0951817035675049, "learning_rate": 1.3709721535039484e-05, "loss": 0.5174, "step": 14543 }, { "epoch": 0.397724786698753, "grad_norm": 1.7465012073516846, "learning_rate": 1.370889901664455e-05, "loss": 0.8299, "step": 14544 }, { "epoch": 0.3977521330124699, "grad_norm": 1.388634443283081, "learning_rate": 1.3708076469154344e-05, "loss": 0.563, "step": 14545 }, { "epoch": 0.39777947932618685, "grad_norm": 1.4482518434524536, "learning_rate": 1.3707253892575323e-05, "loss": 0.553, "step": 14546 }, { "epoch": 0.3978068256399037, "grad_norm": 1.3385629653930664, "learning_rate": 1.370643128691394e-05, "loss": 0.5477, "step": 14547 }, { "epoch": 0.39783417195362064, "grad_norm": 1.2650741338729858, "learning_rate": 1.3705608652176643e-05, "loss": 0.5231, "step": 14548 }, { "epoch": 0.39786151826733757, "grad_norm": 1.2826629877090454, "learning_rate": 1.3704785988369888e-05, "loss": 0.5199, "step": 14549 }, { "epoch": 0.3978888645810545, "grad_norm": 1.0889631509780884, "learning_rate": 1.3703963295500137e-05, "loss": 0.5152, "step": 14550 }, { "epoch": 0.39791621089477136, "grad_norm": 1.220336675643921, "learning_rate": 1.3703140573573829e-05, "loss": 0.5351, "step": 14551 }, { "epoch": 0.3979435572084883, "grad_norm": 1.2234750986099243, "learning_rate": 1.3702317822597428e-05, "loss": 0.5261, "step": 14552 }, { "epoch": 0.3979709035222052, "grad_norm": 2.317199468612671, "learning_rate": 1.3701495042577389e-05, "loss": 0.5143, "step": 14553 }, { "epoch": 0.39799824983592214, "grad_norm": 1.0827299356460571, "learning_rate": 1.3700672233520157e-05, "loss": 0.3701, "step": 14554 }, { "epoch": 0.398025596149639, "grad_norm": 1.2248296737670898, "learning_rate": 1.3699849395432198e-05, "loss": 0.5146, "step": 14555 }, { "epoch": 0.39805294246335593, "grad_norm": 1.6346849203109741, "learning_rate": 1.3699026528319958e-05, "loss": 0.5266, "step": 14556 }, { "epoch": 0.39808028877707285, "grad_norm": 1.3573434352874756, "learning_rate": 1.3698203632189896e-05, "loss": 0.555, "step": 14557 }, { "epoch": 0.3981076350907898, "grad_norm": 1.1470096111297607, "learning_rate": 1.3697380707048471e-05, "loss": 0.5131, "step": 14558 }, { "epoch": 0.39813498140450665, "grad_norm": 1.596848487854004, "learning_rate": 1.3696557752902131e-05, "loss": 0.5276, "step": 14559 }, { "epoch": 0.3981623277182236, "grad_norm": 1.3772716522216797, "learning_rate": 1.3695734769757337e-05, "loss": 0.531, "step": 14560 }, { "epoch": 0.3981896740319405, "grad_norm": 1.1741200685501099, "learning_rate": 1.3694911757620544e-05, "loss": 0.5101, "step": 14561 }, { "epoch": 0.3982170203456574, "grad_norm": 1.3896116018295288, "learning_rate": 1.3694088716498208e-05, "loss": 0.522, "step": 14562 }, { "epoch": 0.3982443666593743, "grad_norm": 1.221295952796936, "learning_rate": 1.3693265646396786e-05, "loss": 0.5131, "step": 14563 }, { "epoch": 0.3982717129730912, "grad_norm": 1.2541927099227905, "learning_rate": 1.3692442547322732e-05, "loss": 0.467, "step": 14564 }, { "epoch": 0.39829905928680814, "grad_norm": 1.2993499040603638, "learning_rate": 1.3691619419282506e-05, "loss": 0.5323, "step": 14565 }, { "epoch": 0.39832640560052507, "grad_norm": 1.4295337200164795, "learning_rate": 1.3690796262282568e-05, "loss": 0.5122, "step": 14566 }, { "epoch": 0.39835375191424194, "grad_norm": 1.345054030418396, "learning_rate": 1.3689973076329368e-05, "loss": 0.4761, "step": 14567 }, { "epoch": 0.39838109822795886, "grad_norm": 1.6202383041381836, "learning_rate": 1.3689149861429371e-05, "loss": 0.366, "step": 14568 }, { "epoch": 0.3984084445416758, "grad_norm": 1.5757315158843994, "learning_rate": 1.3688326617589032e-05, "loss": 0.4955, "step": 14569 }, { "epoch": 0.3984357908553927, "grad_norm": 1.3737746477127075, "learning_rate": 1.3687503344814808e-05, "loss": 0.5314, "step": 14570 }, { "epoch": 0.3984631371691096, "grad_norm": 1.7197980880737305, "learning_rate": 1.3686680043113155e-05, "loss": 0.508, "step": 14571 }, { "epoch": 0.3984904834828265, "grad_norm": 1.2641410827636719, "learning_rate": 1.3685856712490537e-05, "loss": 0.5331, "step": 14572 }, { "epoch": 0.39851782979654343, "grad_norm": 1.3279095888137817, "learning_rate": 1.368503335295341e-05, "loss": 0.4532, "step": 14573 }, { "epoch": 0.39854517611026036, "grad_norm": 1.073617696762085, "learning_rate": 1.3684209964508233e-05, "loss": 0.4714, "step": 14574 }, { "epoch": 0.3985725224239772, "grad_norm": 1.851225733757019, "learning_rate": 1.3683386547161466e-05, "loss": 0.5257, "step": 14575 }, { "epoch": 0.39859986873769415, "grad_norm": 1.1712746620178223, "learning_rate": 1.3682563100919567e-05, "loss": 0.5194, "step": 14576 }, { "epoch": 0.3986272150514111, "grad_norm": 1.1524412631988525, "learning_rate": 1.3681739625788998e-05, "loss": 0.5382, "step": 14577 }, { "epoch": 0.398654561365128, "grad_norm": 1.129299521446228, "learning_rate": 1.3680916121776218e-05, "loss": 0.4779, "step": 14578 }, { "epoch": 0.39868190767884487, "grad_norm": 1.3632351160049438, "learning_rate": 1.3680092588887686e-05, "loss": 0.5408, "step": 14579 }, { "epoch": 0.3987092539925618, "grad_norm": 1.3447109460830688, "learning_rate": 1.3679269027129864e-05, "loss": 0.5118, "step": 14580 }, { "epoch": 0.3987366003062787, "grad_norm": 1.356126070022583, "learning_rate": 1.3678445436509211e-05, "loss": 0.5018, "step": 14581 }, { "epoch": 0.39876394661999565, "grad_norm": 1.0546457767486572, "learning_rate": 1.3677621817032189e-05, "loss": 0.5112, "step": 14582 }, { "epoch": 0.3987912929337125, "grad_norm": 1.30805242061615, "learning_rate": 1.3676798168705258e-05, "loss": 0.5243, "step": 14583 }, { "epoch": 0.39881863924742944, "grad_norm": 1.5101255178451538, "learning_rate": 1.3675974491534883e-05, "loss": 0.5478, "step": 14584 }, { "epoch": 0.39884598556114637, "grad_norm": 1.2876356840133667, "learning_rate": 1.3675150785527522e-05, "loss": 0.5191, "step": 14585 }, { "epoch": 0.3988733318748633, "grad_norm": 1.0792709589004517, "learning_rate": 1.3674327050689637e-05, "loss": 0.5333, "step": 14586 }, { "epoch": 0.39890067818858016, "grad_norm": 1.2494311332702637, "learning_rate": 1.3673503287027687e-05, "loss": 0.5275, "step": 14587 }, { "epoch": 0.3989280245022971, "grad_norm": 1.1354660987854004, "learning_rate": 1.3672679494548143e-05, "loss": 0.4937, "step": 14588 }, { "epoch": 0.398955370816014, "grad_norm": 10.615572929382324, "learning_rate": 1.3671855673257457e-05, "loss": 0.8094, "step": 14589 }, { "epoch": 0.39898271712973093, "grad_norm": 1.297207236289978, "learning_rate": 1.36710318231621e-05, "loss": 0.5422, "step": 14590 }, { "epoch": 0.3990100634434478, "grad_norm": 1.229918360710144, "learning_rate": 1.367020794426853e-05, "loss": 0.534, "step": 14591 }, { "epoch": 0.39903740975716473, "grad_norm": 2.3224732875823975, "learning_rate": 1.366938403658321e-05, "loss": 0.3985, "step": 14592 }, { "epoch": 0.39906475607088165, "grad_norm": 1.1498565673828125, "learning_rate": 1.3668560100112608e-05, "loss": 0.378, "step": 14593 }, { "epoch": 0.3990921023845986, "grad_norm": 1.2406103610992432, "learning_rate": 1.3667736134863183e-05, "loss": 0.5188, "step": 14594 }, { "epoch": 0.39911944869831545, "grad_norm": 1.308142066001892, "learning_rate": 1.36669121408414e-05, "loss": 0.5388, "step": 14595 }, { "epoch": 0.3991467950120324, "grad_norm": 1.391387939453125, "learning_rate": 1.3666088118053725e-05, "loss": 0.5025, "step": 14596 }, { "epoch": 0.3991741413257493, "grad_norm": 1.1259562969207764, "learning_rate": 1.3665264066506619e-05, "loss": 0.5596, "step": 14597 }, { "epoch": 0.3992014876394662, "grad_norm": 1.2161606550216675, "learning_rate": 1.3664439986206548e-05, "loss": 0.5077, "step": 14598 }, { "epoch": 0.3992288339531831, "grad_norm": 1.215950608253479, "learning_rate": 1.3663615877159975e-05, "loss": 0.51, "step": 14599 }, { "epoch": 0.3992561802669, "grad_norm": 1.2804468870162964, "learning_rate": 1.3662791739373367e-05, "loss": 0.5406, "step": 14600 }, { "epoch": 0.39928352658061694, "grad_norm": 1.1913288831710815, "learning_rate": 1.366196757285319e-05, "loss": 0.5109, "step": 14601 }, { "epoch": 0.39931087289433387, "grad_norm": 1.156577467918396, "learning_rate": 1.3661143377605907e-05, "loss": 0.5322, "step": 14602 }, { "epoch": 0.39933821920805074, "grad_norm": 1.1730600595474243, "learning_rate": 1.3660319153637986e-05, "loss": 0.5495, "step": 14603 }, { "epoch": 0.39936556552176766, "grad_norm": 1.3014631271362305, "learning_rate": 1.3659494900955892e-05, "loss": 0.5379, "step": 14604 }, { "epoch": 0.3993929118354846, "grad_norm": 1.362352728843689, "learning_rate": 1.3658670619566088e-05, "loss": 0.5339, "step": 14605 }, { "epoch": 0.3994202581492015, "grad_norm": 1.139266014099121, "learning_rate": 1.3657846309475043e-05, "loss": 0.5219, "step": 14606 }, { "epoch": 0.3994476044629184, "grad_norm": 1.7498215436935425, "learning_rate": 1.3657021970689224e-05, "loss": 0.5311, "step": 14607 }, { "epoch": 0.3994749507766353, "grad_norm": 1.2543578147888184, "learning_rate": 1.3656197603215096e-05, "loss": 0.4303, "step": 14608 }, { "epoch": 0.39950229709035223, "grad_norm": 1.381786584854126, "learning_rate": 1.3655373207059128e-05, "loss": 0.5101, "step": 14609 }, { "epoch": 0.39952964340406916, "grad_norm": 1.2639673948287964, "learning_rate": 1.3654548782227784e-05, "loss": 0.5331, "step": 14610 }, { "epoch": 0.399556989717786, "grad_norm": 1.2871873378753662, "learning_rate": 1.3653724328727534e-05, "loss": 0.5587, "step": 14611 }, { "epoch": 0.39958433603150295, "grad_norm": 1.2148175239562988, "learning_rate": 1.3652899846564846e-05, "loss": 0.5136, "step": 14612 }, { "epoch": 0.3996116823452199, "grad_norm": 2.084885835647583, "learning_rate": 1.3652075335746184e-05, "loss": 0.4395, "step": 14613 }, { "epoch": 0.3996390286589368, "grad_norm": 1.2498401403427124, "learning_rate": 1.3651250796278019e-05, "loss": 0.5039, "step": 14614 }, { "epoch": 0.39966637497265367, "grad_norm": 1.5774890184402466, "learning_rate": 1.3650426228166822e-05, "loss": 0.5615, "step": 14615 }, { "epoch": 0.3996937212863706, "grad_norm": 1.4510412216186523, "learning_rate": 1.3649601631419059e-05, "loss": 0.5265, "step": 14616 }, { "epoch": 0.3997210676000875, "grad_norm": 1.2044854164123535, "learning_rate": 1.3648777006041196e-05, "loss": 0.4959, "step": 14617 }, { "epoch": 0.39974841391380445, "grad_norm": 1.4819972515106201, "learning_rate": 1.3647952352039704e-05, "loss": 0.3979, "step": 14618 }, { "epoch": 0.3997757602275213, "grad_norm": 1.3145877122879028, "learning_rate": 1.3647127669421052e-05, "loss": 0.555, "step": 14619 }, { "epoch": 0.39980310654123824, "grad_norm": 1.1337647438049316, "learning_rate": 1.3646302958191712e-05, "loss": 0.5145, "step": 14620 }, { "epoch": 0.39983045285495517, "grad_norm": 1.282774806022644, "learning_rate": 1.3645478218358153e-05, "loss": 0.5205, "step": 14621 }, { "epoch": 0.3998577991686721, "grad_norm": 1.5386754274368286, "learning_rate": 1.3644653449926839e-05, "loss": 0.5591, "step": 14622 }, { "epoch": 0.39988514548238896, "grad_norm": 1.3877259492874146, "learning_rate": 1.3643828652904247e-05, "loss": 0.436, "step": 14623 }, { "epoch": 0.3999124917961059, "grad_norm": 2.125103235244751, "learning_rate": 1.3643003827296844e-05, "loss": 0.5205, "step": 14624 }, { "epoch": 0.3999398381098228, "grad_norm": 1.3563592433929443, "learning_rate": 1.36421789731111e-05, "loss": 0.5153, "step": 14625 }, { "epoch": 0.39996718442353973, "grad_norm": 1.4336849451065063, "learning_rate": 1.364135409035349e-05, "loss": 0.4949, "step": 14626 }, { "epoch": 0.3999945307372566, "grad_norm": 1.5409868955612183, "learning_rate": 1.3640529179030476e-05, "loss": 0.4306, "step": 14627 }, { "epoch": 0.40002187705097353, "grad_norm": 1.3658193349838257, "learning_rate": 1.3639704239148541e-05, "loss": 0.508, "step": 14628 }, { "epoch": 0.40004922336469045, "grad_norm": 1.2148256301879883, "learning_rate": 1.3638879270714149e-05, "loss": 0.5162, "step": 14629 }, { "epoch": 0.4000765696784073, "grad_norm": 1.2143218517303467, "learning_rate": 1.3638054273733772e-05, "loss": 0.5098, "step": 14630 }, { "epoch": 0.40010391599212425, "grad_norm": 1.5042164325714111, "learning_rate": 1.3637229248213884e-05, "loss": 0.5519, "step": 14631 }, { "epoch": 0.4001312623058412, "grad_norm": 1.2913457155227661, "learning_rate": 1.3636404194160954e-05, "loss": 0.5491, "step": 14632 }, { "epoch": 0.4001586086195581, "grad_norm": 1.3601762056350708, "learning_rate": 1.3635579111581459e-05, "loss": 0.5201, "step": 14633 }, { "epoch": 0.40018595493327497, "grad_norm": 1.2377183437347412, "learning_rate": 1.363475400048187e-05, "loss": 0.5552, "step": 14634 }, { "epoch": 0.4002133012469919, "grad_norm": 1.2046115398406982, "learning_rate": 1.3633928860868654e-05, "loss": 0.5058, "step": 14635 }, { "epoch": 0.4002406475607088, "grad_norm": 1.0732293128967285, "learning_rate": 1.3633103692748295e-05, "loss": 0.4054, "step": 14636 }, { "epoch": 0.40026799387442574, "grad_norm": 1.36920166015625, "learning_rate": 1.3632278496127256e-05, "loss": 0.5607, "step": 14637 }, { "epoch": 0.4002953401881426, "grad_norm": 1.241595983505249, "learning_rate": 1.3631453271012016e-05, "loss": 0.793, "step": 14638 }, { "epoch": 0.40032268650185954, "grad_norm": 1.1987881660461426, "learning_rate": 1.3630628017409049e-05, "loss": 0.4201, "step": 14639 }, { "epoch": 0.40035003281557646, "grad_norm": 1.2988313436508179, "learning_rate": 1.3629802735324823e-05, "loss": 0.5358, "step": 14640 }, { "epoch": 0.4003773791292934, "grad_norm": 1.231135368347168, "learning_rate": 1.3628977424765817e-05, "loss": 0.5429, "step": 14641 }, { "epoch": 0.40040472544301026, "grad_norm": 1.1887320280075073, "learning_rate": 1.3628152085738506e-05, "loss": 0.8424, "step": 14642 }, { "epoch": 0.4004320717567272, "grad_norm": 1.6105403900146484, "learning_rate": 1.362732671824936e-05, "loss": 0.4615, "step": 14643 }, { "epoch": 0.4004594180704441, "grad_norm": 1.222265362739563, "learning_rate": 1.3626501322304858e-05, "loss": 0.4297, "step": 14644 }, { "epoch": 0.40048676438416103, "grad_norm": 1.1980440616607666, "learning_rate": 1.3625675897911474e-05, "loss": 0.5137, "step": 14645 }, { "epoch": 0.4005141106978779, "grad_norm": 1.2115236520767212, "learning_rate": 1.3624850445075684e-05, "loss": 0.5019, "step": 14646 }, { "epoch": 0.4005414570115948, "grad_norm": 1.151493787765503, "learning_rate": 1.3624024963803962e-05, "loss": 0.502, "step": 14647 }, { "epoch": 0.40056880332531175, "grad_norm": 1.1487191915512085, "learning_rate": 1.3623199454102782e-05, "loss": 0.4807, "step": 14648 }, { "epoch": 0.4005961496390287, "grad_norm": 1.384795904159546, "learning_rate": 1.3622373915978623e-05, "loss": 0.5208, "step": 14649 }, { "epoch": 0.40062349595274555, "grad_norm": 1.1529499292373657, "learning_rate": 1.3621548349437963e-05, "loss": 0.5276, "step": 14650 }, { "epoch": 0.40065084226646247, "grad_norm": 1.2445778846740723, "learning_rate": 1.362072275448727e-05, "loss": 0.5076, "step": 14651 }, { "epoch": 0.4006781885801794, "grad_norm": 1.3190470933914185, "learning_rate": 1.3619897131133031e-05, "loss": 0.5308, "step": 14652 }, { "epoch": 0.4007055348938963, "grad_norm": 1.1093238592147827, "learning_rate": 1.3619071479381716e-05, "loss": 0.4898, "step": 14653 }, { "epoch": 0.4007328812076132, "grad_norm": 1.3579649925231934, "learning_rate": 1.3618245799239802e-05, "loss": 0.5452, "step": 14654 }, { "epoch": 0.4007602275213301, "grad_norm": 1.6147212982177734, "learning_rate": 1.361742009071377e-05, "loss": 0.433, "step": 14655 }, { "epoch": 0.40078757383504704, "grad_norm": 1.3481106758117676, "learning_rate": 1.3616594353810095e-05, "loss": 0.5435, "step": 14656 }, { "epoch": 0.40081492014876396, "grad_norm": 1.4158101081848145, "learning_rate": 1.3615768588535255e-05, "loss": 0.5321, "step": 14657 }, { "epoch": 0.40084226646248083, "grad_norm": 1.804738998413086, "learning_rate": 1.3614942794895729e-05, "loss": 0.5273, "step": 14658 }, { "epoch": 0.40086961277619776, "grad_norm": 3.258269786834717, "learning_rate": 1.3614116972897992e-05, "loss": 0.4433, "step": 14659 }, { "epoch": 0.4008969590899147, "grad_norm": 1.314509391784668, "learning_rate": 1.3613291122548525e-05, "loss": 0.4693, "step": 14660 }, { "epoch": 0.4009243054036316, "grad_norm": 1.4382848739624023, "learning_rate": 1.3612465243853807e-05, "loss": 0.5288, "step": 14661 }, { "epoch": 0.4009516517173485, "grad_norm": 1.3420472145080566, "learning_rate": 1.3611639336820316e-05, "loss": 0.4421, "step": 14662 }, { "epoch": 0.4009789980310654, "grad_norm": 1.4871468544006348, "learning_rate": 1.3610813401454527e-05, "loss": 0.5087, "step": 14663 }, { "epoch": 0.40100634434478233, "grad_norm": 1.240291714668274, "learning_rate": 1.3609987437762929e-05, "loss": 0.5383, "step": 14664 }, { "epoch": 0.40103369065849925, "grad_norm": 1.5657154321670532, "learning_rate": 1.360916144575199e-05, "loss": 0.521, "step": 14665 }, { "epoch": 0.4010610369722161, "grad_norm": 1.1660127639770508, "learning_rate": 1.3608335425428199e-05, "loss": 0.5465, "step": 14666 }, { "epoch": 0.40108838328593305, "grad_norm": 1.3269087076187134, "learning_rate": 1.3607509376798029e-05, "loss": 0.5211, "step": 14667 }, { "epoch": 0.40111572959965, "grad_norm": 1.3752142190933228, "learning_rate": 1.3606683299867964e-05, "loss": 0.5344, "step": 14668 }, { "epoch": 0.4011430759133669, "grad_norm": 1.3810715675354004, "learning_rate": 1.3605857194644485e-05, "loss": 0.5696, "step": 14669 }, { "epoch": 0.40117042222708377, "grad_norm": 2.105309247970581, "learning_rate": 1.360503106113407e-05, "loss": 0.5619, "step": 14670 }, { "epoch": 0.4011977685408007, "grad_norm": 1.4611656665802002, "learning_rate": 1.3604204899343204e-05, "loss": 0.4966, "step": 14671 }, { "epoch": 0.4012251148545176, "grad_norm": 1.106955885887146, "learning_rate": 1.360337870927836e-05, "loss": 0.5141, "step": 14672 }, { "epoch": 0.40125246116823454, "grad_norm": 1.2937088012695312, "learning_rate": 1.3602552490946028e-05, "loss": 0.526, "step": 14673 }, { "epoch": 0.4012798074819514, "grad_norm": 1.0850695371627808, "learning_rate": 1.3601726244352686e-05, "loss": 0.5116, "step": 14674 }, { "epoch": 0.40130715379566834, "grad_norm": 1.1393448114395142, "learning_rate": 1.3600899969504812e-05, "loss": 0.5131, "step": 14675 }, { "epoch": 0.40133450010938526, "grad_norm": 1.165921688079834, "learning_rate": 1.360007366640889e-05, "loss": 0.4956, "step": 14676 }, { "epoch": 0.4013618464231022, "grad_norm": 1.1207544803619385, "learning_rate": 1.3599247335071408e-05, "loss": 0.5295, "step": 14677 }, { "epoch": 0.40138919273681906, "grad_norm": 1.426660418510437, "learning_rate": 1.3598420975498841e-05, "loss": 0.5433, "step": 14678 }, { "epoch": 0.401416539050536, "grad_norm": 1.4376810789108276, "learning_rate": 1.3597594587697673e-05, "loss": 0.5657, "step": 14679 }, { "epoch": 0.4014438853642529, "grad_norm": 1.419129729270935, "learning_rate": 1.359676817167439e-05, "loss": 0.8595, "step": 14680 }, { "epoch": 0.40147123167796983, "grad_norm": 1.4654011726379395, "learning_rate": 1.3595941727435471e-05, "loss": 0.512, "step": 14681 }, { "epoch": 0.4014985779916867, "grad_norm": 1.2919448614120483, "learning_rate": 1.3595115254987403e-05, "loss": 0.8401, "step": 14682 }, { "epoch": 0.4015259243054036, "grad_norm": 1.2182502746582031, "learning_rate": 1.3594288754336667e-05, "loss": 0.516, "step": 14683 }, { "epoch": 0.40155327061912055, "grad_norm": 4.517863750457764, "learning_rate": 1.3593462225489744e-05, "loss": 0.5513, "step": 14684 }, { "epoch": 0.4015806169328375, "grad_norm": 1.3819388151168823, "learning_rate": 1.3592635668453127e-05, "loss": 0.4629, "step": 14685 }, { "epoch": 0.40160796324655434, "grad_norm": 1.370044469833374, "learning_rate": 1.359180908323329e-05, "loss": 0.5088, "step": 14686 }, { "epoch": 0.40163530956027127, "grad_norm": 1.7206372022628784, "learning_rate": 1.3590982469836723e-05, "loss": 0.4567, "step": 14687 }, { "epoch": 0.4016626558739882, "grad_norm": 1.356869101524353, "learning_rate": 1.3590155828269911e-05, "loss": 0.5083, "step": 14688 }, { "epoch": 0.4016900021877051, "grad_norm": 1.29542875289917, "learning_rate": 1.3589329158539333e-05, "loss": 0.5293, "step": 14689 }, { "epoch": 0.401717348501422, "grad_norm": 1.218252420425415, "learning_rate": 1.358850246065148e-05, "loss": 0.5309, "step": 14690 }, { "epoch": 0.4017446948151389, "grad_norm": 1.3910727500915527, "learning_rate": 1.3587675734612837e-05, "loss": 0.5514, "step": 14691 }, { "epoch": 0.40177204112885584, "grad_norm": 1.6215250492095947, "learning_rate": 1.3586848980429884e-05, "loss": 0.8027, "step": 14692 }, { "epoch": 0.40179938744257276, "grad_norm": 1.2777422666549683, "learning_rate": 1.3586022198109113e-05, "loss": 0.5226, "step": 14693 }, { "epoch": 0.40182673375628963, "grad_norm": 1.1544629335403442, "learning_rate": 1.3585195387657005e-05, "loss": 0.5288, "step": 14694 }, { "epoch": 0.40185408007000656, "grad_norm": 1.2225496768951416, "learning_rate": 1.3584368549080047e-05, "loss": 0.5281, "step": 14695 }, { "epoch": 0.4018814263837235, "grad_norm": 1.1123461723327637, "learning_rate": 1.3583541682384728e-05, "loss": 0.5143, "step": 14696 }, { "epoch": 0.4019087726974404, "grad_norm": 1.1333295106887817, "learning_rate": 1.3582714787577534e-05, "loss": 0.5366, "step": 14697 }, { "epoch": 0.4019361190111573, "grad_norm": 1.0572757720947266, "learning_rate": 1.3581887864664948e-05, "loss": 0.4872, "step": 14698 }, { "epoch": 0.4019634653248742, "grad_norm": 1.2938233613967896, "learning_rate": 1.3581060913653462e-05, "loss": 0.56, "step": 14699 }, { "epoch": 0.40199081163859113, "grad_norm": 1.3450288772583008, "learning_rate": 1.3580233934549563e-05, "loss": 0.5275, "step": 14700 }, { "epoch": 0.40201815795230805, "grad_norm": 1.0301567316055298, "learning_rate": 1.3579406927359733e-05, "loss": 0.3818, "step": 14701 }, { "epoch": 0.4020455042660249, "grad_norm": 1.13711678981781, "learning_rate": 1.3578579892090465e-05, "loss": 0.5316, "step": 14702 }, { "epoch": 0.40207285057974185, "grad_norm": 1.354957103729248, "learning_rate": 1.3577752828748243e-05, "loss": 0.8274, "step": 14703 }, { "epoch": 0.4021001968934588, "grad_norm": 1.4076194763183594, "learning_rate": 1.3576925737339557e-05, "loss": 0.5223, "step": 14704 }, { "epoch": 0.4021275432071757, "grad_norm": 1.2925903797149658, "learning_rate": 1.3576098617870897e-05, "loss": 0.5555, "step": 14705 }, { "epoch": 0.40215488952089257, "grad_norm": 1.2125506401062012, "learning_rate": 1.357527147034875e-05, "loss": 0.5223, "step": 14706 }, { "epoch": 0.4021822358346095, "grad_norm": 1.3502459526062012, "learning_rate": 1.35744442947796e-05, "loss": 0.4568, "step": 14707 }, { "epoch": 0.4022095821483264, "grad_norm": 1.5314040184020996, "learning_rate": 1.3573617091169944e-05, "loss": 0.4136, "step": 14708 }, { "epoch": 0.40223692846204334, "grad_norm": 1.2697019577026367, "learning_rate": 1.3572789859526268e-05, "loss": 0.3831, "step": 14709 }, { "epoch": 0.4022642747757602, "grad_norm": 1.2059342861175537, "learning_rate": 1.3571962599855061e-05, "loss": 0.5031, "step": 14710 }, { "epoch": 0.40229162108947714, "grad_norm": 1.3802831172943115, "learning_rate": 1.3571135312162814e-05, "loss": 0.5586, "step": 14711 }, { "epoch": 0.40231896740319406, "grad_norm": 1.4430598020553589, "learning_rate": 1.3570307996456014e-05, "loss": 0.547, "step": 14712 }, { "epoch": 0.402346313716911, "grad_norm": 1.221372127532959, "learning_rate": 1.356948065274115e-05, "loss": 0.5403, "step": 14713 }, { "epoch": 0.40237366003062786, "grad_norm": 1.0938177108764648, "learning_rate": 1.3568653281024714e-05, "loss": 0.5106, "step": 14714 }, { "epoch": 0.4024010063443448, "grad_norm": 1.5151236057281494, "learning_rate": 1.3567825881313203e-05, "loss": 0.5419, "step": 14715 }, { "epoch": 0.4024283526580617, "grad_norm": 1.23844575881958, "learning_rate": 1.3566998453613098e-05, "loss": 0.5232, "step": 14716 }, { "epoch": 0.40245569897177863, "grad_norm": 1.4037293195724487, "learning_rate": 1.3566170997930896e-05, "loss": 0.4999, "step": 14717 }, { "epoch": 0.4024830452854955, "grad_norm": 1.1280189752578735, "learning_rate": 1.3565343514273086e-05, "loss": 0.5441, "step": 14718 }, { "epoch": 0.4025103915992124, "grad_norm": 1.527491569519043, "learning_rate": 1.3564516002646157e-05, "loss": 0.4593, "step": 14719 }, { "epoch": 0.40253773791292935, "grad_norm": 1.3354570865631104, "learning_rate": 1.3563688463056606e-05, "loss": 0.8759, "step": 14720 }, { "epoch": 0.4025650842266463, "grad_norm": 1.3730231523513794, "learning_rate": 1.3562860895510919e-05, "loss": 0.5349, "step": 14721 }, { "epoch": 0.40259243054036314, "grad_norm": 1.7209731340408325, "learning_rate": 1.3562033300015594e-05, "loss": 0.5201, "step": 14722 }, { "epoch": 0.40261977685408007, "grad_norm": 1.3879365921020508, "learning_rate": 1.3561205676577118e-05, "loss": 0.5272, "step": 14723 }, { "epoch": 0.402647123167797, "grad_norm": 1.3321664333343506, "learning_rate": 1.3560378025201986e-05, "loss": 0.4557, "step": 14724 }, { "epoch": 0.4026744694815139, "grad_norm": 1.4098930358886719, "learning_rate": 1.3559550345896693e-05, "loss": 0.5078, "step": 14725 }, { "epoch": 0.4027018157952308, "grad_norm": 1.877592921257019, "learning_rate": 1.3558722638667728e-05, "loss": 0.3616, "step": 14726 }, { "epoch": 0.4027291621089477, "grad_norm": 1.4849624633789062, "learning_rate": 1.3557894903521584e-05, "loss": 0.4544, "step": 14727 }, { "epoch": 0.40275650842266464, "grad_norm": 1.1490106582641602, "learning_rate": 1.3557067140464757e-05, "loss": 0.3584, "step": 14728 }, { "epoch": 0.4027838547363815, "grad_norm": 1.4653408527374268, "learning_rate": 1.3556239349503737e-05, "loss": 0.4526, "step": 14729 }, { "epoch": 0.40281120105009843, "grad_norm": 1.2380354404449463, "learning_rate": 1.3555411530645022e-05, "loss": 0.488, "step": 14730 }, { "epoch": 0.40283854736381536, "grad_norm": 1.4889278411865234, "learning_rate": 1.3554583683895105e-05, "loss": 0.5671, "step": 14731 }, { "epoch": 0.4028658936775323, "grad_norm": 1.5003387928009033, "learning_rate": 1.3553755809260478e-05, "loss": 0.5372, "step": 14732 }, { "epoch": 0.40289323999124915, "grad_norm": 1.2640340328216553, "learning_rate": 1.3552927906747634e-05, "loss": 0.5291, "step": 14733 }, { "epoch": 0.4029205863049661, "grad_norm": 1.5068882703781128, "learning_rate": 1.3552099976363078e-05, "loss": 0.5288, "step": 14734 }, { "epoch": 0.402947932618683, "grad_norm": 1.2544585466384888, "learning_rate": 1.355127201811329e-05, "loss": 0.5149, "step": 14735 }, { "epoch": 0.4029752789323999, "grad_norm": 1.3165910243988037, "learning_rate": 1.3550444032004778e-05, "loss": 0.5399, "step": 14736 }, { "epoch": 0.4030026252461168, "grad_norm": 1.6983838081359863, "learning_rate": 1.3549616018044028e-05, "loss": 0.43, "step": 14737 }, { "epoch": 0.4030299715598337, "grad_norm": 1.5069905519485474, "learning_rate": 1.3548787976237538e-05, "loss": 0.5312, "step": 14738 }, { "epoch": 0.40305731787355065, "grad_norm": 1.0102373361587524, "learning_rate": 1.354795990659181e-05, "loss": 0.5146, "step": 14739 }, { "epoch": 0.40308466418726757, "grad_norm": 1.3109338283538818, "learning_rate": 1.354713180911333e-05, "loss": 0.4962, "step": 14740 }, { "epoch": 0.40311201050098444, "grad_norm": 1.4319671392440796, "learning_rate": 1.3546303683808603e-05, "loss": 0.43, "step": 14741 }, { "epoch": 0.40313935681470137, "grad_norm": 1.4301334619522095, "learning_rate": 1.354547553068412e-05, "loss": 0.5161, "step": 14742 }, { "epoch": 0.4031667031284183, "grad_norm": 1.3505191802978516, "learning_rate": 1.3544647349746378e-05, "loss": 0.5465, "step": 14743 }, { "epoch": 0.4031940494421352, "grad_norm": 1.3719135522842407, "learning_rate": 1.354381914100188e-05, "loss": 0.3717, "step": 14744 }, { "epoch": 0.4032213957558521, "grad_norm": 1.3287676572799683, "learning_rate": 1.3542990904457114e-05, "loss": 0.4957, "step": 14745 }, { "epoch": 0.403248742069569, "grad_norm": 1.2791504859924316, "learning_rate": 1.3542162640118582e-05, "loss": 0.4923, "step": 14746 }, { "epoch": 0.40327608838328594, "grad_norm": 1.2929232120513916, "learning_rate": 1.3541334347992783e-05, "loss": 0.5119, "step": 14747 }, { "epoch": 0.40330343469700286, "grad_norm": 1.3057342767715454, "learning_rate": 1.3540506028086208e-05, "loss": 0.509, "step": 14748 }, { "epoch": 0.40333078101071973, "grad_norm": 1.265636920928955, "learning_rate": 1.3539677680405363e-05, "loss": 0.5393, "step": 14749 }, { "epoch": 0.40335812732443665, "grad_norm": 1.314112901687622, "learning_rate": 1.3538849304956742e-05, "loss": 0.5449, "step": 14750 }, { "epoch": 0.4033854736381536, "grad_norm": 1.527377963066101, "learning_rate": 1.3538020901746845e-05, "loss": 0.4462, "step": 14751 }, { "epoch": 0.4034128199518705, "grad_norm": 1.3246104717254639, "learning_rate": 1.3537192470782169e-05, "loss": 0.5419, "step": 14752 }, { "epoch": 0.4034401662655874, "grad_norm": 2.3326830863952637, "learning_rate": 1.3536364012069211e-05, "loss": 0.5469, "step": 14753 }, { "epoch": 0.4034675125793043, "grad_norm": 1.7013144493103027, "learning_rate": 1.3535535525614476e-05, "loss": 0.5063, "step": 14754 }, { "epoch": 0.4034948588930212, "grad_norm": 1.6543375253677368, "learning_rate": 1.3534707011424459e-05, "loss": 0.5049, "step": 14755 }, { "epoch": 0.40352220520673815, "grad_norm": 1.5542014837265015, "learning_rate": 1.3533878469505659e-05, "loss": 0.8656, "step": 14756 }, { "epoch": 0.403549551520455, "grad_norm": 1.0990597009658813, "learning_rate": 1.3533049899864579e-05, "loss": 0.502, "step": 14757 }, { "epoch": 0.40357689783417194, "grad_norm": 1.4536471366882324, "learning_rate": 1.3532221302507715e-05, "loss": 0.4403, "step": 14758 }, { "epoch": 0.40360424414788887, "grad_norm": 1.2473267316818237, "learning_rate": 1.3531392677441571e-05, "loss": 0.5293, "step": 14759 }, { "epoch": 0.4036315904616058, "grad_norm": 1.3429628610610962, "learning_rate": 1.3530564024672644e-05, "loss": 0.5069, "step": 14760 }, { "epoch": 0.40365893677532266, "grad_norm": 1.4678465127944946, "learning_rate": 1.3529735344207438e-05, "loss": 0.5066, "step": 14761 }, { "epoch": 0.4036862830890396, "grad_norm": 1.6377195119857788, "learning_rate": 1.3528906636052448e-05, "loss": 0.441, "step": 14762 }, { "epoch": 0.4037136294027565, "grad_norm": 1.1832534074783325, "learning_rate": 1.3528077900214179e-05, "loss": 0.5067, "step": 14763 }, { "epoch": 0.40374097571647344, "grad_norm": 1.1840406656265259, "learning_rate": 1.3527249136699134e-05, "loss": 0.5006, "step": 14764 }, { "epoch": 0.4037683220301903, "grad_norm": 1.1311237812042236, "learning_rate": 1.3526420345513809e-05, "loss": 0.5142, "step": 14765 }, { "epoch": 0.40379566834390723, "grad_norm": 1.3419266939163208, "learning_rate": 1.3525591526664712e-05, "loss": 0.5292, "step": 14766 }, { "epoch": 0.40382301465762416, "grad_norm": 1.409993052482605, "learning_rate": 1.3524762680158338e-05, "loss": 0.3759, "step": 14767 }, { "epoch": 0.4038503609713411, "grad_norm": 1.3540658950805664, "learning_rate": 1.3523933806001197e-05, "loss": 0.5317, "step": 14768 }, { "epoch": 0.40387770728505795, "grad_norm": 1.3575447797775269, "learning_rate": 1.3523104904199786e-05, "loss": 0.5436, "step": 14769 }, { "epoch": 0.4039050535987749, "grad_norm": 1.3312532901763916, "learning_rate": 1.3522275974760606e-05, "loss": 0.544, "step": 14770 }, { "epoch": 0.4039323999124918, "grad_norm": 4.6058549880981445, "learning_rate": 1.3521447017690165e-05, "loss": 0.3611, "step": 14771 }, { "epoch": 0.4039597462262087, "grad_norm": 1.3380138874053955, "learning_rate": 1.352061803299496e-05, "loss": 0.5152, "step": 14772 }, { "epoch": 0.4039870925399256, "grad_norm": 1.2881803512573242, "learning_rate": 1.3519789020681499e-05, "loss": 0.4945, "step": 14773 }, { "epoch": 0.4040144388536425, "grad_norm": 1.5257261991500854, "learning_rate": 1.3518959980756285e-05, "loss": 0.4623, "step": 14774 }, { "epoch": 0.40404178516735945, "grad_norm": 1.3021459579467773, "learning_rate": 1.351813091322582e-05, "loss": 0.5389, "step": 14775 }, { "epoch": 0.40406913148107637, "grad_norm": 1.4400761127471924, "learning_rate": 1.3517301818096607e-05, "loss": 0.8376, "step": 14776 }, { "epoch": 0.40409647779479324, "grad_norm": 1.5345346927642822, "learning_rate": 1.351647269537515e-05, "loss": 0.5143, "step": 14777 }, { "epoch": 0.40412382410851017, "grad_norm": 1.7532196044921875, "learning_rate": 1.3515643545067955e-05, "loss": 0.5162, "step": 14778 }, { "epoch": 0.4041511704222271, "grad_norm": 1.3747950792312622, "learning_rate": 1.351481436718153e-05, "loss": 0.5013, "step": 14779 }, { "epoch": 0.404178516735944, "grad_norm": 1.3664884567260742, "learning_rate": 1.3513985161722371e-05, "loss": 0.5136, "step": 14780 }, { "epoch": 0.4042058630496609, "grad_norm": 1.686801791191101, "learning_rate": 1.3513155928696988e-05, "loss": 0.4177, "step": 14781 }, { "epoch": 0.4042332093633778, "grad_norm": 1.3235081434249878, "learning_rate": 1.3512326668111888e-05, "loss": 0.5195, "step": 14782 }, { "epoch": 0.40426055567709474, "grad_norm": 1.4739044904708862, "learning_rate": 1.351149737997357e-05, "loss": 0.5381, "step": 14783 }, { "epoch": 0.40428790199081166, "grad_norm": 1.3981997966766357, "learning_rate": 1.3510668064288544e-05, "loss": 0.5122, "step": 14784 }, { "epoch": 0.40431524830452853, "grad_norm": 1.3912569284439087, "learning_rate": 1.3509838721063318e-05, "loss": 0.5284, "step": 14785 }, { "epoch": 0.40434259461824545, "grad_norm": 1.4833101034164429, "learning_rate": 1.3509009350304392e-05, "loss": 0.546, "step": 14786 }, { "epoch": 0.4043699409319624, "grad_norm": 1.5039983987808228, "learning_rate": 1.3508179952018275e-05, "loss": 0.5038, "step": 14787 }, { "epoch": 0.4043972872456793, "grad_norm": 1.236428141593933, "learning_rate": 1.3507350526211474e-05, "loss": 0.8382, "step": 14788 }, { "epoch": 0.4044246335593962, "grad_norm": 1.1643755435943604, "learning_rate": 1.3506521072890495e-05, "loss": 0.5277, "step": 14789 }, { "epoch": 0.4044519798731131, "grad_norm": 1.6633354425430298, "learning_rate": 1.3505691592061846e-05, "loss": 0.5465, "step": 14790 }, { "epoch": 0.40447932618683, "grad_norm": 1.1848187446594238, "learning_rate": 1.3504862083732034e-05, "loss": 0.546, "step": 14791 }, { "epoch": 0.40450667250054695, "grad_norm": 1.370037317276001, "learning_rate": 1.3504032547907562e-05, "loss": 0.5146, "step": 14792 }, { "epoch": 0.4045340188142638, "grad_norm": 1.3292771577835083, "learning_rate": 1.3503202984594945e-05, "loss": 0.514, "step": 14793 }, { "epoch": 0.40456136512798074, "grad_norm": 1.525355339050293, "learning_rate": 1.3502373393800683e-05, "loss": 0.537, "step": 14794 }, { "epoch": 0.40458871144169767, "grad_norm": 1.4437873363494873, "learning_rate": 1.350154377553129e-05, "loss": 0.467, "step": 14795 }, { "epoch": 0.4046160577554146, "grad_norm": 1.298466682434082, "learning_rate": 1.3500714129793268e-05, "loss": 0.5429, "step": 14796 }, { "epoch": 0.40464340406913146, "grad_norm": 2.2135300636291504, "learning_rate": 1.3499884456593133e-05, "loss": 0.5005, "step": 14797 }, { "epoch": 0.4046707503828484, "grad_norm": 1.2264405488967896, "learning_rate": 1.3499054755937386e-05, "loss": 0.5429, "step": 14798 }, { "epoch": 0.4046980966965653, "grad_norm": 1.5222755670547485, "learning_rate": 1.3498225027832543e-05, "loss": 0.5437, "step": 14799 }, { "epoch": 0.40472544301028224, "grad_norm": 1.4231786727905273, "learning_rate": 1.3497395272285106e-05, "loss": 0.5362, "step": 14800 }, { "epoch": 0.4047527893239991, "grad_norm": 1.644899845123291, "learning_rate": 1.3496565489301587e-05, "loss": 0.4668, "step": 14801 }, { "epoch": 0.40478013563771603, "grad_norm": 1.8344229459762573, "learning_rate": 1.3495735678888497e-05, "loss": 0.5696, "step": 14802 }, { "epoch": 0.40480748195143296, "grad_norm": 1.279524803161621, "learning_rate": 1.3494905841052343e-05, "loss": 0.8082, "step": 14803 }, { "epoch": 0.4048348282651499, "grad_norm": 1.0961498022079468, "learning_rate": 1.3494075975799638e-05, "loss": 0.5261, "step": 14804 }, { "epoch": 0.40486217457886675, "grad_norm": 1.2966512441635132, "learning_rate": 1.3493246083136891e-05, "loss": 0.5144, "step": 14805 }, { "epoch": 0.4048895208925837, "grad_norm": 1.5831470489501953, "learning_rate": 1.349241616307061e-05, "loss": 0.5323, "step": 14806 }, { "epoch": 0.4049168672063006, "grad_norm": 1.3214566707611084, "learning_rate": 1.3491586215607307e-05, "loss": 0.5088, "step": 14807 }, { "epoch": 0.4049442135200175, "grad_norm": 1.276100993156433, "learning_rate": 1.3490756240753494e-05, "loss": 0.528, "step": 14808 }, { "epoch": 0.4049715598337344, "grad_norm": 1.5039259195327759, "learning_rate": 1.3489926238515678e-05, "loss": 0.4431, "step": 14809 }, { "epoch": 0.4049989061474513, "grad_norm": 5.35667085647583, "learning_rate": 1.3489096208900375e-05, "loss": 0.4247, "step": 14810 }, { "epoch": 0.40502625246116825, "grad_norm": 1.406874418258667, "learning_rate": 1.3488266151914093e-05, "loss": 0.5004, "step": 14811 }, { "epoch": 0.40505359877488517, "grad_norm": 1.5388505458831787, "learning_rate": 1.3487436067563347e-05, "loss": 0.4839, "step": 14812 }, { "epoch": 0.40508094508860204, "grad_norm": 1.6470531225204468, "learning_rate": 1.3486605955854644e-05, "loss": 0.5424, "step": 14813 }, { "epoch": 0.40510829140231897, "grad_norm": 1.5405066013336182, "learning_rate": 1.34857758167945e-05, "loss": 0.4474, "step": 14814 }, { "epoch": 0.4051356377160359, "grad_norm": 1.342915415763855, "learning_rate": 1.3484945650389426e-05, "loss": 0.508, "step": 14815 }, { "epoch": 0.4051629840297528, "grad_norm": 1.2248613834381104, "learning_rate": 1.3484115456645932e-05, "loss": 0.5383, "step": 14816 }, { "epoch": 0.4051903303434697, "grad_norm": 1.239653468132019, "learning_rate": 1.3483285235570533e-05, "loss": 0.5188, "step": 14817 }, { "epoch": 0.4052176766571866, "grad_norm": 1.2851135730743408, "learning_rate": 1.3482454987169745e-05, "loss": 0.4969, "step": 14818 }, { "epoch": 0.40524502297090353, "grad_norm": 2.0271949768066406, "learning_rate": 1.3481624711450075e-05, "loss": 0.5094, "step": 14819 }, { "epoch": 0.40527236928462046, "grad_norm": 3.2445523738861084, "learning_rate": 1.3480794408418039e-05, "loss": 0.3969, "step": 14820 }, { "epoch": 0.40529971559833733, "grad_norm": 2.1767418384552, "learning_rate": 1.347996407808015e-05, "loss": 0.5794, "step": 14821 }, { "epoch": 0.40532706191205425, "grad_norm": 1.317954182624817, "learning_rate": 1.3479133720442921e-05, "loss": 0.5388, "step": 14822 }, { "epoch": 0.4053544082257712, "grad_norm": 1.209659457206726, "learning_rate": 1.3478303335512872e-05, "loss": 0.8164, "step": 14823 }, { "epoch": 0.4053817545394881, "grad_norm": 1.450978398323059, "learning_rate": 1.3477472923296508e-05, "loss": 0.5281, "step": 14824 }, { "epoch": 0.405409100853205, "grad_norm": 1.0762439966201782, "learning_rate": 1.3476642483800348e-05, "loss": 0.5123, "step": 14825 }, { "epoch": 0.4054364471669219, "grad_norm": 2.602968692779541, "learning_rate": 1.3475812017030907e-05, "loss": 0.4217, "step": 14826 }, { "epoch": 0.4054637934806388, "grad_norm": 1.2980189323425293, "learning_rate": 1.3474981522994698e-05, "loss": 0.823, "step": 14827 }, { "epoch": 0.40549113979435575, "grad_norm": 1.1469933986663818, "learning_rate": 1.347415100169824e-05, "loss": 0.5063, "step": 14828 }, { "epoch": 0.4055184861080726, "grad_norm": 1.722838282585144, "learning_rate": 1.3473320453148039e-05, "loss": 0.5198, "step": 14829 }, { "epoch": 0.40554583242178954, "grad_norm": 1.232016682624817, "learning_rate": 1.3472489877350621e-05, "loss": 0.5105, "step": 14830 }, { "epoch": 0.40557317873550647, "grad_norm": 1.1713790893554688, "learning_rate": 1.3471659274312497e-05, "loss": 0.5353, "step": 14831 }, { "epoch": 0.40560052504922334, "grad_norm": 1.4611014127731323, "learning_rate": 1.3470828644040183e-05, "loss": 0.4193, "step": 14832 }, { "epoch": 0.40562787136294026, "grad_norm": 1.4867030382156372, "learning_rate": 1.3469997986540195e-05, "loss": 0.5358, "step": 14833 }, { "epoch": 0.4056552176766572, "grad_norm": 1.3078091144561768, "learning_rate": 1.346916730181905e-05, "loss": 0.4521, "step": 14834 }, { "epoch": 0.4056825639903741, "grad_norm": 1.2182097434997559, "learning_rate": 1.3468336589883261e-05, "loss": 0.5306, "step": 14835 }, { "epoch": 0.405709910304091, "grad_norm": 1.233120322227478, "learning_rate": 1.3467505850739351e-05, "loss": 0.4331, "step": 14836 }, { "epoch": 0.4057372566178079, "grad_norm": 1.041638970375061, "learning_rate": 1.346667508439383e-05, "loss": 0.5327, "step": 14837 }, { "epoch": 0.40576460293152483, "grad_norm": 1.189841389656067, "learning_rate": 1.346584429085322e-05, "loss": 0.5296, "step": 14838 }, { "epoch": 0.40579194924524176, "grad_norm": 1.3510887622833252, "learning_rate": 1.3465013470124038e-05, "loss": 0.5618, "step": 14839 }, { "epoch": 0.4058192955589586, "grad_norm": 8.694178581237793, "learning_rate": 1.3464182622212798e-05, "loss": 0.7779, "step": 14840 }, { "epoch": 0.40584664187267555, "grad_norm": 1.28368079662323, "learning_rate": 1.346335174712602e-05, "loss": 0.4908, "step": 14841 }, { "epoch": 0.4058739881863925, "grad_norm": 1.6481810808181763, "learning_rate": 1.3462520844870226e-05, "loss": 0.5229, "step": 14842 }, { "epoch": 0.4059013345001094, "grad_norm": 1.0466729402542114, "learning_rate": 1.3461689915451927e-05, "loss": 0.5299, "step": 14843 }, { "epoch": 0.40592868081382627, "grad_norm": 1.3262513875961304, "learning_rate": 1.346085895887765e-05, "loss": 0.5302, "step": 14844 }, { "epoch": 0.4059560271275432, "grad_norm": 1.2917691469192505, "learning_rate": 1.3460027975153903e-05, "loss": 0.5251, "step": 14845 }, { "epoch": 0.4059833734412601, "grad_norm": 1.5722386837005615, "learning_rate": 1.3459196964287209e-05, "loss": 0.3904, "step": 14846 }, { "epoch": 0.40601071975497705, "grad_norm": 1.103576898574829, "learning_rate": 1.3458365926284093e-05, "loss": 0.5, "step": 14847 }, { "epoch": 0.4060380660686939, "grad_norm": 1.5161774158477783, "learning_rate": 1.3457534861151066e-05, "loss": 0.4844, "step": 14848 }, { "epoch": 0.40606541238241084, "grad_norm": 1.3181020021438599, "learning_rate": 1.3456703768894657e-05, "loss": 0.4753, "step": 14849 }, { "epoch": 0.40609275869612776, "grad_norm": 1.2567706108093262, "learning_rate": 1.3455872649521375e-05, "loss": 0.5295, "step": 14850 }, { "epoch": 0.4061201050098447, "grad_norm": 1.3316470384597778, "learning_rate": 1.3455041503037744e-05, "loss": 0.5251, "step": 14851 }, { "epoch": 0.40614745132356156, "grad_norm": 4.480909824371338, "learning_rate": 1.3454210329450286e-05, "loss": 0.8754, "step": 14852 }, { "epoch": 0.4061747976372785, "grad_norm": 1.7536377906799316, "learning_rate": 1.345337912876552e-05, "loss": 0.8372, "step": 14853 }, { "epoch": 0.4062021439509954, "grad_norm": 1.172338604927063, "learning_rate": 1.3452547900989968e-05, "loss": 0.4846, "step": 14854 }, { "epoch": 0.40622949026471233, "grad_norm": 1.3329614400863647, "learning_rate": 1.3451716646130148e-05, "loss": 0.5475, "step": 14855 }, { "epoch": 0.4062568365784292, "grad_norm": 1.3199207782745361, "learning_rate": 1.345088536419258e-05, "loss": 0.8274, "step": 14856 }, { "epoch": 0.40628418289214613, "grad_norm": 1.527101993560791, "learning_rate": 1.345005405518379e-05, "loss": 0.4422, "step": 14857 }, { "epoch": 0.40631152920586305, "grad_norm": 1.4603737592697144, "learning_rate": 1.3449222719110298e-05, "loss": 0.4478, "step": 14858 }, { "epoch": 0.40633887551958, "grad_norm": 1.2592225074768066, "learning_rate": 1.3448391355978622e-05, "loss": 0.489, "step": 14859 }, { "epoch": 0.40636622183329685, "grad_norm": 1.3765991926193237, "learning_rate": 1.3447559965795289e-05, "loss": 0.5312, "step": 14860 }, { "epoch": 0.4063935681470138, "grad_norm": 1.3283051252365112, "learning_rate": 1.3446728548566817e-05, "loss": 0.5572, "step": 14861 }, { "epoch": 0.4064209144607307, "grad_norm": 1.4956552982330322, "learning_rate": 1.3445897104299728e-05, "loss": 0.8527, "step": 14862 }, { "epoch": 0.4064482607744476, "grad_norm": 1.532206654548645, "learning_rate": 1.3445065633000548e-05, "loss": 0.5328, "step": 14863 }, { "epoch": 0.4064756070881645, "grad_norm": 1.288705587387085, "learning_rate": 1.3444234134675796e-05, "loss": 0.5468, "step": 14864 }, { "epoch": 0.4065029534018814, "grad_norm": 1.4966849088668823, "learning_rate": 1.3443402609331995e-05, "loss": 0.5, "step": 14865 }, { "epoch": 0.40653029971559834, "grad_norm": 1.4518846273422241, "learning_rate": 1.3442571056975674e-05, "loss": 0.5309, "step": 14866 }, { "epoch": 0.40655764602931527, "grad_norm": 1.374060034751892, "learning_rate": 1.3441739477613351e-05, "loss": 0.5537, "step": 14867 }, { "epoch": 0.40658499234303214, "grad_norm": 1.5093843936920166, "learning_rate": 1.344090787125155e-05, "loss": 0.5153, "step": 14868 }, { "epoch": 0.40661233865674906, "grad_norm": 1.0963469743728638, "learning_rate": 1.3440076237896792e-05, "loss": 0.5246, "step": 14869 }, { "epoch": 0.406639684970466, "grad_norm": 1.321397304534912, "learning_rate": 1.3439244577555606e-05, "loss": 0.4807, "step": 14870 }, { "epoch": 0.4066670312841829, "grad_norm": 1.5906678438186646, "learning_rate": 1.3438412890234517e-05, "loss": 0.3979, "step": 14871 }, { "epoch": 0.4066943775978998, "grad_norm": 1.0661060810089111, "learning_rate": 1.3437581175940042e-05, "loss": 0.4166, "step": 14872 }, { "epoch": 0.4067217239116167, "grad_norm": 2.1751720905303955, "learning_rate": 1.3436749434678711e-05, "loss": 0.4586, "step": 14873 }, { "epoch": 0.40674907022533363, "grad_norm": 4.2870683670043945, "learning_rate": 1.3435917666457052e-05, "loss": 0.4305, "step": 14874 }, { "epoch": 0.40677641653905056, "grad_norm": 1.3078843355178833, "learning_rate": 1.3435085871281581e-05, "loss": 0.8181, "step": 14875 }, { "epoch": 0.4068037628527674, "grad_norm": 1.4619190692901611, "learning_rate": 1.343425404915883e-05, "loss": 0.5516, "step": 14876 }, { "epoch": 0.40683110916648435, "grad_norm": 1.2765361070632935, "learning_rate": 1.3433422200095323e-05, "loss": 0.518, "step": 14877 }, { "epoch": 0.4068584554802013, "grad_norm": 3.633305788040161, "learning_rate": 1.3432590324097582e-05, "loss": 0.5255, "step": 14878 }, { "epoch": 0.4068858017939182, "grad_norm": 1.6644266843795776, "learning_rate": 1.3431758421172141e-05, "loss": 0.5553, "step": 14879 }, { "epoch": 0.40691314810763507, "grad_norm": 4.749633312225342, "learning_rate": 1.3430926491325518e-05, "loss": 0.4291, "step": 14880 }, { "epoch": 0.406940494421352, "grad_norm": 1.459166169166565, "learning_rate": 1.343009453456424e-05, "loss": 0.5485, "step": 14881 }, { "epoch": 0.4069678407350689, "grad_norm": 1.5516738891601562, "learning_rate": 1.3429262550894841e-05, "loss": 0.5717, "step": 14882 }, { "epoch": 0.40699518704878584, "grad_norm": 1.7358191013336182, "learning_rate": 1.3428430540323839e-05, "loss": 0.4591, "step": 14883 }, { "epoch": 0.4070225333625027, "grad_norm": 1.1118782758712769, "learning_rate": 1.3427598502857763e-05, "loss": 0.5369, "step": 14884 }, { "epoch": 0.40704987967621964, "grad_norm": 1.2573528289794922, "learning_rate": 1.3426766438503146e-05, "loss": 0.5604, "step": 14885 }, { "epoch": 0.40707722598993656, "grad_norm": 1.4288760423660278, "learning_rate": 1.3425934347266508e-05, "loss": 0.5497, "step": 14886 }, { "epoch": 0.4071045723036535, "grad_norm": 1.543030858039856, "learning_rate": 1.342510222915438e-05, "loss": 0.5447, "step": 14887 }, { "epoch": 0.40713191861737036, "grad_norm": 1.3793846368789673, "learning_rate": 1.3424270084173288e-05, "loss": 0.5498, "step": 14888 }, { "epoch": 0.4071592649310873, "grad_norm": 1.372512698173523, "learning_rate": 1.342343791232976e-05, "loss": 0.8103, "step": 14889 }, { "epoch": 0.4071866112448042, "grad_norm": 1.178017497062683, "learning_rate": 1.3422605713630326e-05, "loss": 0.5366, "step": 14890 }, { "epoch": 0.40721395755852113, "grad_norm": 1.4724314212799072, "learning_rate": 1.3421773488081513e-05, "loss": 0.5183, "step": 14891 }, { "epoch": 0.407241303872238, "grad_norm": 2.336768865585327, "learning_rate": 1.3420941235689848e-05, "loss": 0.5078, "step": 14892 }, { "epoch": 0.40726865018595493, "grad_norm": 1.1560779809951782, "learning_rate": 1.3420108956461866e-05, "loss": 0.812, "step": 14893 }, { "epoch": 0.40729599649967185, "grad_norm": 1.3937854766845703, "learning_rate": 1.3419276650404089e-05, "loss": 0.3527, "step": 14894 }, { "epoch": 0.4073233428133888, "grad_norm": 1.8022481203079224, "learning_rate": 1.3418444317523047e-05, "loss": 0.5168, "step": 14895 }, { "epoch": 0.40735068912710565, "grad_norm": 1.412137508392334, "learning_rate": 1.3417611957825275e-05, "loss": 0.5527, "step": 14896 }, { "epoch": 0.4073780354408226, "grad_norm": 1.5889034271240234, "learning_rate": 1.3416779571317298e-05, "loss": 0.5256, "step": 14897 }, { "epoch": 0.4074053817545395, "grad_norm": 1.4024723768234253, "learning_rate": 1.341594715800565e-05, "loss": 0.4488, "step": 14898 }, { "epoch": 0.4074327280682564, "grad_norm": 1.4960429668426514, "learning_rate": 1.3415114717896853e-05, "loss": 0.5407, "step": 14899 }, { "epoch": 0.4074600743819733, "grad_norm": 1.246357798576355, "learning_rate": 1.3414282250997443e-05, "loss": 0.5003, "step": 14900 }, { "epoch": 0.4074874206956902, "grad_norm": 1.2750532627105713, "learning_rate": 1.3413449757313952e-05, "loss": 0.5176, "step": 14901 }, { "epoch": 0.40751476700940714, "grad_norm": 1.0534297227859497, "learning_rate": 1.3412617236852907e-05, "loss": 0.3762, "step": 14902 }, { "epoch": 0.40754211332312407, "grad_norm": 1.3440659046173096, "learning_rate": 1.341178468962084e-05, "loss": 0.4088, "step": 14903 }, { "epoch": 0.40756945963684094, "grad_norm": 1.2830132246017456, "learning_rate": 1.3410952115624283e-05, "loss": 0.5318, "step": 14904 }, { "epoch": 0.40759680595055786, "grad_norm": 1.3267478942871094, "learning_rate": 1.3410119514869764e-05, "loss": 0.5188, "step": 14905 }, { "epoch": 0.4076241522642748, "grad_norm": 1.2541160583496094, "learning_rate": 1.3409286887363817e-05, "loss": 0.5153, "step": 14906 }, { "epoch": 0.4076514985779917, "grad_norm": 1.3518328666687012, "learning_rate": 1.3408454233112977e-05, "loss": 0.5664, "step": 14907 }, { "epoch": 0.4076788448917086, "grad_norm": 1.2691960334777832, "learning_rate": 1.340762155212377e-05, "loss": 0.4808, "step": 14908 }, { "epoch": 0.4077061912054255, "grad_norm": 1.7355822324752808, "learning_rate": 1.3406788844402736e-05, "loss": 0.5484, "step": 14909 }, { "epoch": 0.40773353751914243, "grad_norm": 1.1579817533493042, "learning_rate": 1.3405956109956398e-05, "loss": 0.5435, "step": 14910 }, { "epoch": 0.40776088383285936, "grad_norm": 1.3045775890350342, "learning_rate": 1.340512334879129e-05, "loss": 0.4945, "step": 14911 }, { "epoch": 0.4077882301465762, "grad_norm": 1.2929494380950928, "learning_rate": 1.3404290560913955e-05, "loss": 0.4557, "step": 14912 }, { "epoch": 0.40781557646029315, "grad_norm": 1.2101023197174072, "learning_rate": 1.3403457746330912e-05, "loss": 0.4956, "step": 14913 }, { "epoch": 0.4078429227740101, "grad_norm": 1.3224941492080688, "learning_rate": 1.3402624905048704e-05, "loss": 0.535, "step": 14914 }, { "epoch": 0.407870269087727, "grad_norm": 1.2076051235198975, "learning_rate": 1.3401792037073862e-05, "loss": 0.5449, "step": 14915 }, { "epoch": 0.40789761540144387, "grad_norm": 1.3034799098968506, "learning_rate": 1.3400959142412915e-05, "loss": 0.5197, "step": 14916 }, { "epoch": 0.4079249617151608, "grad_norm": 1.4250216484069824, "learning_rate": 1.3400126221072404e-05, "loss": 0.5452, "step": 14917 }, { "epoch": 0.4079523080288777, "grad_norm": 1.2683712244033813, "learning_rate": 1.3399293273058858e-05, "loss": 0.5225, "step": 14918 }, { "epoch": 0.40797965434259464, "grad_norm": 1.4327049255371094, "learning_rate": 1.3398460298378812e-05, "loss": 0.5299, "step": 14919 }, { "epoch": 0.4080070006563115, "grad_norm": 1.318810224533081, "learning_rate": 1.3397627297038809e-05, "loss": 0.5625, "step": 14920 }, { "epoch": 0.40803434697002844, "grad_norm": 1.3538025617599487, "learning_rate": 1.3396794269045368e-05, "loss": 0.473, "step": 14921 }, { "epoch": 0.40806169328374536, "grad_norm": 1.193458914756775, "learning_rate": 1.3395961214405034e-05, "loss": 0.4955, "step": 14922 }, { "epoch": 0.4080890395974623, "grad_norm": 1.2269099950790405, "learning_rate": 1.339512813312434e-05, "loss": 0.5045, "step": 14923 }, { "epoch": 0.40811638591117916, "grad_norm": 1.5741839408874512, "learning_rate": 1.3394295025209821e-05, "loss": 0.5425, "step": 14924 }, { "epoch": 0.4081437322248961, "grad_norm": 1.2464417219161987, "learning_rate": 1.3393461890668014e-05, "loss": 0.8217, "step": 14925 }, { "epoch": 0.408171078538613, "grad_norm": 1.4113847017288208, "learning_rate": 1.339262872950545e-05, "loss": 0.5357, "step": 14926 }, { "epoch": 0.40819842485232993, "grad_norm": 1.5028102397918701, "learning_rate": 1.3391795541728672e-05, "loss": 0.4507, "step": 14927 }, { "epoch": 0.4082257711660468, "grad_norm": 1.2867834568023682, "learning_rate": 1.3390962327344213e-05, "loss": 0.5377, "step": 14928 }, { "epoch": 0.4082531174797637, "grad_norm": 1.21572744846344, "learning_rate": 1.3390129086358606e-05, "loss": 0.5454, "step": 14929 }, { "epoch": 0.40828046379348065, "grad_norm": 1.3860937356948853, "learning_rate": 1.338929581877839e-05, "loss": 0.5333, "step": 14930 }, { "epoch": 0.4083078101071976, "grad_norm": 1.2336970567703247, "learning_rate": 1.3388462524610107e-05, "loss": 0.5316, "step": 14931 }, { "epoch": 0.40833515642091445, "grad_norm": 1.412678599357605, "learning_rate": 1.3387629203860284e-05, "loss": 0.4615, "step": 14932 }, { "epoch": 0.40836250273463137, "grad_norm": 1.2034744024276733, "learning_rate": 1.3386795856535466e-05, "loss": 0.5266, "step": 14933 }, { "epoch": 0.4083898490483483, "grad_norm": 1.1999400854110718, "learning_rate": 1.3385962482642185e-05, "loss": 0.5335, "step": 14934 }, { "epoch": 0.40841719536206517, "grad_norm": 1.131591558456421, "learning_rate": 1.3385129082186983e-05, "loss": 0.5326, "step": 14935 }, { "epoch": 0.4084445416757821, "grad_norm": 1.1801037788391113, "learning_rate": 1.3384295655176396e-05, "loss": 0.5062, "step": 14936 }, { "epoch": 0.408471887989499, "grad_norm": 1.3229159116744995, "learning_rate": 1.3383462201616963e-05, "loss": 0.5155, "step": 14937 }, { "epoch": 0.40849923430321594, "grad_norm": 1.1521611213684082, "learning_rate": 1.338262872151522e-05, "loss": 0.5514, "step": 14938 }, { "epoch": 0.4085265806169328, "grad_norm": 1.000489354133606, "learning_rate": 1.3381795214877706e-05, "loss": 0.5176, "step": 14939 }, { "epoch": 0.40855392693064974, "grad_norm": 1.2373093366622925, "learning_rate": 1.3380961681710962e-05, "loss": 0.5221, "step": 14940 }, { "epoch": 0.40858127324436666, "grad_norm": 1.529296875, "learning_rate": 1.3380128122021522e-05, "loss": 0.4472, "step": 14941 }, { "epoch": 0.4086086195580836, "grad_norm": 1.1963739395141602, "learning_rate": 1.3379294535815932e-05, "loss": 0.5291, "step": 14942 }, { "epoch": 0.40863596587180046, "grad_norm": 1.5005841255187988, "learning_rate": 1.3378460923100724e-05, "loss": 0.4333, "step": 14943 }, { "epoch": 0.4086633121855174, "grad_norm": 1.3816585540771484, "learning_rate": 1.3377627283882446e-05, "loss": 0.5155, "step": 14944 }, { "epoch": 0.4086906584992343, "grad_norm": 1.257348895072937, "learning_rate": 1.3376793618167628e-05, "loss": 0.5221, "step": 14945 }, { "epoch": 0.40871800481295123, "grad_norm": 1.21913743019104, "learning_rate": 1.3375959925962814e-05, "loss": 0.5155, "step": 14946 }, { "epoch": 0.4087453511266681, "grad_norm": 1.4095515012741089, "learning_rate": 1.3375126207274547e-05, "loss": 0.5267, "step": 14947 }, { "epoch": 0.408772697440385, "grad_norm": 1.2101658582687378, "learning_rate": 1.3374292462109361e-05, "loss": 0.5396, "step": 14948 }, { "epoch": 0.40880004375410195, "grad_norm": 1.656929850578308, "learning_rate": 1.3373458690473805e-05, "loss": 0.5213, "step": 14949 }, { "epoch": 0.4088273900678189, "grad_norm": 1.4385257959365845, "learning_rate": 1.3372624892374413e-05, "loss": 0.5378, "step": 14950 }, { "epoch": 0.40885473638153574, "grad_norm": 1.3945567607879639, "learning_rate": 1.3371791067817725e-05, "loss": 0.5363, "step": 14951 }, { "epoch": 0.40888208269525267, "grad_norm": 1.1449142694473267, "learning_rate": 1.3370957216810289e-05, "loss": 0.5118, "step": 14952 }, { "epoch": 0.4089094290089696, "grad_norm": 1.2584744691848755, "learning_rate": 1.337012333935864e-05, "loss": 0.5141, "step": 14953 }, { "epoch": 0.4089367753226865, "grad_norm": 1.2354881763458252, "learning_rate": 1.3369289435469322e-05, "loss": 0.534, "step": 14954 }, { "epoch": 0.4089641216364034, "grad_norm": 1.6026965379714966, "learning_rate": 1.336845550514888e-05, "loss": 0.4619, "step": 14955 }, { "epoch": 0.4089914679501203, "grad_norm": 1.2404245138168335, "learning_rate": 1.3367621548403851e-05, "loss": 0.5326, "step": 14956 }, { "epoch": 0.40901881426383724, "grad_norm": 1.5781731605529785, "learning_rate": 1.3366787565240776e-05, "loss": 0.5587, "step": 14957 }, { "epoch": 0.40904616057755416, "grad_norm": 1.4997342824935913, "learning_rate": 1.3365953555666202e-05, "loss": 0.4351, "step": 14958 }, { "epoch": 0.40907350689127103, "grad_norm": 1.2626532316207886, "learning_rate": 1.3365119519686669e-05, "loss": 0.5514, "step": 14959 }, { "epoch": 0.40910085320498796, "grad_norm": 1.5385583639144897, "learning_rate": 1.3364285457308718e-05, "loss": 0.5073, "step": 14960 }, { "epoch": 0.4091281995187049, "grad_norm": 1.6408592462539673, "learning_rate": 1.33634513685389e-05, "loss": 0.8139, "step": 14961 }, { "epoch": 0.4091555458324218, "grad_norm": 1.5439695119857788, "learning_rate": 1.3362617253383748e-05, "loss": 0.4602, "step": 14962 }, { "epoch": 0.4091828921461387, "grad_norm": 1.2490692138671875, "learning_rate": 1.3361783111849813e-05, "loss": 0.5431, "step": 14963 }, { "epoch": 0.4092102384598556, "grad_norm": 1.3877196311950684, "learning_rate": 1.3360948943943632e-05, "loss": 0.5115, "step": 14964 }, { "epoch": 0.4092375847735725, "grad_norm": 1.2288447618484497, "learning_rate": 1.3360114749671756e-05, "loss": 0.5536, "step": 14965 }, { "epoch": 0.40926493108728945, "grad_norm": 1.1246132850646973, "learning_rate": 1.3359280529040724e-05, "loss": 0.5167, "step": 14966 }, { "epoch": 0.4092922774010063, "grad_norm": 1.143784761428833, "learning_rate": 1.335844628205708e-05, "loss": 0.5582, "step": 14967 }, { "epoch": 0.40931962371472325, "grad_norm": 1.609472393989563, "learning_rate": 1.3357612008727374e-05, "loss": 0.4453, "step": 14968 }, { "epoch": 0.40934697002844017, "grad_norm": 1.366606593132019, "learning_rate": 1.3356777709058144e-05, "loss": 0.5069, "step": 14969 }, { "epoch": 0.4093743163421571, "grad_norm": 1.554384708404541, "learning_rate": 1.3355943383055935e-05, "loss": 0.4875, "step": 14970 }, { "epoch": 0.40940166265587397, "grad_norm": 1.7527629137039185, "learning_rate": 1.33551090307273e-05, "loss": 0.3765, "step": 14971 }, { "epoch": 0.4094290089695909, "grad_norm": 1.7856359481811523, "learning_rate": 1.3354274652078774e-05, "loss": 0.5366, "step": 14972 }, { "epoch": 0.4094563552833078, "grad_norm": 1.5487688779830933, "learning_rate": 1.3353440247116912e-05, "loss": 0.4449, "step": 14973 }, { "epoch": 0.40948370159702474, "grad_norm": 1.1976649761199951, "learning_rate": 1.3352605815848254e-05, "loss": 0.5318, "step": 14974 }, { "epoch": 0.4095110479107416, "grad_norm": 1.255475640296936, "learning_rate": 1.3351771358279343e-05, "loss": 0.5667, "step": 14975 }, { "epoch": 0.40953839422445854, "grad_norm": 1.2463386058807373, "learning_rate": 1.3350936874416731e-05, "loss": 0.5248, "step": 14976 }, { "epoch": 0.40956574053817546, "grad_norm": 1.1881659030914307, "learning_rate": 1.3350102364266966e-05, "loss": 0.5352, "step": 14977 }, { "epoch": 0.4095930868518924, "grad_norm": 1.2345982789993286, "learning_rate": 1.3349267827836584e-05, "loss": 0.5471, "step": 14978 }, { "epoch": 0.40962043316560925, "grad_norm": 1.1673790216445923, "learning_rate": 1.3348433265132145e-05, "loss": 0.5184, "step": 14979 }, { "epoch": 0.4096477794793262, "grad_norm": 1.0452113151550293, "learning_rate": 1.3347598676160185e-05, "loss": 0.5398, "step": 14980 }, { "epoch": 0.4096751257930431, "grad_norm": 1.375754714012146, "learning_rate": 1.3346764060927255e-05, "loss": 0.5373, "step": 14981 }, { "epoch": 0.40970247210676003, "grad_norm": 1.323702096939087, "learning_rate": 1.3345929419439907e-05, "loss": 0.5079, "step": 14982 }, { "epoch": 0.4097298184204769, "grad_norm": 1.134111762046814, "learning_rate": 1.3345094751704682e-05, "loss": 0.5245, "step": 14983 }, { "epoch": 0.4097571647341938, "grad_norm": 1.2324274778366089, "learning_rate": 1.3344260057728127e-05, "loss": 0.4153, "step": 14984 }, { "epoch": 0.40978451104791075, "grad_norm": 1.1777218580245972, "learning_rate": 1.3343425337516798e-05, "loss": 0.8149, "step": 14985 }, { "epoch": 0.4098118573616277, "grad_norm": 1.2367913722991943, "learning_rate": 1.3342590591077235e-05, "loss": 0.5279, "step": 14986 }, { "epoch": 0.40983920367534454, "grad_norm": 1.133294701576233, "learning_rate": 1.3341755818415992e-05, "loss": 0.511, "step": 14987 }, { "epoch": 0.40986654998906147, "grad_norm": 1.1641355752944946, "learning_rate": 1.3340921019539612e-05, "loss": 0.5119, "step": 14988 }, { "epoch": 0.4098938963027784, "grad_norm": 1.2024344205856323, "learning_rate": 1.334008619445465e-05, "loss": 0.4599, "step": 14989 }, { "epoch": 0.4099212426164953, "grad_norm": 1.3335202932357788, "learning_rate": 1.3339251343167652e-05, "loss": 0.5322, "step": 14990 }, { "epoch": 0.4099485889302122, "grad_norm": 1.2645806074142456, "learning_rate": 1.3338416465685165e-05, "loss": 0.5382, "step": 14991 }, { "epoch": 0.4099759352439291, "grad_norm": 1.181830883026123, "learning_rate": 1.3337581562013739e-05, "loss": 0.4989, "step": 14992 }, { "epoch": 0.41000328155764604, "grad_norm": 1.3919811248779297, "learning_rate": 1.3336746632159928e-05, "loss": 0.5729, "step": 14993 }, { "epoch": 0.41003062787136296, "grad_norm": 1.130130648612976, "learning_rate": 1.3335911676130275e-05, "loss": 0.5143, "step": 14994 }, { "epoch": 0.41005797418507983, "grad_norm": 1.3405287265777588, "learning_rate": 1.3335076693931337e-05, "loss": 0.5424, "step": 14995 }, { "epoch": 0.41008532049879676, "grad_norm": 1.547120451927185, "learning_rate": 1.333424168556966e-05, "loss": 0.517, "step": 14996 }, { "epoch": 0.4101126668125137, "grad_norm": 1.0753867626190186, "learning_rate": 1.3333406651051795e-05, "loss": 0.5044, "step": 14997 }, { "epoch": 0.4101400131262306, "grad_norm": 1.1710660457611084, "learning_rate": 1.3332571590384293e-05, "loss": 0.5033, "step": 14998 }, { "epoch": 0.4101673594399475, "grad_norm": 1.5550124645233154, "learning_rate": 1.3331736503573704e-05, "loss": 0.4582, "step": 14999 }, { "epoch": 0.4101947057536644, "grad_norm": 1.5215222835540771, "learning_rate": 1.333090139062658e-05, "loss": 0.4682, "step": 15000 }, { "epoch": 0.4102220520673813, "grad_norm": 1.551518440246582, "learning_rate": 1.3330066251549475e-05, "loss": 0.4276, "step": 15001 }, { "epoch": 0.41024939838109825, "grad_norm": 1.294064998626709, "learning_rate": 1.3329231086348933e-05, "loss": 0.5277, "step": 15002 }, { "epoch": 0.4102767446948151, "grad_norm": 1.2593672275543213, "learning_rate": 1.3328395895031512e-05, "loss": 0.4476, "step": 15003 }, { "epoch": 0.41030409100853205, "grad_norm": 1.8613996505737305, "learning_rate": 1.3327560677603764e-05, "loss": 0.4612, "step": 15004 }, { "epoch": 0.41033143732224897, "grad_norm": 1.2305083274841309, "learning_rate": 1.3326725434072235e-05, "loss": 0.5143, "step": 15005 }, { "epoch": 0.4103587836359659, "grad_norm": 1.2904589176177979, "learning_rate": 1.3325890164443485e-05, "loss": 0.5433, "step": 15006 }, { "epoch": 0.41038612994968277, "grad_norm": 1.5591403245925903, "learning_rate": 1.3325054868724058e-05, "loss": 0.3957, "step": 15007 }, { "epoch": 0.4104134762633997, "grad_norm": 1.4005814790725708, "learning_rate": 1.3324219546920516e-05, "loss": 0.5288, "step": 15008 }, { "epoch": 0.4104408225771166, "grad_norm": 1.1478670835494995, "learning_rate": 1.3323384199039407e-05, "loss": 0.527, "step": 15009 }, { "epoch": 0.41046816889083354, "grad_norm": 1.5202850103378296, "learning_rate": 1.332254882508728e-05, "loss": 0.4598, "step": 15010 }, { "epoch": 0.4104955152045504, "grad_norm": 1.379805088043213, "learning_rate": 1.3321713425070695e-05, "loss": 0.5318, "step": 15011 }, { "epoch": 0.41052286151826733, "grad_norm": 1.4885612726211548, "learning_rate": 1.3320877998996205e-05, "loss": 0.5587, "step": 15012 }, { "epoch": 0.41055020783198426, "grad_norm": 1.5135624408721924, "learning_rate": 1.3320042546870357e-05, "loss": 0.5129, "step": 15013 }, { "epoch": 0.4105775541457012, "grad_norm": 2.77585768699646, "learning_rate": 1.3319207068699714e-05, "loss": 0.4478, "step": 15014 }, { "epoch": 0.41060490045941805, "grad_norm": 1.5922518968582153, "learning_rate": 1.3318371564490823e-05, "loss": 0.5109, "step": 15015 }, { "epoch": 0.410632246773135, "grad_norm": 1.2756803035736084, "learning_rate": 1.3317536034250242e-05, "loss": 0.5079, "step": 15016 }, { "epoch": 0.4106595930868519, "grad_norm": 1.4173444509506226, "learning_rate": 1.3316700477984523e-05, "loss": 0.5125, "step": 15017 }, { "epoch": 0.41068693940056883, "grad_norm": 1.5730252265930176, "learning_rate": 1.3315864895700224e-05, "loss": 0.5034, "step": 15018 }, { "epoch": 0.4107142857142857, "grad_norm": 2.268209218978882, "learning_rate": 1.3315029287403896e-05, "loss": 0.3662, "step": 15019 }, { "epoch": 0.4107416320280026, "grad_norm": 1.2042784690856934, "learning_rate": 1.3314193653102099e-05, "loss": 0.522, "step": 15020 }, { "epoch": 0.41076897834171955, "grad_norm": 1.5155537128448486, "learning_rate": 1.3313357992801384e-05, "loss": 0.8538, "step": 15021 }, { "epoch": 0.4107963246554365, "grad_norm": 1.4353055953979492, "learning_rate": 1.3312522306508308e-05, "loss": 0.522, "step": 15022 }, { "epoch": 0.41082367096915334, "grad_norm": 1.5152232646942139, "learning_rate": 1.3311686594229427e-05, "loss": 0.4396, "step": 15023 }, { "epoch": 0.41085101728287027, "grad_norm": 1.4348762035369873, "learning_rate": 1.3310850855971296e-05, "loss": 0.5058, "step": 15024 }, { "epoch": 0.4108783635965872, "grad_norm": 1.3145970106124878, "learning_rate": 1.3310015091740474e-05, "loss": 0.5112, "step": 15025 }, { "epoch": 0.4109057099103041, "grad_norm": 1.338209629058838, "learning_rate": 1.3309179301543513e-05, "loss": 0.5069, "step": 15026 }, { "epoch": 0.410933056224021, "grad_norm": 1.3418898582458496, "learning_rate": 1.3308343485386972e-05, "loss": 0.5276, "step": 15027 }, { "epoch": 0.4109604025377379, "grad_norm": 1.5214353799819946, "learning_rate": 1.3307507643277408e-05, "loss": 0.531, "step": 15028 }, { "epoch": 0.41098774885145484, "grad_norm": 1.236169695854187, "learning_rate": 1.3306671775221375e-05, "loss": 0.5379, "step": 15029 }, { "epoch": 0.41101509516517176, "grad_norm": 1.204457402229309, "learning_rate": 1.3305835881225436e-05, "loss": 0.5448, "step": 15030 }, { "epoch": 0.41104244147888863, "grad_norm": 1.744781732559204, "learning_rate": 1.3304999961296143e-05, "loss": 0.5279, "step": 15031 }, { "epoch": 0.41106978779260556, "grad_norm": 1.536025047302246, "learning_rate": 1.3304164015440056e-05, "loss": 0.5317, "step": 15032 }, { "epoch": 0.4110971341063225, "grad_norm": 1.219922661781311, "learning_rate": 1.3303328043663732e-05, "loss": 0.4882, "step": 15033 }, { "epoch": 0.41112448042003935, "grad_norm": 1.4264836311340332, "learning_rate": 1.3302492045973728e-05, "loss": 0.5574, "step": 15034 }, { "epoch": 0.4111518267337563, "grad_norm": 1.1873071193695068, "learning_rate": 1.3301656022376603e-05, "loss": 0.5208, "step": 15035 }, { "epoch": 0.4111791730474732, "grad_norm": 1.5599652528762817, "learning_rate": 1.3300819972878918e-05, "loss": 0.5503, "step": 15036 }, { "epoch": 0.4112065193611901, "grad_norm": 1.3235911130905151, "learning_rate": 1.3299983897487227e-05, "loss": 0.5315, "step": 15037 }, { "epoch": 0.411233865674907, "grad_norm": 1.388359785079956, "learning_rate": 1.3299147796208092e-05, "loss": 0.5289, "step": 15038 }, { "epoch": 0.4112612119886239, "grad_norm": 3.702449083328247, "learning_rate": 1.3298311669048072e-05, "loss": 0.4236, "step": 15039 }, { "epoch": 0.41128855830234085, "grad_norm": 1.3505351543426514, "learning_rate": 1.3297475516013723e-05, "loss": 0.53, "step": 15040 }, { "epoch": 0.41131590461605777, "grad_norm": 1.3323079347610474, "learning_rate": 1.3296639337111609e-05, "loss": 0.504, "step": 15041 }, { "epoch": 0.41134325092977464, "grad_norm": 1.3283721208572388, "learning_rate": 1.3295803132348285e-05, "loss": 0.5448, "step": 15042 }, { "epoch": 0.41137059724349156, "grad_norm": 1.1395800113677979, "learning_rate": 1.3294966901730316e-05, "loss": 0.5526, "step": 15043 }, { "epoch": 0.4113979435572085, "grad_norm": 1.2865060567855835, "learning_rate": 1.3294130645264259e-05, "loss": 0.461, "step": 15044 }, { "epoch": 0.4114252898709254, "grad_norm": 1.4582492113113403, "learning_rate": 1.329329436295667e-05, "loss": 0.4738, "step": 15045 }, { "epoch": 0.4114526361846423, "grad_norm": 1.284399390220642, "learning_rate": 1.3292458054814115e-05, "loss": 0.5307, "step": 15046 }, { "epoch": 0.4114799824983592, "grad_norm": 1.3062177896499634, "learning_rate": 1.3291621720843155e-05, "loss": 0.5633, "step": 15047 }, { "epoch": 0.41150732881207613, "grad_norm": 1.4223542213439941, "learning_rate": 1.3290785361050347e-05, "loss": 0.5361, "step": 15048 }, { "epoch": 0.41153467512579306, "grad_norm": 1.496664047241211, "learning_rate": 1.3289948975442254e-05, "loss": 0.5333, "step": 15049 }, { "epoch": 0.41156202143950993, "grad_norm": 1.2118899822235107, "learning_rate": 1.3289112564025439e-05, "loss": 0.5004, "step": 15050 }, { "epoch": 0.41158936775322685, "grad_norm": 1.4193521738052368, "learning_rate": 1.3288276126806459e-05, "loss": 0.5299, "step": 15051 }, { "epoch": 0.4116167140669438, "grad_norm": 1.1521296501159668, "learning_rate": 1.3287439663791881e-05, "loss": 0.4876, "step": 15052 }, { "epoch": 0.4116440603806607, "grad_norm": 1.2816591262817383, "learning_rate": 1.3286603174988261e-05, "loss": 0.5357, "step": 15053 }, { "epoch": 0.4116714066943776, "grad_norm": 1.2405121326446533, "learning_rate": 1.3285766660402164e-05, "loss": 0.5155, "step": 15054 }, { "epoch": 0.4116987530080945, "grad_norm": 1.5549122095108032, "learning_rate": 1.3284930120040157e-05, "loss": 0.4349, "step": 15055 }, { "epoch": 0.4117260993218114, "grad_norm": 2.196012496948242, "learning_rate": 1.3284093553908793e-05, "loss": 0.534, "step": 15056 }, { "epoch": 0.41175344563552835, "grad_norm": 1.3276079893112183, "learning_rate": 1.3283256962014641e-05, "loss": 0.8224, "step": 15057 }, { "epoch": 0.4117807919492452, "grad_norm": 1.3578420877456665, "learning_rate": 1.3282420344364262e-05, "loss": 0.5078, "step": 15058 }, { "epoch": 0.41180813826296214, "grad_norm": 1.1623202562332153, "learning_rate": 1.3281583700964218e-05, "loss": 0.5034, "step": 15059 }, { "epoch": 0.41183548457667907, "grad_norm": 1.887856125831604, "learning_rate": 1.3280747031821075e-05, "loss": 0.5244, "step": 15060 }, { "epoch": 0.411862830890396, "grad_norm": 1.7810814380645752, "learning_rate": 1.3279910336941396e-05, "loss": 0.5113, "step": 15061 }, { "epoch": 0.41189017720411286, "grad_norm": 1.288704514503479, "learning_rate": 1.327907361633174e-05, "loss": 0.8073, "step": 15062 }, { "epoch": 0.4119175235178298, "grad_norm": 1.283984661102295, "learning_rate": 1.3278236869998678e-05, "loss": 0.5202, "step": 15063 }, { "epoch": 0.4119448698315467, "grad_norm": 1.4512372016906738, "learning_rate": 1.3277400097948767e-05, "loss": 0.5223, "step": 15064 }, { "epoch": 0.41197221614526364, "grad_norm": 1.2824933528900146, "learning_rate": 1.3276563300188574e-05, "loss": 0.5184, "step": 15065 }, { "epoch": 0.4119995624589805, "grad_norm": 1.3075274229049683, "learning_rate": 1.3275726476724668e-05, "loss": 0.5258, "step": 15066 }, { "epoch": 0.41202690877269743, "grad_norm": 1.454040765762329, "learning_rate": 1.3274889627563608e-05, "loss": 0.5073, "step": 15067 }, { "epoch": 0.41205425508641436, "grad_norm": 1.2329082489013672, "learning_rate": 1.3274052752711959e-05, "loss": 0.517, "step": 15068 }, { "epoch": 0.4120816014001313, "grad_norm": 1.4746416807174683, "learning_rate": 1.3273215852176288e-05, "loss": 0.4007, "step": 15069 }, { "epoch": 0.41210894771384815, "grad_norm": 1.2899448871612549, "learning_rate": 1.3272378925963158e-05, "loss": 0.5336, "step": 15070 }, { "epoch": 0.4121362940275651, "grad_norm": 1.2441585063934326, "learning_rate": 1.3271541974079143e-05, "loss": 0.5087, "step": 15071 }, { "epoch": 0.412163640341282, "grad_norm": 1.0902475118637085, "learning_rate": 1.3270704996530794e-05, "loss": 0.503, "step": 15072 }, { "epoch": 0.4121909866549989, "grad_norm": 1.720130443572998, "learning_rate": 1.326986799332469e-05, "loss": 0.5236, "step": 15073 }, { "epoch": 0.4122183329687158, "grad_norm": 1.254644751548767, "learning_rate": 1.326903096446739e-05, "loss": 0.8677, "step": 15074 }, { "epoch": 0.4122456792824327, "grad_norm": 1.1684716939926147, "learning_rate": 1.3268193909965461e-05, "loss": 0.5144, "step": 15075 }, { "epoch": 0.41227302559614964, "grad_norm": 1.1462407112121582, "learning_rate": 1.3267356829825474e-05, "loss": 0.4899, "step": 15076 }, { "epoch": 0.41230037190986657, "grad_norm": 1.358588695526123, "learning_rate": 1.3266519724053988e-05, "loss": 0.5277, "step": 15077 }, { "epoch": 0.41232771822358344, "grad_norm": 1.451712727546692, "learning_rate": 1.3265682592657579e-05, "loss": 0.5226, "step": 15078 }, { "epoch": 0.41235506453730036, "grad_norm": 1.6529830694198608, "learning_rate": 1.3264845435642804e-05, "loss": 0.5094, "step": 15079 }, { "epoch": 0.4123824108510173, "grad_norm": 1.7934682369232178, "learning_rate": 1.3264008253016238e-05, "loss": 0.4717, "step": 15080 }, { "epoch": 0.4124097571647342, "grad_norm": 1.6107971668243408, "learning_rate": 1.3263171044784444e-05, "loss": 0.4874, "step": 15081 }, { "epoch": 0.4124371034784511, "grad_norm": 1.367987871170044, "learning_rate": 1.3262333810953996e-05, "loss": 0.5159, "step": 15082 }, { "epoch": 0.412464449792168, "grad_norm": 1.3035951852798462, "learning_rate": 1.3261496551531452e-05, "loss": 0.5243, "step": 15083 }, { "epoch": 0.41249179610588493, "grad_norm": 1.5784000158309937, "learning_rate": 1.3260659266523389e-05, "loss": 0.5596, "step": 15084 }, { "epoch": 0.41251914241960186, "grad_norm": 3.1679890155792236, "learning_rate": 1.325982195593637e-05, "loss": 0.5473, "step": 15085 }, { "epoch": 0.41254648873331873, "grad_norm": 1.2954621315002441, "learning_rate": 1.3258984619776965e-05, "loss": 0.5444, "step": 15086 }, { "epoch": 0.41257383504703565, "grad_norm": 1.3005300760269165, "learning_rate": 1.3258147258051743e-05, "loss": 0.531, "step": 15087 }, { "epoch": 0.4126011813607526, "grad_norm": 1.2836706638336182, "learning_rate": 1.3257309870767272e-05, "loss": 0.4641, "step": 15088 }, { "epoch": 0.4126285276744695, "grad_norm": 1.8889530897140503, "learning_rate": 1.3256472457930122e-05, "loss": 0.5448, "step": 15089 }, { "epoch": 0.4126558739881864, "grad_norm": 1.346344232559204, "learning_rate": 1.3255635019546864e-05, "loss": 0.5569, "step": 15090 }, { "epoch": 0.4126832203019033, "grad_norm": 1.54905366897583, "learning_rate": 1.3254797555624061e-05, "loss": 0.4477, "step": 15091 }, { "epoch": 0.4127105666156202, "grad_norm": 1.4627411365509033, "learning_rate": 1.325396006616829e-05, "loss": 0.5236, "step": 15092 }, { "epoch": 0.41273791292933715, "grad_norm": 1.3949825763702393, "learning_rate": 1.325312255118612e-05, "loss": 0.5768, "step": 15093 }, { "epoch": 0.412765259243054, "grad_norm": 1.8453515768051147, "learning_rate": 1.3252285010684119e-05, "loss": 0.5125, "step": 15094 }, { "epoch": 0.41279260555677094, "grad_norm": 1.3019148111343384, "learning_rate": 1.3251447444668855e-05, "loss": 0.5064, "step": 15095 }, { "epoch": 0.41281995187048787, "grad_norm": 1.622086763381958, "learning_rate": 1.3250609853146903e-05, "loss": 0.3725, "step": 15096 }, { "epoch": 0.4128472981842048, "grad_norm": 1.949761986732483, "learning_rate": 1.324977223612483e-05, "loss": 0.391, "step": 15097 }, { "epoch": 0.41287464449792166, "grad_norm": 1.3266351222991943, "learning_rate": 1.3248934593609208e-05, "loss": 0.827, "step": 15098 }, { "epoch": 0.4129019908116386, "grad_norm": 1.5019384622573853, "learning_rate": 1.3248096925606609e-05, "loss": 0.5411, "step": 15099 }, { "epoch": 0.4129293371253555, "grad_norm": 1.6214090585708618, "learning_rate": 1.3247259232123603e-05, "loss": 0.5825, "step": 15100 }, { "epoch": 0.41295668343907244, "grad_norm": 1.2192752361297607, "learning_rate": 1.3246421513166763e-05, "loss": 0.8264, "step": 15101 }, { "epoch": 0.4129840297527893, "grad_norm": 1.6761658191680908, "learning_rate": 1.3245583768742658e-05, "loss": 0.5131, "step": 15102 }, { "epoch": 0.41301137606650623, "grad_norm": 1.33306884765625, "learning_rate": 1.3244745998857864e-05, "loss": 0.5128, "step": 15103 }, { "epoch": 0.41303872238022316, "grad_norm": 1.3366934061050415, "learning_rate": 1.3243908203518951e-05, "loss": 0.5313, "step": 15104 }, { "epoch": 0.4130660686939401, "grad_norm": 1.6385345458984375, "learning_rate": 1.3243070382732488e-05, "loss": 0.5258, "step": 15105 }, { "epoch": 0.41309341500765695, "grad_norm": 1.1508790254592896, "learning_rate": 1.3242232536505056e-05, "loss": 0.517, "step": 15106 }, { "epoch": 0.4131207613213739, "grad_norm": 1.4476592540740967, "learning_rate": 1.3241394664843216e-05, "loss": 0.5802, "step": 15107 }, { "epoch": 0.4131481076350908, "grad_norm": 2.3096187114715576, "learning_rate": 1.324055676775355e-05, "loss": 0.5336, "step": 15108 }, { "epoch": 0.4131754539488077, "grad_norm": 1.9122430086135864, "learning_rate": 1.323971884524263e-05, "loss": 0.5517, "step": 15109 }, { "epoch": 0.4132028002625246, "grad_norm": 1.429819107055664, "learning_rate": 1.3238880897317024e-05, "loss": 0.5411, "step": 15110 }, { "epoch": 0.4132301465762415, "grad_norm": 1.2744965553283691, "learning_rate": 1.3238042923983311e-05, "loss": 0.4123, "step": 15111 }, { "epoch": 0.41325749288995844, "grad_norm": 1.4066241979599, "learning_rate": 1.3237204925248061e-05, "loss": 0.5163, "step": 15112 }, { "epoch": 0.41328483920367537, "grad_norm": 1.53683340549469, "learning_rate": 1.3236366901117852e-05, "loss": 0.5401, "step": 15113 }, { "epoch": 0.41331218551739224, "grad_norm": 1.713454008102417, "learning_rate": 1.3235528851599253e-05, "loss": 0.5304, "step": 15114 }, { "epoch": 0.41333953183110916, "grad_norm": 1.2328394651412964, "learning_rate": 1.3234690776698841e-05, "loss": 0.5323, "step": 15115 }, { "epoch": 0.4133668781448261, "grad_norm": 1.256150484085083, "learning_rate": 1.323385267642319e-05, "loss": 0.5131, "step": 15116 }, { "epoch": 0.413394224458543, "grad_norm": 1.3439347743988037, "learning_rate": 1.3233014550778878e-05, "loss": 0.3504, "step": 15117 }, { "epoch": 0.4134215707722599, "grad_norm": 1.305364966392517, "learning_rate": 1.3232176399772474e-05, "loss": 0.5117, "step": 15118 }, { "epoch": 0.4134489170859768, "grad_norm": 1.5101544857025146, "learning_rate": 1.3231338223410554e-05, "loss": 0.4596, "step": 15119 }, { "epoch": 0.41347626339969373, "grad_norm": 1.313578724861145, "learning_rate": 1.32305000216997e-05, "loss": 0.54, "step": 15120 }, { "epoch": 0.41350360971341066, "grad_norm": 1.708877682685852, "learning_rate": 1.3229661794646478e-05, "loss": 0.5292, "step": 15121 }, { "epoch": 0.4135309560271275, "grad_norm": 1.1507022380828857, "learning_rate": 1.322882354225747e-05, "loss": 0.5053, "step": 15122 }, { "epoch": 0.41355830234084445, "grad_norm": 1.2375415563583374, "learning_rate": 1.3227985264539248e-05, "loss": 0.5549, "step": 15123 }, { "epoch": 0.4135856486545614, "grad_norm": 1.813653588294983, "learning_rate": 1.3227146961498391e-05, "loss": 0.3882, "step": 15124 }, { "epoch": 0.4136129949682783, "grad_norm": 1.2322384119033813, "learning_rate": 1.3226308633141476e-05, "loss": 0.5325, "step": 15125 }, { "epoch": 0.41364034128199517, "grad_norm": 1.4065167903900146, "learning_rate": 1.3225470279475077e-05, "loss": 0.5094, "step": 15126 }, { "epoch": 0.4136676875957121, "grad_norm": 1.21066153049469, "learning_rate": 1.3224631900505768e-05, "loss": 0.535, "step": 15127 }, { "epoch": 0.413695033909429, "grad_norm": 1.7089202404022217, "learning_rate": 1.3223793496240135e-05, "loss": 0.5507, "step": 15128 }, { "epoch": 0.41372238022314595, "grad_norm": 1.310011625289917, "learning_rate": 1.3222955066684745e-05, "loss": 0.5044, "step": 15129 }, { "epoch": 0.4137497265368628, "grad_norm": 1.4454195499420166, "learning_rate": 1.3222116611846183e-05, "loss": 0.4961, "step": 15130 }, { "epoch": 0.41377707285057974, "grad_norm": 1.5530283451080322, "learning_rate": 1.3221278131731021e-05, "loss": 0.5557, "step": 15131 }, { "epoch": 0.41380441916429667, "grad_norm": 1.2895129919052124, "learning_rate": 1.3220439626345838e-05, "loss": 0.857, "step": 15132 }, { "epoch": 0.4138317654780136, "grad_norm": 1.445925235748291, "learning_rate": 1.3219601095697214e-05, "loss": 0.5042, "step": 15133 }, { "epoch": 0.41385911179173046, "grad_norm": 1.551151990890503, "learning_rate": 1.3218762539791724e-05, "loss": 0.5342, "step": 15134 }, { "epoch": 0.4138864581054474, "grad_norm": 1.1340219974517822, "learning_rate": 1.3217923958635947e-05, "loss": 0.486, "step": 15135 }, { "epoch": 0.4139138044191643, "grad_norm": 1.520597219467163, "learning_rate": 1.3217085352236466e-05, "loss": 0.3811, "step": 15136 }, { "epoch": 0.4139411507328812, "grad_norm": 1.1612962484359741, "learning_rate": 1.321624672059985e-05, "loss": 0.5117, "step": 15137 }, { "epoch": 0.4139684970465981, "grad_norm": 1.4646186828613281, "learning_rate": 1.3215408063732686e-05, "loss": 0.5384, "step": 15138 }, { "epoch": 0.41399584336031503, "grad_norm": 1.4218319654464722, "learning_rate": 1.3214569381641554e-05, "loss": 0.5153, "step": 15139 }, { "epoch": 0.41402318967403196, "grad_norm": 1.612484097480774, "learning_rate": 1.3213730674333025e-05, "loss": 0.5303, "step": 15140 }, { "epoch": 0.4140505359877488, "grad_norm": 1.569235920906067, "learning_rate": 1.3212891941813688e-05, "loss": 0.3553, "step": 15141 }, { "epoch": 0.41407788230146575, "grad_norm": 1.5645827054977417, "learning_rate": 1.3212053184090114e-05, "loss": 0.5391, "step": 15142 }, { "epoch": 0.4141052286151827, "grad_norm": 1.566786289215088, "learning_rate": 1.3211214401168886e-05, "loss": 0.4965, "step": 15143 }, { "epoch": 0.4141325749288996, "grad_norm": 1.227700114250183, "learning_rate": 1.3210375593056587e-05, "loss": 0.5104, "step": 15144 }, { "epoch": 0.41415992124261647, "grad_norm": 1.88059401512146, "learning_rate": 1.3209536759759795e-05, "loss": 0.5513, "step": 15145 }, { "epoch": 0.4141872675563334, "grad_norm": 1.4897058010101318, "learning_rate": 1.320869790128509e-05, "loss": 0.5353, "step": 15146 }, { "epoch": 0.4142146138700503, "grad_norm": 1.300400972366333, "learning_rate": 1.3207859017639055e-05, "loss": 0.513, "step": 15147 }, { "epoch": 0.41424196018376724, "grad_norm": 1.240501880645752, "learning_rate": 1.3207020108828266e-05, "loss": 0.558, "step": 15148 }, { "epoch": 0.4142693064974841, "grad_norm": 1.4492993354797363, "learning_rate": 1.3206181174859309e-05, "loss": 0.5439, "step": 15149 }, { "epoch": 0.41429665281120104, "grad_norm": 1.258111596107483, "learning_rate": 1.3205342215738763e-05, "loss": 0.5593, "step": 15150 }, { "epoch": 0.41432399912491796, "grad_norm": 1.9202077388763428, "learning_rate": 1.3204503231473207e-05, "loss": 0.4419, "step": 15151 }, { "epoch": 0.4143513454386349, "grad_norm": 1.226405143737793, "learning_rate": 1.3203664222069228e-05, "loss": 0.5039, "step": 15152 }, { "epoch": 0.41437869175235176, "grad_norm": 4.666604995727539, "learning_rate": 1.3202825187533401e-05, "loss": 0.3929, "step": 15153 }, { "epoch": 0.4144060380660687, "grad_norm": 2.0824382305145264, "learning_rate": 1.3201986127872312e-05, "loss": 0.5325, "step": 15154 }, { "epoch": 0.4144333843797856, "grad_norm": 1.312958002090454, "learning_rate": 1.3201147043092545e-05, "loss": 0.526, "step": 15155 }, { "epoch": 0.41446073069350253, "grad_norm": 1.0464255809783936, "learning_rate": 1.320030793320068e-05, "loss": 0.5177, "step": 15156 }, { "epoch": 0.4144880770072194, "grad_norm": 1.0967415571212769, "learning_rate": 1.3199468798203299e-05, "loss": 0.5125, "step": 15157 }, { "epoch": 0.4145154233209363, "grad_norm": 1.4604982137680054, "learning_rate": 1.3198629638106987e-05, "loss": 0.5285, "step": 15158 }, { "epoch": 0.41454276963465325, "grad_norm": 1.4021151065826416, "learning_rate": 1.3197790452918324e-05, "loss": 0.5162, "step": 15159 }, { "epoch": 0.4145701159483702, "grad_norm": 1.1972049474716187, "learning_rate": 1.31969512426439e-05, "loss": 0.4878, "step": 15160 }, { "epoch": 0.41459746226208705, "grad_norm": 1.2249027490615845, "learning_rate": 1.3196112007290287e-05, "loss": 0.5009, "step": 15161 }, { "epoch": 0.41462480857580397, "grad_norm": 1.2689549922943115, "learning_rate": 1.3195272746864073e-05, "loss": 0.532, "step": 15162 }, { "epoch": 0.4146521548895209, "grad_norm": 1.1246273517608643, "learning_rate": 1.3194433461371849e-05, "loss": 0.5213, "step": 15163 }, { "epoch": 0.4146795012032378, "grad_norm": 1.4051743745803833, "learning_rate": 1.3193594150820193e-05, "loss": 0.5236, "step": 15164 }, { "epoch": 0.4147068475169547, "grad_norm": 1.3370013236999512, "learning_rate": 1.3192754815215689e-05, "loss": 0.5439, "step": 15165 }, { "epoch": 0.4147341938306716, "grad_norm": 1.3935785293579102, "learning_rate": 1.3191915454564923e-05, "loss": 0.406, "step": 15166 }, { "epoch": 0.41476154014438854, "grad_norm": 1.2415132522583008, "learning_rate": 1.3191076068874477e-05, "loss": 0.5171, "step": 15167 }, { "epoch": 0.41478888645810547, "grad_norm": 1.474615216255188, "learning_rate": 1.3190236658150937e-05, "loss": 0.5195, "step": 15168 }, { "epoch": 0.41481623277182234, "grad_norm": 5.095984935760498, "learning_rate": 1.3189397222400889e-05, "loss": 0.8343, "step": 15169 }, { "epoch": 0.41484357908553926, "grad_norm": 1.344285488128662, "learning_rate": 1.3188557761630917e-05, "loss": 0.4948, "step": 15170 }, { "epoch": 0.4148709253992562, "grad_norm": 1.1953591108322144, "learning_rate": 1.3187718275847608e-05, "loss": 0.5171, "step": 15171 }, { "epoch": 0.4148982717129731, "grad_norm": 1.467898964881897, "learning_rate": 1.3186878765057543e-05, "loss": 0.5123, "step": 15172 }, { "epoch": 0.41492561802669, "grad_norm": 1.261940598487854, "learning_rate": 1.3186039229267311e-05, "loss": 0.4862, "step": 15173 }, { "epoch": 0.4149529643404069, "grad_norm": 1.8156583309173584, "learning_rate": 1.3185199668483502e-05, "loss": 0.5114, "step": 15174 }, { "epoch": 0.41498031065412383, "grad_norm": 1.3292285203933716, "learning_rate": 1.3184360082712695e-05, "loss": 0.4994, "step": 15175 }, { "epoch": 0.41500765696784075, "grad_norm": 1.6572387218475342, "learning_rate": 1.318352047196148e-05, "loss": 0.5091, "step": 15176 }, { "epoch": 0.4150350032815576, "grad_norm": 1.1503244638442993, "learning_rate": 1.3182680836236444e-05, "loss": 0.5455, "step": 15177 }, { "epoch": 0.41506234959527455, "grad_norm": 1.2210774421691895, "learning_rate": 1.318184117554417e-05, "loss": 0.5506, "step": 15178 }, { "epoch": 0.4150896959089915, "grad_norm": 1.1239533424377441, "learning_rate": 1.318100148989125e-05, "loss": 0.4821, "step": 15179 }, { "epoch": 0.4151170422227084, "grad_norm": 1.0505205392837524, "learning_rate": 1.3180161779284263e-05, "loss": 0.5237, "step": 15180 }, { "epoch": 0.41514438853642527, "grad_norm": 1.1367107629776, "learning_rate": 1.3179322043729807e-05, "loss": 0.5025, "step": 15181 }, { "epoch": 0.4151717348501422, "grad_norm": 1.2879725694656372, "learning_rate": 1.3178482283234467e-05, "loss": 0.5285, "step": 15182 }, { "epoch": 0.4151990811638591, "grad_norm": 1.3741977214813232, "learning_rate": 1.3177642497804821e-05, "loss": 0.5137, "step": 15183 }, { "epoch": 0.41522642747757604, "grad_norm": 1.1992571353912354, "learning_rate": 1.317680268744747e-05, "loss": 0.8091, "step": 15184 }, { "epoch": 0.4152537737912929, "grad_norm": 1.3985962867736816, "learning_rate": 1.3175962852168992e-05, "loss": 0.5238, "step": 15185 }, { "epoch": 0.41528112010500984, "grad_norm": 1.0980342626571655, "learning_rate": 1.3175122991975982e-05, "loss": 0.4903, "step": 15186 }, { "epoch": 0.41530846641872676, "grad_norm": 1.521716594696045, "learning_rate": 1.3174283106875026e-05, "loss": 0.4332, "step": 15187 }, { "epoch": 0.4153358127324437, "grad_norm": 1.2042492628097534, "learning_rate": 1.3173443196872708e-05, "loss": 0.5218, "step": 15188 }, { "epoch": 0.41536315904616056, "grad_norm": 1.3357245922088623, "learning_rate": 1.3172603261975623e-05, "loss": 0.537, "step": 15189 }, { "epoch": 0.4153905053598775, "grad_norm": 1.1667271852493286, "learning_rate": 1.317176330219036e-05, "loss": 0.5531, "step": 15190 }, { "epoch": 0.4154178516735944, "grad_norm": 1.300919771194458, "learning_rate": 1.3170923317523505e-05, "loss": 0.3761, "step": 15191 }, { "epoch": 0.41544519798731133, "grad_norm": 1.278822660446167, "learning_rate": 1.3170083307981649e-05, "loss": 0.5478, "step": 15192 }, { "epoch": 0.4154725443010282, "grad_norm": 1.667192816734314, "learning_rate": 1.3169243273571381e-05, "loss": 0.4189, "step": 15193 }, { "epoch": 0.4154998906147451, "grad_norm": 1.3425085544586182, "learning_rate": 1.3168403214299292e-05, "loss": 0.3905, "step": 15194 }, { "epoch": 0.41552723692846205, "grad_norm": 1.240820288658142, "learning_rate": 1.3167563130171974e-05, "loss": 0.4229, "step": 15195 }, { "epoch": 0.415554583242179, "grad_norm": 1.3118422031402588, "learning_rate": 1.316672302119601e-05, "loss": 0.5446, "step": 15196 }, { "epoch": 0.41558192955589585, "grad_norm": 1.4461662769317627, "learning_rate": 1.3165882887377998e-05, "loss": 0.4261, "step": 15197 }, { "epoch": 0.41560927586961277, "grad_norm": 1.5936344861984253, "learning_rate": 1.3165042728724527e-05, "loss": 0.5289, "step": 15198 }, { "epoch": 0.4156366221833297, "grad_norm": 1.633467197418213, "learning_rate": 1.3164202545242185e-05, "loss": 0.5381, "step": 15199 }, { "epoch": 0.4156639684970466, "grad_norm": 2.0384445190429688, "learning_rate": 1.3163362336937561e-05, "loss": 0.4066, "step": 15200 }, { "epoch": 0.4156913148107635, "grad_norm": 1.235308289527893, "learning_rate": 1.3162522103817257e-05, "loss": 0.5186, "step": 15201 }, { "epoch": 0.4157186611244804, "grad_norm": 1.197823166847229, "learning_rate": 1.3161681845887853e-05, "loss": 0.5201, "step": 15202 }, { "epoch": 0.41574600743819734, "grad_norm": 1.1824002265930176, "learning_rate": 1.3160841563155943e-05, "loss": 0.5632, "step": 15203 }, { "epoch": 0.41577335375191427, "grad_norm": 1.1071995496749878, "learning_rate": 1.3160001255628125e-05, "loss": 0.5573, "step": 15204 }, { "epoch": 0.41580070006563113, "grad_norm": 2.9141390323638916, "learning_rate": 1.3159160923310981e-05, "loss": 0.4314, "step": 15205 }, { "epoch": 0.41582804637934806, "grad_norm": 1.299595594406128, "learning_rate": 1.3158320566211114e-05, "loss": 0.5241, "step": 15206 }, { "epoch": 0.415855392693065, "grad_norm": 1.3278862237930298, "learning_rate": 1.3157480184335107e-05, "loss": 0.5331, "step": 15207 }, { "epoch": 0.4158827390067819, "grad_norm": 1.172577977180481, "learning_rate": 1.3156639777689557e-05, "loss": 0.5541, "step": 15208 }, { "epoch": 0.4159100853204988, "grad_norm": 1.3830595016479492, "learning_rate": 1.3155799346281058e-05, "loss": 0.5543, "step": 15209 }, { "epoch": 0.4159374316342157, "grad_norm": 1.1470378637313843, "learning_rate": 1.3154958890116201e-05, "loss": 0.5168, "step": 15210 }, { "epoch": 0.41596477794793263, "grad_norm": 1.3863718509674072, "learning_rate": 1.3154118409201577e-05, "loss": 0.5282, "step": 15211 }, { "epoch": 0.41599212426164955, "grad_norm": 1.245318055152893, "learning_rate": 1.3153277903543784e-05, "loss": 0.5477, "step": 15212 }, { "epoch": 0.4160194705753664, "grad_norm": 1.1657459735870361, "learning_rate": 1.3152437373149414e-05, "loss": 0.4972, "step": 15213 }, { "epoch": 0.41604681688908335, "grad_norm": 1.1978594064712524, "learning_rate": 1.3151596818025059e-05, "loss": 0.5335, "step": 15214 }, { "epoch": 0.4160741632028003, "grad_norm": 1.0324872732162476, "learning_rate": 1.3150756238177314e-05, "loss": 0.4943, "step": 15215 }, { "epoch": 0.4161015095165172, "grad_norm": 1.273918628692627, "learning_rate": 1.3149915633612771e-05, "loss": 0.5177, "step": 15216 }, { "epoch": 0.41612885583023407, "grad_norm": 1.5816739797592163, "learning_rate": 1.3149075004338028e-05, "loss": 0.5294, "step": 15217 }, { "epoch": 0.416156202143951, "grad_norm": 1.2852271795272827, "learning_rate": 1.3148234350359678e-05, "loss": 0.5197, "step": 15218 }, { "epoch": 0.4161835484576679, "grad_norm": 1.2793793678283691, "learning_rate": 1.3147393671684316e-05, "loss": 0.8664, "step": 15219 }, { "epoch": 0.41621089477138484, "grad_norm": 1.5431517362594604, "learning_rate": 1.3146552968318537e-05, "loss": 0.5595, "step": 15220 }, { "epoch": 0.4162382410851017, "grad_norm": 1.8222711086273193, "learning_rate": 1.3145712240268935e-05, "loss": 0.3983, "step": 15221 }, { "epoch": 0.41626558739881864, "grad_norm": 4.440171718597412, "learning_rate": 1.3144871487542104e-05, "loss": 0.5318, "step": 15222 }, { "epoch": 0.41629293371253556, "grad_norm": 1.4191980361938477, "learning_rate": 1.3144030710144642e-05, "loss": 0.406, "step": 15223 }, { "epoch": 0.4163202800262525, "grad_norm": 1.137546420097351, "learning_rate": 1.3143189908083143e-05, "loss": 0.5547, "step": 15224 }, { "epoch": 0.41634762633996936, "grad_norm": 1.1264572143554688, "learning_rate": 1.3142349081364208e-05, "loss": 0.4986, "step": 15225 }, { "epoch": 0.4163749726536863, "grad_norm": 1.1225203275680542, "learning_rate": 1.3141508229994424e-05, "loss": 0.5237, "step": 15226 }, { "epoch": 0.4164023189674032, "grad_norm": 2.730825662612915, "learning_rate": 1.3140667353980392e-05, "loss": 0.5269, "step": 15227 }, { "epoch": 0.41642966528112013, "grad_norm": 1.7427318096160889, "learning_rate": 1.3139826453328712e-05, "loss": 0.5125, "step": 15228 }, { "epoch": 0.416457011594837, "grad_norm": 1.9210090637207031, "learning_rate": 1.3138985528045973e-05, "loss": 0.8508, "step": 15229 }, { "epoch": 0.4164843579085539, "grad_norm": 1.274668574333191, "learning_rate": 1.3138144578138781e-05, "loss": 0.5416, "step": 15230 }, { "epoch": 0.41651170422227085, "grad_norm": 1.4022220373153687, "learning_rate": 1.3137303603613725e-05, "loss": 0.5233, "step": 15231 }, { "epoch": 0.4165390505359878, "grad_norm": 1.3183549642562866, "learning_rate": 1.3136462604477402e-05, "loss": 0.4989, "step": 15232 }, { "epoch": 0.41656639684970465, "grad_norm": 1.1440482139587402, "learning_rate": 1.313562158073642e-05, "loss": 0.4957, "step": 15233 }, { "epoch": 0.41659374316342157, "grad_norm": 2.2198047637939453, "learning_rate": 1.3134780532397362e-05, "loss": 0.5107, "step": 15234 }, { "epoch": 0.4166210894771385, "grad_norm": 1.6430822610855103, "learning_rate": 1.3133939459466835e-05, "loss": 0.5537, "step": 15235 }, { "epoch": 0.4166484357908554, "grad_norm": 1.3902287483215332, "learning_rate": 1.313309836195144e-05, "loss": 0.5331, "step": 15236 }, { "epoch": 0.4166757821045723, "grad_norm": 1.1174795627593994, "learning_rate": 1.3132257239857764e-05, "loss": 0.492, "step": 15237 }, { "epoch": 0.4167031284182892, "grad_norm": 1.2159395217895508, "learning_rate": 1.3131416093192413e-05, "loss": 0.5219, "step": 15238 }, { "epoch": 0.41673047473200614, "grad_norm": 1.3651844263076782, "learning_rate": 1.3130574921961985e-05, "loss": 0.4315, "step": 15239 }, { "epoch": 0.416757821045723, "grad_norm": 1.1953741312026978, "learning_rate": 1.3129733726173078e-05, "loss": 0.5264, "step": 15240 }, { "epoch": 0.41678516735943993, "grad_norm": 1.2661837339401245, "learning_rate": 1.312889250583229e-05, "loss": 0.5382, "step": 15241 }, { "epoch": 0.41681251367315686, "grad_norm": 1.3362786769866943, "learning_rate": 1.312805126094622e-05, "loss": 0.5177, "step": 15242 }, { "epoch": 0.4168398599868738, "grad_norm": 1.4601491689682007, "learning_rate": 1.3127209991521465e-05, "loss": 0.8568, "step": 15243 }, { "epoch": 0.41686720630059065, "grad_norm": 1.919486403465271, "learning_rate": 1.3126368697564635e-05, "loss": 0.4209, "step": 15244 }, { "epoch": 0.4168945526143076, "grad_norm": 1.179800033569336, "learning_rate": 1.3125527379082317e-05, "loss": 0.5211, "step": 15245 }, { "epoch": 0.4169218989280245, "grad_norm": 1.315576434135437, "learning_rate": 1.3124686036081118e-05, "loss": 0.5461, "step": 15246 }, { "epoch": 0.41694924524174143, "grad_norm": 1.7157247066497803, "learning_rate": 1.3123844668567639e-05, "loss": 0.5507, "step": 15247 }, { "epoch": 0.4169765915554583, "grad_norm": 2.1608335971832275, "learning_rate": 1.3123003276548474e-05, "loss": 0.5025, "step": 15248 }, { "epoch": 0.4170039378691752, "grad_norm": 1.097132682800293, "learning_rate": 1.3122161860030229e-05, "loss": 0.5495, "step": 15249 }, { "epoch": 0.41703128418289215, "grad_norm": 1.3124710321426392, "learning_rate": 1.3121320419019502e-05, "loss": 0.5314, "step": 15250 }, { "epoch": 0.4170586304966091, "grad_norm": 1.353520393371582, "learning_rate": 1.3120478953522892e-05, "loss": 0.7968, "step": 15251 }, { "epoch": 0.41708597681032594, "grad_norm": 1.153419017791748, "learning_rate": 1.3119637463547009e-05, "loss": 0.5349, "step": 15252 }, { "epoch": 0.41711332312404287, "grad_norm": 1.37563955783844, "learning_rate": 1.3118795949098444e-05, "loss": 0.4854, "step": 15253 }, { "epoch": 0.4171406694377598, "grad_norm": 1.6087119579315186, "learning_rate": 1.3117954410183804e-05, "loss": 0.4664, "step": 15254 }, { "epoch": 0.4171680157514767, "grad_norm": 1.15144681930542, "learning_rate": 1.311711284680969e-05, "loss": 0.3879, "step": 15255 }, { "epoch": 0.4171953620651936, "grad_norm": 1.1680488586425781, "learning_rate": 1.31162712589827e-05, "loss": 0.5172, "step": 15256 }, { "epoch": 0.4172227083789105, "grad_norm": 1.278076171875, "learning_rate": 1.311542964670944e-05, "loss": 0.4517, "step": 15257 }, { "epoch": 0.41725005469262744, "grad_norm": 1.28750479221344, "learning_rate": 1.3114588009996512e-05, "loss": 0.4372, "step": 15258 }, { "epoch": 0.41727740100634436, "grad_norm": 1.1976839303970337, "learning_rate": 1.3113746348850518e-05, "loss": 0.5229, "step": 15259 }, { "epoch": 0.41730474732006123, "grad_norm": 1.1903852224349976, "learning_rate": 1.3112904663278059e-05, "loss": 0.4924, "step": 15260 }, { "epoch": 0.41733209363377816, "grad_norm": 1.3454234600067139, "learning_rate": 1.311206295328574e-05, "loss": 0.5315, "step": 15261 }, { "epoch": 0.4173594399474951, "grad_norm": 1.3663616180419922, "learning_rate": 1.311122121888016e-05, "loss": 0.5365, "step": 15262 }, { "epoch": 0.417386786261212, "grad_norm": 1.5662016868591309, "learning_rate": 1.3110379460067927e-05, "loss": 0.5321, "step": 15263 }, { "epoch": 0.4174141325749289, "grad_norm": 1.3037079572677612, "learning_rate": 1.3109537676855644e-05, "loss": 0.4429, "step": 15264 }, { "epoch": 0.4174414788886458, "grad_norm": 1.3190205097198486, "learning_rate": 1.3108695869249912e-05, "loss": 0.536, "step": 15265 }, { "epoch": 0.4174688252023627, "grad_norm": 1.3015637397766113, "learning_rate": 1.3107854037257335e-05, "loss": 0.5216, "step": 15266 }, { "epoch": 0.41749617151607965, "grad_norm": 1.3885340690612793, "learning_rate": 1.3107012180884517e-05, "loss": 0.5298, "step": 15267 }, { "epoch": 0.4175235178297965, "grad_norm": 1.2452571392059326, "learning_rate": 1.3106170300138067e-05, "loss": 0.4963, "step": 15268 }, { "epoch": 0.41755086414351344, "grad_norm": 1.2225757837295532, "learning_rate": 1.3105328395024582e-05, "loss": 0.5307, "step": 15269 }, { "epoch": 0.41757821045723037, "grad_norm": 1.1328766345977783, "learning_rate": 1.3104486465550668e-05, "loss": 0.5265, "step": 15270 }, { "epoch": 0.4176055567709473, "grad_norm": 1.1916108131408691, "learning_rate": 1.3103644511722935e-05, "loss": 0.517, "step": 15271 }, { "epoch": 0.41763290308466416, "grad_norm": 1.8201760053634644, "learning_rate": 1.3102802533547983e-05, "loss": 0.5454, "step": 15272 }, { "epoch": 0.4176602493983811, "grad_norm": 1.5363539457321167, "learning_rate": 1.3101960531032419e-05, "loss": 0.4018, "step": 15273 }, { "epoch": 0.417687595712098, "grad_norm": 1.3359603881835938, "learning_rate": 1.3101118504182846e-05, "loss": 0.5145, "step": 15274 }, { "epoch": 0.41771494202581494, "grad_norm": 1.345646619796753, "learning_rate": 1.3100276453005871e-05, "loss": 0.5477, "step": 15275 }, { "epoch": 0.4177422883395318, "grad_norm": 1.8466538190841675, "learning_rate": 1.30994343775081e-05, "loss": 0.3685, "step": 15276 }, { "epoch": 0.41776963465324873, "grad_norm": 1.199968934059143, "learning_rate": 1.3098592277696138e-05, "loss": 0.3898, "step": 15277 }, { "epoch": 0.41779698096696566, "grad_norm": 1.9432594776153564, "learning_rate": 1.3097750153576594e-05, "loss": 0.5424, "step": 15278 }, { "epoch": 0.4178243272806826, "grad_norm": 1.2947908639907837, "learning_rate": 1.3096908005156071e-05, "loss": 0.5274, "step": 15279 }, { "epoch": 0.41785167359439945, "grad_norm": 1.2148913145065308, "learning_rate": 1.3096065832441174e-05, "loss": 0.519, "step": 15280 }, { "epoch": 0.4178790199081164, "grad_norm": 1.1060307025909424, "learning_rate": 1.3095223635438512e-05, "loss": 0.5236, "step": 15281 }, { "epoch": 0.4179063662218333, "grad_norm": 1.1172807216644287, "learning_rate": 1.3094381414154695e-05, "loss": 0.5209, "step": 15282 }, { "epoch": 0.41793371253555023, "grad_norm": 1.2340251207351685, "learning_rate": 1.3093539168596324e-05, "loss": 0.521, "step": 15283 }, { "epoch": 0.4179610588492671, "grad_norm": 1.5004655122756958, "learning_rate": 1.309269689877001e-05, "loss": 0.5268, "step": 15284 }, { "epoch": 0.417988405162984, "grad_norm": 1.2990514039993286, "learning_rate": 1.3091854604682359e-05, "loss": 0.8499, "step": 15285 }, { "epoch": 0.41801575147670095, "grad_norm": 1.2257920503616333, "learning_rate": 1.3091012286339977e-05, "loss": 0.5391, "step": 15286 }, { "epoch": 0.4180430977904179, "grad_norm": 1.2230408191680908, "learning_rate": 1.3090169943749475e-05, "loss": 0.8434, "step": 15287 }, { "epoch": 0.41807044410413474, "grad_norm": 1.1913126707077026, "learning_rate": 1.3089327576917458e-05, "loss": 0.5002, "step": 15288 }, { "epoch": 0.41809779041785167, "grad_norm": 1.2218929529190063, "learning_rate": 1.3088485185850539e-05, "loss": 0.8424, "step": 15289 }, { "epoch": 0.4181251367315686, "grad_norm": 1.407668113708496, "learning_rate": 1.3087642770555321e-05, "loss": 0.5055, "step": 15290 }, { "epoch": 0.4181524830452855, "grad_norm": 1.024469256401062, "learning_rate": 1.3086800331038414e-05, "loss": 0.4876, "step": 15291 }, { "epoch": 0.4181798293590024, "grad_norm": 2.538264274597168, "learning_rate": 1.3085957867306427e-05, "loss": 0.5447, "step": 15292 }, { "epoch": 0.4182071756727193, "grad_norm": 1.2983475923538208, "learning_rate": 1.3085115379365971e-05, "loss": 0.5081, "step": 15293 }, { "epoch": 0.41823452198643624, "grad_norm": 1.5490748882293701, "learning_rate": 1.308427286722365e-05, "loss": 0.5565, "step": 15294 }, { "epoch": 0.41826186830015316, "grad_norm": 1.4158316850662231, "learning_rate": 1.308343033088608e-05, "loss": 0.5615, "step": 15295 }, { "epoch": 0.41828921461387003, "grad_norm": 1.5568876266479492, "learning_rate": 1.3082587770359862e-05, "loss": 0.507, "step": 15296 }, { "epoch": 0.41831656092758696, "grad_norm": 1.2976696491241455, "learning_rate": 1.3081745185651613e-05, "loss": 0.4515, "step": 15297 }, { "epoch": 0.4183439072413039, "grad_norm": 1.236626148223877, "learning_rate": 1.3080902576767942e-05, "loss": 0.5085, "step": 15298 }, { "epoch": 0.4183712535550208, "grad_norm": 0.9968411922454834, "learning_rate": 1.3080059943715454e-05, "loss": 0.5041, "step": 15299 }, { "epoch": 0.4183985998687377, "grad_norm": 1.505017638206482, "learning_rate": 1.3079217286500763e-05, "loss": 0.4212, "step": 15300 }, { "epoch": 0.4184259461824546, "grad_norm": 1.0803296566009521, "learning_rate": 1.3078374605130482e-05, "loss": 0.5382, "step": 15301 }, { "epoch": 0.4184532924961715, "grad_norm": 1.3891308307647705, "learning_rate": 1.3077531899611216e-05, "loss": 0.5501, "step": 15302 }, { "epoch": 0.41848063880988845, "grad_norm": 1.578153133392334, "learning_rate": 1.307668916994958e-05, "loss": 0.4447, "step": 15303 }, { "epoch": 0.4185079851236053, "grad_norm": 1.3257570266723633, "learning_rate": 1.3075846416152183e-05, "loss": 0.5118, "step": 15304 }, { "epoch": 0.41853533143732224, "grad_norm": 1.1581677198410034, "learning_rate": 1.3075003638225634e-05, "loss": 0.5395, "step": 15305 }, { "epoch": 0.41856267775103917, "grad_norm": 1.1376194953918457, "learning_rate": 1.3074160836176551e-05, "loss": 0.5249, "step": 15306 }, { "epoch": 0.4185900240647561, "grad_norm": 1.2200416326522827, "learning_rate": 1.307331801001154e-05, "loss": 0.4663, "step": 15307 }, { "epoch": 0.41861737037847296, "grad_norm": 1.8571025133132935, "learning_rate": 1.3072475159737211e-05, "loss": 0.3825, "step": 15308 }, { "epoch": 0.4186447166921899, "grad_norm": 1.4066832065582275, "learning_rate": 1.3071632285360183e-05, "loss": 0.5379, "step": 15309 }, { "epoch": 0.4186720630059068, "grad_norm": 1.1614296436309814, "learning_rate": 1.3070789386887061e-05, "loss": 0.5086, "step": 15310 }, { "epoch": 0.41869940931962374, "grad_norm": 1.3251057863235474, "learning_rate": 1.3069946464324462e-05, "loss": 0.5227, "step": 15311 }, { "epoch": 0.4187267556333406, "grad_norm": 1.5618938207626343, "learning_rate": 1.3069103517678997e-05, "loss": 0.4663, "step": 15312 }, { "epoch": 0.41875410194705753, "grad_norm": 1.3302267789840698, "learning_rate": 1.3068260546957276e-05, "loss": 0.4438, "step": 15313 }, { "epoch": 0.41878144826077446, "grad_norm": 2.0823347568511963, "learning_rate": 1.3067417552165919e-05, "loss": 0.5113, "step": 15314 }, { "epoch": 0.4188087945744914, "grad_norm": 1.5819884538650513, "learning_rate": 1.306657453331153e-05, "loss": 0.5188, "step": 15315 }, { "epoch": 0.41883614088820825, "grad_norm": 1.2466716766357422, "learning_rate": 1.306573149040073e-05, "loss": 0.5338, "step": 15316 }, { "epoch": 0.4188634872019252, "grad_norm": 1.368223786354065, "learning_rate": 1.306488842344013e-05, "loss": 0.5234, "step": 15317 }, { "epoch": 0.4188908335156421, "grad_norm": 1.4314595460891724, "learning_rate": 1.306404533243634e-05, "loss": 0.4961, "step": 15318 }, { "epoch": 0.418918179829359, "grad_norm": 1.4693453311920166, "learning_rate": 1.3063202217395979e-05, "loss": 0.549, "step": 15319 }, { "epoch": 0.4189455261430759, "grad_norm": 1.1595979928970337, "learning_rate": 1.3062359078325657e-05, "loss": 0.3779, "step": 15320 }, { "epoch": 0.4189728724567928, "grad_norm": 1.4029405117034912, "learning_rate": 1.3061515915231991e-05, "loss": 0.551, "step": 15321 }, { "epoch": 0.41900021877050975, "grad_norm": 1.2916250228881836, "learning_rate": 1.3060672728121594e-05, "loss": 0.536, "step": 15322 }, { "epoch": 0.41902756508422667, "grad_norm": 1.133302927017212, "learning_rate": 1.3059829517001078e-05, "loss": 0.529, "step": 15323 }, { "epoch": 0.41905491139794354, "grad_norm": 1.3010776042938232, "learning_rate": 1.3058986281877066e-05, "loss": 0.8215, "step": 15324 }, { "epoch": 0.41908225771166047, "grad_norm": 1.1240763664245605, "learning_rate": 1.3058143022756164e-05, "loss": 0.5517, "step": 15325 }, { "epoch": 0.4191096040253774, "grad_norm": 1.44508957862854, "learning_rate": 1.3057299739644992e-05, "loss": 0.5597, "step": 15326 }, { "epoch": 0.4191369503390943, "grad_norm": 1.2887639999389648, "learning_rate": 1.3056456432550162e-05, "loss": 0.538, "step": 15327 }, { "epoch": 0.4191642966528112, "grad_norm": 1.2479808330535889, "learning_rate": 1.3055613101478292e-05, "loss": 0.5543, "step": 15328 }, { "epoch": 0.4191916429665281, "grad_norm": 1.0807812213897705, "learning_rate": 1.3054769746435998e-05, "loss": 0.5296, "step": 15329 }, { "epoch": 0.41921898928024504, "grad_norm": 1.584537386894226, "learning_rate": 1.3053926367429894e-05, "loss": 0.521, "step": 15330 }, { "epoch": 0.41924633559396196, "grad_norm": 3.2854106426239014, "learning_rate": 1.30530829644666e-05, "loss": 0.5062, "step": 15331 }, { "epoch": 0.41927368190767883, "grad_norm": 1.2220921516418457, "learning_rate": 1.3052239537552726e-05, "loss": 0.5248, "step": 15332 }, { "epoch": 0.41930102822139576, "grad_norm": 2.159006357192993, "learning_rate": 1.3051396086694894e-05, "loss": 0.5141, "step": 15333 }, { "epoch": 0.4193283745351127, "grad_norm": 1.337810754776001, "learning_rate": 1.3050552611899717e-05, "loss": 0.4255, "step": 15334 }, { "epoch": 0.4193557208488296, "grad_norm": 1.7085022926330566, "learning_rate": 1.3049709113173811e-05, "loss": 0.5255, "step": 15335 }, { "epoch": 0.4193830671625465, "grad_norm": 1.4694578647613525, "learning_rate": 1.30488655905238e-05, "loss": 0.5308, "step": 15336 }, { "epoch": 0.4194104134762634, "grad_norm": 1.5048080682754517, "learning_rate": 1.3048022043956295e-05, "loss": 0.4527, "step": 15337 }, { "epoch": 0.4194377597899803, "grad_norm": 1.522107481956482, "learning_rate": 1.3047178473477915e-05, "loss": 0.5246, "step": 15338 }, { "epoch": 0.4194651061036972, "grad_norm": 1.4286115169525146, "learning_rate": 1.3046334879095274e-05, "loss": 0.5482, "step": 15339 }, { "epoch": 0.4194924524174141, "grad_norm": 1.4633146524429321, "learning_rate": 1.3045491260814998e-05, "loss": 0.5062, "step": 15340 }, { "epoch": 0.41951979873113104, "grad_norm": 1.4551541805267334, "learning_rate": 1.3044647618643697e-05, "loss": 0.5268, "step": 15341 }, { "epoch": 0.41954714504484797, "grad_norm": 1.403708577156067, "learning_rate": 1.3043803952587996e-05, "loss": 0.5281, "step": 15342 }, { "epoch": 0.41957449135856484, "grad_norm": 1.4945389032363892, "learning_rate": 1.3042960262654504e-05, "loss": 0.5786, "step": 15343 }, { "epoch": 0.41960183767228176, "grad_norm": 1.4388827085494995, "learning_rate": 1.304211654884985e-05, "loss": 0.4318, "step": 15344 }, { "epoch": 0.4196291839859987, "grad_norm": 1.2890703678131104, "learning_rate": 1.3041272811180645e-05, "loss": 0.4077, "step": 15345 }, { "epoch": 0.4196565302997156, "grad_norm": 1.5885839462280273, "learning_rate": 1.304042904965351e-05, "loss": 0.4268, "step": 15346 }, { "epoch": 0.4196838766134325, "grad_norm": 1.9572492837905884, "learning_rate": 1.3039585264275068e-05, "loss": 0.5154, "step": 15347 }, { "epoch": 0.4197112229271494, "grad_norm": 1.3639625310897827, "learning_rate": 1.3038741455051934e-05, "loss": 0.828, "step": 15348 }, { "epoch": 0.41973856924086633, "grad_norm": 1.2961877584457397, "learning_rate": 1.3037897621990728e-05, "loss": 0.5068, "step": 15349 }, { "epoch": 0.41976591555458326, "grad_norm": 1.4482704401016235, "learning_rate": 1.3037053765098069e-05, "loss": 0.544, "step": 15350 }, { "epoch": 0.4197932618683001, "grad_norm": 1.123721718788147, "learning_rate": 1.3036209884380577e-05, "loss": 0.5095, "step": 15351 }, { "epoch": 0.41982060818201705, "grad_norm": 1.4604146480560303, "learning_rate": 1.3035365979844876e-05, "loss": 0.548, "step": 15352 }, { "epoch": 0.419847954495734, "grad_norm": 1.3867555856704712, "learning_rate": 1.3034522051497583e-05, "loss": 0.4465, "step": 15353 }, { "epoch": 0.4198753008094509, "grad_norm": 1.7153502702713013, "learning_rate": 1.3033678099345315e-05, "loss": 0.5105, "step": 15354 }, { "epoch": 0.41990264712316777, "grad_norm": 1.1979992389678955, "learning_rate": 1.30328341233947e-05, "loss": 0.5337, "step": 15355 }, { "epoch": 0.4199299934368847, "grad_norm": 1.3203603029251099, "learning_rate": 1.3031990123652352e-05, "loss": 0.4961, "step": 15356 }, { "epoch": 0.4199573397506016, "grad_norm": 1.2288925647735596, "learning_rate": 1.3031146100124895e-05, "loss": 0.5001, "step": 15357 }, { "epoch": 0.41998468606431855, "grad_norm": 1.3382682800292969, "learning_rate": 1.303030205281895e-05, "loss": 0.5164, "step": 15358 }, { "epoch": 0.4200120323780354, "grad_norm": 1.0584776401519775, "learning_rate": 1.3029457981741139e-05, "loss": 0.5104, "step": 15359 }, { "epoch": 0.42003937869175234, "grad_norm": 1.275160789489746, "learning_rate": 1.3028613886898087e-05, "loss": 0.5061, "step": 15360 }, { "epoch": 0.42006672500546927, "grad_norm": 1.4075350761413574, "learning_rate": 1.3027769768296406e-05, "loss": 0.4986, "step": 15361 }, { "epoch": 0.4200940713191862, "grad_norm": 1.2853548526763916, "learning_rate": 1.3026925625942724e-05, "loss": 0.4976, "step": 15362 }, { "epoch": 0.42012141763290306, "grad_norm": 1.3048967123031616, "learning_rate": 1.3026081459843662e-05, "loss": 0.518, "step": 15363 }, { "epoch": 0.42014876394662, "grad_norm": 1.6277915239334106, "learning_rate": 1.3025237270005845e-05, "loss": 0.5258, "step": 15364 }, { "epoch": 0.4201761102603369, "grad_norm": 1.3162872791290283, "learning_rate": 1.3024393056435889e-05, "loss": 0.4609, "step": 15365 }, { "epoch": 0.42020345657405384, "grad_norm": 1.6100174188613892, "learning_rate": 1.3023548819140423e-05, "loss": 0.5283, "step": 15366 }, { "epoch": 0.4202308028877707, "grad_norm": 1.2070714235305786, "learning_rate": 1.3022704558126066e-05, "loss": 0.5419, "step": 15367 }, { "epoch": 0.42025814920148763, "grad_norm": 1.4967535734176636, "learning_rate": 1.3021860273399445e-05, "loss": 0.4809, "step": 15368 }, { "epoch": 0.42028549551520455, "grad_norm": 1.512344241142273, "learning_rate": 1.3021015964967178e-05, "loss": 0.5084, "step": 15369 }, { "epoch": 0.4203128418289215, "grad_norm": 1.2958365678787231, "learning_rate": 1.3020171632835893e-05, "loss": 0.5249, "step": 15370 }, { "epoch": 0.42034018814263835, "grad_norm": 1.4766418933868408, "learning_rate": 1.301932727701221e-05, "loss": 0.3852, "step": 15371 }, { "epoch": 0.4203675344563553, "grad_norm": 1.2566908597946167, "learning_rate": 1.3018482897502755e-05, "loss": 0.522, "step": 15372 }, { "epoch": 0.4203948807700722, "grad_norm": 1.468946099281311, "learning_rate": 1.3017638494314153e-05, "loss": 0.4588, "step": 15373 }, { "epoch": 0.4204222270837891, "grad_norm": 1.4532272815704346, "learning_rate": 1.3016794067453022e-05, "loss": 0.4479, "step": 15374 }, { "epoch": 0.420449573397506, "grad_norm": 1.2356971502304077, "learning_rate": 1.301594961692599e-05, "loss": 0.5252, "step": 15375 }, { "epoch": 0.4204769197112229, "grad_norm": 1.4057276248931885, "learning_rate": 1.3015105142739687e-05, "loss": 0.4061, "step": 15376 }, { "epoch": 0.42050426602493984, "grad_norm": 1.5595731735229492, "learning_rate": 1.301426064490073e-05, "loss": 0.5333, "step": 15377 }, { "epoch": 0.42053161233865677, "grad_norm": 1.3659796714782715, "learning_rate": 1.3013416123415748e-05, "loss": 0.5349, "step": 15378 }, { "epoch": 0.42055895865237364, "grad_norm": 1.3391222953796387, "learning_rate": 1.3012571578291367e-05, "loss": 0.5285, "step": 15379 }, { "epoch": 0.42058630496609056, "grad_norm": 2.1320223808288574, "learning_rate": 1.3011727009534205e-05, "loss": 0.5028, "step": 15380 }, { "epoch": 0.4206136512798075, "grad_norm": 1.289774775505066, "learning_rate": 1.3010882417150892e-05, "loss": 0.5136, "step": 15381 }, { "epoch": 0.4206409975935244, "grad_norm": 1.259008765220642, "learning_rate": 1.3010037801148056e-05, "loss": 0.5165, "step": 15382 }, { "epoch": 0.4206683439072413, "grad_norm": 1.1869044303894043, "learning_rate": 1.3009193161532319e-05, "loss": 0.4886, "step": 15383 }, { "epoch": 0.4206956902209582, "grad_norm": 1.1393071413040161, "learning_rate": 1.3008348498310308e-05, "loss": 0.5561, "step": 15384 }, { "epoch": 0.42072303653467513, "grad_norm": 1.1967891454696655, "learning_rate": 1.3007503811488653e-05, "loss": 0.5005, "step": 15385 }, { "epoch": 0.42075038284839206, "grad_norm": 1.595193862915039, "learning_rate": 1.3006659101073975e-05, "loss": 0.4953, "step": 15386 }, { "epoch": 0.4207777291621089, "grad_norm": 1.2074270248413086, "learning_rate": 1.3005814367072902e-05, "loss": 0.5206, "step": 15387 }, { "epoch": 0.42080507547582585, "grad_norm": 1.2505955696105957, "learning_rate": 1.3004969609492061e-05, "loss": 0.541, "step": 15388 }, { "epoch": 0.4208324217895428, "grad_norm": 1.3695862293243408, "learning_rate": 1.3004124828338078e-05, "loss": 0.521, "step": 15389 }, { "epoch": 0.4208597681032597, "grad_norm": 1.3519165515899658, "learning_rate": 1.3003280023617586e-05, "loss": 0.8327, "step": 15390 }, { "epoch": 0.42088711441697657, "grad_norm": 1.3583439588546753, "learning_rate": 1.3002435195337202e-05, "loss": 0.5377, "step": 15391 }, { "epoch": 0.4209144607306935, "grad_norm": 1.1425198316574097, "learning_rate": 1.3001590343503563e-05, "loss": 0.4845, "step": 15392 }, { "epoch": 0.4209418070444104, "grad_norm": 1.4658418893814087, "learning_rate": 1.300074546812329e-05, "loss": 0.5083, "step": 15393 }, { "epoch": 0.42096915335812735, "grad_norm": 1.1971994638442993, "learning_rate": 1.2999900569203012e-05, "loss": 0.4525, "step": 15394 }, { "epoch": 0.4209964996718442, "grad_norm": 1.3100337982177734, "learning_rate": 1.2999055646749362e-05, "loss": 0.5308, "step": 15395 }, { "epoch": 0.42102384598556114, "grad_norm": 0.9938175678253174, "learning_rate": 1.2998210700768966e-05, "loss": 0.5078, "step": 15396 }, { "epoch": 0.42105119229927807, "grad_norm": 1.9675854444503784, "learning_rate": 1.2997365731268446e-05, "loss": 0.5252, "step": 15397 }, { "epoch": 0.421078538612995, "grad_norm": 1.1348949670791626, "learning_rate": 1.2996520738254436e-05, "loss": 0.4919, "step": 15398 }, { "epoch": 0.42110588492671186, "grad_norm": 1.4040400981903076, "learning_rate": 1.2995675721733566e-05, "loss": 0.5417, "step": 15399 }, { "epoch": 0.4211332312404288, "grad_norm": 1.3904372453689575, "learning_rate": 1.299483068171246e-05, "loss": 0.5053, "step": 15400 }, { "epoch": 0.4211605775541457, "grad_norm": 1.1567535400390625, "learning_rate": 1.2993985618197752e-05, "loss": 0.5461, "step": 15401 }, { "epoch": 0.42118792386786263, "grad_norm": 1.4015370607376099, "learning_rate": 1.299314053119607e-05, "loss": 0.5149, "step": 15402 }, { "epoch": 0.4212152701815795, "grad_norm": 1.1187206506729126, "learning_rate": 1.2992295420714042e-05, "loss": 0.5038, "step": 15403 }, { "epoch": 0.42124261649529643, "grad_norm": 1.3363990783691406, "learning_rate": 1.29914502867583e-05, "loss": 0.8154, "step": 15404 }, { "epoch": 0.42126996280901335, "grad_norm": 1.3590140342712402, "learning_rate": 1.2990605129335469e-05, "loss": 0.523, "step": 15405 }, { "epoch": 0.4212973091227303, "grad_norm": 1.1787861585617065, "learning_rate": 1.2989759948452188e-05, "loss": 0.533, "step": 15406 }, { "epoch": 0.42132465543644715, "grad_norm": 2.2277615070343018, "learning_rate": 1.2988914744115078e-05, "loss": 0.8201, "step": 15407 }, { "epoch": 0.4213520017501641, "grad_norm": 1.5339032411575317, "learning_rate": 1.298806951633077e-05, "loss": 0.8313, "step": 15408 }, { "epoch": 0.421379348063881, "grad_norm": 1.610568642616272, "learning_rate": 1.2987224265105902e-05, "loss": 0.5519, "step": 15409 }, { "epoch": 0.4214066943775979, "grad_norm": 1.1660298109054565, "learning_rate": 1.2986378990447099e-05, "loss": 0.5646, "step": 15410 }, { "epoch": 0.4214340406913148, "grad_norm": 1.1634924411773682, "learning_rate": 1.2985533692360994e-05, "loss": 0.5199, "step": 15411 }, { "epoch": 0.4214613870050317, "grad_norm": 1.516531229019165, "learning_rate": 1.298468837085422e-05, "loss": 0.5374, "step": 15412 }, { "epoch": 0.42148873331874864, "grad_norm": 1.1499074697494507, "learning_rate": 1.29838430259334e-05, "loss": 0.7892, "step": 15413 }, { "epoch": 0.42151607963246557, "grad_norm": 1.9341166019439697, "learning_rate": 1.2982997657605175e-05, "loss": 0.3951, "step": 15414 }, { "epoch": 0.42154342594618244, "grad_norm": 1.2618721723556519, "learning_rate": 1.298215226587617e-05, "loss": 0.4401, "step": 15415 }, { "epoch": 0.42157077225989936, "grad_norm": 1.2744548320770264, "learning_rate": 1.2981306850753021e-05, "loss": 0.5502, "step": 15416 }, { "epoch": 0.4215981185736163, "grad_norm": 1.2778208255767822, "learning_rate": 1.298046141224236e-05, "loss": 0.5199, "step": 15417 }, { "epoch": 0.4216254648873332, "grad_norm": 5.667043685913086, "learning_rate": 1.2979615950350817e-05, "loss": 0.513, "step": 15418 }, { "epoch": 0.4216528112010501, "grad_norm": 1.1723490953445435, "learning_rate": 1.2978770465085024e-05, "loss": 0.4761, "step": 15419 }, { "epoch": 0.421680157514767, "grad_norm": 1.1629009246826172, "learning_rate": 1.2977924956451617e-05, "loss": 0.5083, "step": 15420 }, { "epoch": 0.42170750382848393, "grad_norm": 1.136297345161438, "learning_rate": 1.2977079424457226e-05, "loss": 0.4895, "step": 15421 }, { "epoch": 0.42173485014220086, "grad_norm": 1.293371319770813, "learning_rate": 1.2976233869108486e-05, "loss": 0.3849, "step": 15422 }, { "epoch": 0.4217621964559177, "grad_norm": 1.6701595783233643, "learning_rate": 1.2975388290412027e-05, "loss": 0.375, "step": 15423 }, { "epoch": 0.42178954276963465, "grad_norm": 1.1786895990371704, "learning_rate": 1.2974542688374485e-05, "loss": 0.4847, "step": 15424 }, { "epoch": 0.4218168890833516, "grad_norm": 3.422816276550293, "learning_rate": 1.2973697063002493e-05, "loss": 0.4212, "step": 15425 }, { "epoch": 0.4218442353970685, "grad_norm": 1.1079421043395996, "learning_rate": 1.2972851414302683e-05, "loss": 0.5126, "step": 15426 }, { "epoch": 0.42187158171078537, "grad_norm": 1.8034149408340454, "learning_rate": 1.2972005742281693e-05, "loss": 0.8169, "step": 15427 }, { "epoch": 0.4218989280245023, "grad_norm": 1.312480092048645, "learning_rate": 1.2971160046946152e-05, "loss": 0.5196, "step": 15428 }, { "epoch": 0.4219262743382192, "grad_norm": 1.3863062858581543, "learning_rate": 1.2970314328302696e-05, "loss": 0.5277, "step": 15429 }, { "epoch": 0.42195362065193615, "grad_norm": 1.2831768989562988, "learning_rate": 1.2969468586357962e-05, "loss": 0.54, "step": 15430 }, { "epoch": 0.421980966965653, "grad_norm": 1.1497544050216675, "learning_rate": 1.2968622821118584e-05, "loss": 0.815, "step": 15431 }, { "epoch": 0.42200831327936994, "grad_norm": 1.350076675415039, "learning_rate": 1.2967777032591194e-05, "loss": 0.8247, "step": 15432 }, { "epoch": 0.42203565959308686, "grad_norm": 1.4102896451950073, "learning_rate": 1.2966931220782429e-05, "loss": 0.5201, "step": 15433 }, { "epoch": 0.4220630059068038, "grad_norm": 1.2882463932037354, "learning_rate": 1.2966085385698919e-05, "loss": 0.4713, "step": 15434 }, { "epoch": 0.42209035222052066, "grad_norm": 1.350157618522644, "learning_rate": 1.2965239527347308e-05, "loss": 0.4402, "step": 15435 }, { "epoch": 0.4221176985342376, "grad_norm": 1.479596734046936, "learning_rate": 1.2964393645734227e-05, "loss": 0.535, "step": 15436 }, { "epoch": 0.4221450448479545, "grad_norm": 1.5470759868621826, "learning_rate": 1.2963547740866311e-05, "loss": 0.5705, "step": 15437 }, { "epoch": 0.42217239116167143, "grad_norm": 1.4731985330581665, "learning_rate": 1.2962701812750198e-05, "loss": 0.4279, "step": 15438 }, { "epoch": 0.4221997374753883, "grad_norm": 1.4049186706542969, "learning_rate": 1.2961855861392523e-05, "loss": 0.8432, "step": 15439 }, { "epoch": 0.42222708378910523, "grad_norm": 1.567075252532959, "learning_rate": 1.2961009886799922e-05, "loss": 0.4638, "step": 15440 }, { "epoch": 0.42225443010282215, "grad_norm": 1.5087472200393677, "learning_rate": 1.2960163888979032e-05, "loss": 0.5235, "step": 15441 }, { "epoch": 0.422281776416539, "grad_norm": 1.342159628868103, "learning_rate": 1.295931786793649e-05, "loss": 0.5323, "step": 15442 }, { "epoch": 0.42230912273025595, "grad_norm": 1.1758722066879272, "learning_rate": 1.2958471823678928e-05, "loss": 0.5284, "step": 15443 }, { "epoch": 0.4223364690439729, "grad_norm": 1.3197845220565796, "learning_rate": 1.2957625756212993e-05, "loss": 0.5291, "step": 15444 }, { "epoch": 0.4223638153576898, "grad_norm": 1.5429675579071045, "learning_rate": 1.2956779665545312e-05, "loss": 0.52, "step": 15445 }, { "epoch": 0.42239116167140667, "grad_norm": 1.4421937465667725, "learning_rate": 1.295593355168253e-05, "loss": 0.5271, "step": 15446 }, { "epoch": 0.4224185079851236, "grad_norm": 1.2916373014450073, "learning_rate": 1.2955087414631278e-05, "loss": 0.4423, "step": 15447 }, { "epoch": 0.4224458542988405, "grad_norm": 1.1892560720443726, "learning_rate": 1.2954241254398202e-05, "loss": 0.4886, "step": 15448 }, { "epoch": 0.42247320061255744, "grad_norm": 1.2531508207321167, "learning_rate": 1.2953395070989931e-05, "loss": 0.5267, "step": 15449 }, { "epoch": 0.4225005469262743, "grad_norm": 1.3207660913467407, "learning_rate": 1.2952548864413106e-05, "loss": 0.5213, "step": 15450 }, { "epoch": 0.42252789323999124, "grad_norm": 1.5807843208312988, "learning_rate": 1.2951702634674366e-05, "loss": 0.5404, "step": 15451 }, { "epoch": 0.42255523955370816, "grad_norm": 1.4996182918548584, "learning_rate": 1.2950856381780353e-05, "loss": 0.4955, "step": 15452 }, { "epoch": 0.4225825858674251, "grad_norm": 1.3876025676727295, "learning_rate": 1.2950010105737698e-05, "loss": 0.4314, "step": 15453 }, { "epoch": 0.42260993218114196, "grad_norm": 1.4701576232910156, "learning_rate": 1.2949163806553045e-05, "loss": 0.5443, "step": 15454 }, { "epoch": 0.4226372784948589, "grad_norm": 1.8890082836151123, "learning_rate": 1.2948317484233033e-05, "loss": 0.5366, "step": 15455 }, { "epoch": 0.4226646248085758, "grad_norm": 1.5586657524108887, "learning_rate": 1.29474711387843e-05, "loss": 0.8247, "step": 15456 }, { "epoch": 0.42269197112229273, "grad_norm": 1.2993898391723633, "learning_rate": 1.2946624770213486e-05, "loss": 0.8598, "step": 15457 }, { "epoch": 0.4227193174360096, "grad_norm": 1.5518397092819214, "learning_rate": 1.2945778378527227e-05, "loss": 0.4201, "step": 15458 }, { "epoch": 0.4227466637497265, "grad_norm": 1.280914068222046, "learning_rate": 1.2944931963732166e-05, "loss": 0.443, "step": 15459 }, { "epoch": 0.42277401006344345, "grad_norm": 1.5359374284744263, "learning_rate": 1.2944085525834946e-05, "loss": 0.4988, "step": 15460 }, { "epoch": 0.4228013563771604, "grad_norm": 1.1695011854171753, "learning_rate": 1.2943239064842202e-05, "loss": 0.5022, "step": 15461 }, { "epoch": 0.42282870269087725, "grad_norm": 1.1391545534133911, "learning_rate": 1.2942392580760573e-05, "loss": 0.4977, "step": 15462 }, { "epoch": 0.42285604900459417, "grad_norm": 1.2354235649108887, "learning_rate": 1.2941546073596706e-05, "loss": 0.4717, "step": 15463 }, { "epoch": 0.4228833953183111, "grad_norm": 1.1639561653137207, "learning_rate": 1.2940699543357236e-05, "loss": 0.5083, "step": 15464 }, { "epoch": 0.422910741632028, "grad_norm": 2.1964266300201416, "learning_rate": 1.2939852990048807e-05, "loss": 0.5017, "step": 15465 }, { "epoch": 0.4229380879457449, "grad_norm": 1.2183078527450562, "learning_rate": 1.293900641367806e-05, "loss": 0.4923, "step": 15466 }, { "epoch": 0.4229654342594618, "grad_norm": 1.6459167003631592, "learning_rate": 1.2938159814251633e-05, "loss": 0.8133, "step": 15467 }, { "epoch": 0.42299278057317874, "grad_norm": 1.1717066764831543, "learning_rate": 1.2937313191776168e-05, "loss": 0.5324, "step": 15468 }, { "epoch": 0.42302012688689566, "grad_norm": 1.358285665512085, "learning_rate": 1.2936466546258308e-05, "loss": 0.5329, "step": 15469 }, { "epoch": 0.42304747320061253, "grad_norm": 1.205925464630127, "learning_rate": 1.2935619877704695e-05, "loss": 0.5355, "step": 15470 }, { "epoch": 0.42307481951432946, "grad_norm": 1.3567920923233032, "learning_rate": 1.293477318612197e-05, "loss": 0.4727, "step": 15471 }, { "epoch": 0.4231021658280464, "grad_norm": 1.1411832571029663, "learning_rate": 1.2933926471516776e-05, "loss": 0.5394, "step": 15472 }, { "epoch": 0.4231295121417633, "grad_norm": 1.5855798721313477, "learning_rate": 1.2933079733895753e-05, "loss": 0.5054, "step": 15473 }, { "epoch": 0.4231568584554802, "grad_norm": 1.1659201383590698, "learning_rate": 1.2932232973265546e-05, "loss": 0.4984, "step": 15474 }, { "epoch": 0.4231842047691971, "grad_norm": 1.2468059062957764, "learning_rate": 1.2931386189632796e-05, "loss": 0.5265, "step": 15475 }, { "epoch": 0.42321155108291403, "grad_norm": 1.1411993503570557, "learning_rate": 1.2930539383004147e-05, "loss": 0.5349, "step": 15476 }, { "epoch": 0.42323889739663095, "grad_norm": 2.6456711292266846, "learning_rate": 1.2929692553386239e-05, "loss": 0.5436, "step": 15477 }, { "epoch": 0.4232662437103478, "grad_norm": 1.1561458110809326, "learning_rate": 1.292884570078572e-05, "loss": 0.4944, "step": 15478 }, { "epoch": 0.42329359002406475, "grad_norm": 1.3000568151474, "learning_rate": 1.2927998825209232e-05, "loss": 0.5523, "step": 15479 }, { "epoch": 0.4233209363377817, "grad_norm": 1.2978096008300781, "learning_rate": 1.2927151926663412e-05, "loss": 0.524, "step": 15480 }, { "epoch": 0.4233482826514986, "grad_norm": 1.423198938369751, "learning_rate": 1.2926305005154913e-05, "loss": 0.5548, "step": 15481 }, { "epoch": 0.42337562896521547, "grad_norm": 1.4174516201019287, "learning_rate": 1.2925458060690375e-05, "loss": 0.4169, "step": 15482 }, { "epoch": 0.4234029752789324, "grad_norm": 1.3200265169143677, "learning_rate": 1.2924611093276442e-05, "loss": 0.4181, "step": 15483 }, { "epoch": 0.4234303215926493, "grad_norm": 1.313969612121582, "learning_rate": 1.2923764102919756e-05, "loss": 0.8258, "step": 15484 }, { "epoch": 0.42345766790636624, "grad_norm": 2.7778100967407227, "learning_rate": 1.2922917089626964e-05, "loss": 0.5314, "step": 15485 }, { "epoch": 0.4234850142200831, "grad_norm": 5.253312587738037, "learning_rate": 1.292207005340471e-05, "loss": 0.5186, "step": 15486 }, { "epoch": 0.42351236053380004, "grad_norm": 1.3238716125488281, "learning_rate": 1.2921222994259641e-05, "loss": 0.4821, "step": 15487 }, { "epoch": 0.42353970684751696, "grad_norm": 1.3647226095199585, "learning_rate": 1.2920375912198394e-05, "loss": 0.5693, "step": 15488 }, { "epoch": 0.4235670531612339, "grad_norm": 1.359679102897644, "learning_rate": 1.2919528807227624e-05, "loss": 0.4907, "step": 15489 }, { "epoch": 0.42359439947495076, "grad_norm": 1.3456976413726807, "learning_rate": 1.2918681679353973e-05, "loss": 0.5171, "step": 15490 }, { "epoch": 0.4236217457886677, "grad_norm": 1.254921793937683, "learning_rate": 1.2917834528584084e-05, "loss": 0.5599, "step": 15491 }, { "epoch": 0.4236490921023846, "grad_norm": 1.6002389192581177, "learning_rate": 1.2916987354924606e-05, "loss": 0.3834, "step": 15492 }, { "epoch": 0.42367643841610153, "grad_norm": 2.073758363723755, "learning_rate": 1.291614015838218e-05, "loss": 0.531, "step": 15493 }, { "epoch": 0.4237037847298184, "grad_norm": 1.432607650756836, "learning_rate": 1.2915292938963455e-05, "loss": 0.5456, "step": 15494 }, { "epoch": 0.4237311310435353, "grad_norm": 1.8574535846710205, "learning_rate": 1.2914445696675081e-05, "loss": 0.5111, "step": 15495 }, { "epoch": 0.42375847735725225, "grad_norm": 1.3432761430740356, "learning_rate": 1.2913598431523697e-05, "loss": 0.5044, "step": 15496 }, { "epoch": 0.4237858236709692, "grad_norm": 1.1511211395263672, "learning_rate": 1.2912751143515955e-05, "loss": 0.4794, "step": 15497 }, { "epoch": 0.42381316998468604, "grad_norm": 1.579987645149231, "learning_rate": 1.29119038326585e-05, "loss": 0.5219, "step": 15498 }, { "epoch": 0.42384051629840297, "grad_norm": 1.267292857170105, "learning_rate": 1.2911056498957978e-05, "loss": 0.5308, "step": 15499 }, { "epoch": 0.4238678626121199, "grad_norm": 1.3851608037948608, "learning_rate": 1.291020914242104e-05, "loss": 0.5344, "step": 15500 }, { "epoch": 0.4238952089258368, "grad_norm": 1.2029619216918945, "learning_rate": 1.2909361763054329e-05, "loss": 0.5093, "step": 15501 }, { "epoch": 0.4239225552395537, "grad_norm": 1.5077089071273804, "learning_rate": 1.2908514360864493e-05, "loss": 0.5381, "step": 15502 }, { "epoch": 0.4239499015532706, "grad_norm": 1.1496050357818604, "learning_rate": 1.2907666935858183e-05, "loss": 0.5241, "step": 15503 }, { "epoch": 0.42397724786698754, "grad_norm": 1.209323763847351, "learning_rate": 1.290681948804204e-05, "loss": 0.5057, "step": 15504 }, { "epoch": 0.42400459418070446, "grad_norm": 1.7492445707321167, "learning_rate": 1.2905972017422717e-05, "loss": 0.462, "step": 15505 }, { "epoch": 0.42403194049442133, "grad_norm": 1.5285117626190186, "learning_rate": 1.2905124524006862e-05, "loss": 0.5024, "step": 15506 }, { "epoch": 0.42405928680813826, "grad_norm": 1.091795802116394, "learning_rate": 1.2904277007801122e-05, "loss": 0.5134, "step": 15507 }, { "epoch": 0.4240866331218552, "grad_norm": 1.2342619895935059, "learning_rate": 1.2903429468812147e-05, "loss": 0.5389, "step": 15508 }, { "epoch": 0.4241139794355721, "grad_norm": 1.1421409845352173, "learning_rate": 1.2902581907046586e-05, "loss": 0.5138, "step": 15509 }, { "epoch": 0.424141325749289, "grad_norm": 1.3021776676177979, "learning_rate": 1.2901734322511083e-05, "loss": 0.5307, "step": 15510 }, { "epoch": 0.4241686720630059, "grad_norm": 1.4636974334716797, "learning_rate": 1.2900886715212295e-05, "loss": 0.7976, "step": 15511 }, { "epoch": 0.4241960183767228, "grad_norm": 1.3213938474655151, "learning_rate": 1.2900039085156866e-05, "loss": 0.502, "step": 15512 }, { "epoch": 0.42422336469043975, "grad_norm": 1.3175228834152222, "learning_rate": 1.2899191432351446e-05, "loss": 0.5272, "step": 15513 }, { "epoch": 0.4242507110041566, "grad_norm": 1.1394221782684326, "learning_rate": 1.2898343756802685e-05, "loss": 0.5243, "step": 15514 }, { "epoch": 0.42427805731787355, "grad_norm": 1.2800800800323486, "learning_rate": 1.2897496058517231e-05, "loss": 0.3388, "step": 15515 }, { "epoch": 0.42430540363159047, "grad_norm": 1.231205940246582, "learning_rate": 1.2896648337501739e-05, "loss": 0.5252, "step": 15516 }, { "epoch": 0.4243327499453074, "grad_norm": 1.2438102960586548, "learning_rate": 1.2895800593762855e-05, "loss": 0.5354, "step": 15517 }, { "epoch": 0.42436009625902427, "grad_norm": 1.1472270488739014, "learning_rate": 1.2894952827307232e-05, "loss": 0.3832, "step": 15518 }, { "epoch": 0.4243874425727412, "grad_norm": 1.2078962326049805, "learning_rate": 1.2894105038141515e-05, "loss": 0.482, "step": 15519 }, { "epoch": 0.4244147888864581, "grad_norm": 1.4607868194580078, "learning_rate": 1.2893257226272361e-05, "loss": 0.5536, "step": 15520 }, { "epoch": 0.42444213520017504, "grad_norm": 1.4840807914733887, "learning_rate": 1.2892409391706419e-05, "loss": 0.4138, "step": 15521 }, { "epoch": 0.4244694815138919, "grad_norm": 1.3470304012298584, "learning_rate": 1.289156153445034e-05, "loss": 0.534, "step": 15522 }, { "epoch": 0.42449682782760884, "grad_norm": 1.3608216047286987, "learning_rate": 1.2890713654510772e-05, "loss": 0.515, "step": 15523 }, { "epoch": 0.42452417414132576, "grad_norm": 1.4593902826309204, "learning_rate": 1.2889865751894369e-05, "loss": 0.5179, "step": 15524 }, { "epoch": 0.4245515204550427, "grad_norm": 1.165553092956543, "learning_rate": 1.288901782660778e-05, "loss": 0.5216, "step": 15525 }, { "epoch": 0.42457886676875956, "grad_norm": 1.2207300662994385, "learning_rate": 1.2888169878657664e-05, "loss": 0.5069, "step": 15526 }, { "epoch": 0.4246062130824765, "grad_norm": 1.3887159824371338, "learning_rate": 1.2887321908050665e-05, "loss": 0.5639, "step": 15527 }, { "epoch": 0.4246335593961934, "grad_norm": 1.4701652526855469, "learning_rate": 1.288647391479344e-05, "loss": 0.5252, "step": 15528 }, { "epoch": 0.42466090570991033, "grad_norm": 1.539513111114502, "learning_rate": 1.288562589889264e-05, "loss": 0.3612, "step": 15529 }, { "epoch": 0.4246882520236272, "grad_norm": 1.4509984254837036, "learning_rate": 1.2884777860354915e-05, "loss": 0.4595, "step": 15530 }, { "epoch": 0.4247155983373441, "grad_norm": 1.559032917022705, "learning_rate": 1.288392979918692e-05, "loss": 0.495, "step": 15531 }, { "epoch": 0.42474294465106105, "grad_norm": 1.5455999374389648, "learning_rate": 1.2883081715395306e-05, "loss": 0.4456, "step": 15532 }, { "epoch": 0.424770290964778, "grad_norm": 1.1768579483032227, "learning_rate": 1.2882233608986732e-05, "loss": 0.5282, "step": 15533 }, { "epoch": 0.42479763727849484, "grad_norm": 1.5636274814605713, "learning_rate": 1.288138547996784e-05, "loss": 0.8319, "step": 15534 }, { "epoch": 0.42482498359221177, "grad_norm": 1.1832034587860107, "learning_rate": 1.2880537328345298e-05, "loss": 0.5246, "step": 15535 }, { "epoch": 0.4248523299059287, "grad_norm": 1.3751786947250366, "learning_rate": 1.2879689154125745e-05, "loss": 0.5237, "step": 15536 }, { "epoch": 0.4248796762196456, "grad_norm": 1.1522713899612427, "learning_rate": 1.2878840957315843e-05, "loss": 0.5186, "step": 15537 }, { "epoch": 0.4249070225333625, "grad_norm": 1.3395720720291138, "learning_rate": 1.2877992737922243e-05, "loss": 0.5125, "step": 15538 }, { "epoch": 0.4249343688470794, "grad_norm": 1.6532933712005615, "learning_rate": 1.2877144495951602e-05, "loss": 0.4947, "step": 15539 }, { "epoch": 0.42496171516079634, "grad_norm": 1.1673839092254639, "learning_rate": 1.2876296231410568e-05, "loss": 0.475, "step": 15540 }, { "epoch": 0.42498906147451326, "grad_norm": 1.2993048429489136, "learning_rate": 1.2875447944305803e-05, "loss": 0.502, "step": 15541 }, { "epoch": 0.42501640778823013, "grad_norm": 1.4663851261138916, "learning_rate": 1.2874599634643956e-05, "loss": 0.5438, "step": 15542 }, { "epoch": 0.42504375410194706, "grad_norm": 1.2439477443695068, "learning_rate": 1.2873751302431685e-05, "loss": 0.5184, "step": 15543 }, { "epoch": 0.425071100415664, "grad_norm": 1.2374765872955322, "learning_rate": 1.2872902947675646e-05, "loss": 0.4926, "step": 15544 }, { "epoch": 0.42509844672938085, "grad_norm": 1.410728096961975, "learning_rate": 1.2872054570382489e-05, "loss": 0.5263, "step": 15545 }, { "epoch": 0.4251257930430978, "grad_norm": 1.5300356149673462, "learning_rate": 1.2871206170558872e-05, "loss": 0.5245, "step": 15546 }, { "epoch": 0.4251531393568147, "grad_norm": 1.2442736625671387, "learning_rate": 1.287035774821145e-05, "loss": 0.5473, "step": 15547 }, { "epoch": 0.4251804856705316, "grad_norm": 2.6891183853149414, "learning_rate": 1.2869509303346882e-05, "loss": 0.5025, "step": 15548 }, { "epoch": 0.4252078319842485, "grad_norm": 1.4620509147644043, "learning_rate": 1.286866083597182e-05, "loss": 0.7823, "step": 15549 }, { "epoch": 0.4252351782979654, "grad_norm": 1.194940209388733, "learning_rate": 1.286781234609292e-05, "loss": 0.508, "step": 15550 }, { "epoch": 0.42526252461168235, "grad_norm": 1.404173731803894, "learning_rate": 1.2866963833716838e-05, "loss": 0.5084, "step": 15551 }, { "epoch": 0.42528987092539927, "grad_norm": 1.1429177522659302, "learning_rate": 1.2866115298850235e-05, "loss": 0.5444, "step": 15552 }, { "epoch": 0.42531721723911614, "grad_norm": 1.171634316444397, "learning_rate": 1.2865266741499765e-05, "loss": 0.512, "step": 15553 }, { "epoch": 0.42534456355283307, "grad_norm": 1.35221266746521, "learning_rate": 1.286441816167208e-05, "loss": 0.5229, "step": 15554 }, { "epoch": 0.42537190986655, "grad_norm": 1.2531416416168213, "learning_rate": 1.2863569559373843e-05, "loss": 0.5214, "step": 15555 }, { "epoch": 0.4253992561802669, "grad_norm": 1.6428459882736206, "learning_rate": 1.2862720934611708e-05, "loss": 0.4991, "step": 15556 }, { "epoch": 0.4254266024939838, "grad_norm": 1.1377137899398804, "learning_rate": 1.2861872287392333e-05, "loss": 0.5296, "step": 15557 }, { "epoch": 0.4254539488077007, "grad_norm": 1.2269744873046875, "learning_rate": 1.2861023617722372e-05, "loss": 0.5342, "step": 15558 }, { "epoch": 0.42548129512141764, "grad_norm": 1.1766934394836426, "learning_rate": 1.2860174925608489e-05, "loss": 0.5051, "step": 15559 }, { "epoch": 0.42550864143513456, "grad_norm": 1.5193312168121338, "learning_rate": 1.285932621105734e-05, "loss": 0.5233, "step": 15560 }, { "epoch": 0.42553598774885143, "grad_norm": 1.3674211502075195, "learning_rate": 1.285847747407558e-05, "loss": 0.5132, "step": 15561 }, { "epoch": 0.42556333406256835, "grad_norm": 1.0377748012542725, "learning_rate": 1.2857628714669867e-05, "loss": 0.506, "step": 15562 }, { "epoch": 0.4255906803762853, "grad_norm": 1.542595624923706, "learning_rate": 1.2856779932846863e-05, "loss": 0.5143, "step": 15563 }, { "epoch": 0.4256180266900022, "grad_norm": 1.3861639499664307, "learning_rate": 1.2855931128613223e-05, "loss": 0.5214, "step": 15564 }, { "epoch": 0.4256453730037191, "grad_norm": 1.4315744638442993, "learning_rate": 1.285508230197561e-05, "loss": 0.5562, "step": 15565 }, { "epoch": 0.425672719317436, "grad_norm": 1.4215023517608643, "learning_rate": 1.2854233452940677e-05, "loss": 0.8593, "step": 15566 }, { "epoch": 0.4257000656311529, "grad_norm": 1.194598913192749, "learning_rate": 1.2853384581515085e-05, "loss": 0.5222, "step": 15567 }, { "epoch": 0.42572741194486985, "grad_norm": 1.3400990962982178, "learning_rate": 1.2852535687705498e-05, "loss": 0.5462, "step": 15568 }, { "epoch": 0.4257547582585867, "grad_norm": 1.2341383695602417, "learning_rate": 1.2851686771518568e-05, "loss": 0.5104, "step": 15569 }, { "epoch": 0.42578210457230364, "grad_norm": 1.2894405126571655, "learning_rate": 1.2850837832960959e-05, "loss": 0.3875, "step": 15570 }, { "epoch": 0.42580945088602057, "grad_norm": 1.2420281171798706, "learning_rate": 1.284998887203933e-05, "loss": 0.5457, "step": 15571 }, { "epoch": 0.4258367971997375, "grad_norm": 1.2051855325698853, "learning_rate": 1.284913988876034e-05, "loss": 0.4977, "step": 15572 }, { "epoch": 0.42586414351345436, "grad_norm": 1.5630275011062622, "learning_rate": 1.2848290883130647e-05, "loss": 0.5245, "step": 15573 }, { "epoch": 0.4258914898271713, "grad_norm": 1.1051223278045654, "learning_rate": 1.2847441855156917e-05, "loss": 0.5526, "step": 15574 }, { "epoch": 0.4259188361408882, "grad_norm": 1.175080418586731, "learning_rate": 1.2846592804845805e-05, "loss": 0.5038, "step": 15575 }, { "epoch": 0.42594618245460514, "grad_norm": 1.315028429031372, "learning_rate": 1.2845743732203974e-05, "loss": 0.5375, "step": 15576 }, { "epoch": 0.425973528768322, "grad_norm": 1.3670483827590942, "learning_rate": 1.2844894637238085e-05, "loss": 0.5219, "step": 15577 }, { "epoch": 0.42600087508203893, "grad_norm": 1.301662802696228, "learning_rate": 1.2844045519954796e-05, "loss": 0.4674, "step": 15578 }, { "epoch": 0.42602822139575586, "grad_norm": 1.2038383483886719, "learning_rate": 1.2843196380360773e-05, "loss": 0.5137, "step": 15579 }, { "epoch": 0.4260555677094728, "grad_norm": 1.1844724416732788, "learning_rate": 1.2842347218462675e-05, "loss": 0.4926, "step": 15580 }, { "epoch": 0.42608291402318965, "grad_norm": 1.1362009048461914, "learning_rate": 1.2841498034267158e-05, "loss": 0.3656, "step": 15581 }, { "epoch": 0.4261102603369066, "grad_norm": 1.4427117109298706, "learning_rate": 1.2840648827780893e-05, "loss": 0.7965, "step": 15582 }, { "epoch": 0.4261376066506235, "grad_norm": 1.2770988941192627, "learning_rate": 1.2839799599010536e-05, "loss": 0.4716, "step": 15583 }, { "epoch": 0.4261649529643404, "grad_norm": 1.2832146883010864, "learning_rate": 1.2838950347962751e-05, "loss": 0.8644, "step": 15584 }, { "epoch": 0.4261922992780573, "grad_norm": 1.6867969036102295, "learning_rate": 1.28381010746442e-05, "loss": 0.5371, "step": 15585 }, { "epoch": 0.4262196455917742, "grad_norm": 1.1793588399887085, "learning_rate": 1.2837251779061544e-05, "loss": 0.5223, "step": 15586 }, { "epoch": 0.42624699190549115, "grad_norm": 1.2496576309204102, "learning_rate": 1.2836402461221447e-05, "loss": 0.8209, "step": 15587 }, { "epoch": 0.42627433821920807, "grad_norm": 1.3886559009552002, "learning_rate": 1.2835553121130571e-05, "loss": 0.4457, "step": 15588 }, { "epoch": 0.42630168453292494, "grad_norm": 1.1740041971206665, "learning_rate": 1.2834703758795576e-05, "loss": 0.489, "step": 15589 }, { "epoch": 0.42632903084664187, "grad_norm": 1.5280271768569946, "learning_rate": 1.2833854374223132e-05, "loss": 0.5479, "step": 15590 }, { "epoch": 0.4263563771603588, "grad_norm": 1.3768761157989502, "learning_rate": 1.2833004967419893e-05, "loss": 0.5107, "step": 15591 }, { "epoch": 0.4263837234740757, "grad_norm": 1.3500691652297974, "learning_rate": 1.283215553839253e-05, "loss": 0.5309, "step": 15592 }, { "epoch": 0.4264110697877926, "grad_norm": 1.3479944467544556, "learning_rate": 1.2831306087147706e-05, "loss": 0.4208, "step": 15593 }, { "epoch": 0.4264384161015095, "grad_norm": 1.7603691816329956, "learning_rate": 1.2830456613692077e-05, "loss": 0.5216, "step": 15594 }, { "epoch": 0.42646576241522643, "grad_norm": 1.505030870437622, "learning_rate": 1.2829607118032317e-05, "loss": 0.4492, "step": 15595 }, { "epoch": 0.42649310872894336, "grad_norm": 1.8908820152282715, "learning_rate": 1.2828757600175084e-05, "loss": 0.524, "step": 15596 }, { "epoch": 0.42652045504266023, "grad_norm": 1.270939588546753, "learning_rate": 1.2827908060127044e-05, "loss": 0.4997, "step": 15597 }, { "epoch": 0.42654780135637715, "grad_norm": 1.3887627124786377, "learning_rate": 1.2827058497894862e-05, "loss": 0.4373, "step": 15598 }, { "epoch": 0.4265751476700941, "grad_norm": 1.1924155950546265, "learning_rate": 1.2826208913485199e-05, "loss": 0.5474, "step": 15599 }, { "epoch": 0.426602493983811, "grad_norm": 1.4177043437957764, "learning_rate": 1.2825359306904725e-05, "loss": 0.4937, "step": 15600 }, { "epoch": 0.4266298402975279, "grad_norm": 1.1475259065628052, "learning_rate": 1.2824509678160099e-05, "loss": 0.4932, "step": 15601 }, { "epoch": 0.4266571866112448, "grad_norm": 1.2545607089996338, "learning_rate": 1.2823660027257994e-05, "loss": 0.8242, "step": 15602 }, { "epoch": 0.4266845329249617, "grad_norm": 1.426695704460144, "learning_rate": 1.2822810354205068e-05, "loss": 0.5493, "step": 15603 }, { "epoch": 0.42671187923867865, "grad_norm": 1.2889145612716675, "learning_rate": 1.282196065900799e-05, "loss": 0.5277, "step": 15604 }, { "epoch": 0.4267392255523955, "grad_norm": 1.4394651651382446, "learning_rate": 1.2821110941673425e-05, "loss": 0.525, "step": 15605 }, { "epoch": 0.42676657186611244, "grad_norm": 1.2245124578475952, "learning_rate": 1.282026120220804e-05, "loss": 0.4175, "step": 15606 }, { "epoch": 0.42679391817982937, "grad_norm": 1.0975244045257568, "learning_rate": 1.2819411440618497e-05, "loss": 0.564, "step": 15607 }, { "epoch": 0.4268212644935463, "grad_norm": 1.5660054683685303, "learning_rate": 1.2818561656911465e-05, "loss": 0.551, "step": 15608 }, { "epoch": 0.42684861080726316, "grad_norm": 1.4655605554580688, "learning_rate": 1.2817711851093612e-05, "loss": 0.5395, "step": 15609 }, { "epoch": 0.4268759571209801, "grad_norm": 1.8015177249908447, "learning_rate": 1.28168620231716e-05, "loss": 0.373, "step": 15610 }, { "epoch": 0.426903303434697, "grad_norm": 1.2297723293304443, "learning_rate": 1.28160121731521e-05, "loss": 0.5283, "step": 15611 }, { "epoch": 0.42693064974841394, "grad_norm": 1.2376482486724854, "learning_rate": 1.2815162301041774e-05, "loss": 0.4379, "step": 15612 }, { "epoch": 0.4269579960621308, "grad_norm": 1.3214069604873657, "learning_rate": 1.2814312406847295e-05, "loss": 0.5184, "step": 15613 }, { "epoch": 0.42698534237584773, "grad_norm": 1.2419787645339966, "learning_rate": 1.2813462490575326e-05, "loss": 0.5362, "step": 15614 }, { "epoch": 0.42701268868956466, "grad_norm": 1.3825675249099731, "learning_rate": 1.2812612552232535e-05, "loss": 0.5572, "step": 15615 }, { "epoch": 0.4270400350032816, "grad_norm": 1.2289930582046509, "learning_rate": 1.2811762591825592e-05, "loss": 0.5459, "step": 15616 }, { "epoch": 0.42706738131699845, "grad_norm": 1.298408031463623, "learning_rate": 1.2810912609361161e-05, "loss": 0.555, "step": 15617 }, { "epoch": 0.4270947276307154, "grad_norm": 1.1795767545700073, "learning_rate": 1.2810062604845911e-05, "loss": 0.5102, "step": 15618 }, { "epoch": 0.4271220739444323, "grad_norm": 1.1945204734802246, "learning_rate": 1.2809212578286515e-05, "loss": 0.5425, "step": 15619 }, { "epoch": 0.4271494202581492, "grad_norm": 1.1751797199249268, "learning_rate": 1.2808362529689633e-05, "loss": 0.508, "step": 15620 }, { "epoch": 0.4271767665718661, "grad_norm": 1.1001116037368774, "learning_rate": 1.2807512459061937e-05, "loss": 0.5074, "step": 15621 }, { "epoch": 0.427204112885583, "grad_norm": 1.5917346477508545, "learning_rate": 1.2806662366410098e-05, "loss": 0.5194, "step": 15622 }, { "epoch": 0.42723145919929995, "grad_norm": 1.2392557859420776, "learning_rate": 1.2805812251740782e-05, "loss": 0.5347, "step": 15623 }, { "epoch": 0.42725880551301687, "grad_norm": 1.3332079648971558, "learning_rate": 1.280496211506066e-05, "loss": 0.5315, "step": 15624 }, { "epoch": 0.42728615182673374, "grad_norm": 1.213044285774231, "learning_rate": 1.2804111956376397e-05, "loss": 0.5216, "step": 15625 }, { "epoch": 0.42731349814045066, "grad_norm": 1.2402431964874268, "learning_rate": 1.2803261775694667e-05, "loss": 0.5007, "step": 15626 }, { "epoch": 0.4273408444541676, "grad_norm": 0.956289529800415, "learning_rate": 1.2802411573022134e-05, "loss": 0.5152, "step": 15627 }, { "epoch": 0.4273681907678845, "grad_norm": 1.208454966545105, "learning_rate": 1.2801561348365474e-05, "loss": 0.5472, "step": 15628 }, { "epoch": 0.4273955370816014, "grad_norm": 0.9983574151992798, "learning_rate": 1.2800711101731352e-05, "loss": 0.5233, "step": 15629 }, { "epoch": 0.4274228833953183, "grad_norm": 1.6078866720199585, "learning_rate": 1.2799860833126441e-05, "loss": 0.5143, "step": 15630 }, { "epoch": 0.42745022970903523, "grad_norm": 1.3193103075027466, "learning_rate": 1.2799010542557406e-05, "loss": 0.5217, "step": 15631 }, { "epoch": 0.42747757602275216, "grad_norm": 1.3829563856124878, "learning_rate": 1.2798160230030926e-05, "loss": 0.5197, "step": 15632 }, { "epoch": 0.42750492233646903, "grad_norm": 1.315222978591919, "learning_rate": 1.2797309895553664e-05, "loss": 0.8267, "step": 15633 }, { "epoch": 0.42753226865018595, "grad_norm": 1.4193285703659058, "learning_rate": 1.2796459539132293e-05, "loss": 0.518, "step": 15634 }, { "epoch": 0.4275596149639029, "grad_norm": 1.2853724956512451, "learning_rate": 1.2795609160773484e-05, "loss": 0.5004, "step": 15635 }, { "epoch": 0.4275869612776198, "grad_norm": 1.3753817081451416, "learning_rate": 1.2794758760483907e-05, "loss": 0.8373, "step": 15636 }, { "epoch": 0.4276143075913367, "grad_norm": 1.6796848773956299, "learning_rate": 1.2793908338270236e-05, "loss": 0.5615, "step": 15637 }, { "epoch": 0.4276416539050536, "grad_norm": 1.2007218599319458, "learning_rate": 1.279305789413914e-05, "loss": 0.5127, "step": 15638 }, { "epoch": 0.4276690002187705, "grad_norm": 1.1463825702667236, "learning_rate": 1.2792207428097289e-05, "loss": 0.5272, "step": 15639 }, { "epoch": 0.42769634653248745, "grad_norm": 1.200708031654358, "learning_rate": 1.2791356940151359e-05, "loss": 0.5265, "step": 15640 }, { "epoch": 0.4277236928462043, "grad_norm": 1.374193549156189, "learning_rate": 1.2790506430308021e-05, "loss": 0.524, "step": 15641 }, { "epoch": 0.42775103915992124, "grad_norm": 1.3306556940078735, "learning_rate": 1.278965589857394e-05, "loss": 0.5269, "step": 15642 }, { "epoch": 0.42777838547363817, "grad_norm": 1.2907623052597046, "learning_rate": 1.2788805344955796e-05, "loss": 0.4907, "step": 15643 }, { "epoch": 0.42780573178735504, "grad_norm": 1.1933856010437012, "learning_rate": 1.278795476946026e-05, "loss": 0.4868, "step": 15644 }, { "epoch": 0.42783307810107196, "grad_norm": 1.105292797088623, "learning_rate": 1.2787104172094002e-05, "loss": 0.522, "step": 15645 }, { "epoch": 0.4278604244147889, "grad_norm": 1.1799306869506836, "learning_rate": 1.2786253552863697e-05, "loss": 0.5214, "step": 15646 }, { "epoch": 0.4278877707285058, "grad_norm": 1.1984035968780518, "learning_rate": 1.2785402911776017e-05, "loss": 0.5072, "step": 15647 }, { "epoch": 0.4279151170422227, "grad_norm": 1.2920494079589844, "learning_rate": 1.2784552248837634e-05, "loss": 0.5229, "step": 15648 }, { "epoch": 0.4279424633559396, "grad_norm": 1.4217618703842163, "learning_rate": 1.2783701564055223e-05, "loss": 0.528, "step": 15649 }, { "epoch": 0.42796980966965653, "grad_norm": 1.1954424381256104, "learning_rate": 1.2782850857435456e-05, "loss": 0.5267, "step": 15650 }, { "epoch": 0.42799715598337346, "grad_norm": 3.241356611251831, "learning_rate": 1.2782000128985006e-05, "loss": 0.526, "step": 15651 }, { "epoch": 0.4280245022970903, "grad_norm": 1.070550799369812, "learning_rate": 1.2781149378710551e-05, "loss": 0.5076, "step": 15652 }, { "epoch": 0.42805184861080725, "grad_norm": 1.1889501810073853, "learning_rate": 1.2780298606618762e-05, "loss": 0.5299, "step": 15653 }, { "epoch": 0.4280791949245242, "grad_norm": 1.1558480262756348, "learning_rate": 1.2779447812716312e-05, "loss": 0.4949, "step": 15654 }, { "epoch": 0.4281065412382411, "grad_norm": 1.285825252532959, "learning_rate": 1.2778596997009874e-05, "loss": 0.3887, "step": 15655 }, { "epoch": 0.42813388755195797, "grad_norm": 1.3582420349121094, "learning_rate": 1.2777746159506123e-05, "loss": 0.545, "step": 15656 }, { "epoch": 0.4281612338656749, "grad_norm": 1.1947427988052368, "learning_rate": 1.2776895300211742e-05, "loss": 0.5002, "step": 15657 }, { "epoch": 0.4281885801793918, "grad_norm": 1.1983650922775269, "learning_rate": 1.2776044419133393e-05, "loss": 0.5123, "step": 15658 }, { "epoch": 0.42821592649310875, "grad_norm": 1.6233681440353394, "learning_rate": 1.2775193516277759e-05, "loss": 0.5149, "step": 15659 }, { "epoch": 0.4282432728068256, "grad_norm": 1.228857398033142, "learning_rate": 1.2774342591651516e-05, "loss": 0.4637, "step": 15660 }, { "epoch": 0.42827061912054254, "grad_norm": 1.2619229555130005, "learning_rate": 1.2773491645261332e-05, "loss": 0.5174, "step": 15661 }, { "epoch": 0.42829796543425946, "grad_norm": 1.2580089569091797, "learning_rate": 1.2772640677113886e-05, "loss": 0.5161, "step": 15662 }, { "epoch": 0.4283253117479764, "grad_norm": 2.8144760131835938, "learning_rate": 1.2771789687215857e-05, "loss": 0.8248, "step": 15663 }, { "epoch": 0.42835265806169326, "grad_norm": 1.984387755393982, "learning_rate": 1.2770938675573915e-05, "loss": 0.4108, "step": 15664 }, { "epoch": 0.4283800043754102, "grad_norm": 1.5135656595230103, "learning_rate": 1.2770087642194742e-05, "loss": 0.8463, "step": 15665 }, { "epoch": 0.4284073506891271, "grad_norm": 1.193019986152649, "learning_rate": 1.2769236587085007e-05, "loss": 0.5044, "step": 15666 }, { "epoch": 0.42843469700284403, "grad_norm": 1.477731704711914, "learning_rate": 1.2768385510251392e-05, "loss": 0.5284, "step": 15667 }, { "epoch": 0.4284620433165609, "grad_norm": 1.2117481231689453, "learning_rate": 1.2767534411700573e-05, "loss": 0.4902, "step": 15668 }, { "epoch": 0.42848938963027783, "grad_norm": 1.501856803894043, "learning_rate": 1.2766683291439225e-05, "loss": 0.8246, "step": 15669 }, { "epoch": 0.42851673594399475, "grad_norm": 1.3296256065368652, "learning_rate": 1.2765832149474025e-05, "loss": 0.5483, "step": 15670 }, { "epoch": 0.4285440822577117, "grad_norm": 1.409029483795166, "learning_rate": 1.276498098581165e-05, "loss": 0.5444, "step": 15671 }, { "epoch": 0.42857142857142855, "grad_norm": 1.6480640172958374, "learning_rate": 1.2764129800458778e-05, "loss": 0.499, "step": 15672 }, { "epoch": 0.4285987748851455, "grad_norm": 1.2286978960037231, "learning_rate": 1.2763278593422086e-05, "loss": 0.5266, "step": 15673 }, { "epoch": 0.4286261211988624, "grad_norm": 1.6399006843566895, "learning_rate": 1.2762427364708247e-05, "loss": 0.5182, "step": 15674 }, { "epoch": 0.4286534675125793, "grad_norm": 1.2402665615081787, "learning_rate": 1.2761576114323946e-05, "loss": 0.5373, "step": 15675 }, { "epoch": 0.4286808138262962, "grad_norm": 1.5612362623214722, "learning_rate": 1.2760724842275861e-05, "loss": 0.5475, "step": 15676 }, { "epoch": 0.4287081601400131, "grad_norm": 1.26643967628479, "learning_rate": 1.2759873548570662e-05, "loss": 0.5082, "step": 15677 }, { "epoch": 0.42873550645373004, "grad_norm": 1.415327787399292, "learning_rate": 1.2759022233215033e-05, "loss": 0.5299, "step": 15678 }, { "epoch": 0.42876285276744697, "grad_norm": 2.173762321472168, "learning_rate": 1.2758170896215652e-05, "loss": 0.5278, "step": 15679 }, { "epoch": 0.42879019908116384, "grad_norm": 1.313781976699829, "learning_rate": 1.2757319537579193e-05, "loss": 0.4289, "step": 15680 }, { "epoch": 0.42881754539488076, "grad_norm": 1.541194200515747, "learning_rate": 1.2756468157312341e-05, "loss": 0.5528, "step": 15681 }, { "epoch": 0.4288448917085977, "grad_norm": 1.678147315979004, "learning_rate": 1.2755616755421771e-05, "loss": 0.8377, "step": 15682 }, { "epoch": 0.4288722380223146, "grad_norm": 1.1175081729888916, "learning_rate": 1.2754765331914163e-05, "loss": 0.523, "step": 15683 }, { "epoch": 0.4288995843360315, "grad_norm": 1.483663558959961, "learning_rate": 1.2753913886796198e-05, "loss": 0.3527, "step": 15684 }, { "epoch": 0.4289269306497484, "grad_norm": 1.2598594427108765, "learning_rate": 1.2753062420074551e-05, "loss": 0.4298, "step": 15685 }, { "epoch": 0.42895427696346533, "grad_norm": 1.3655327558517456, "learning_rate": 1.2752210931755905e-05, "loss": 0.5384, "step": 15686 }, { "epoch": 0.42898162327718226, "grad_norm": 1.2868940830230713, "learning_rate": 1.275135942184694e-05, "loss": 0.5362, "step": 15687 }, { "epoch": 0.4290089695908991, "grad_norm": 1.361261010169983, "learning_rate": 1.2750507890354334e-05, "loss": 0.5104, "step": 15688 }, { "epoch": 0.42903631590461605, "grad_norm": 1.1731096506118774, "learning_rate": 1.2749656337284768e-05, "loss": 0.5178, "step": 15689 }, { "epoch": 0.429063662218333, "grad_norm": 3.211552381515503, "learning_rate": 1.2748804762644921e-05, "loss": 0.404, "step": 15690 }, { "epoch": 0.4290910085320499, "grad_norm": 1.5256896018981934, "learning_rate": 1.2747953166441476e-05, "loss": 0.4307, "step": 15691 }, { "epoch": 0.42911835484576677, "grad_norm": 1.2754909992218018, "learning_rate": 1.274710154868111e-05, "loss": 0.4776, "step": 15692 }, { "epoch": 0.4291457011594837, "grad_norm": 1.3300597667694092, "learning_rate": 1.2746249909370505e-05, "loss": 0.8126, "step": 15693 }, { "epoch": 0.4291730474732006, "grad_norm": 1.3841458559036255, "learning_rate": 1.2745398248516346e-05, "loss": 0.4879, "step": 15694 }, { "epoch": 0.42920039378691754, "grad_norm": 1.2941709756851196, "learning_rate": 1.2744546566125306e-05, "loss": 0.5091, "step": 15695 }, { "epoch": 0.4292277401006344, "grad_norm": 1.4677997827529907, "learning_rate": 1.2743694862204073e-05, "loss": 0.5126, "step": 15696 }, { "epoch": 0.42925508641435134, "grad_norm": 1.2979341745376587, "learning_rate": 1.2742843136759324e-05, "loss": 0.7881, "step": 15697 }, { "epoch": 0.42928243272806826, "grad_norm": 1.2784572839736938, "learning_rate": 1.2741991389797743e-05, "loss": 0.549, "step": 15698 }, { "epoch": 0.4293097790417852, "grad_norm": 1.3155725002288818, "learning_rate": 1.2741139621326011e-05, "loss": 0.5257, "step": 15699 }, { "epoch": 0.42933712535550206, "grad_norm": 1.2756237983703613, "learning_rate": 1.2740287831350813e-05, "loss": 0.5318, "step": 15700 }, { "epoch": 0.429364471669219, "grad_norm": 1.3257051706314087, "learning_rate": 1.2739436019878825e-05, "loss": 0.3896, "step": 15701 }, { "epoch": 0.4293918179829359, "grad_norm": 1.4058418273925781, "learning_rate": 1.2738584186916732e-05, "loss": 0.4408, "step": 15702 }, { "epoch": 0.42941916429665283, "grad_norm": 1.7185612916946411, "learning_rate": 1.273773233247122e-05, "loss": 0.4464, "step": 15703 }, { "epoch": 0.4294465106103697, "grad_norm": 1.2202494144439697, "learning_rate": 1.2736880456548965e-05, "loss": 0.5204, "step": 15704 }, { "epoch": 0.4294738569240866, "grad_norm": 1.367515206336975, "learning_rate": 1.2736028559156655e-05, "loss": 0.5388, "step": 15705 }, { "epoch": 0.42950120323780355, "grad_norm": 1.1887015104293823, "learning_rate": 1.273517664030097e-05, "loss": 0.4918, "step": 15706 }, { "epoch": 0.4295285495515205, "grad_norm": 1.992445468902588, "learning_rate": 1.2734324699988593e-05, "loss": 0.5852, "step": 15707 }, { "epoch": 0.42955589586523735, "grad_norm": 1.2095359563827515, "learning_rate": 1.2733472738226211e-05, "loss": 0.5334, "step": 15708 }, { "epoch": 0.42958324217895427, "grad_norm": 1.3903321027755737, "learning_rate": 1.2732620755020504e-05, "loss": 0.5146, "step": 15709 }, { "epoch": 0.4296105884926712, "grad_norm": 1.4030178785324097, "learning_rate": 1.2731768750378155e-05, "loss": 0.5272, "step": 15710 }, { "epoch": 0.4296379348063881, "grad_norm": 1.3916714191436768, "learning_rate": 1.2730916724305851e-05, "loss": 0.4347, "step": 15711 }, { "epoch": 0.429665281120105, "grad_norm": 1.384179711341858, "learning_rate": 1.2730064676810272e-05, "loss": 0.5693, "step": 15712 }, { "epoch": 0.4296926274338219, "grad_norm": 1.3730285167694092, "learning_rate": 1.2729212607898102e-05, "loss": 0.4581, "step": 15713 }, { "epoch": 0.42971997374753884, "grad_norm": 1.208251953125, "learning_rate": 1.2728360517576032e-05, "loss": 0.5056, "step": 15714 }, { "epoch": 0.42974732006125577, "grad_norm": 1.0311508178710938, "learning_rate": 1.2727508405850739e-05, "loss": 0.5143, "step": 15715 }, { "epoch": 0.42977466637497264, "grad_norm": 1.7642756700515747, "learning_rate": 1.2726656272728908e-05, "loss": 0.5079, "step": 15716 }, { "epoch": 0.42980201268868956, "grad_norm": 1.2942441701889038, "learning_rate": 1.272580411821723e-05, "loss": 0.5028, "step": 15717 }, { "epoch": 0.4298293590024065, "grad_norm": 1.5022860765457153, "learning_rate": 1.272495194232238e-05, "loss": 0.5411, "step": 15718 }, { "epoch": 0.4298567053161234, "grad_norm": 1.2856556177139282, "learning_rate": 1.2724099745051058e-05, "loss": 0.4818, "step": 15719 }, { "epoch": 0.4298840516298403, "grad_norm": 1.1913464069366455, "learning_rate": 1.2723247526409932e-05, "loss": 0.5358, "step": 15720 }, { "epoch": 0.4299113979435572, "grad_norm": 1.6023170948028564, "learning_rate": 1.2722395286405697e-05, "loss": 0.5374, "step": 15721 }, { "epoch": 0.42993874425727413, "grad_norm": 1.1503106355667114, "learning_rate": 1.272154302504504e-05, "loss": 0.54, "step": 15722 }, { "epoch": 0.42996609057099106, "grad_norm": 1.169364094734192, "learning_rate": 1.2720690742334641e-05, "loss": 0.4016, "step": 15723 }, { "epoch": 0.4299934368847079, "grad_norm": 1.256098747253418, "learning_rate": 1.2719838438281188e-05, "loss": 0.5368, "step": 15724 }, { "epoch": 0.43002078319842485, "grad_norm": 1.1920263767242432, "learning_rate": 1.2718986112891374e-05, "loss": 0.5019, "step": 15725 }, { "epoch": 0.4300481295121418, "grad_norm": 1.3007347583770752, "learning_rate": 1.2718133766171872e-05, "loss": 0.5137, "step": 15726 }, { "epoch": 0.4300754758258587, "grad_norm": 1.4153028726577759, "learning_rate": 1.2717281398129382e-05, "loss": 0.5177, "step": 15727 }, { "epoch": 0.43010282213957557, "grad_norm": 1.6190673112869263, "learning_rate": 1.2716429008770579e-05, "loss": 0.5406, "step": 15728 }, { "epoch": 0.4301301684532925, "grad_norm": 1.286976933479309, "learning_rate": 1.2715576598102158e-05, "loss": 0.4837, "step": 15729 }, { "epoch": 0.4301575147670094, "grad_norm": 1.5284160375595093, "learning_rate": 1.2714724166130802e-05, "loss": 0.5163, "step": 15730 }, { "epoch": 0.43018486108072634, "grad_norm": 1.1309545040130615, "learning_rate": 1.2713871712863197e-05, "loss": 0.495, "step": 15731 }, { "epoch": 0.4302122073944432, "grad_norm": 1.0718159675598145, "learning_rate": 1.2713019238306035e-05, "loss": 0.5186, "step": 15732 }, { "epoch": 0.43023955370816014, "grad_norm": 1.655587077140808, "learning_rate": 1.2712166742465998e-05, "loss": 0.4253, "step": 15733 }, { "epoch": 0.43026690002187706, "grad_norm": 1.3761539459228516, "learning_rate": 1.2711314225349778e-05, "loss": 0.4879, "step": 15734 }, { "epoch": 0.430294246335594, "grad_norm": 1.3079211711883545, "learning_rate": 1.2710461686964059e-05, "loss": 0.5593, "step": 15735 }, { "epoch": 0.43032159264931086, "grad_norm": 1.804384469985962, "learning_rate": 1.2709609127315532e-05, "loss": 0.5106, "step": 15736 }, { "epoch": 0.4303489389630278, "grad_norm": 1.529929518699646, "learning_rate": 1.2708756546410884e-05, "loss": 0.4597, "step": 15737 }, { "epoch": 0.4303762852767447, "grad_norm": 1.3197290897369385, "learning_rate": 1.2707903944256806e-05, "loss": 0.5557, "step": 15738 }, { "epoch": 0.43040363159046163, "grad_norm": 1.1833434104919434, "learning_rate": 1.2707051320859978e-05, "loss": 0.4956, "step": 15739 }, { "epoch": 0.4304309779041785, "grad_norm": 1.2384766340255737, "learning_rate": 1.2706198676227097e-05, "loss": 0.5031, "step": 15740 }, { "epoch": 0.4304583242178954, "grad_norm": 1.5818133354187012, "learning_rate": 1.270534601036485e-05, "loss": 0.4901, "step": 15741 }, { "epoch": 0.43048567053161235, "grad_norm": 1.4355441331863403, "learning_rate": 1.2704493323279922e-05, "loss": 0.4526, "step": 15742 }, { "epoch": 0.4305130168453293, "grad_norm": 1.186509370803833, "learning_rate": 1.2703640614979009e-05, "loss": 0.5508, "step": 15743 }, { "epoch": 0.43054036315904615, "grad_norm": 1.107344388961792, "learning_rate": 1.2702787885468794e-05, "loss": 0.5166, "step": 15744 }, { "epoch": 0.43056770947276307, "grad_norm": 6.211559295654297, "learning_rate": 1.2701935134755968e-05, "loss": 0.8411, "step": 15745 }, { "epoch": 0.43059505578648, "grad_norm": 1.4310426712036133, "learning_rate": 1.2701082362847224e-05, "loss": 0.4234, "step": 15746 }, { "epoch": 0.43062240210019687, "grad_norm": 1.5794340372085571, "learning_rate": 1.2700229569749248e-05, "loss": 0.3914, "step": 15747 }, { "epoch": 0.4306497484139138, "grad_norm": 1.3789353370666504, "learning_rate": 1.2699376755468728e-05, "loss": 0.5111, "step": 15748 }, { "epoch": 0.4306770947276307, "grad_norm": 2.9396021366119385, "learning_rate": 1.2698523920012362e-05, "loss": 0.8194, "step": 15749 }, { "epoch": 0.43070444104134764, "grad_norm": 1.374864101409912, "learning_rate": 1.2697671063386832e-05, "loss": 0.4338, "step": 15750 }, { "epoch": 0.4307317873550645, "grad_norm": 1.500123143196106, "learning_rate": 1.269681818559883e-05, "loss": 0.528, "step": 15751 }, { "epoch": 0.43075913366878144, "grad_norm": 3.125574827194214, "learning_rate": 1.2695965286655053e-05, "loss": 0.5578, "step": 15752 }, { "epoch": 0.43078647998249836, "grad_norm": 1.574867844581604, "learning_rate": 1.2695112366562184e-05, "loss": 0.4421, "step": 15753 }, { "epoch": 0.4308138262962153, "grad_norm": 1.9258906841278076, "learning_rate": 1.2694259425326916e-05, "loss": 0.7968, "step": 15754 }, { "epoch": 0.43084117260993215, "grad_norm": 1.261525273323059, "learning_rate": 1.2693406462955941e-05, "loss": 0.5245, "step": 15755 }, { "epoch": 0.4308685189236491, "grad_norm": 1.2086094617843628, "learning_rate": 1.2692553479455952e-05, "loss": 0.4942, "step": 15756 }, { "epoch": 0.430895865237366, "grad_norm": 1.3891332149505615, "learning_rate": 1.2691700474833639e-05, "loss": 0.5373, "step": 15757 }, { "epoch": 0.43092321155108293, "grad_norm": 2.0763542652130127, "learning_rate": 1.2690847449095692e-05, "loss": 0.4496, "step": 15758 }, { "epoch": 0.4309505578647998, "grad_norm": 8.953393936157227, "learning_rate": 1.2689994402248804e-05, "loss": 0.514, "step": 15759 }, { "epoch": 0.4309779041785167, "grad_norm": 1.3003230094909668, "learning_rate": 1.2689141334299668e-05, "loss": 0.5015, "step": 15760 }, { "epoch": 0.43100525049223365, "grad_norm": 1.3054969310760498, "learning_rate": 1.2688288245254974e-05, "loss": 0.5505, "step": 15761 }, { "epoch": 0.4310325968059506, "grad_norm": 1.7303036451339722, "learning_rate": 1.2687435135121417e-05, "loss": 0.3931, "step": 15762 }, { "epoch": 0.43105994311966744, "grad_norm": 1.6800720691680908, "learning_rate": 1.2686582003905688e-05, "loss": 0.5914, "step": 15763 }, { "epoch": 0.43108728943338437, "grad_norm": 1.5032249689102173, "learning_rate": 1.2685728851614478e-05, "loss": 0.549, "step": 15764 }, { "epoch": 0.4311146357471013, "grad_norm": 5.827508449554443, "learning_rate": 1.2684875678254483e-05, "loss": 0.3975, "step": 15765 }, { "epoch": 0.4311419820608182, "grad_norm": 1.5964527130126953, "learning_rate": 1.268402248383239e-05, "loss": 0.5335, "step": 15766 }, { "epoch": 0.4311693283745351, "grad_norm": 1.1246669292449951, "learning_rate": 1.2683169268354897e-05, "loss": 0.5089, "step": 15767 }, { "epoch": 0.431196674688252, "grad_norm": 1.712471842765808, "learning_rate": 1.2682316031828699e-05, "loss": 0.4777, "step": 15768 }, { "epoch": 0.43122402100196894, "grad_norm": 1.487117886543274, "learning_rate": 1.2681462774260482e-05, "loss": 0.8091, "step": 15769 }, { "epoch": 0.43125136731568586, "grad_norm": 1.1964855194091797, "learning_rate": 1.2680609495656946e-05, "loss": 0.5146, "step": 15770 }, { "epoch": 0.43127871362940273, "grad_norm": 1.3183155059814453, "learning_rate": 1.2679756196024786e-05, "loss": 0.5143, "step": 15771 }, { "epoch": 0.43130605994311966, "grad_norm": 1.3774513006210327, "learning_rate": 1.2678902875370689e-05, "loss": 0.5129, "step": 15772 }, { "epoch": 0.4313334062568366, "grad_norm": 1.368309736251831, "learning_rate": 1.2678049533701356e-05, "loss": 0.5105, "step": 15773 }, { "epoch": 0.4313607525705535, "grad_norm": 1.11027991771698, "learning_rate": 1.2677196171023476e-05, "loss": 0.5056, "step": 15774 }, { "epoch": 0.4313880988842704, "grad_norm": 1.1715490818023682, "learning_rate": 1.2676342787343742e-05, "loss": 0.53, "step": 15775 }, { "epoch": 0.4314154451979873, "grad_norm": 1.3623536825180054, "learning_rate": 1.2675489382668858e-05, "loss": 0.5267, "step": 15776 }, { "epoch": 0.4314427915117042, "grad_norm": 1.5090540647506714, "learning_rate": 1.267463595700551e-05, "loss": 0.5234, "step": 15777 }, { "epoch": 0.43147013782542115, "grad_norm": 1.3526278734207153, "learning_rate": 1.2673782510360395e-05, "loss": 0.5284, "step": 15778 }, { "epoch": 0.431497484139138, "grad_norm": 1.1173592805862427, "learning_rate": 1.2672929042740211e-05, "loss": 0.5168, "step": 15779 }, { "epoch": 0.43152483045285495, "grad_norm": 1.1637437343597412, "learning_rate": 1.267207555415165e-05, "loss": 0.5287, "step": 15780 }, { "epoch": 0.43155217676657187, "grad_norm": 1.1404554843902588, "learning_rate": 1.2671222044601408e-05, "loss": 0.5267, "step": 15781 }, { "epoch": 0.4315795230802888, "grad_norm": 1.465814471244812, "learning_rate": 1.2670368514096184e-05, "loss": 0.523, "step": 15782 }, { "epoch": 0.43160686939400567, "grad_norm": 1.8499755859375, "learning_rate": 1.2669514962642664e-05, "loss": 0.4283, "step": 15783 }, { "epoch": 0.4316342157077226, "grad_norm": 1.1454920768737793, "learning_rate": 1.2668661390247557e-05, "loss": 0.5319, "step": 15784 }, { "epoch": 0.4316615620214395, "grad_norm": 1.233643889427185, "learning_rate": 1.2667807796917547e-05, "loss": 0.5279, "step": 15785 }, { "epoch": 0.43168890833515644, "grad_norm": 2.1204137802124023, "learning_rate": 1.2666954182659336e-05, "loss": 0.5464, "step": 15786 }, { "epoch": 0.4317162546488733, "grad_norm": 1.4553362131118774, "learning_rate": 1.2666100547479622e-05, "loss": 0.4349, "step": 15787 }, { "epoch": 0.43174360096259023, "grad_norm": 1.4670896530151367, "learning_rate": 1.26652468913851e-05, "loss": 0.5416, "step": 15788 }, { "epoch": 0.43177094727630716, "grad_norm": 1.2555689811706543, "learning_rate": 1.2664393214382465e-05, "loss": 0.5311, "step": 15789 }, { "epoch": 0.4317982935900241, "grad_norm": 1.4432722330093384, "learning_rate": 1.2663539516478416e-05, "loss": 0.8105, "step": 15790 }, { "epoch": 0.43182563990374095, "grad_norm": 1.1246834993362427, "learning_rate": 1.2662685797679648e-05, "loss": 0.5382, "step": 15791 }, { "epoch": 0.4318529862174579, "grad_norm": 1.265248417854309, "learning_rate": 1.266183205799286e-05, "loss": 0.5115, "step": 15792 }, { "epoch": 0.4318803325311748, "grad_norm": 1.1319150924682617, "learning_rate": 1.2660978297424748e-05, "loss": 0.5247, "step": 15793 }, { "epoch": 0.43190767884489173, "grad_norm": 3.0480730533599854, "learning_rate": 1.266012451598201e-05, "loss": 0.424, "step": 15794 }, { "epoch": 0.4319350251586086, "grad_norm": 1.3537726402282715, "learning_rate": 1.2659270713671346e-05, "loss": 0.3881, "step": 15795 }, { "epoch": 0.4319623714723255, "grad_norm": 1.457499623298645, "learning_rate": 1.2658416890499447e-05, "loss": 0.5193, "step": 15796 }, { "epoch": 0.43198971778604245, "grad_norm": 1.1413861513137817, "learning_rate": 1.2657563046473023e-05, "loss": 0.5133, "step": 15797 }, { "epoch": 0.4320170640997594, "grad_norm": 1.3552677631378174, "learning_rate": 1.2656709181598758e-05, "loss": 0.5049, "step": 15798 }, { "epoch": 0.43204441041347624, "grad_norm": 1.450575828552246, "learning_rate": 1.2655855295883361e-05, "loss": 0.5311, "step": 15799 }, { "epoch": 0.43207175672719317, "grad_norm": 1.7733412981033325, "learning_rate": 1.2655001389333523e-05, "loss": 0.4279, "step": 15800 }, { "epoch": 0.4320991030409101, "grad_norm": 1.418789267539978, "learning_rate": 1.265414746195595e-05, "loss": 0.5487, "step": 15801 }, { "epoch": 0.432126449354627, "grad_norm": 1.1775223016738892, "learning_rate": 1.2653293513757337e-05, "loss": 0.4787, "step": 15802 }, { "epoch": 0.4321537956683439, "grad_norm": 1.4683825969696045, "learning_rate": 1.2652439544744382e-05, "loss": 0.5414, "step": 15803 }, { "epoch": 0.4321811419820608, "grad_norm": 1.7687718868255615, "learning_rate": 1.2651585554923784e-05, "loss": 0.3802, "step": 15804 }, { "epoch": 0.43220848829577774, "grad_norm": 1.25227952003479, "learning_rate": 1.2650731544302244e-05, "loss": 0.5267, "step": 15805 }, { "epoch": 0.43223583460949466, "grad_norm": 2.0130603313446045, "learning_rate": 1.2649877512886462e-05, "loss": 0.4721, "step": 15806 }, { "epoch": 0.43226318092321153, "grad_norm": 1.5559242963790894, "learning_rate": 1.2649023460683134e-05, "loss": 0.5258, "step": 15807 }, { "epoch": 0.43229052723692846, "grad_norm": 1.6845194101333618, "learning_rate": 1.2648169387698968e-05, "loss": 0.4518, "step": 15808 }, { "epoch": 0.4323178735506454, "grad_norm": 1.283078908920288, "learning_rate": 1.2647315293940654e-05, "loss": 0.5265, "step": 15809 }, { "epoch": 0.4323452198643623, "grad_norm": 1.4142489433288574, "learning_rate": 1.2646461179414895e-05, "loss": 0.5188, "step": 15810 }, { "epoch": 0.4323725661780792, "grad_norm": 2.81939435005188, "learning_rate": 1.2645607044128396e-05, "loss": 0.375, "step": 15811 }, { "epoch": 0.4323999124917961, "grad_norm": 1.1857686042785645, "learning_rate": 1.2644752888087855e-05, "loss": 0.5209, "step": 15812 }, { "epoch": 0.432427258805513, "grad_norm": 1.3944464921951294, "learning_rate": 1.2643898711299969e-05, "loss": 0.5624, "step": 15813 }, { "epoch": 0.43245460511922995, "grad_norm": 1.3549249172210693, "learning_rate": 1.2643044513771444e-05, "loss": 0.3924, "step": 15814 }, { "epoch": 0.4324819514329468, "grad_norm": 1.453642725944519, "learning_rate": 1.2642190295508975e-05, "loss": 0.5161, "step": 15815 }, { "epoch": 0.43250929774666375, "grad_norm": 1.574524164199829, "learning_rate": 1.2641336056519272e-05, "loss": 0.5476, "step": 15816 }, { "epoch": 0.43253664406038067, "grad_norm": 1.485734224319458, "learning_rate": 1.264048179680903e-05, "loss": 0.4927, "step": 15817 }, { "epoch": 0.4325639903740976, "grad_norm": 1.393242597579956, "learning_rate": 1.263962751638495e-05, "loss": 0.533, "step": 15818 }, { "epoch": 0.43259133668781447, "grad_norm": 1.2026655673980713, "learning_rate": 1.2638773215253735e-05, "loss": 0.5204, "step": 15819 }, { "epoch": 0.4326186830015314, "grad_norm": 1.465981364250183, "learning_rate": 1.2637918893422085e-05, "loss": 0.4271, "step": 15820 }, { "epoch": 0.4326460293152483, "grad_norm": 1.0872682332992554, "learning_rate": 1.2637064550896705e-05, "loss": 0.5275, "step": 15821 }, { "epoch": 0.43267337562896524, "grad_norm": 1.4000520706176758, "learning_rate": 1.2636210187684297e-05, "loss": 0.4591, "step": 15822 }, { "epoch": 0.4327007219426821, "grad_norm": 2.071505069732666, "learning_rate": 1.263535580379156e-05, "loss": 0.4401, "step": 15823 }, { "epoch": 0.43272806825639903, "grad_norm": 1.2770200967788696, "learning_rate": 1.2634501399225199e-05, "loss": 0.5285, "step": 15824 }, { "epoch": 0.43275541457011596, "grad_norm": 1.3804055452346802, "learning_rate": 1.2633646973991918e-05, "loss": 0.8562, "step": 15825 }, { "epoch": 0.4327827608838329, "grad_norm": 1.2046626806259155, "learning_rate": 1.2632792528098414e-05, "loss": 0.5327, "step": 15826 }, { "epoch": 0.43281010719754975, "grad_norm": 1.3385523557662964, "learning_rate": 1.2631938061551396e-05, "loss": 0.8264, "step": 15827 }, { "epoch": 0.4328374535112667, "grad_norm": 1.4758076667785645, "learning_rate": 1.2631083574357563e-05, "loss": 0.5261, "step": 15828 }, { "epoch": 0.4328647998249836, "grad_norm": 1.5364981889724731, "learning_rate": 1.263022906652362e-05, "loss": 0.4254, "step": 15829 }, { "epoch": 0.43289214613870053, "grad_norm": 2.4845123291015625, "learning_rate": 1.2629374538056274e-05, "loss": 0.4297, "step": 15830 }, { "epoch": 0.4329194924524174, "grad_norm": 1.3738104104995728, "learning_rate": 1.262851998896222e-05, "loss": 0.4203, "step": 15831 }, { "epoch": 0.4329468387661343, "grad_norm": 1.2567832469940186, "learning_rate": 1.2627665419248167e-05, "loss": 0.5226, "step": 15832 }, { "epoch": 0.43297418507985125, "grad_norm": 1.3095426559448242, "learning_rate": 1.2626810828920821e-05, "loss": 0.5223, "step": 15833 }, { "epoch": 0.4330015313935682, "grad_norm": 3.4961769580841064, "learning_rate": 1.262595621798688e-05, "loss": 0.5137, "step": 15834 }, { "epoch": 0.43302887770728504, "grad_norm": 1.2720774412155151, "learning_rate": 1.2625101586453054e-05, "loss": 0.5071, "step": 15835 }, { "epoch": 0.43305622402100197, "grad_norm": 1.4356920719146729, "learning_rate": 1.2624246934326046e-05, "loss": 0.5434, "step": 15836 }, { "epoch": 0.4330835703347189, "grad_norm": 1.1854839324951172, "learning_rate": 1.2623392261612556e-05, "loss": 0.5208, "step": 15837 }, { "epoch": 0.4331109166484358, "grad_norm": 1.3799391984939575, "learning_rate": 1.2622537568319295e-05, "loss": 0.8345, "step": 15838 }, { "epoch": 0.4331382629621527, "grad_norm": 1.3115994930267334, "learning_rate": 1.2621682854452963e-05, "loss": 0.5448, "step": 15839 }, { "epoch": 0.4331656092758696, "grad_norm": 1.3498036861419678, "learning_rate": 1.2620828120020265e-05, "loss": 0.531, "step": 15840 }, { "epoch": 0.43319295558958654, "grad_norm": 1.5197923183441162, "learning_rate": 1.261997336502791e-05, "loss": 0.5497, "step": 15841 }, { "epoch": 0.43322030190330346, "grad_norm": 4.490693092346191, "learning_rate": 1.2619118589482602e-05, "loss": 0.3984, "step": 15842 }, { "epoch": 0.43324764821702033, "grad_norm": 1.140896201133728, "learning_rate": 1.2618263793391043e-05, "loss": 0.4903, "step": 15843 }, { "epoch": 0.43327499453073726, "grad_norm": 1.3259083032608032, "learning_rate": 1.2617408976759945e-05, "loss": 0.5236, "step": 15844 }, { "epoch": 0.4333023408444542, "grad_norm": 1.2516472339630127, "learning_rate": 1.2616554139596008e-05, "loss": 0.4942, "step": 15845 }, { "epoch": 0.4333296871581711, "grad_norm": 1.3644341230392456, "learning_rate": 1.2615699281905944e-05, "loss": 0.5189, "step": 15846 }, { "epoch": 0.433357033471888, "grad_norm": 1.1508026123046875, "learning_rate": 1.2614844403696452e-05, "loss": 0.5135, "step": 15847 }, { "epoch": 0.4333843797856049, "grad_norm": 1.2245314121246338, "learning_rate": 1.2613989504974238e-05, "loss": 0.5091, "step": 15848 }, { "epoch": 0.4334117260993218, "grad_norm": 1.716536045074463, "learning_rate": 1.2613134585746018e-05, "loss": 0.4033, "step": 15849 }, { "epoch": 0.4334390724130387, "grad_norm": 1.3550217151641846, "learning_rate": 1.2612279646018489e-05, "loss": 0.4201, "step": 15850 }, { "epoch": 0.4334664187267556, "grad_norm": 1.6547722816467285, "learning_rate": 1.2611424685798364e-05, "loss": 0.5191, "step": 15851 }, { "epoch": 0.43349376504047255, "grad_norm": 1.5738219022750854, "learning_rate": 1.2610569705092346e-05, "loss": 0.4974, "step": 15852 }, { "epoch": 0.43352111135418947, "grad_norm": 1.9629294872283936, "learning_rate": 1.2609714703907141e-05, "loss": 0.5703, "step": 15853 }, { "epoch": 0.43354845766790634, "grad_norm": 1.3785593509674072, "learning_rate": 1.2608859682249462e-05, "loss": 0.5417, "step": 15854 }, { "epoch": 0.43357580398162326, "grad_norm": 1.6106094121932983, "learning_rate": 1.260800464012601e-05, "loss": 0.4887, "step": 15855 }, { "epoch": 0.4336031502953402, "grad_norm": 1.0347281694412231, "learning_rate": 1.2607149577543498e-05, "loss": 0.5154, "step": 15856 }, { "epoch": 0.4336304966090571, "grad_norm": 1.1517302989959717, "learning_rate": 1.2606294494508631e-05, "loss": 0.3399, "step": 15857 }, { "epoch": 0.433657842922774, "grad_norm": 1.9042558670043945, "learning_rate": 1.2605439391028116e-05, "loss": 0.4646, "step": 15858 }, { "epoch": 0.4336851892364909, "grad_norm": 1.50614333152771, "learning_rate": 1.2604584267108661e-05, "loss": 0.5499, "step": 15859 }, { "epoch": 0.43371253555020783, "grad_norm": 1.077449917793274, "learning_rate": 1.260372912275698e-05, "loss": 0.5119, "step": 15860 }, { "epoch": 0.43373988186392476, "grad_norm": 1.3966946601867676, "learning_rate": 1.2602873957979771e-05, "loss": 0.5069, "step": 15861 }, { "epoch": 0.43376722817764163, "grad_norm": 1.1432963609695435, "learning_rate": 1.2602018772783753e-05, "loss": 0.4929, "step": 15862 }, { "epoch": 0.43379457449135855, "grad_norm": 1.3722504377365112, "learning_rate": 1.2601163567175627e-05, "loss": 0.5358, "step": 15863 }, { "epoch": 0.4338219208050755, "grad_norm": 1.1389527320861816, "learning_rate": 1.2600308341162107e-05, "loss": 0.5081, "step": 15864 }, { "epoch": 0.4338492671187924, "grad_norm": 1.2652697563171387, "learning_rate": 1.2599453094749898e-05, "loss": 0.4405, "step": 15865 }, { "epoch": 0.4338766134325093, "grad_norm": 1.234337329864502, "learning_rate": 1.2598597827945714e-05, "loss": 0.5304, "step": 15866 }, { "epoch": 0.4339039597462262, "grad_norm": 1.2664250135421753, "learning_rate": 1.2597742540756257e-05, "loss": 0.5251, "step": 15867 }, { "epoch": 0.4339313060599431, "grad_norm": 1.2070103883743286, "learning_rate": 1.2596887233188244e-05, "loss": 0.5319, "step": 15868 }, { "epoch": 0.43395865237366005, "grad_norm": 1.1987009048461914, "learning_rate": 1.259603190524838e-05, "loss": 0.5026, "step": 15869 }, { "epoch": 0.4339859986873769, "grad_norm": 1.3198553323745728, "learning_rate": 1.2595176556943379e-05, "loss": 0.5182, "step": 15870 }, { "epoch": 0.43401334500109384, "grad_norm": 1.1314916610717773, "learning_rate": 1.2594321188279947e-05, "loss": 0.4879, "step": 15871 }, { "epoch": 0.43404069131481077, "grad_norm": 3.3723363876342773, "learning_rate": 1.2593465799264792e-05, "loss": 0.4245, "step": 15872 }, { "epoch": 0.4340680376285277, "grad_norm": 1.28179931640625, "learning_rate": 1.2592610389904634e-05, "loss": 0.5112, "step": 15873 }, { "epoch": 0.43409538394224456, "grad_norm": 1.2725975513458252, "learning_rate": 1.259175496020617e-05, "loss": 0.5056, "step": 15874 }, { "epoch": 0.4341227302559615, "grad_norm": 1.8772709369659424, "learning_rate": 1.2590899510176122e-05, "loss": 0.3936, "step": 15875 }, { "epoch": 0.4341500765696784, "grad_norm": 1.6244480609893799, "learning_rate": 1.2590044039821196e-05, "loss": 0.4415, "step": 15876 }, { "epoch": 0.43417742288339534, "grad_norm": 1.2682024240493774, "learning_rate": 1.25891885491481e-05, "loss": 0.5289, "step": 15877 }, { "epoch": 0.4342047691971122, "grad_norm": 1.5180162191390991, "learning_rate": 1.2588333038163554e-05, "loss": 0.5371, "step": 15878 }, { "epoch": 0.43423211551082913, "grad_norm": 1.2193492650985718, "learning_rate": 1.2587477506874261e-05, "loss": 0.518, "step": 15879 }, { "epoch": 0.43425946182454606, "grad_norm": 1.1971843242645264, "learning_rate": 1.2586621955286936e-05, "loss": 0.497, "step": 15880 }, { "epoch": 0.434286808138263, "grad_norm": 1.4534969329833984, "learning_rate": 1.2585766383408289e-05, "loss": 0.4444, "step": 15881 }, { "epoch": 0.43431415445197985, "grad_norm": 1.329601526260376, "learning_rate": 1.2584910791245033e-05, "loss": 0.4275, "step": 15882 }, { "epoch": 0.4343415007656968, "grad_norm": 1.5188292264938354, "learning_rate": 1.2584055178803878e-05, "loss": 0.8419, "step": 15883 }, { "epoch": 0.4343688470794137, "grad_norm": 1.1024748086929321, "learning_rate": 1.2583199546091539e-05, "loss": 0.549, "step": 15884 }, { "epoch": 0.4343961933931306, "grad_norm": 1.320876955986023, "learning_rate": 1.2582343893114724e-05, "loss": 0.5109, "step": 15885 }, { "epoch": 0.4344235397068475, "grad_norm": 2.503200054168701, "learning_rate": 1.258148821988015e-05, "loss": 0.4074, "step": 15886 }, { "epoch": 0.4344508860205644, "grad_norm": 1.3132153749465942, "learning_rate": 1.258063252639453e-05, "loss": 0.5175, "step": 15887 }, { "epoch": 0.43447823233428134, "grad_norm": 1.2184275388717651, "learning_rate": 1.257977681266457e-05, "loss": 0.5343, "step": 15888 }, { "epoch": 0.43450557864799827, "grad_norm": 1.3692823648452759, "learning_rate": 1.2578921078696986e-05, "loss": 0.521, "step": 15889 }, { "epoch": 0.43453292496171514, "grad_norm": 1.25847327709198, "learning_rate": 1.2578065324498497e-05, "loss": 0.5147, "step": 15890 }, { "epoch": 0.43456027127543206, "grad_norm": 1.3690133094787598, "learning_rate": 1.2577209550075807e-05, "loss": 0.7914, "step": 15891 }, { "epoch": 0.434587617589149, "grad_norm": 1.6237174272537231, "learning_rate": 1.2576353755435633e-05, "loss": 0.5292, "step": 15892 }, { "epoch": 0.4346149639028659, "grad_norm": 2.0516786575317383, "learning_rate": 1.2575497940584689e-05, "loss": 0.3845, "step": 15893 }, { "epoch": 0.4346423102165828, "grad_norm": 1.2399574518203735, "learning_rate": 1.257464210552969e-05, "loss": 0.49, "step": 15894 }, { "epoch": 0.4346696565302997, "grad_norm": 1.120643973350525, "learning_rate": 1.257378625027735e-05, "loss": 0.5019, "step": 15895 }, { "epoch": 0.43469700284401663, "grad_norm": 1.4044058322906494, "learning_rate": 1.2572930374834376e-05, "loss": 0.8381, "step": 15896 }, { "epoch": 0.43472434915773356, "grad_norm": 1.3280614614486694, "learning_rate": 1.257207447920749e-05, "loss": 0.5374, "step": 15897 }, { "epoch": 0.43475169547145043, "grad_norm": 1.1687779426574707, "learning_rate": 1.2571218563403406e-05, "loss": 0.516, "step": 15898 }, { "epoch": 0.43477904178516735, "grad_norm": 1.7056620121002197, "learning_rate": 1.257036262742883e-05, "loss": 0.319, "step": 15899 }, { "epoch": 0.4348063880988843, "grad_norm": 1.190202236175537, "learning_rate": 1.256950667129049e-05, "loss": 0.5212, "step": 15900 }, { "epoch": 0.4348337344126012, "grad_norm": 2.862145185470581, "learning_rate": 1.2568650694995089e-05, "loss": 0.559, "step": 15901 }, { "epoch": 0.43486108072631807, "grad_norm": 1.6395034790039062, "learning_rate": 1.2567794698549344e-05, "loss": 0.4915, "step": 15902 }, { "epoch": 0.434888427040035, "grad_norm": 1.7299731969833374, "learning_rate": 1.2566938681959977e-05, "loss": 0.4321, "step": 15903 }, { "epoch": 0.4349157733537519, "grad_norm": 1.1162325143814087, "learning_rate": 1.2566082645233698e-05, "loss": 0.5155, "step": 15904 }, { "epoch": 0.43494311966746885, "grad_norm": 1.9684514999389648, "learning_rate": 1.2565226588377221e-05, "loss": 0.5226, "step": 15905 }, { "epoch": 0.4349704659811857, "grad_norm": 1.5241270065307617, "learning_rate": 1.2564370511397266e-05, "loss": 0.4822, "step": 15906 }, { "epoch": 0.43499781229490264, "grad_norm": 1.5471330881118774, "learning_rate": 1.2563514414300541e-05, "loss": 0.4062, "step": 15907 }, { "epoch": 0.43502515860861957, "grad_norm": 2.6500635147094727, "learning_rate": 1.2562658297093768e-05, "loss": 0.4572, "step": 15908 }, { "epoch": 0.4350525049223365, "grad_norm": 1.4105480909347534, "learning_rate": 1.2561802159783668e-05, "loss": 0.5249, "step": 15909 }, { "epoch": 0.43507985123605336, "grad_norm": 1.3632100820541382, "learning_rate": 1.2560946002376943e-05, "loss": 0.3998, "step": 15910 }, { "epoch": 0.4351071975497703, "grad_norm": 1.6415607929229736, "learning_rate": 1.2560089824880324e-05, "loss": 0.5245, "step": 15911 }, { "epoch": 0.4351345438634872, "grad_norm": 1.4800342321395874, "learning_rate": 1.2559233627300516e-05, "loss": 0.4919, "step": 15912 }, { "epoch": 0.43516189017720414, "grad_norm": 1.1327478885650635, "learning_rate": 1.255837740964424e-05, "loss": 0.4856, "step": 15913 }, { "epoch": 0.435189236490921, "grad_norm": 1.230973720550537, "learning_rate": 1.2557521171918217e-05, "loss": 0.5337, "step": 15914 }, { "epoch": 0.43521658280463793, "grad_norm": 1.2973512411117554, "learning_rate": 1.2556664914129157e-05, "loss": 0.5297, "step": 15915 }, { "epoch": 0.43524392911835486, "grad_norm": 1.0294314622879028, "learning_rate": 1.2555808636283784e-05, "loss": 0.4934, "step": 15916 }, { "epoch": 0.4352712754320718, "grad_norm": 1.1100550889968872, "learning_rate": 1.2554952338388807e-05, "loss": 0.5021, "step": 15917 }, { "epoch": 0.43529862174578865, "grad_norm": 1.4788548946380615, "learning_rate": 1.255409602045095e-05, "loss": 0.5451, "step": 15918 }, { "epoch": 0.4353259680595056, "grad_norm": 1.230049967765808, "learning_rate": 1.255323968247693e-05, "loss": 0.5357, "step": 15919 }, { "epoch": 0.4353533143732225, "grad_norm": 1.2646729946136475, "learning_rate": 1.255238332447346e-05, "loss": 0.5263, "step": 15920 }, { "epoch": 0.4353806606869394, "grad_norm": 1.2806494235992432, "learning_rate": 1.255152694644726e-05, "loss": 0.5502, "step": 15921 }, { "epoch": 0.4354080070006563, "grad_norm": 1.141638159751892, "learning_rate": 1.2550670548405056e-05, "loss": 0.4664, "step": 15922 }, { "epoch": 0.4354353533143732, "grad_norm": 1.3799710273742676, "learning_rate": 1.2549814130353557e-05, "loss": 0.5123, "step": 15923 }, { "epoch": 0.43546269962809014, "grad_norm": 1.2744710445404053, "learning_rate": 1.254895769229948e-05, "loss": 0.5263, "step": 15924 }, { "epoch": 0.43549004594180707, "grad_norm": 1.5839574337005615, "learning_rate": 1.254810123424955e-05, "loss": 0.5525, "step": 15925 }, { "epoch": 0.43551739225552394, "grad_norm": 1.5008814334869385, "learning_rate": 1.2547244756210481e-05, "loss": 0.523, "step": 15926 }, { "epoch": 0.43554473856924086, "grad_norm": 1.6480754613876343, "learning_rate": 1.2546388258188998e-05, "loss": 0.4666, "step": 15927 }, { "epoch": 0.4355720848829578, "grad_norm": 1.2332731485366821, "learning_rate": 1.2545531740191811e-05, "loss": 0.4975, "step": 15928 }, { "epoch": 0.4355994311966747, "grad_norm": 1.132010579109192, "learning_rate": 1.2544675202225645e-05, "loss": 0.5164, "step": 15929 }, { "epoch": 0.4356267775103916, "grad_norm": 1.236875295639038, "learning_rate": 1.2543818644297222e-05, "loss": 0.4192, "step": 15930 }, { "epoch": 0.4356541238241085, "grad_norm": 1.685882329940796, "learning_rate": 1.254296206641325e-05, "loss": 0.8657, "step": 15931 }, { "epoch": 0.43568147013782543, "grad_norm": 1.4187675714492798, "learning_rate": 1.2542105468580461e-05, "loss": 0.5577, "step": 15932 }, { "epoch": 0.43570881645154236, "grad_norm": 1.1146690845489502, "learning_rate": 1.2541248850805572e-05, "loss": 0.5269, "step": 15933 }, { "epoch": 0.4357361627652592, "grad_norm": 1.3313993215560913, "learning_rate": 1.2540392213095296e-05, "loss": 0.4889, "step": 15934 }, { "epoch": 0.43576350907897615, "grad_norm": 1.235858678817749, "learning_rate": 1.2539535555456363e-05, "loss": 0.4915, "step": 15935 }, { "epoch": 0.4357908553926931, "grad_norm": 1.6926357746124268, "learning_rate": 1.2538678877895484e-05, "loss": 0.4529, "step": 15936 }, { "epoch": 0.43581820170641, "grad_norm": 1.3995434045791626, "learning_rate": 1.2537822180419386e-05, "loss": 0.501, "step": 15937 }, { "epoch": 0.43584554802012687, "grad_norm": 1.6794872283935547, "learning_rate": 1.2536965463034787e-05, "loss": 0.4207, "step": 15938 }, { "epoch": 0.4358728943338438, "grad_norm": 1.1472264528274536, "learning_rate": 1.2536108725748409e-05, "loss": 0.5253, "step": 15939 }, { "epoch": 0.4359002406475607, "grad_norm": 1.6120877265930176, "learning_rate": 1.253525196856697e-05, "loss": 0.5219, "step": 15940 }, { "epoch": 0.43592758696127765, "grad_norm": 1.253839373588562, "learning_rate": 1.2534395191497195e-05, "loss": 0.8319, "step": 15941 }, { "epoch": 0.4359549332749945, "grad_norm": 1.1233543157577515, "learning_rate": 1.2533538394545799e-05, "loss": 0.3996, "step": 15942 }, { "epoch": 0.43598227958871144, "grad_norm": 1.241683006286621, "learning_rate": 1.253268157771951e-05, "loss": 0.5187, "step": 15943 }, { "epoch": 0.43600962590242837, "grad_norm": 1.1173415184020996, "learning_rate": 1.2531824741025047e-05, "loss": 0.4911, "step": 15944 }, { "epoch": 0.4360369722161453, "grad_norm": 1.5380806922912598, "learning_rate": 1.2530967884469129e-05, "loss": 0.8289, "step": 15945 }, { "epoch": 0.43606431852986216, "grad_norm": 1.186927318572998, "learning_rate": 1.2530111008058482e-05, "loss": 0.5289, "step": 15946 }, { "epoch": 0.4360916648435791, "grad_norm": 1.154800534248352, "learning_rate": 1.2529254111799828e-05, "loss": 0.3923, "step": 15947 }, { "epoch": 0.436119011157296, "grad_norm": 1.2456676959991455, "learning_rate": 1.2528397195699883e-05, "loss": 0.496, "step": 15948 }, { "epoch": 0.4361463574710129, "grad_norm": 2.5387821197509766, "learning_rate": 1.2527540259765378e-05, "loss": 0.4167, "step": 15949 }, { "epoch": 0.4361737037847298, "grad_norm": 1.407707929611206, "learning_rate": 1.2526683304003027e-05, "loss": 0.5478, "step": 15950 }, { "epoch": 0.43620105009844673, "grad_norm": 2.009204626083374, "learning_rate": 1.2525826328419557e-05, "loss": 0.5066, "step": 15951 }, { "epoch": 0.43622839641216365, "grad_norm": 1.4619587659835815, "learning_rate": 1.2524969333021694e-05, "loss": 0.4261, "step": 15952 }, { "epoch": 0.4362557427258805, "grad_norm": 1.13182532787323, "learning_rate": 1.2524112317816152e-05, "loss": 0.527, "step": 15953 }, { "epoch": 0.43628308903959745, "grad_norm": 1.4513914585113525, "learning_rate": 1.2523255282809665e-05, "loss": 0.5408, "step": 15954 }, { "epoch": 0.4363104353533144, "grad_norm": 1.4375964403152466, "learning_rate": 1.2522398228008947e-05, "loss": 0.5482, "step": 15955 }, { "epoch": 0.4363377816670313, "grad_norm": 1.2902628183364868, "learning_rate": 1.2521541153420724e-05, "loss": 0.5064, "step": 15956 }, { "epoch": 0.43636512798074817, "grad_norm": 1.2592780590057373, "learning_rate": 1.2520684059051722e-05, "loss": 0.5641, "step": 15957 }, { "epoch": 0.4363924742944651, "grad_norm": 1.169589638710022, "learning_rate": 1.2519826944908666e-05, "loss": 0.506, "step": 15958 }, { "epoch": 0.436419820608182, "grad_norm": 1.4086148738861084, "learning_rate": 1.2518969810998272e-05, "loss": 0.4324, "step": 15959 }, { "epoch": 0.43644716692189894, "grad_norm": 1.4914933443069458, "learning_rate": 1.251811265732727e-05, "loss": 0.5285, "step": 15960 }, { "epoch": 0.4364745132356158, "grad_norm": 1.5582845211029053, "learning_rate": 1.2517255483902382e-05, "loss": 0.5572, "step": 15961 }, { "epoch": 0.43650185954933274, "grad_norm": 1.5358284711837769, "learning_rate": 1.2516398290730334e-05, "loss": 0.5077, "step": 15962 }, { "epoch": 0.43652920586304966, "grad_norm": 1.2514216899871826, "learning_rate": 1.251554107781785e-05, "loss": 0.5359, "step": 15963 }, { "epoch": 0.4365565521767666, "grad_norm": 1.1463607549667358, "learning_rate": 1.2514683845171654e-05, "loss": 0.4961, "step": 15964 }, { "epoch": 0.43658389849048346, "grad_norm": 1.4067734479904175, "learning_rate": 1.2513826592798471e-05, "loss": 0.429, "step": 15965 }, { "epoch": 0.4366112448042004, "grad_norm": 1.312724232673645, "learning_rate": 1.2512969320705024e-05, "loss": 0.5384, "step": 15966 }, { "epoch": 0.4366385911179173, "grad_norm": 1.3991292715072632, "learning_rate": 1.2512112028898043e-05, "loss": 0.5079, "step": 15967 }, { "epoch": 0.43666593743163423, "grad_norm": 1.179039716720581, "learning_rate": 1.2511254717384249e-05, "loss": 0.3487, "step": 15968 }, { "epoch": 0.4366932837453511, "grad_norm": 1.2488843202590942, "learning_rate": 1.2510397386170366e-05, "loss": 0.4276, "step": 15969 }, { "epoch": 0.436720630059068, "grad_norm": 1.4192893505096436, "learning_rate": 1.2509540035263126e-05, "loss": 0.5054, "step": 15970 }, { "epoch": 0.43674797637278495, "grad_norm": 1.4958040714263916, "learning_rate": 1.2508682664669247e-05, "loss": 0.532, "step": 15971 }, { "epoch": 0.4367753226865019, "grad_norm": 1.1488429307937622, "learning_rate": 1.250782527439546e-05, "loss": 0.502, "step": 15972 }, { "epoch": 0.43680266900021875, "grad_norm": 1.684788465499878, "learning_rate": 1.2506967864448489e-05, "loss": 0.3813, "step": 15973 }, { "epoch": 0.43683001531393567, "grad_norm": 1.1032631397247314, "learning_rate": 1.2506110434835061e-05, "loss": 0.4952, "step": 15974 }, { "epoch": 0.4368573616276526, "grad_norm": 1.4078794717788696, "learning_rate": 1.2505252985561905e-05, "loss": 0.3951, "step": 15975 }, { "epoch": 0.4368847079413695, "grad_norm": 1.5517983436584473, "learning_rate": 1.2504395516635742e-05, "loss": 0.4675, "step": 15976 }, { "epoch": 0.4369120542550864, "grad_norm": 3.3986239433288574, "learning_rate": 1.25035380280633e-05, "loss": 0.387, "step": 15977 }, { "epoch": 0.4369394005688033, "grad_norm": 1.4547171592712402, "learning_rate": 1.2502680519851306e-05, "loss": 0.4403, "step": 15978 }, { "epoch": 0.43696674688252024, "grad_norm": 1.8216700553894043, "learning_rate": 1.250182299200649e-05, "loss": 0.5173, "step": 15979 }, { "epoch": 0.43699409319623717, "grad_norm": 1.6931558847427368, "learning_rate": 1.2500965444535576e-05, "loss": 0.5275, "step": 15980 }, { "epoch": 0.43702143950995403, "grad_norm": 1.3631858825683594, "learning_rate": 1.250010787744529e-05, "loss": 0.5209, "step": 15981 }, { "epoch": 0.43704878582367096, "grad_norm": 1.476719856262207, "learning_rate": 1.2499250290742362e-05, "loss": 0.4977, "step": 15982 }, { "epoch": 0.4370761321373879, "grad_norm": 1.1861056089401245, "learning_rate": 1.249839268443352e-05, "loss": 0.8216, "step": 15983 }, { "epoch": 0.4371034784511048, "grad_norm": 1.4470144510269165, "learning_rate": 1.2497535058525492e-05, "loss": 0.5007, "step": 15984 }, { "epoch": 0.4371308247648217, "grad_norm": 1.4730168581008911, "learning_rate": 1.2496677413025e-05, "loss": 0.4245, "step": 15985 }, { "epoch": 0.4371581710785386, "grad_norm": 1.2437838315963745, "learning_rate": 1.2495819747938778e-05, "loss": 0.459, "step": 15986 }, { "epoch": 0.43718551739225553, "grad_norm": 1.2439236640930176, "learning_rate": 1.2494962063273555e-05, "loss": 0.5118, "step": 15987 }, { "epoch": 0.43721286370597245, "grad_norm": 1.3522472381591797, "learning_rate": 1.2494104359036054e-05, "loss": 0.4999, "step": 15988 }, { "epoch": 0.4372402100196893, "grad_norm": 1.3635504245758057, "learning_rate": 1.249324663523301e-05, "loss": 0.3796, "step": 15989 }, { "epoch": 0.43726755633340625, "grad_norm": 1.7593140602111816, "learning_rate": 1.2492388891871143e-05, "loss": 0.5306, "step": 15990 }, { "epoch": 0.4372949026471232, "grad_norm": 1.5172817707061768, "learning_rate": 1.2491531128957189e-05, "loss": 0.5033, "step": 15991 }, { "epoch": 0.4373222489608401, "grad_norm": 1.3053008317947388, "learning_rate": 1.2490673346497872e-05, "loss": 0.4987, "step": 15992 }, { "epoch": 0.43734959527455697, "grad_norm": 1.8668577671051025, "learning_rate": 1.2489815544499929e-05, "loss": 0.5242, "step": 15993 }, { "epoch": 0.4373769415882739, "grad_norm": 1.462943196296692, "learning_rate": 1.2488957722970079e-05, "loss": 0.5158, "step": 15994 }, { "epoch": 0.4374042879019908, "grad_norm": 1.4741915464401245, "learning_rate": 1.2488099881915059e-05, "loss": 0.5696, "step": 15995 }, { "epoch": 0.43743163421570774, "grad_norm": 1.3786232471466064, "learning_rate": 1.2487242021341593e-05, "loss": 0.5011, "step": 15996 }, { "epoch": 0.4374589805294246, "grad_norm": 1.5883370637893677, "learning_rate": 1.2486384141256414e-05, "loss": 0.5146, "step": 15997 }, { "epoch": 0.43748632684314154, "grad_norm": 1.2555983066558838, "learning_rate": 1.2485526241666252e-05, "loss": 0.5374, "step": 15998 }, { "epoch": 0.43751367315685846, "grad_norm": 1.7254563570022583, "learning_rate": 1.2484668322577836e-05, "loss": 0.5314, "step": 15999 }, { "epoch": 0.4375410194705754, "grad_norm": 1.191306233406067, "learning_rate": 1.2483810383997897e-05, "loss": 0.5139, "step": 16000 }, { "epoch": 0.43756836578429226, "grad_norm": 1.1641030311584473, "learning_rate": 1.2482952425933161e-05, "loss": 0.5152, "step": 16001 }, { "epoch": 0.4375957120980092, "grad_norm": 1.3288042545318604, "learning_rate": 1.2482094448390362e-05, "loss": 0.4445, "step": 16002 }, { "epoch": 0.4376230584117261, "grad_norm": 1.3822425603866577, "learning_rate": 1.2481236451376234e-05, "loss": 0.5728, "step": 16003 }, { "epoch": 0.43765040472544303, "grad_norm": 1.1551629304885864, "learning_rate": 1.2480378434897505e-05, "loss": 0.5269, "step": 16004 }, { "epoch": 0.4376777510391599, "grad_norm": 1.4868543148040771, "learning_rate": 1.2479520398960905e-05, "loss": 0.5303, "step": 16005 }, { "epoch": 0.4377050973528768, "grad_norm": 1.245854139328003, "learning_rate": 1.2478662343573163e-05, "loss": 0.5154, "step": 16006 }, { "epoch": 0.43773244366659375, "grad_norm": 1.1142199039459229, "learning_rate": 1.2477804268741012e-05, "loss": 0.5104, "step": 16007 }, { "epoch": 0.4377597899803107, "grad_norm": 1.326043963432312, "learning_rate": 1.2476946174471186e-05, "loss": 0.547, "step": 16008 }, { "epoch": 0.43778713629402755, "grad_norm": 1.299804925918579, "learning_rate": 1.2476088060770412e-05, "loss": 0.8139, "step": 16009 }, { "epoch": 0.43781448260774447, "grad_norm": 1.1315339803695679, "learning_rate": 1.2475229927645425e-05, "loss": 0.4723, "step": 16010 }, { "epoch": 0.4378418289214614, "grad_norm": 1.5979222059249878, "learning_rate": 1.247437177510296e-05, "loss": 0.5406, "step": 16011 }, { "epoch": 0.4378691752351783, "grad_norm": 1.2197147607803345, "learning_rate": 1.2473513603149738e-05, "loss": 0.4982, "step": 16012 }, { "epoch": 0.4378965215488952, "grad_norm": 1.5266886949539185, "learning_rate": 1.24726554117925e-05, "loss": 0.5088, "step": 16013 }, { "epoch": 0.4379238678626121, "grad_norm": 1.2829055786132812, "learning_rate": 1.2471797201037981e-05, "loss": 0.5274, "step": 16014 }, { "epoch": 0.43795121417632904, "grad_norm": 1.2430412769317627, "learning_rate": 1.2470938970892903e-05, "loss": 0.5265, "step": 16015 }, { "epoch": 0.43797856049004597, "grad_norm": 1.2961474657058716, "learning_rate": 1.2470080721364007e-05, "loss": 0.4847, "step": 16016 }, { "epoch": 0.43800590680376283, "grad_norm": 1.1641031503677368, "learning_rate": 1.246922245245802e-05, "loss": 0.5066, "step": 16017 }, { "epoch": 0.43803325311747976, "grad_norm": 1.4188191890716553, "learning_rate": 1.246836416418168e-05, "loss": 0.4262, "step": 16018 }, { "epoch": 0.4380605994311967, "grad_norm": 1.7954237461090088, "learning_rate": 1.2467505856541718e-05, "loss": 0.5024, "step": 16019 }, { "epoch": 0.4380879457449136, "grad_norm": 1.335499882698059, "learning_rate": 1.2466647529544863e-05, "loss": 0.5414, "step": 16020 }, { "epoch": 0.4381152920586305, "grad_norm": 1.3532333374023438, "learning_rate": 1.2465789183197856e-05, "loss": 0.4714, "step": 16021 }, { "epoch": 0.4381426383723474, "grad_norm": 1.30252206325531, "learning_rate": 1.2464930817507427e-05, "loss": 0.5303, "step": 16022 }, { "epoch": 0.43816998468606433, "grad_norm": 1.275940179824829, "learning_rate": 1.2464072432480308e-05, "loss": 0.5284, "step": 16023 }, { "epoch": 0.43819733099978125, "grad_norm": 1.1910724639892578, "learning_rate": 1.2463214028123237e-05, "loss": 0.552, "step": 16024 }, { "epoch": 0.4382246773134981, "grad_norm": 1.3101435899734497, "learning_rate": 1.2462355604442941e-05, "loss": 0.5092, "step": 16025 }, { "epoch": 0.43825202362721505, "grad_norm": 1.144687533378601, "learning_rate": 1.2461497161446161e-05, "loss": 0.4849, "step": 16026 }, { "epoch": 0.438279369940932, "grad_norm": 1.254090666770935, "learning_rate": 1.2460638699139629e-05, "loss": 0.5362, "step": 16027 }, { "epoch": 0.4383067162546489, "grad_norm": 1.4260683059692383, "learning_rate": 1.2459780217530078e-05, "loss": 0.8438, "step": 16028 }, { "epoch": 0.43833406256836577, "grad_norm": 1.3807995319366455, "learning_rate": 1.2458921716624242e-05, "loss": 0.5133, "step": 16029 }, { "epoch": 0.4383614088820827, "grad_norm": 1.1685484647750854, "learning_rate": 1.245806319642886e-05, "loss": 0.5271, "step": 16030 }, { "epoch": 0.4383887551957996, "grad_norm": 1.0845143795013428, "learning_rate": 1.245720465695066e-05, "loss": 0.4947, "step": 16031 }, { "epoch": 0.43841610150951654, "grad_norm": 1.2595454454421997, "learning_rate": 1.2456346098196383e-05, "loss": 0.8093, "step": 16032 }, { "epoch": 0.4384434478232334, "grad_norm": 1.1081011295318604, "learning_rate": 1.2455487520172764e-05, "loss": 0.5371, "step": 16033 }, { "epoch": 0.43847079413695034, "grad_norm": 1.1802164316177368, "learning_rate": 1.2454628922886534e-05, "loss": 0.8365, "step": 16034 }, { "epoch": 0.43849814045066726, "grad_norm": 1.2311495542526245, "learning_rate": 1.2453770306344433e-05, "loss": 0.5211, "step": 16035 }, { "epoch": 0.4385254867643842, "grad_norm": 2.747250556945801, "learning_rate": 1.2452911670553191e-05, "loss": 0.5169, "step": 16036 }, { "epoch": 0.43855283307810106, "grad_norm": 1.228387475013733, "learning_rate": 1.245205301551955e-05, "loss": 0.5396, "step": 16037 }, { "epoch": 0.438580179391818, "grad_norm": 1.2719796895980835, "learning_rate": 1.2451194341250244e-05, "loss": 0.4963, "step": 16038 }, { "epoch": 0.4386075257055349, "grad_norm": 1.5835179090499878, "learning_rate": 1.2450335647752005e-05, "loss": 0.449, "step": 16039 }, { "epoch": 0.43863487201925183, "grad_norm": 1.2907037734985352, "learning_rate": 1.2449476935031576e-05, "loss": 0.527, "step": 16040 }, { "epoch": 0.4386622183329687, "grad_norm": 1.187851905822754, "learning_rate": 1.2448618203095687e-05, "loss": 0.5113, "step": 16041 }, { "epoch": 0.4386895646466856, "grad_norm": 1.177147626876831, "learning_rate": 1.2447759451951077e-05, "loss": 0.5201, "step": 16042 }, { "epoch": 0.43871691096040255, "grad_norm": 1.4206401109695435, "learning_rate": 1.2446900681604486e-05, "loss": 0.4707, "step": 16043 }, { "epoch": 0.4387442572741195, "grad_norm": 1.4587230682373047, "learning_rate": 1.2446041892062646e-05, "loss": 0.4827, "step": 16044 }, { "epoch": 0.43877160358783635, "grad_norm": 1.2206685543060303, "learning_rate": 1.2445183083332295e-05, "loss": 0.4897, "step": 16045 }, { "epoch": 0.43879894990155327, "grad_norm": 1.2258858680725098, "learning_rate": 1.2444324255420174e-05, "loss": 0.4327, "step": 16046 }, { "epoch": 0.4388262962152702, "grad_norm": 2.023669719696045, "learning_rate": 1.2443465408333015e-05, "loss": 0.5298, "step": 16047 }, { "epoch": 0.4388536425289871, "grad_norm": 1.1312834024429321, "learning_rate": 1.2442606542077556e-05, "loss": 0.5367, "step": 16048 }, { "epoch": 0.438880988842704, "grad_norm": 1.647179126739502, "learning_rate": 1.2441747656660539e-05, "loss": 0.3926, "step": 16049 }, { "epoch": 0.4389083351564209, "grad_norm": 1.0919057130813599, "learning_rate": 1.2440888752088696e-05, "loss": 0.4939, "step": 16050 }, { "epoch": 0.43893568147013784, "grad_norm": 1.3503963947296143, "learning_rate": 1.2440029828368766e-05, "loss": 0.4329, "step": 16051 }, { "epoch": 0.4389630277838547, "grad_norm": 1.1975877285003662, "learning_rate": 1.2439170885507495e-05, "loss": 0.5185, "step": 16052 }, { "epoch": 0.43899037409757163, "grad_norm": 1.7342760562896729, "learning_rate": 1.243831192351161e-05, "loss": 0.5258, "step": 16053 }, { "epoch": 0.43901772041128856, "grad_norm": 1.3668606281280518, "learning_rate": 1.2437452942387857e-05, "loss": 0.5133, "step": 16054 }, { "epoch": 0.4390450667250055, "grad_norm": 1.3180656433105469, "learning_rate": 1.243659394214297e-05, "loss": 0.5305, "step": 16055 }, { "epoch": 0.43907241303872235, "grad_norm": 1.5301094055175781, "learning_rate": 1.2435734922783688e-05, "loss": 0.5565, "step": 16056 }, { "epoch": 0.4390997593524393, "grad_norm": 1.2319649457931519, "learning_rate": 1.2434875884316753e-05, "loss": 0.4778, "step": 16057 }, { "epoch": 0.4391271056661562, "grad_norm": 1.3535892963409424, "learning_rate": 1.24340168267489e-05, "loss": 0.3953, "step": 16058 }, { "epoch": 0.43915445197987313, "grad_norm": 1.5703678131103516, "learning_rate": 1.2433157750086873e-05, "loss": 0.8074, "step": 16059 }, { "epoch": 0.43918179829359, "grad_norm": 1.2361584901809692, "learning_rate": 1.2432298654337406e-05, "loss": 0.5007, "step": 16060 }, { "epoch": 0.4392091446073069, "grad_norm": 1.3151979446411133, "learning_rate": 1.243143953950724e-05, "loss": 0.5313, "step": 16061 }, { "epoch": 0.43923649092102385, "grad_norm": 1.3890154361724854, "learning_rate": 1.2430580405603117e-05, "loss": 0.5465, "step": 16062 }, { "epoch": 0.4392638372347408, "grad_norm": 1.2165366411209106, "learning_rate": 1.2429721252631778e-05, "loss": 0.5071, "step": 16063 }, { "epoch": 0.43929118354845764, "grad_norm": 1.612459421157837, "learning_rate": 1.2428862080599952e-05, "loss": 0.5479, "step": 16064 }, { "epoch": 0.43931852986217457, "grad_norm": 1.3041539192199707, "learning_rate": 1.2428002889514393e-05, "loss": 0.5245, "step": 16065 }, { "epoch": 0.4393458761758915, "grad_norm": 1.6093153953552246, "learning_rate": 1.242714367938183e-05, "loss": 0.824, "step": 16066 }, { "epoch": 0.4393732224896084, "grad_norm": 1.359727382659912, "learning_rate": 1.2426284450209007e-05, "loss": 0.3616, "step": 16067 }, { "epoch": 0.4394005688033253, "grad_norm": 1.1846710443496704, "learning_rate": 1.242542520200267e-05, "loss": 0.5213, "step": 16068 }, { "epoch": 0.4394279151170422, "grad_norm": 1.8248499631881714, "learning_rate": 1.2424565934769552e-05, "loss": 0.5504, "step": 16069 }, { "epoch": 0.43945526143075914, "grad_norm": 1.1791177988052368, "learning_rate": 1.2423706648516398e-05, "loss": 0.8388, "step": 16070 }, { "epoch": 0.43948260774447606, "grad_norm": 1.4307879209518433, "learning_rate": 1.2422847343249947e-05, "loss": 0.5265, "step": 16071 }, { "epoch": 0.43950995405819293, "grad_norm": 1.6103047132492065, "learning_rate": 1.2421988018976937e-05, "loss": 0.5511, "step": 16072 }, { "epoch": 0.43953730037190986, "grad_norm": 1.3712191581726074, "learning_rate": 1.2421128675704117e-05, "loss": 0.5198, "step": 16073 }, { "epoch": 0.4395646466856268, "grad_norm": 1.6253081560134888, "learning_rate": 1.2420269313438222e-05, "loss": 0.8259, "step": 16074 }, { "epoch": 0.4395919929993437, "grad_norm": 1.333573818206787, "learning_rate": 1.2419409932185996e-05, "loss": 0.541, "step": 16075 }, { "epoch": 0.4396193393130606, "grad_norm": 1.3316866159439087, "learning_rate": 1.241855053195418e-05, "loss": 0.5584, "step": 16076 }, { "epoch": 0.4396466856267775, "grad_norm": 1.2004809379577637, "learning_rate": 1.2417691112749516e-05, "loss": 0.8173, "step": 16077 }, { "epoch": 0.4396740319404944, "grad_norm": 1.0066649913787842, "learning_rate": 1.2416831674578746e-05, "loss": 0.4969, "step": 16078 }, { "epoch": 0.43970137825421135, "grad_norm": 1.3031554222106934, "learning_rate": 1.241597221744861e-05, "loss": 0.507, "step": 16079 }, { "epoch": 0.4397287245679282, "grad_norm": 1.3310285806655884, "learning_rate": 1.2415112741365852e-05, "loss": 0.4297, "step": 16080 }, { "epoch": 0.43975607088164514, "grad_norm": 1.2505615949630737, "learning_rate": 1.2414253246337217e-05, "loss": 0.4907, "step": 16081 }, { "epoch": 0.43978341719536207, "grad_norm": 1.3159550428390503, "learning_rate": 1.2413393732369443e-05, "loss": 0.5487, "step": 16082 }, { "epoch": 0.439810763509079, "grad_norm": 1.21958327293396, "learning_rate": 1.2412534199469273e-05, "loss": 0.5104, "step": 16083 }, { "epoch": 0.43983810982279586, "grad_norm": 1.38229238986969, "learning_rate": 1.2411674647643454e-05, "loss": 0.5163, "step": 16084 }, { "epoch": 0.4398654561365128, "grad_norm": 1.259186863899231, "learning_rate": 1.2410815076898724e-05, "loss": 0.4962, "step": 16085 }, { "epoch": 0.4398928024502297, "grad_norm": 1.1845537424087524, "learning_rate": 1.2409955487241827e-05, "loss": 0.5114, "step": 16086 }, { "epoch": 0.43992014876394664, "grad_norm": 1.263247013092041, "learning_rate": 1.2409095878679512e-05, "loss": 0.5079, "step": 16087 }, { "epoch": 0.4399474950776635, "grad_norm": 1.4314806461334229, "learning_rate": 1.2408236251218513e-05, "loss": 0.5026, "step": 16088 }, { "epoch": 0.43997484139138043, "grad_norm": 1.3672642707824707, "learning_rate": 1.2407376604865579e-05, "loss": 0.517, "step": 16089 }, { "epoch": 0.44000218770509736, "grad_norm": 1.4306918382644653, "learning_rate": 1.2406516939627456e-05, "loss": 0.5185, "step": 16090 }, { "epoch": 0.4400295340188143, "grad_norm": 2.0097696781158447, "learning_rate": 1.2405657255510881e-05, "loss": 0.5083, "step": 16091 }, { "epoch": 0.44005688033253115, "grad_norm": 1.5280570983886719, "learning_rate": 1.2404797552522606e-05, "loss": 0.4972, "step": 16092 }, { "epoch": 0.4400842266462481, "grad_norm": 1.696977138519287, "learning_rate": 1.2403937830669368e-05, "loss": 0.522, "step": 16093 }, { "epoch": 0.440111572959965, "grad_norm": 1.2583811283111572, "learning_rate": 1.2403078089957914e-05, "loss": 0.5259, "step": 16094 }, { "epoch": 0.44013891927368193, "grad_norm": 1.4796563386917114, "learning_rate": 1.240221833039499e-05, "loss": 0.3943, "step": 16095 }, { "epoch": 0.4401662655873988, "grad_norm": 1.5650122165679932, "learning_rate": 1.2401358551987338e-05, "loss": 0.5221, "step": 16096 }, { "epoch": 0.4401936119011157, "grad_norm": 1.3380459547042847, "learning_rate": 1.2400498754741705e-05, "loss": 0.5213, "step": 16097 }, { "epoch": 0.44022095821483265, "grad_norm": 1.8025621175765991, "learning_rate": 1.2399638938664836e-05, "loss": 0.8283, "step": 16098 }, { "epoch": 0.44024830452854957, "grad_norm": 1.2221295833587646, "learning_rate": 1.2398779103763474e-05, "loss": 0.5128, "step": 16099 }, { "epoch": 0.44027565084226644, "grad_norm": 1.3093502521514893, "learning_rate": 1.2397919250044366e-05, "loss": 0.5037, "step": 16100 }, { "epoch": 0.44030299715598337, "grad_norm": 1.1454112529754639, "learning_rate": 1.2397059377514253e-05, "loss": 0.5358, "step": 16101 }, { "epoch": 0.4403303434697003, "grad_norm": 1.3917022943496704, "learning_rate": 1.2396199486179885e-05, "loss": 0.5331, "step": 16102 }, { "epoch": 0.4403576897834172, "grad_norm": 1.1738624572753906, "learning_rate": 1.2395339576048008e-05, "loss": 0.3815, "step": 16103 }, { "epoch": 0.4403850360971341, "grad_norm": 2.005535840988159, "learning_rate": 1.2394479647125366e-05, "loss": 0.7968, "step": 16104 }, { "epoch": 0.440412382410851, "grad_norm": 1.2263413667678833, "learning_rate": 1.2393619699418703e-05, "loss": 0.5083, "step": 16105 }, { "epoch": 0.44043972872456794, "grad_norm": 1.3560938835144043, "learning_rate": 1.2392759732934769e-05, "loss": 0.5308, "step": 16106 }, { "epoch": 0.44046707503828486, "grad_norm": 1.3126609325408936, "learning_rate": 1.2391899747680307e-05, "loss": 0.546, "step": 16107 }, { "epoch": 0.44049442135200173, "grad_norm": 1.3905137777328491, "learning_rate": 1.2391039743662065e-05, "loss": 0.4931, "step": 16108 }, { "epoch": 0.44052176766571866, "grad_norm": 1.0179907083511353, "learning_rate": 1.239017972088679e-05, "loss": 0.4903, "step": 16109 }, { "epoch": 0.4405491139794356, "grad_norm": 1.1778064966201782, "learning_rate": 1.2389319679361224e-05, "loss": 0.5467, "step": 16110 }, { "epoch": 0.4405764602931525, "grad_norm": 1.4486418962478638, "learning_rate": 1.2388459619092124e-05, "loss": 0.8462, "step": 16111 }, { "epoch": 0.4406038066068694, "grad_norm": 1.1912736892700195, "learning_rate": 1.2387599540086226e-05, "loss": 0.4978, "step": 16112 }, { "epoch": 0.4406311529205863, "grad_norm": 1.2883113622665405, "learning_rate": 1.2386739442350285e-05, "loss": 0.5232, "step": 16113 }, { "epoch": 0.4406584992343032, "grad_norm": 1.326464056968689, "learning_rate": 1.238587932589104e-05, "loss": 0.4424, "step": 16114 }, { "epoch": 0.44068584554802015, "grad_norm": 1.5788142681121826, "learning_rate": 1.2385019190715244e-05, "loss": 0.3703, "step": 16115 }, { "epoch": 0.440713191861737, "grad_norm": 1.3081101179122925, "learning_rate": 1.2384159036829647e-05, "loss": 0.5233, "step": 16116 }, { "epoch": 0.44074053817545394, "grad_norm": 1.2571660280227661, "learning_rate": 1.2383298864240994e-05, "loss": 0.5336, "step": 16117 }, { "epoch": 0.44076788448917087, "grad_norm": 7.16123104095459, "learning_rate": 1.2382438672956027e-05, "loss": 0.5146, "step": 16118 }, { "epoch": 0.4407952308028878, "grad_norm": 1.3940662145614624, "learning_rate": 1.2381578462981502e-05, "loss": 0.5267, "step": 16119 }, { "epoch": 0.44082257711660466, "grad_norm": 1.1603165864944458, "learning_rate": 1.2380718234324164e-05, "loss": 0.5207, "step": 16120 }, { "epoch": 0.4408499234303216, "grad_norm": 1.2619447708129883, "learning_rate": 1.237985798699076e-05, "loss": 0.5523, "step": 16121 }, { "epoch": 0.4408772697440385, "grad_norm": 1.6282098293304443, "learning_rate": 1.2378997720988043e-05, "loss": 0.4891, "step": 16122 }, { "epoch": 0.44090461605775544, "grad_norm": 3.7917771339416504, "learning_rate": 1.2378137436322755e-05, "loss": 0.5404, "step": 16123 }, { "epoch": 0.4409319623714723, "grad_norm": 1.6551358699798584, "learning_rate": 1.237727713300165e-05, "loss": 0.5567, "step": 16124 }, { "epoch": 0.44095930868518923, "grad_norm": 1.5483434200286865, "learning_rate": 1.2376416811031475e-05, "loss": 0.4916, "step": 16125 }, { "epoch": 0.44098665499890616, "grad_norm": 1.2836062908172607, "learning_rate": 1.2375556470418977e-05, "loss": 0.5198, "step": 16126 }, { "epoch": 0.4410140013126231, "grad_norm": 1.3118584156036377, "learning_rate": 1.237469611117091e-05, "loss": 0.547, "step": 16127 }, { "epoch": 0.44104134762633995, "grad_norm": 1.1365371942520142, "learning_rate": 1.2373835733294018e-05, "loss": 0.4997, "step": 16128 }, { "epoch": 0.4410686939400569, "grad_norm": 1.454143762588501, "learning_rate": 1.2372975336795052e-05, "loss": 0.4448, "step": 16129 }, { "epoch": 0.4410960402537738, "grad_norm": 1.541582465171814, "learning_rate": 1.2372114921680765e-05, "loss": 0.5246, "step": 16130 }, { "epoch": 0.4411233865674907, "grad_norm": 1.3419164419174194, "learning_rate": 1.2371254487957902e-05, "loss": 0.5084, "step": 16131 }, { "epoch": 0.4411507328812076, "grad_norm": 1.9993559122085571, "learning_rate": 1.2370394035633217e-05, "loss": 0.8641, "step": 16132 }, { "epoch": 0.4411780791949245, "grad_norm": 1.0902912616729736, "learning_rate": 1.2369533564713457e-05, "loss": 0.539, "step": 16133 }, { "epoch": 0.44120542550864145, "grad_norm": 1.492004632949829, "learning_rate": 1.2368673075205372e-05, "loss": 0.7956, "step": 16134 }, { "epoch": 0.44123277182235837, "grad_norm": 1.5765823125839233, "learning_rate": 1.2367812567115716e-05, "loss": 0.4428, "step": 16135 }, { "epoch": 0.44126011813607524, "grad_norm": 1.3786301612854004, "learning_rate": 1.2366952040451235e-05, "loss": 0.4038, "step": 16136 }, { "epoch": 0.44128746444979217, "grad_norm": 1.2944380044937134, "learning_rate": 1.2366091495218679e-05, "loss": 0.5101, "step": 16137 }, { "epoch": 0.4413148107635091, "grad_norm": 1.0671428442001343, "learning_rate": 1.2365230931424805e-05, "loss": 0.529, "step": 16138 }, { "epoch": 0.441342157077226, "grad_norm": 1.1738965511322021, "learning_rate": 1.2364370349076356e-05, "loss": 0.5166, "step": 16139 }, { "epoch": 0.4413695033909429, "grad_norm": 1.1441771984100342, "learning_rate": 1.236350974818009e-05, "loss": 0.5721, "step": 16140 }, { "epoch": 0.4413968497046598, "grad_norm": 1.1431856155395508, "learning_rate": 1.2362649128742753e-05, "loss": 0.5325, "step": 16141 }, { "epoch": 0.44142419601837674, "grad_norm": 1.229574203491211, "learning_rate": 1.2361788490771101e-05, "loss": 0.4858, "step": 16142 }, { "epoch": 0.44145154233209366, "grad_norm": 4.034395217895508, "learning_rate": 1.2360927834271884e-05, "loss": 0.4906, "step": 16143 }, { "epoch": 0.44147888864581053, "grad_norm": 1.2181934118270874, "learning_rate": 1.2360067159251847e-05, "loss": 0.4883, "step": 16144 }, { "epoch": 0.44150623495952745, "grad_norm": 1.3456400632858276, "learning_rate": 1.235920646571775e-05, "loss": 0.514, "step": 16145 }, { "epoch": 0.4415335812732444, "grad_norm": 1.3897651433944702, "learning_rate": 1.2358345753676343e-05, "loss": 0.5461, "step": 16146 }, { "epoch": 0.4415609275869613, "grad_norm": 1.43019700050354, "learning_rate": 1.2357485023134376e-05, "loss": 0.4977, "step": 16147 }, { "epoch": 0.4415882739006782, "grad_norm": 1.3866676092147827, "learning_rate": 1.2356624274098603e-05, "loss": 0.5677, "step": 16148 }, { "epoch": 0.4416156202143951, "grad_norm": 1.4619839191436768, "learning_rate": 1.2355763506575777e-05, "loss": 0.4361, "step": 16149 }, { "epoch": 0.441642966528112, "grad_norm": 1.0544790029525757, "learning_rate": 1.2354902720572647e-05, "loss": 0.5283, "step": 16150 }, { "epoch": 0.4416703128418289, "grad_norm": 1.40537691116333, "learning_rate": 1.2354041916095973e-05, "loss": 0.5197, "step": 16151 }, { "epoch": 0.4416976591555458, "grad_norm": 1.2797132730484009, "learning_rate": 1.23531810931525e-05, "loss": 0.4334, "step": 16152 }, { "epoch": 0.44172500546926274, "grad_norm": 1.2367630004882812, "learning_rate": 1.2352320251748981e-05, "loss": 0.5275, "step": 16153 }, { "epoch": 0.44175235178297967, "grad_norm": 1.25504469871521, "learning_rate": 1.2351459391892175e-05, "loss": 0.5124, "step": 16154 }, { "epoch": 0.44177969809669654, "grad_norm": 1.5646493434906006, "learning_rate": 1.235059851358883e-05, "loss": 0.5258, "step": 16155 }, { "epoch": 0.44180704441041346, "grad_norm": 1.170433759689331, "learning_rate": 1.23497376168457e-05, "loss": 0.5013, "step": 16156 }, { "epoch": 0.4418343907241304, "grad_norm": 1.2266287803649902, "learning_rate": 1.2348876701669541e-05, "loss": 0.4909, "step": 16157 }, { "epoch": 0.4418617370378473, "grad_norm": 1.2073452472686768, "learning_rate": 1.2348015768067105e-05, "loss": 0.5163, "step": 16158 }, { "epoch": 0.4418890833515642, "grad_norm": 1.1012636423110962, "learning_rate": 1.2347154816045147e-05, "loss": 0.4855, "step": 16159 }, { "epoch": 0.4419164296652811, "grad_norm": 1.442776083946228, "learning_rate": 1.234629384561042e-05, "loss": 0.5298, "step": 16160 }, { "epoch": 0.44194377597899803, "grad_norm": 1.4065883159637451, "learning_rate": 1.2345432856769677e-05, "loss": 0.5378, "step": 16161 }, { "epoch": 0.44197112229271496, "grad_norm": 2.3712916374206543, "learning_rate": 1.2344571849529674e-05, "loss": 0.8415, "step": 16162 }, { "epoch": 0.4419984686064318, "grad_norm": 2.8931925296783447, "learning_rate": 1.2343710823897163e-05, "loss": 0.3361, "step": 16163 }, { "epoch": 0.44202581492014875, "grad_norm": 1.3636436462402344, "learning_rate": 1.23428497798789e-05, "loss": 0.5125, "step": 16164 }, { "epoch": 0.4420531612338657, "grad_norm": 1.305904746055603, "learning_rate": 1.2341988717481643e-05, "loss": 0.8012, "step": 16165 }, { "epoch": 0.4420805075475826, "grad_norm": 1.2042250633239746, "learning_rate": 1.2341127636712142e-05, "loss": 0.5148, "step": 16166 }, { "epoch": 0.44210785386129947, "grad_norm": 1.3389065265655518, "learning_rate": 1.2340266537577154e-05, "loss": 0.5224, "step": 16167 }, { "epoch": 0.4421352001750164, "grad_norm": 1.258165717124939, "learning_rate": 1.2339405420083432e-05, "loss": 0.545, "step": 16168 }, { "epoch": 0.4421625464887333, "grad_norm": 2.0600154399871826, "learning_rate": 1.2338544284237737e-05, "loss": 0.8573, "step": 16169 }, { "epoch": 0.44218989280245025, "grad_norm": 1.4823784828186035, "learning_rate": 1.2337683130046816e-05, "loss": 0.5175, "step": 16170 }, { "epoch": 0.4422172391161671, "grad_norm": 1.411520004272461, "learning_rate": 1.233682195751743e-05, "loss": 0.537, "step": 16171 }, { "epoch": 0.44224458542988404, "grad_norm": 1.1891872882843018, "learning_rate": 1.2335960766656335e-05, "loss": 0.5107, "step": 16172 }, { "epoch": 0.44227193174360097, "grad_norm": 1.3283318281173706, "learning_rate": 1.2335099557470283e-05, "loss": 0.5294, "step": 16173 }, { "epoch": 0.4422992780573179, "grad_norm": 1.2551062107086182, "learning_rate": 1.233423832996603e-05, "loss": 0.5261, "step": 16174 }, { "epoch": 0.44232662437103476, "grad_norm": 1.4273030757904053, "learning_rate": 1.2333377084150338e-05, "loss": 0.5104, "step": 16175 }, { "epoch": 0.4423539706847517, "grad_norm": 1.2708168029785156, "learning_rate": 1.233251582002996e-05, "loss": 0.5077, "step": 16176 }, { "epoch": 0.4423813169984686, "grad_norm": 1.3420363664627075, "learning_rate": 1.2331654537611647e-05, "loss": 0.508, "step": 16177 }, { "epoch": 0.44240866331218553, "grad_norm": 1.310447335243225, "learning_rate": 1.2330793236902166e-05, "loss": 0.5103, "step": 16178 }, { "epoch": 0.4424360096259024, "grad_norm": 1.2232469320297241, "learning_rate": 1.2329931917908265e-05, "loss": 0.539, "step": 16179 }, { "epoch": 0.44246335593961933, "grad_norm": 1.4459688663482666, "learning_rate": 1.2329070580636704e-05, "loss": 0.5097, "step": 16180 }, { "epoch": 0.44249070225333625, "grad_norm": 1.6817774772644043, "learning_rate": 1.2328209225094242e-05, "loss": 0.5182, "step": 16181 }, { "epoch": 0.4425180485670532, "grad_norm": 1.6116368770599365, "learning_rate": 1.232734785128763e-05, "loss": 0.3932, "step": 16182 }, { "epoch": 0.44254539488077005, "grad_norm": 1.2841583490371704, "learning_rate": 1.232648645922363e-05, "loss": 0.5586, "step": 16183 }, { "epoch": 0.442572741194487, "grad_norm": 1.4124811887741089, "learning_rate": 1.2325625048909e-05, "loss": 0.8334, "step": 16184 }, { "epoch": 0.4426000875082039, "grad_norm": 1.7205796241760254, "learning_rate": 1.2324763620350494e-05, "loss": 0.4519, "step": 16185 }, { "epoch": 0.4426274338219208, "grad_norm": 1.3474715948104858, "learning_rate": 1.2323902173554874e-05, "loss": 0.5001, "step": 16186 }, { "epoch": 0.4426547801356377, "grad_norm": 2.759945869445801, "learning_rate": 1.2323040708528894e-05, "loss": 0.5044, "step": 16187 }, { "epoch": 0.4426821264493546, "grad_norm": 1.678653359413147, "learning_rate": 1.2322179225279317e-05, "loss": 0.459, "step": 16188 }, { "epoch": 0.44270947276307154, "grad_norm": 1.70966374874115, "learning_rate": 1.2321317723812893e-05, "loss": 0.5203, "step": 16189 }, { "epoch": 0.44273681907678847, "grad_norm": 1.2788487672805786, "learning_rate": 1.2320456204136387e-05, "loss": 0.4803, "step": 16190 }, { "epoch": 0.44276416539050534, "grad_norm": 1.171846866607666, "learning_rate": 1.2319594666256552e-05, "loss": 0.3875, "step": 16191 }, { "epoch": 0.44279151170422226, "grad_norm": 2.070239305496216, "learning_rate": 1.2318733110180152e-05, "loss": 0.5092, "step": 16192 }, { "epoch": 0.4428188580179392, "grad_norm": 1.3917903900146484, "learning_rate": 1.2317871535913943e-05, "loss": 0.4818, "step": 16193 }, { "epoch": 0.4428462043316561, "grad_norm": 2.2785043716430664, "learning_rate": 1.2317009943464684e-05, "loss": 0.5362, "step": 16194 }, { "epoch": 0.442873550645373, "grad_norm": 1.7335596084594727, "learning_rate": 1.2316148332839135e-05, "loss": 0.4246, "step": 16195 }, { "epoch": 0.4429008969590899, "grad_norm": 1.2851496934890747, "learning_rate": 1.2315286704044054e-05, "loss": 0.5388, "step": 16196 }, { "epoch": 0.44292824327280683, "grad_norm": 1.5240341424942017, "learning_rate": 1.2314425057086199e-05, "loss": 0.8258, "step": 16197 }, { "epoch": 0.44295558958652376, "grad_norm": 1.3512697219848633, "learning_rate": 1.2313563391972332e-05, "loss": 0.5113, "step": 16198 }, { "epoch": 0.4429829359002406, "grad_norm": 1.5575034618377686, "learning_rate": 1.2312701708709209e-05, "loss": 0.4006, "step": 16199 }, { "epoch": 0.44301028221395755, "grad_norm": 1.280192494392395, "learning_rate": 1.2311840007303595e-05, "loss": 0.489, "step": 16200 }, { "epoch": 0.4430376285276745, "grad_norm": 1.2968133687973022, "learning_rate": 1.2310978287762245e-05, "loss": 0.5212, "step": 16201 }, { "epoch": 0.4430649748413914, "grad_norm": 1.2579840421676636, "learning_rate": 1.231011655009192e-05, "loss": 0.4908, "step": 16202 }, { "epoch": 0.44309232115510827, "grad_norm": 1.3146567344665527, "learning_rate": 1.2309254794299383e-05, "loss": 0.4183, "step": 16203 }, { "epoch": 0.4431196674688252, "grad_norm": 1.4145582914352417, "learning_rate": 1.2308393020391393e-05, "loss": 0.4995, "step": 16204 }, { "epoch": 0.4431470137825421, "grad_norm": 1.4222660064697266, "learning_rate": 1.2307531228374708e-05, "loss": 0.5374, "step": 16205 }, { "epoch": 0.44317436009625905, "grad_norm": 1.2777340412139893, "learning_rate": 1.230666941825609e-05, "loss": 0.5352, "step": 16206 }, { "epoch": 0.4432017064099759, "grad_norm": 1.445657730102539, "learning_rate": 1.2305807590042298e-05, "loss": 0.5416, "step": 16207 }, { "epoch": 0.44322905272369284, "grad_norm": 1.4171947240829468, "learning_rate": 1.2304945743740096e-05, "loss": 0.8418, "step": 16208 }, { "epoch": 0.44325639903740977, "grad_norm": 1.2470285892486572, "learning_rate": 1.2304083879356241e-05, "loss": 0.5162, "step": 16209 }, { "epoch": 0.4432837453511267, "grad_norm": 1.296076774597168, "learning_rate": 1.2303221996897499e-05, "loss": 0.8336, "step": 16210 }, { "epoch": 0.44331109166484356, "grad_norm": 1.5692238807678223, "learning_rate": 1.2302360096370626e-05, "loss": 0.5081, "step": 16211 }, { "epoch": 0.4433384379785605, "grad_norm": 1.2835181951522827, "learning_rate": 1.2301498177782389e-05, "loss": 0.4957, "step": 16212 }, { "epoch": 0.4433657842922774, "grad_norm": 1.3203530311584473, "learning_rate": 1.2300636241139544e-05, "loss": 0.5284, "step": 16213 }, { "epoch": 0.44339313060599433, "grad_norm": 1.5678430795669556, "learning_rate": 1.2299774286448858e-05, "loss": 0.3799, "step": 16214 }, { "epoch": 0.4434204769197112, "grad_norm": 1.9979641437530518, "learning_rate": 1.2298912313717086e-05, "loss": 0.5294, "step": 16215 }, { "epoch": 0.44344782323342813, "grad_norm": 2.461834669113159, "learning_rate": 1.2298050322950998e-05, "loss": 0.5357, "step": 16216 }, { "epoch": 0.44347516954714505, "grad_norm": 1.3314330577850342, "learning_rate": 1.229718831415735e-05, "loss": 0.503, "step": 16217 }, { "epoch": 0.443502515860862, "grad_norm": 1.6035268306732178, "learning_rate": 1.2296326287342904e-05, "loss": 0.4684, "step": 16218 }, { "epoch": 0.44352986217457885, "grad_norm": 1.3278350830078125, "learning_rate": 1.2295464242514428e-05, "loss": 0.5162, "step": 16219 }, { "epoch": 0.4435572084882958, "grad_norm": 1.315271258354187, "learning_rate": 1.2294602179678678e-05, "loss": 0.5129, "step": 16220 }, { "epoch": 0.4435845548020127, "grad_norm": 1.4241251945495605, "learning_rate": 1.2293740098842424e-05, "loss": 0.5464, "step": 16221 }, { "epoch": 0.4436119011157296, "grad_norm": 1.7620604038238525, "learning_rate": 1.2292878000012423e-05, "loss": 0.4412, "step": 16222 }, { "epoch": 0.4436392474294465, "grad_norm": 1.3707736730575562, "learning_rate": 1.2292015883195436e-05, "loss": 0.5165, "step": 16223 }, { "epoch": 0.4436665937431634, "grad_norm": 1.3980785608291626, "learning_rate": 1.229115374839823e-05, "loss": 0.8307, "step": 16224 }, { "epoch": 0.44369394005688034, "grad_norm": 1.407184362411499, "learning_rate": 1.229029159562757e-05, "loss": 0.5286, "step": 16225 }, { "epoch": 0.44372128637059727, "grad_norm": 3.8022289276123047, "learning_rate": 1.2289429424890216e-05, "loss": 0.5028, "step": 16226 }, { "epoch": 0.44374863268431414, "grad_norm": 1.2991622686386108, "learning_rate": 1.2288567236192934e-05, "loss": 0.5137, "step": 16227 }, { "epoch": 0.44377597899803106, "grad_norm": 1.3750433921813965, "learning_rate": 1.2287705029542484e-05, "loss": 0.4961, "step": 16228 }, { "epoch": 0.443803325311748, "grad_norm": 1.4453343152999878, "learning_rate": 1.228684280494563e-05, "loss": 0.5174, "step": 16229 }, { "epoch": 0.4438306716254649, "grad_norm": 1.297042965888977, "learning_rate": 1.2285980562409143e-05, "loss": 0.495, "step": 16230 }, { "epoch": 0.4438580179391818, "grad_norm": 1.287978172302246, "learning_rate": 1.2285118301939778e-05, "loss": 0.5283, "step": 16231 }, { "epoch": 0.4438853642528987, "grad_norm": 1.994225025177002, "learning_rate": 1.2284256023544306e-05, "loss": 0.4588, "step": 16232 }, { "epoch": 0.44391271056661563, "grad_norm": 1.436379075050354, "learning_rate": 1.2283393727229487e-05, "loss": 0.3796, "step": 16233 }, { "epoch": 0.44394005688033256, "grad_norm": 1.5811669826507568, "learning_rate": 1.2282531413002085e-05, "loss": 0.4626, "step": 16234 }, { "epoch": 0.4439674031940494, "grad_norm": 1.4539958238601685, "learning_rate": 1.2281669080868868e-05, "loss": 0.452, "step": 16235 }, { "epoch": 0.44399474950776635, "grad_norm": 1.356154203414917, "learning_rate": 1.2280806730836599e-05, "loss": 0.5124, "step": 16236 }, { "epoch": 0.4440220958214833, "grad_norm": 1.2664189338684082, "learning_rate": 1.2279944362912042e-05, "loss": 0.5384, "step": 16237 }, { "epoch": 0.4440494421352002, "grad_norm": 2.2655766010284424, "learning_rate": 1.2279081977101965e-05, "loss": 0.3825, "step": 16238 }, { "epoch": 0.44407678844891707, "grad_norm": 1.2576078176498413, "learning_rate": 1.2278219573413132e-05, "loss": 0.5236, "step": 16239 }, { "epoch": 0.444104134762634, "grad_norm": 1.4945778846740723, "learning_rate": 1.2277357151852305e-05, "loss": 0.4557, "step": 16240 }, { "epoch": 0.4441314810763509, "grad_norm": 1.2942324876785278, "learning_rate": 1.2276494712426254e-05, "loss": 0.5253, "step": 16241 }, { "epoch": 0.44415882739006785, "grad_norm": 1.2286713123321533, "learning_rate": 1.227563225514174e-05, "loss": 0.5168, "step": 16242 }, { "epoch": 0.4441861737037847, "grad_norm": 1.136353611946106, "learning_rate": 1.2274769780005536e-05, "loss": 0.515, "step": 16243 }, { "epoch": 0.44421352001750164, "grad_norm": 1.315340518951416, "learning_rate": 1.2273907287024397e-05, "loss": 0.419, "step": 16244 }, { "epoch": 0.44424086633121856, "grad_norm": 1.6209388971328735, "learning_rate": 1.2273044776205099e-05, "loss": 0.438, "step": 16245 }, { "epoch": 0.4442682126449355, "grad_norm": 1.2804467678070068, "learning_rate": 1.2272182247554405e-05, "loss": 0.5314, "step": 16246 }, { "epoch": 0.44429555895865236, "grad_norm": 1.2522802352905273, "learning_rate": 1.2271319701079078e-05, "loss": 0.4927, "step": 16247 }, { "epoch": 0.4443229052723693, "grad_norm": 1.2426868677139282, "learning_rate": 1.227045713678589e-05, "loss": 0.4929, "step": 16248 }, { "epoch": 0.4443502515860862, "grad_norm": 1.0951133966445923, "learning_rate": 1.2269594554681603e-05, "loss": 0.5062, "step": 16249 }, { "epoch": 0.44437759789980313, "grad_norm": 1.1146118640899658, "learning_rate": 1.2268731954772985e-05, "loss": 0.5161, "step": 16250 }, { "epoch": 0.44440494421352, "grad_norm": 1.2676112651824951, "learning_rate": 1.2267869337066807e-05, "loss": 0.5122, "step": 16251 }, { "epoch": 0.44443229052723693, "grad_norm": 2.053652048110962, "learning_rate": 1.2267006701569829e-05, "loss": 0.4041, "step": 16252 }, { "epoch": 0.44445963684095385, "grad_norm": 1.8579028844833374, "learning_rate": 1.2266144048288821e-05, "loss": 0.5584, "step": 16253 }, { "epoch": 0.4444869831546707, "grad_norm": 1.8087632656097412, "learning_rate": 1.2265281377230554e-05, "loss": 0.51, "step": 16254 }, { "epoch": 0.44451432946838765, "grad_norm": 1.0907400846481323, "learning_rate": 1.2264418688401787e-05, "loss": 0.5399, "step": 16255 }, { "epoch": 0.4445416757821046, "grad_norm": 1.035519003868103, "learning_rate": 1.2263555981809297e-05, "loss": 0.5135, "step": 16256 }, { "epoch": 0.4445690220958215, "grad_norm": 1.186606764793396, "learning_rate": 1.2262693257459848e-05, "loss": 0.5121, "step": 16257 }, { "epoch": 0.44459636840953837, "grad_norm": 1.5586433410644531, "learning_rate": 1.2261830515360202e-05, "loss": 0.5108, "step": 16258 }, { "epoch": 0.4446237147232553, "grad_norm": 1.108583927154541, "learning_rate": 1.2260967755517134e-05, "loss": 0.4843, "step": 16259 }, { "epoch": 0.4446510610369722, "grad_norm": 1.264655590057373, "learning_rate": 1.2260104977937414e-05, "loss": 0.5337, "step": 16260 }, { "epoch": 0.44467840735068914, "grad_norm": 1.2245676517486572, "learning_rate": 1.2259242182627804e-05, "loss": 0.5044, "step": 16261 }, { "epoch": 0.444705753664406, "grad_norm": 1.8100075721740723, "learning_rate": 1.2258379369595075e-05, "loss": 0.5508, "step": 16262 }, { "epoch": 0.44473309997812294, "grad_norm": 1.2122101783752441, "learning_rate": 1.2257516538845994e-05, "loss": 0.4879, "step": 16263 }, { "epoch": 0.44476044629183986, "grad_norm": 1.3370589017868042, "learning_rate": 1.2256653690387331e-05, "loss": 0.4773, "step": 16264 }, { "epoch": 0.4447877926055568, "grad_norm": 1.477024793624878, "learning_rate": 1.2255790824225856e-05, "loss": 0.5204, "step": 16265 }, { "epoch": 0.44481513891927366, "grad_norm": 1.221234917640686, "learning_rate": 1.2254927940368337e-05, "loss": 0.5267, "step": 16266 }, { "epoch": 0.4448424852329906, "grad_norm": 1.783892035484314, "learning_rate": 1.2254065038821545e-05, "loss": 0.5229, "step": 16267 }, { "epoch": 0.4448698315467075, "grad_norm": 1.270956039428711, "learning_rate": 1.2253202119592245e-05, "loss": 0.5595, "step": 16268 }, { "epoch": 0.44489717786042443, "grad_norm": 1.1711947917938232, "learning_rate": 1.2252339182687205e-05, "loss": 0.5345, "step": 16269 }, { "epoch": 0.4449245241741413, "grad_norm": 1.5966688394546509, "learning_rate": 1.2251476228113204e-05, "loss": 0.5384, "step": 16270 }, { "epoch": 0.4449518704878582, "grad_norm": 1.2290172576904297, "learning_rate": 1.2250613255877001e-05, "loss": 0.5102, "step": 16271 }, { "epoch": 0.44497921680157515, "grad_norm": 1.5183653831481934, "learning_rate": 1.2249750265985372e-05, "loss": 0.553, "step": 16272 }, { "epoch": 0.4450065631152921, "grad_norm": 1.0586588382720947, "learning_rate": 1.2248887258445089e-05, "loss": 0.513, "step": 16273 }, { "epoch": 0.44503390942900894, "grad_norm": 1.3759443759918213, "learning_rate": 1.2248024233262912e-05, "loss": 0.5051, "step": 16274 }, { "epoch": 0.44506125574272587, "grad_norm": 1.3416478633880615, "learning_rate": 1.2247161190445623e-05, "loss": 0.507, "step": 16275 }, { "epoch": 0.4450886020564428, "grad_norm": 1.2015581130981445, "learning_rate": 1.2246298129999985e-05, "loss": 0.5405, "step": 16276 }, { "epoch": 0.4451159483701597, "grad_norm": 1.3824890851974487, "learning_rate": 1.2245435051932768e-05, "loss": 0.4369, "step": 16277 }, { "epoch": 0.4451432946838766, "grad_norm": 1.593613862991333, "learning_rate": 1.2244571956250746e-05, "loss": 0.5373, "step": 16278 }, { "epoch": 0.4451706409975935, "grad_norm": 1.4190466403961182, "learning_rate": 1.224370884296069e-05, "loss": 0.5327, "step": 16279 }, { "epoch": 0.44519798731131044, "grad_norm": 1.6318196058273315, "learning_rate": 1.224284571206937e-05, "loss": 0.5081, "step": 16280 }, { "epoch": 0.44522533362502736, "grad_norm": 1.2597272396087646, "learning_rate": 1.2241982563583556e-05, "loss": 0.5199, "step": 16281 }, { "epoch": 0.44525267993874423, "grad_norm": 2.3624696731567383, "learning_rate": 1.2241119397510017e-05, "loss": 0.5293, "step": 16282 }, { "epoch": 0.44528002625246116, "grad_norm": 1.3909473419189453, "learning_rate": 1.2240256213855531e-05, "loss": 0.5284, "step": 16283 }, { "epoch": 0.4453073725661781, "grad_norm": 1.2087106704711914, "learning_rate": 1.2239393012626863e-05, "loss": 0.5204, "step": 16284 }, { "epoch": 0.445334718879895, "grad_norm": 1.6463369131088257, "learning_rate": 1.2238529793830787e-05, "loss": 0.5012, "step": 16285 }, { "epoch": 0.4453620651936119, "grad_norm": 1.2535444498062134, "learning_rate": 1.2237666557474077e-05, "loss": 0.414, "step": 16286 }, { "epoch": 0.4453894115073288, "grad_norm": 1.2594839334487915, "learning_rate": 1.22368033035635e-05, "loss": 0.5202, "step": 16287 }, { "epoch": 0.44541675782104573, "grad_norm": 1.3725513219833374, "learning_rate": 1.223594003210583e-05, "loss": 0.8359, "step": 16288 }, { "epoch": 0.44544410413476265, "grad_norm": 2.0340077877044678, "learning_rate": 1.2235076743107843e-05, "loss": 0.3771, "step": 16289 }, { "epoch": 0.4454714504484795, "grad_norm": 1.4765623807907104, "learning_rate": 1.2234213436576308e-05, "loss": 0.4843, "step": 16290 }, { "epoch": 0.44549879676219645, "grad_norm": 1.4555379152297974, "learning_rate": 1.2233350112517994e-05, "loss": 0.8111, "step": 16291 }, { "epoch": 0.4455261430759134, "grad_norm": 1.3742856979370117, "learning_rate": 1.2232486770939682e-05, "loss": 0.5498, "step": 16292 }, { "epoch": 0.4455534893896303, "grad_norm": 1.3176536560058594, "learning_rate": 1.2231623411848135e-05, "loss": 0.5384, "step": 16293 }, { "epoch": 0.44558083570334717, "grad_norm": 1.8365769386291504, "learning_rate": 1.223076003525013e-05, "loss": 0.53, "step": 16294 }, { "epoch": 0.4456081820170641, "grad_norm": 1.1144366264343262, "learning_rate": 1.2229896641152443e-05, "loss": 0.5043, "step": 16295 }, { "epoch": 0.445635528330781, "grad_norm": 1.4430533647537231, "learning_rate": 1.2229033229561842e-05, "loss": 0.5162, "step": 16296 }, { "epoch": 0.44566287464449794, "grad_norm": 1.1230807304382324, "learning_rate": 1.2228169800485105e-05, "loss": 0.5399, "step": 16297 }, { "epoch": 0.4456902209582148, "grad_norm": 1.1295890808105469, "learning_rate": 1.2227306353929002e-05, "loss": 0.516, "step": 16298 }, { "epoch": 0.44571756727193174, "grad_norm": 1.4537156820297241, "learning_rate": 1.2226442889900306e-05, "loss": 0.4637, "step": 16299 }, { "epoch": 0.44574491358564866, "grad_norm": 1.1283149719238281, "learning_rate": 1.2225579408405795e-05, "loss": 0.5232, "step": 16300 }, { "epoch": 0.4457722598993656, "grad_norm": 1.9812753200531006, "learning_rate": 1.2224715909452236e-05, "loss": 0.37, "step": 16301 }, { "epoch": 0.44579960621308246, "grad_norm": 1.1361442804336548, "learning_rate": 1.2223852393046408e-05, "loss": 0.5199, "step": 16302 }, { "epoch": 0.4458269525267994, "grad_norm": 1.205457091331482, "learning_rate": 1.2222988859195089e-05, "loss": 0.4942, "step": 16303 }, { "epoch": 0.4458542988405163, "grad_norm": 1.6262277364730835, "learning_rate": 1.2222125307905041e-05, "loss": 0.5298, "step": 16304 }, { "epoch": 0.44588164515423323, "grad_norm": 1.3923100233078003, "learning_rate": 1.222126173918305e-05, "loss": 0.5309, "step": 16305 }, { "epoch": 0.4459089914679501, "grad_norm": 1.2402063608169556, "learning_rate": 1.2220398153035884e-05, "loss": 0.8501, "step": 16306 }, { "epoch": 0.445936337781667, "grad_norm": 1.4569448232650757, "learning_rate": 1.2219534549470317e-05, "loss": 0.5415, "step": 16307 }, { "epoch": 0.44596368409538395, "grad_norm": 1.3534915447235107, "learning_rate": 1.221867092849313e-05, "loss": 0.5191, "step": 16308 }, { "epoch": 0.4459910304091009, "grad_norm": 1.5583863258361816, "learning_rate": 1.2217807290111093e-05, "loss": 0.4652, "step": 16309 }, { "epoch": 0.44601837672281774, "grad_norm": 1.1476534605026245, "learning_rate": 1.2216943634330983e-05, "loss": 0.5457, "step": 16310 }, { "epoch": 0.44604572303653467, "grad_norm": 1.3682085275650024, "learning_rate": 1.2216079961159572e-05, "loss": 0.523, "step": 16311 }, { "epoch": 0.4460730693502516, "grad_norm": 1.5611450672149658, "learning_rate": 1.2215216270603637e-05, "loss": 0.5327, "step": 16312 }, { "epoch": 0.4461004156639685, "grad_norm": 1.3379533290863037, "learning_rate": 1.2214352562669953e-05, "loss": 0.4952, "step": 16313 }, { "epoch": 0.4461277619776854, "grad_norm": 1.115101933479309, "learning_rate": 1.2213488837365299e-05, "loss": 0.5037, "step": 16314 }, { "epoch": 0.4461551082914023, "grad_norm": 1.9407587051391602, "learning_rate": 1.2212625094696446e-05, "loss": 0.8267, "step": 16315 }, { "epoch": 0.44618245460511924, "grad_norm": 1.6165485382080078, "learning_rate": 1.2211761334670174e-05, "loss": 0.5724, "step": 16316 }, { "epoch": 0.44620980091883616, "grad_norm": 1.30227530002594, "learning_rate": 1.2210897557293256e-05, "loss": 0.524, "step": 16317 }, { "epoch": 0.44623714723255303, "grad_norm": 1.1176002025604248, "learning_rate": 1.2210033762572467e-05, "loss": 0.819, "step": 16318 }, { "epoch": 0.44626449354626996, "grad_norm": 1.347270131111145, "learning_rate": 1.2209169950514588e-05, "loss": 0.5087, "step": 16319 }, { "epoch": 0.4462918398599869, "grad_norm": 1.3780686855316162, "learning_rate": 1.220830612112639e-05, "loss": 0.549, "step": 16320 }, { "epoch": 0.4463191861737038, "grad_norm": 1.2681264877319336, "learning_rate": 1.2207442274414654e-05, "loss": 0.5319, "step": 16321 }, { "epoch": 0.4463465324874207, "grad_norm": 1.6394902467727661, "learning_rate": 1.2206578410386153e-05, "loss": 0.4023, "step": 16322 }, { "epoch": 0.4463738788011376, "grad_norm": 1.8077205419540405, "learning_rate": 1.2205714529047664e-05, "loss": 0.3704, "step": 16323 }, { "epoch": 0.4464012251148545, "grad_norm": 1.4563941955566406, "learning_rate": 1.2204850630405969e-05, "loss": 0.5629, "step": 16324 }, { "epoch": 0.44642857142857145, "grad_norm": 1.4086474180221558, "learning_rate": 1.2203986714467838e-05, "loss": 0.5332, "step": 16325 }, { "epoch": 0.4464559177422883, "grad_norm": 1.9564616680145264, "learning_rate": 1.2203122781240051e-05, "loss": 0.5426, "step": 16326 }, { "epoch": 0.44648326405600525, "grad_norm": 1.319070816040039, "learning_rate": 1.220225883072939e-05, "loss": 0.5147, "step": 16327 }, { "epoch": 0.44651061036972217, "grad_norm": 4.994931221008301, "learning_rate": 1.2201394862942627e-05, "loss": 0.542, "step": 16328 }, { "epoch": 0.4465379566834391, "grad_norm": 1.3984017372131348, "learning_rate": 1.2200530877886539e-05, "loss": 0.5504, "step": 16329 }, { "epoch": 0.44656530299715597, "grad_norm": 1.589598298072815, "learning_rate": 1.2199666875567906e-05, "loss": 0.5027, "step": 16330 }, { "epoch": 0.4465926493108729, "grad_norm": 1.0426620244979858, "learning_rate": 1.2198802855993505e-05, "loss": 0.5113, "step": 16331 }, { "epoch": 0.4466199956245898, "grad_norm": 1.49565851688385, "learning_rate": 1.2197938819170116e-05, "loss": 0.4916, "step": 16332 }, { "epoch": 0.44664734193830674, "grad_norm": 1.3894643783569336, "learning_rate": 1.2197074765104513e-05, "loss": 0.4123, "step": 16333 }, { "epoch": 0.4466746882520236, "grad_norm": 1.2808117866516113, "learning_rate": 1.2196210693803478e-05, "loss": 0.532, "step": 16334 }, { "epoch": 0.44670203456574054, "grad_norm": 1.4002645015716553, "learning_rate": 1.219534660527379e-05, "loss": 0.4285, "step": 16335 }, { "epoch": 0.44672938087945746, "grad_norm": 1.4480491876602173, "learning_rate": 1.2194482499522225e-05, "loss": 0.5046, "step": 16336 }, { "epoch": 0.4467567271931744, "grad_norm": 1.5157241821289062, "learning_rate": 1.2193618376555558e-05, "loss": 0.5095, "step": 16337 }, { "epoch": 0.44678407350689125, "grad_norm": 1.4276760816574097, "learning_rate": 1.2192754236380577e-05, "loss": 0.5424, "step": 16338 }, { "epoch": 0.4468114198206082, "grad_norm": 1.4277180433273315, "learning_rate": 1.2191890079004054e-05, "loss": 0.5368, "step": 16339 }, { "epoch": 0.4468387661343251, "grad_norm": 1.9724029302597046, "learning_rate": 1.2191025904432774e-05, "loss": 0.5632, "step": 16340 }, { "epoch": 0.44686611244804203, "grad_norm": 1.1133878231048584, "learning_rate": 1.2190161712673507e-05, "loss": 0.5403, "step": 16341 }, { "epoch": 0.4468934587617589, "grad_norm": 1.3400157690048218, "learning_rate": 1.218929750373304e-05, "loss": 0.5295, "step": 16342 }, { "epoch": 0.4469208050754758, "grad_norm": 1.566226601600647, "learning_rate": 1.2188433277618152e-05, "loss": 0.4998, "step": 16343 }, { "epoch": 0.44694815138919275, "grad_norm": 1.5344350337982178, "learning_rate": 1.2187569034335617e-05, "loss": 0.487, "step": 16344 }, { "epoch": 0.4469754977029097, "grad_norm": 1.7928099632263184, "learning_rate": 1.2186704773892224e-05, "loss": 0.5279, "step": 16345 }, { "epoch": 0.44700284401662654, "grad_norm": 1.4850423336029053, "learning_rate": 1.2185840496294743e-05, "loss": 0.3937, "step": 16346 }, { "epoch": 0.44703019033034347, "grad_norm": 1.5267950296401978, "learning_rate": 1.218497620154996e-05, "loss": 0.549, "step": 16347 }, { "epoch": 0.4470575366440604, "grad_norm": 1.3434687852859497, "learning_rate": 1.2184111889664652e-05, "loss": 0.5115, "step": 16348 }, { "epoch": 0.4470848829577773, "grad_norm": 1.3182326555252075, "learning_rate": 1.2183247560645603e-05, "loss": 0.4987, "step": 16349 }, { "epoch": 0.4471122292714942, "grad_norm": 27.11739730834961, "learning_rate": 1.2182383214499592e-05, "loss": 0.5393, "step": 16350 }, { "epoch": 0.4471395755852111, "grad_norm": 1.4939148426055908, "learning_rate": 1.2181518851233397e-05, "loss": 0.5402, "step": 16351 }, { "epoch": 0.44716692189892804, "grad_norm": 1.1656837463378906, "learning_rate": 1.2180654470853799e-05, "loss": 0.5011, "step": 16352 }, { "epoch": 0.44719426821264496, "grad_norm": 1.5754671096801758, "learning_rate": 1.217979007336758e-05, "loss": 0.5093, "step": 16353 }, { "epoch": 0.44722161452636183, "grad_norm": 1.1513783931732178, "learning_rate": 1.2178925658781526e-05, "loss": 0.5294, "step": 16354 }, { "epoch": 0.44724896084007876, "grad_norm": 1.4963525533676147, "learning_rate": 1.2178061227102411e-05, "loss": 0.4229, "step": 16355 }, { "epoch": 0.4472763071537957, "grad_norm": 1.1891030073165894, "learning_rate": 1.2177196778337017e-05, "loss": 0.5099, "step": 16356 }, { "epoch": 0.44730365346751255, "grad_norm": 1.4631216526031494, "learning_rate": 1.217633231249213e-05, "loss": 0.4962, "step": 16357 }, { "epoch": 0.4473309997812295, "grad_norm": 2.45786190032959, "learning_rate": 1.2175467829574526e-05, "loss": 0.3362, "step": 16358 }, { "epoch": 0.4473583460949464, "grad_norm": 1.1358261108398438, "learning_rate": 1.2174603329590991e-05, "loss": 0.5037, "step": 16359 }, { "epoch": 0.4473856924086633, "grad_norm": 1.3172569274902344, "learning_rate": 1.2173738812548304e-05, "loss": 0.5019, "step": 16360 }, { "epoch": 0.4474130387223802, "grad_norm": 1.2307794094085693, "learning_rate": 1.2172874278453246e-05, "loss": 0.5289, "step": 16361 }, { "epoch": 0.4474403850360971, "grad_norm": 1.635638952255249, "learning_rate": 1.2172009727312603e-05, "loss": 0.4373, "step": 16362 }, { "epoch": 0.44746773134981405, "grad_norm": 1.5523295402526855, "learning_rate": 1.2171145159133158e-05, "loss": 0.5275, "step": 16363 }, { "epoch": 0.44749507766353097, "grad_norm": 1.2428996562957764, "learning_rate": 1.2170280573921684e-05, "loss": 0.5101, "step": 16364 }, { "epoch": 0.44752242397724784, "grad_norm": 1.0559979677200317, "learning_rate": 1.2169415971684974e-05, "loss": 0.5286, "step": 16365 }, { "epoch": 0.44754977029096477, "grad_norm": 1.3912848234176636, "learning_rate": 1.2168551352429804e-05, "loss": 0.504, "step": 16366 }, { "epoch": 0.4475771166046817, "grad_norm": 1.3214938640594482, "learning_rate": 1.2167686716162959e-05, "loss": 0.5248, "step": 16367 }, { "epoch": 0.4476044629183986, "grad_norm": 1.159886121749878, "learning_rate": 1.2166822062891224e-05, "loss": 0.4969, "step": 16368 }, { "epoch": 0.4476318092321155, "grad_norm": 1.2659912109375, "learning_rate": 1.2165957392621376e-05, "loss": 0.5123, "step": 16369 }, { "epoch": 0.4476591555458324, "grad_norm": 1.2414520978927612, "learning_rate": 1.2165092705360207e-05, "loss": 0.5078, "step": 16370 }, { "epoch": 0.44768650185954934, "grad_norm": 1.192136526107788, "learning_rate": 1.2164228001114488e-05, "loss": 0.5288, "step": 16371 }, { "epoch": 0.44771384817326626, "grad_norm": 2.009455442428589, "learning_rate": 1.2163363279891014e-05, "loss": 0.3857, "step": 16372 }, { "epoch": 0.44774119448698313, "grad_norm": 1.136643409729004, "learning_rate": 1.2162498541696564e-05, "loss": 0.5114, "step": 16373 }, { "epoch": 0.44776854080070005, "grad_norm": 1.405428171157837, "learning_rate": 1.2161633786537921e-05, "loss": 0.5159, "step": 16374 }, { "epoch": 0.447795887114417, "grad_norm": 2.3792524337768555, "learning_rate": 1.216076901442187e-05, "loss": 0.3755, "step": 16375 }, { "epoch": 0.4478232334281339, "grad_norm": 2.076077938079834, "learning_rate": 1.2159904225355192e-05, "loss": 0.5037, "step": 16376 }, { "epoch": 0.4478505797418508, "grad_norm": 1.3720512390136719, "learning_rate": 1.2159039419344674e-05, "loss": 0.416, "step": 16377 }, { "epoch": 0.4478779260555677, "grad_norm": 1.4743974208831787, "learning_rate": 1.2158174596397098e-05, "loss": 0.5217, "step": 16378 }, { "epoch": 0.4479052723692846, "grad_norm": 1.592176914215088, "learning_rate": 1.2157309756519253e-05, "loss": 0.569, "step": 16379 }, { "epoch": 0.44793261868300155, "grad_norm": 1.2502210140228271, "learning_rate": 1.2156444899717919e-05, "loss": 0.8189, "step": 16380 }, { "epoch": 0.4479599649967184, "grad_norm": 1.357448935508728, "learning_rate": 1.2155580025999883e-05, "loss": 0.5, "step": 16381 }, { "epoch": 0.44798731131043534, "grad_norm": 1.3832813501358032, "learning_rate": 1.2154715135371925e-05, "loss": 0.5312, "step": 16382 }, { "epoch": 0.44801465762415227, "grad_norm": 1.578026294708252, "learning_rate": 1.2153850227840833e-05, "loss": 0.5283, "step": 16383 }, { "epoch": 0.4480420039378692, "grad_norm": 1.9667785167694092, "learning_rate": 1.2152985303413396e-05, "loss": 0.5385, "step": 16384 }, { "epoch": 0.44806935025158606, "grad_norm": 1.418485403060913, "learning_rate": 1.2152120362096392e-05, "loss": 0.4475, "step": 16385 }, { "epoch": 0.448096696565303, "grad_norm": 1.223842978477478, "learning_rate": 1.215125540389661e-05, "loss": 0.538, "step": 16386 }, { "epoch": 0.4481240428790199, "grad_norm": 1.3264119625091553, "learning_rate": 1.2150390428820835e-05, "loss": 0.5437, "step": 16387 }, { "epoch": 0.44815138919273684, "grad_norm": 1.3423722982406616, "learning_rate": 1.2149525436875851e-05, "loss": 0.5237, "step": 16388 }, { "epoch": 0.4481787355064537, "grad_norm": 1.2940963506698608, "learning_rate": 1.2148660428068449e-05, "loss": 0.8586, "step": 16389 }, { "epoch": 0.44820608182017063, "grad_norm": 2.203855514526367, "learning_rate": 1.2147795402405404e-05, "loss": 0.3871, "step": 16390 }, { "epoch": 0.44823342813388756, "grad_norm": 1.380042314529419, "learning_rate": 1.2146930359893512e-05, "loss": 0.5143, "step": 16391 }, { "epoch": 0.4482607744476045, "grad_norm": 1.5438156127929688, "learning_rate": 1.2146065300539558e-05, "loss": 0.4609, "step": 16392 }, { "epoch": 0.44828812076132135, "grad_norm": 1.395537257194519, "learning_rate": 1.2145200224350322e-05, "loss": 0.5108, "step": 16393 }, { "epoch": 0.4483154670750383, "grad_norm": 1.3888822793960571, "learning_rate": 1.2144335131332596e-05, "loss": 0.7893, "step": 16394 }, { "epoch": 0.4483428133887552, "grad_norm": 1.1921108961105347, "learning_rate": 1.2143470021493164e-05, "loss": 0.4874, "step": 16395 }, { "epoch": 0.4483701597024721, "grad_norm": 1.6067720651626587, "learning_rate": 1.2142604894838811e-05, "loss": 0.446, "step": 16396 }, { "epoch": 0.448397506016189, "grad_norm": 1.3690974712371826, "learning_rate": 1.2141739751376328e-05, "loss": 0.5152, "step": 16397 }, { "epoch": 0.4484248523299059, "grad_norm": 1.2348005771636963, "learning_rate": 1.2140874591112501e-05, "loss": 0.4923, "step": 16398 }, { "epoch": 0.44845219864362285, "grad_norm": 1.0841994285583496, "learning_rate": 1.2140009414054113e-05, "loss": 0.5134, "step": 16399 }, { "epoch": 0.44847954495733977, "grad_norm": 1.2690014839172363, "learning_rate": 1.2139144220207955e-05, "loss": 0.5048, "step": 16400 }, { "epoch": 0.44850689127105664, "grad_norm": 1.7628393173217773, "learning_rate": 1.213827900958081e-05, "loss": 0.3734, "step": 16401 }, { "epoch": 0.44853423758477357, "grad_norm": 1.4177578687667847, "learning_rate": 1.2137413782179469e-05, "loss": 0.5548, "step": 16402 }, { "epoch": 0.4485615838984905, "grad_norm": 1.3428735733032227, "learning_rate": 1.2136548538010718e-05, "loss": 0.8168, "step": 16403 }, { "epoch": 0.4485889302122074, "grad_norm": 1.6336332559585571, "learning_rate": 1.2135683277081345e-05, "loss": 0.54, "step": 16404 }, { "epoch": 0.4486162765259243, "grad_norm": 1.2268918752670288, "learning_rate": 1.2134817999398138e-05, "loss": 0.5215, "step": 16405 }, { "epoch": 0.4486436228396412, "grad_norm": 1.150765061378479, "learning_rate": 1.2133952704967885e-05, "loss": 0.5182, "step": 16406 }, { "epoch": 0.44867096915335813, "grad_norm": 1.4502465724945068, "learning_rate": 1.213308739379737e-05, "loss": 0.5279, "step": 16407 }, { "epoch": 0.44869831546707506, "grad_norm": 1.5032212734222412, "learning_rate": 1.213222206589339e-05, "loss": 0.3939, "step": 16408 }, { "epoch": 0.44872566178079193, "grad_norm": 1.3665400743484497, "learning_rate": 1.2131356721262726e-05, "loss": 0.5365, "step": 16409 }, { "epoch": 0.44875300809450885, "grad_norm": 1.2186803817749023, "learning_rate": 1.2130491359912167e-05, "loss": 0.5155, "step": 16410 }, { "epoch": 0.4487803544082258, "grad_norm": 2.0175395011901855, "learning_rate": 1.2129625981848505e-05, "loss": 0.5209, "step": 16411 }, { "epoch": 0.4488077007219427, "grad_norm": 1.4868299961090088, "learning_rate": 1.2128760587078526e-05, "loss": 0.5307, "step": 16412 }, { "epoch": 0.4488350470356596, "grad_norm": 1.8414850234985352, "learning_rate": 1.212789517560902e-05, "loss": 0.5022, "step": 16413 }, { "epoch": 0.4488623933493765, "grad_norm": 1.2358145713806152, "learning_rate": 1.2127029747446772e-05, "loss": 0.5161, "step": 16414 }, { "epoch": 0.4488897396630934, "grad_norm": 1.0211024284362793, "learning_rate": 1.2126164302598577e-05, "loss": 0.3438, "step": 16415 }, { "epoch": 0.44891708597681035, "grad_norm": 2.1699695587158203, "learning_rate": 1.2125298841071223e-05, "loss": 0.489, "step": 16416 }, { "epoch": 0.4489444322905272, "grad_norm": 1.1772804260253906, "learning_rate": 1.2124433362871495e-05, "loss": 0.5332, "step": 16417 }, { "epoch": 0.44897177860424414, "grad_norm": 1.264223575592041, "learning_rate": 1.2123567868006183e-05, "loss": 0.5517, "step": 16418 }, { "epoch": 0.44899912491796107, "grad_norm": 1.5102815628051758, "learning_rate": 1.2122702356482083e-05, "loss": 0.4074, "step": 16419 }, { "epoch": 0.449026471231678, "grad_norm": 1.26938796043396, "learning_rate": 1.2121836828305977e-05, "loss": 0.4821, "step": 16420 }, { "epoch": 0.44905381754539486, "grad_norm": 1.425081729888916, "learning_rate": 1.212097128348466e-05, "loss": 0.5543, "step": 16421 }, { "epoch": 0.4490811638591118, "grad_norm": 1.4500607252120972, "learning_rate": 1.2120105722024923e-05, "loss": 0.4777, "step": 16422 }, { "epoch": 0.4491085101728287, "grad_norm": 1.3557239770889282, "learning_rate": 1.2119240143933548e-05, "loss": 0.5168, "step": 16423 }, { "epoch": 0.44913585648654564, "grad_norm": 1.7296955585479736, "learning_rate": 1.2118374549217333e-05, "loss": 0.5454, "step": 16424 }, { "epoch": 0.4491632028002625, "grad_norm": 1.6690200567245483, "learning_rate": 1.2117508937883064e-05, "loss": 0.5133, "step": 16425 }, { "epoch": 0.44919054911397943, "grad_norm": 1.4986653327941895, "learning_rate": 1.2116643309937535e-05, "loss": 0.5323, "step": 16426 }, { "epoch": 0.44921789542769636, "grad_norm": 1.2193915843963623, "learning_rate": 1.2115777665387535e-05, "loss": 0.5126, "step": 16427 }, { "epoch": 0.4492452417414133, "grad_norm": 1.244162917137146, "learning_rate": 1.2114912004239853e-05, "loss": 0.5048, "step": 16428 }, { "epoch": 0.44927258805513015, "grad_norm": 1.1157865524291992, "learning_rate": 1.2114046326501286e-05, "loss": 0.517, "step": 16429 }, { "epoch": 0.4492999343688471, "grad_norm": 1.221447467803955, "learning_rate": 1.2113180632178617e-05, "loss": 0.4969, "step": 16430 }, { "epoch": 0.449327280682564, "grad_norm": 1.7460098266601562, "learning_rate": 1.211231492127864e-05, "loss": 0.4124, "step": 16431 }, { "epoch": 0.4493546269962809, "grad_norm": 1.4955233335494995, "learning_rate": 1.211144919380815e-05, "loss": 0.5392, "step": 16432 }, { "epoch": 0.4493819733099978, "grad_norm": 1.3665993213653564, "learning_rate": 1.2110583449773933e-05, "loss": 0.5183, "step": 16433 }, { "epoch": 0.4494093196237147, "grad_norm": 1.440359115600586, "learning_rate": 1.2109717689182782e-05, "loss": 0.5081, "step": 16434 }, { "epoch": 0.44943666593743165, "grad_norm": 1.0907056331634521, "learning_rate": 1.2108851912041492e-05, "loss": 0.495, "step": 16435 }, { "epoch": 0.44946401225114857, "grad_norm": 1.489031195640564, "learning_rate": 1.2107986118356849e-05, "loss": 0.4953, "step": 16436 }, { "epoch": 0.44949135856486544, "grad_norm": 3.488189220428467, "learning_rate": 1.2107120308135649e-05, "loss": 0.4152, "step": 16437 }, { "epoch": 0.44951870487858236, "grad_norm": 1.4449968338012695, "learning_rate": 1.2106254481384688e-05, "loss": 0.5091, "step": 16438 }, { "epoch": 0.4495460511922993, "grad_norm": 1.2910643815994263, "learning_rate": 1.2105388638110746e-05, "loss": 0.4905, "step": 16439 }, { "epoch": 0.4495733975060162, "grad_norm": 1.377712368965149, "learning_rate": 1.210452277832063e-05, "loss": 0.5273, "step": 16440 }, { "epoch": 0.4496007438197331, "grad_norm": 1.3155947923660278, "learning_rate": 1.2103656902021119e-05, "loss": 0.5216, "step": 16441 }, { "epoch": 0.44962809013345, "grad_norm": 1.3579185009002686, "learning_rate": 1.2102791009219013e-05, "loss": 0.5045, "step": 16442 }, { "epoch": 0.44965543644716693, "grad_norm": 1.5451350212097168, "learning_rate": 1.2101925099921104e-05, "loss": 0.481, "step": 16443 }, { "epoch": 0.44968278276088386, "grad_norm": 1.3859223127365112, "learning_rate": 1.2101059174134185e-05, "loss": 0.7941, "step": 16444 }, { "epoch": 0.44971012907460073, "grad_norm": 1.2226386070251465, "learning_rate": 1.2100193231865045e-05, "loss": 0.5436, "step": 16445 }, { "epoch": 0.44973747538831765, "grad_norm": 1.6639909744262695, "learning_rate": 1.2099327273120485e-05, "loss": 0.5041, "step": 16446 }, { "epoch": 0.4497648217020346, "grad_norm": 1.358022689819336, "learning_rate": 1.2098461297907289e-05, "loss": 0.5144, "step": 16447 }, { "epoch": 0.4497921680157515, "grad_norm": 1.2106338739395142, "learning_rate": 1.2097595306232257e-05, "loss": 0.4852, "step": 16448 }, { "epoch": 0.4498195143294684, "grad_norm": 1.1205649375915527, "learning_rate": 1.2096729298102178e-05, "loss": 0.5021, "step": 16449 }, { "epoch": 0.4498468606431853, "grad_norm": 1.3281660079956055, "learning_rate": 1.2095863273523852e-05, "loss": 0.5068, "step": 16450 }, { "epoch": 0.4498742069569022, "grad_norm": 1.1790558099746704, "learning_rate": 1.2094997232504066e-05, "loss": 0.5198, "step": 16451 }, { "epoch": 0.44990155327061915, "grad_norm": 1.4430878162384033, "learning_rate": 1.2094131175049614e-05, "loss": 0.5165, "step": 16452 }, { "epoch": 0.449928899584336, "grad_norm": 2.5000081062316895, "learning_rate": 1.2093265101167295e-05, "loss": 0.5247, "step": 16453 }, { "epoch": 0.44995624589805294, "grad_norm": 1.16724693775177, "learning_rate": 1.20923990108639e-05, "loss": 0.5181, "step": 16454 }, { "epoch": 0.44998359221176987, "grad_norm": 1.5682138204574585, "learning_rate": 1.2091532904146224e-05, "loss": 0.4913, "step": 16455 }, { "epoch": 0.45001093852548674, "grad_norm": 1.1560828685760498, "learning_rate": 1.2090666781021059e-05, "loss": 0.4989, "step": 16456 }, { "epoch": 0.45003828483920366, "grad_norm": 1.6369655132293701, "learning_rate": 1.2089800641495204e-05, "loss": 0.8207, "step": 16457 }, { "epoch": 0.4500656311529206, "grad_norm": 1.5074281692504883, "learning_rate": 1.2088934485575447e-05, "loss": 0.8113, "step": 16458 }, { "epoch": 0.4500929774666375, "grad_norm": 1.5833799839019775, "learning_rate": 1.2088068313268593e-05, "loss": 0.3952, "step": 16459 }, { "epoch": 0.4501203237803544, "grad_norm": 1.3728764057159424, "learning_rate": 1.2087202124581425e-05, "loss": 0.5469, "step": 16460 }, { "epoch": 0.4501476700940713, "grad_norm": 1.3957715034484863, "learning_rate": 1.2086335919520747e-05, "loss": 0.5234, "step": 16461 }, { "epoch": 0.45017501640778823, "grad_norm": 1.1729873418807983, "learning_rate": 1.208546969809335e-05, "loss": 0.5016, "step": 16462 }, { "epoch": 0.45020236272150516, "grad_norm": 1.6355119943618774, "learning_rate": 1.2084603460306031e-05, "loss": 0.5311, "step": 16463 }, { "epoch": 0.450229709035222, "grad_norm": 1.3475397825241089, "learning_rate": 1.208373720616558e-05, "loss": 0.4323, "step": 16464 }, { "epoch": 0.45025705534893895, "grad_norm": 1.510376214981079, "learning_rate": 1.2082870935678803e-05, "loss": 0.4725, "step": 16465 }, { "epoch": 0.4502844016626559, "grad_norm": 1.7428114414215088, "learning_rate": 1.2082004648852486e-05, "loss": 0.5519, "step": 16466 }, { "epoch": 0.4503117479763728, "grad_norm": 1.4210470914840698, "learning_rate": 1.208113834569343e-05, "loss": 0.482, "step": 16467 }, { "epoch": 0.45033909429008967, "grad_norm": 1.2653276920318604, "learning_rate": 1.2080272026208433e-05, "loss": 0.5053, "step": 16468 }, { "epoch": 0.4503664406038066, "grad_norm": 1.2593941688537598, "learning_rate": 1.207940569040428e-05, "loss": 0.5157, "step": 16469 }, { "epoch": 0.4503937869175235, "grad_norm": 1.093523383140564, "learning_rate": 1.2078539338287781e-05, "loss": 0.5231, "step": 16470 }, { "epoch": 0.45042113323124044, "grad_norm": 1.7975541353225708, "learning_rate": 1.2077672969865721e-05, "loss": 0.8252, "step": 16471 }, { "epoch": 0.4504484795449573, "grad_norm": 2.131943464279175, "learning_rate": 1.2076806585144903e-05, "loss": 0.5119, "step": 16472 }, { "epoch": 0.45047582585867424, "grad_norm": 1.3662974834442139, "learning_rate": 1.2075940184132122e-05, "loss": 0.4128, "step": 16473 }, { "epoch": 0.45050317217239116, "grad_norm": 3.3240206241607666, "learning_rate": 1.2075073766834176e-05, "loss": 0.5133, "step": 16474 }, { "epoch": 0.4505305184861081, "grad_norm": 1.332553744316101, "learning_rate": 1.2074207333257857e-05, "loss": 0.5432, "step": 16475 }, { "epoch": 0.45055786479982496, "grad_norm": 1.2233442068099976, "learning_rate": 1.2073340883409968e-05, "loss": 0.3985, "step": 16476 }, { "epoch": 0.4505852111135419, "grad_norm": 1.6329931020736694, "learning_rate": 1.2072474417297299e-05, "loss": 0.5256, "step": 16477 }, { "epoch": 0.4506125574272588, "grad_norm": 1.2245146036148071, "learning_rate": 1.2071607934926657e-05, "loss": 0.5347, "step": 16478 }, { "epoch": 0.45063990374097573, "grad_norm": 1.49604332447052, "learning_rate": 1.2070741436304831e-05, "loss": 0.5309, "step": 16479 }, { "epoch": 0.4506672500546926, "grad_norm": 1.3564125299453735, "learning_rate": 1.206987492143862e-05, "loss": 0.5279, "step": 16480 }, { "epoch": 0.45069459636840953, "grad_norm": 2.7227165699005127, "learning_rate": 1.2069008390334826e-05, "loss": 0.5341, "step": 16481 }, { "epoch": 0.45072194268212645, "grad_norm": 1.1965793371200562, "learning_rate": 1.2068141843000241e-05, "loss": 0.5274, "step": 16482 }, { "epoch": 0.4507492889958434, "grad_norm": 1.247852087020874, "learning_rate": 1.2067275279441669e-05, "loss": 0.544, "step": 16483 }, { "epoch": 0.45077663530956025, "grad_norm": 1.3159794807434082, "learning_rate": 1.20664086996659e-05, "loss": 0.5213, "step": 16484 }, { "epoch": 0.4508039816232772, "grad_norm": 1.3623636960983276, "learning_rate": 1.2065542103679734e-05, "loss": 0.4915, "step": 16485 }, { "epoch": 0.4508313279369941, "grad_norm": 1.2789459228515625, "learning_rate": 1.2064675491489976e-05, "loss": 0.533, "step": 16486 }, { "epoch": 0.450858674250711, "grad_norm": 1.410948395729065, "learning_rate": 1.2063808863103422e-05, "loss": 0.8253, "step": 16487 }, { "epoch": 0.4508860205644279, "grad_norm": 1.4732247591018677, "learning_rate": 1.2062942218526863e-05, "loss": 0.5533, "step": 16488 }, { "epoch": 0.4509133668781448, "grad_norm": 1.2940593957901, "learning_rate": 1.2062075557767107e-05, "loss": 0.4346, "step": 16489 }, { "epoch": 0.45094071319186174, "grad_norm": 1.3384690284729004, "learning_rate": 1.2061208880830945e-05, "loss": 0.515, "step": 16490 }, { "epoch": 0.45096805950557867, "grad_norm": 1.3665263652801514, "learning_rate": 1.206034218772518e-05, "loss": 0.5202, "step": 16491 }, { "epoch": 0.45099540581929554, "grad_norm": 1.3159972429275513, "learning_rate": 1.2059475478456613e-05, "loss": 0.5272, "step": 16492 }, { "epoch": 0.45102275213301246, "grad_norm": 1.406113862991333, "learning_rate": 1.2058608753032038e-05, "loss": 0.5391, "step": 16493 }, { "epoch": 0.4510500984467294, "grad_norm": 1.3849008083343506, "learning_rate": 1.2057742011458258e-05, "loss": 0.5309, "step": 16494 }, { "epoch": 0.4510774447604463, "grad_norm": 1.4739930629730225, "learning_rate": 1.2056875253742072e-05, "loss": 0.5261, "step": 16495 }, { "epoch": 0.4511047910741632, "grad_norm": 1.145010232925415, "learning_rate": 1.2056008479890275e-05, "loss": 0.558, "step": 16496 }, { "epoch": 0.4511321373878801, "grad_norm": 1.1210105419158936, "learning_rate": 1.2055141689909673e-05, "loss": 0.5175, "step": 16497 }, { "epoch": 0.45115948370159703, "grad_norm": 1.4557219743728638, "learning_rate": 1.205427488380706e-05, "loss": 0.4266, "step": 16498 }, { "epoch": 0.45118683001531396, "grad_norm": 1.5521351099014282, "learning_rate": 1.205340806158924e-05, "loss": 0.4939, "step": 16499 }, { "epoch": 0.4512141763290308, "grad_norm": 1.4365853071212769, "learning_rate": 1.2052541223263015e-05, "loss": 0.5132, "step": 16500 }, { "epoch": 0.45124152264274775, "grad_norm": 1.4015909433364868, "learning_rate": 1.2051674368835178e-05, "loss": 0.5026, "step": 16501 }, { "epoch": 0.4512688689564647, "grad_norm": 1.3197073936462402, "learning_rate": 1.2050807498312533e-05, "loss": 0.573, "step": 16502 }, { "epoch": 0.4512962152701816, "grad_norm": 1.3705471754074097, "learning_rate": 1.2049940611701882e-05, "loss": 0.5049, "step": 16503 }, { "epoch": 0.45132356158389847, "grad_norm": 1.2994314432144165, "learning_rate": 1.2049073709010024e-05, "loss": 0.8078, "step": 16504 }, { "epoch": 0.4513509078976154, "grad_norm": 1.3327431678771973, "learning_rate": 1.204820679024376e-05, "loss": 0.5606, "step": 16505 }, { "epoch": 0.4513782542113323, "grad_norm": 1.244179606437683, "learning_rate": 1.2047339855409885e-05, "loss": 0.5103, "step": 16506 }, { "epoch": 0.45140560052504924, "grad_norm": 1.5631033182144165, "learning_rate": 1.204647290451521e-05, "loss": 0.4589, "step": 16507 }, { "epoch": 0.4514329468387661, "grad_norm": 1.2778475284576416, "learning_rate": 1.204560593756653e-05, "loss": 0.5465, "step": 16508 }, { "epoch": 0.45146029315248304, "grad_norm": 1.2813830375671387, "learning_rate": 1.2044738954570645e-05, "loss": 0.5115, "step": 16509 }, { "epoch": 0.45148763946619996, "grad_norm": 1.4205299615859985, "learning_rate": 1.2043871955534358e-05, "loss": 0.5475, "step": 16510 }, { "epoch": 0.4515149857799169, "grad_norm": 1.2863397598266602, "learning_rate": 1.2043004940464475e-05, "loss": 0.5396, "step": 16511 }, { "epoch": 0.45154233209363376, "grad_norm": 1.3541715145111084, "learning_rate": 1.204213790936779e-05, "loss": 0.4415, "step": 16512 }, { "epoch": 0.4515696784073507, "grad_norm": 1.2344157695770264, "learning_rate": 1.2041270862251111e-05, "loss": 0.5561, "step": 16513 }, { "epoch": 0.4515970247210676, "grad_norm": 1.2047122716903687, "learning_rate": 1.2040403799121232e-05, "loss": 0.5066, "step": 16514 }, { "epoch": 0.45162437103478453, "grad_norm": 1.0772572755813599, "learning_rate": 1.2039536719984962e-05, "loss": 0.4725, "step": 16515 }, { "epoch": 0.4516517173485014, "grad_norm": 1.4164243936538696, "learning_rate": 1.2038669624849103e-05, "loss": 0.5301, "step": 16516 }, { "epoch": 0.4516790636622183, "grad_norm": 1.5929359197616577, "learning_rate": 1.2037802513720448e-05, "loss": 0.4525, "step": 16517 }, { "epoch": 0.45170640997593525, "grad_norm": 1.175157070159912, "learning_rate": 1.2036935386605807e-05, "loss": 0.3661, "step": 16518 }, { "epoch": 0.4517337562896522, "grad_norm": 1.244973063468933, "learning_rate": 1.2036068243511986e-05, "loss": 0.4882, "step": 16519 }, { "epoch": 0.45176110260336905, "grad_norm": 1.6912291049957275, "learning_rate": 1.203520108444578e-05, "loss": 0.5106, "step": 16520 }, { "epoch": 0.45178844891708597, "grad_norm": 1.3215450048446655, "learning_rate": 1.2034333909413996e-05, "loss": 0.4992, "step": 16521 }, { "epoch": 0.4518157952308029, "grad_norm": 1.1646733283996582, "learning_rate": 1.2033466718423435e-05, "loss": 0.516, "step": 16522 }, { "epoch": 0.4518431415445198, "grad_norm": 1.8705979585647583, "learning_rate": 1.2032599511480896e-05, "loss": 0.4053, "step": 16523 }, { "epoch": 0.4518704878582367, "grad_norm": 2.2350785732269287, "learning_rate": 1.203173228859319e-05, "loss": 0.5228, "step": 16524 }, { "epoch": 0.4518978341719536, "grad_norm": 1.8265633583068848, "learning_rate": 1.2030865049767114e-05, "loss": 0.5287, "step": 16525 }, { "epoch": 0.45192518048567054, "grad_norm": 1.4419835805892944, "learning_rate": 1.2029997795009471e-05, "loss": 0.5195, "step": 16526 }, { "epoch": 0.45195252679938747, "grad_norm": 1.2289869785308838, "learning_rate": 1.202913052432707e-05, "loss": 0.3675, "step": 16527 }, { "epoch": 0.45197987311310434, "grad_norm": 1.6890813112258911, "learning_rate": 1.202826323772671e-05, "loss": 0.5348, "step": 16528 }, { "epoch": 0.45200721942682126, "grad_norm": 1.2416743040084839, "learning_rate": 1.2027395935215193e-05, "loss": 0.5138, "step": 16529 }, { "epoch": 0.4520345657405382, "grad_norm": 1.4114789962768555, "learning_rate": 1.2026528616799332e-05, "loss": 0.5182, "step": 16530 }, { "epoch": 0.4520619120542551, "grad_norm": 1.5789810419082642, "learning_rate": 1.2025661282485919e-05, "loss": 0.527, "step": 16531 }, { "epoch": 0.452089258367972, "grad_norm": 1.3005506992340088, "learning_rate": 1.2024793932281765e-05, "loss": 0.5087, "step": 16532 }, { "epoch": 0.4521166046816889, "grad_norm": 1.419431447982788, "learning_rate": 1.2023926566193671e-05, "loss": 0.4565, "step": 16533 }, { "epoch": 0.45214395099540583, "grad_norm": 1.3501174449920654, "learning_rate": 1.2023059184228443e-05, "loss": 0.5182, "step": 16534 }, { "epoch": 0.45217129730912275, "grad_norm": 1.3611440658569336, "learning_rate": 1.2022191786392886e-05, "loss": 0.5015, "step": 16535 }, { "epoch": 0.4521986436228396, "grad_norm": 1.2198259830474854, "learning_rate": 1.2021324372693804e-05, "loss": 0.5271, "step": 16536 }, { "epoch": 0.45222598993655655, "grad_norm": 1.4738023281097412, "learning_rate": 1.2020456943138001e-05, "loss": 0.5099, "step": 16537 }, { "epoch": 0.4522533362502735, "grad_norm": 3.7657310962677, "learning_rate": 1.2019589497732283e-05, "loss": 0.4154, "step": 16538 }, { "epoch": 0.4522806825639904, "grad_norm": 1.4890445470809937, "learning_rate": 1.2018722036483453e-05, "loss": 0.4372, "step": 16539 }, { "epoch": 0.45230802887770727, "grad_norm": 1.532995343208313, "learning_rate": 1.2017854559398314e-05, "loss": 0.4716, "step": 16540 }, { "epoch": 0.4523353751914242, "grad_norm": 1.1978299617767334, "learning_rate": 1.2016987066483677e-05, "loss": 0.5031, "step": 16541 }, { "epoch": 0.4523627215051411, "grad_norm": 1.5346620082855225, "learning_rate": 1.2016119557746342e-05, "loss": 0.5027, "step": 16542 }, { "epoch": 0.45239006781885804, "grad_norm": 1.1312282085418701, "learning_rate": 1.2015252033193116e-05, "loss": 0.5035, "step": 16543 }, { "epoch": 0.4524174141325749, "grad_norm": 1.48432195186615, "learning_rate": 1.2014384492830807e-05, "loss": 0.4, "step": 16544 }, { "epoch": 0.45244476044629184, "grad_norm": 1.1904571056365967, "learning_rate": 1.2013516936666216e-05, "loss": 0.5135, "step": 16545 }, { "epoch": 0.45247210676000876, "grad_norm": 1.1858717203140259, "learning_rate": 1.2012649364706154e-05, "loss": 0.5078, "step": 16546 }, { "epoch": 0.4524994530737257, "grad_norm": 1.1477795839309692, "learning_rate": 1.2011781776957422e-05, "loss": 0.5128, "step": 16547 }, { "epoch": 0.45252679938744256, "grad_norm": 1.3370790481567383, "learning_rate": 1.201091417342683e-05, "loss": 0.5123, "step": 16548 }, { "epoch": 0.4525541457011595, "grad_norm": 1.5233795642852783, "learning_rate": 1.2010046554121181e-05, "loss": 0.5146, "step": 16549 }, { "epoch": 0.4525814920148764, "grad_norm": 1.2618311643600464, "learning_rate": 1.2009178919047284e-05, "loss": 0.5562, "step": 16550 }, { "epoch": 0.45260883832859333, "grad_norm": 1.303135633468628, "learning_rate": 1.2008311268211943e-05, "loss": 0.4965, "step": 16551 }, { "epoch": 0.4526361846423102, "grad_norm": 1.4018568992614746, "learning_rate": 1.2007443601621965e-05, "loss": 0.4555, "step": 16552 }, { "epoch": 0.4526635309560271, "grad_norm": 1.1842122077941895, "learning_rate": 1.2006575919284156e-05, "loss": 0.5156, "step": 16553 }, { "epoch": 0.45269087726974405, "grad_norm": 1.2079716920852661, "learning_rate": 1.2005708221205325e-05, "loss": 0.5058, "step": 16554 }, { "epoch": 0.452718223583461, "grad_norm": 1.1940510272979736, "learning_rate": 1.2004840507392277e-05, "loss": 0.5044, "step": 16555 }, { "epoch": 0.45274556989717785, "grad_norm": 1.307474970817566, "learning_rate": 1.2003972777851822e-05, "loss": 0.5174, "step": 16556 }, { "epoch": 0.45277291621089477, "grad_norm": 1.3543972969055176, "learning_rate": 1.2003105032590762e-05, "loss": 0.5391, "step": 16557 }, { "epoch": 0.4528002625246117, "grad_norm": 1.2153116464614868, "learning_rate": 1.2002237271615907e-05, "loss": 0.535, "step": 16558 }, { "epoch": 0.45282760883832857, "grad_norm": 1.230656385421753, "learning_rate": 1.2001369494934064e-05, "loss": 0.5074, "step": 16559 }, { "epoch": 0.4528549551520455, "grad_norm": 1.2328720092773438, "learning_rate": 1.200050170255204e-05, "loss": 0.4934, "step": 16560 }, { "epoch": 0.4528823014657624, "grad_norm": 1.2695658206939697, "learning_rate": 1.1999633894476643e-05, "loss": 0.4426, "step": 16561 }, { "epoch": 0.45290964777947934, "grad_norm": 1.1178518533706665, "learning_rate": 1.1998766070714683e-05, "loss": 0.5071, "step": 16562 }, { "epoch": 0.4529369940931962, "grad_norm": 1.4119336605072021, "learning_rate": 1.1997898231272961e-05, "loss": 0.518, "step": 16563 }, { "epoch": 0.45296434040691314, "grad_norm": 1.2115644216537476, "learning_rate": 1.1997030376158293e-05, "loss": 0.3816, "step": 16564 }, { "epoch": 0.45299168672063006, "grad_norm": 1.3101874589920044, "learning_rate": 1.1996162505377485e-05, "loss": 0.5216, "step": 16565 }, { "epoch": 0.453019033034347, "grad_norm": 1.309091567993164, "learning_rate": 1.1995294618937341e-05, "loss": 0.5178, "step": 16566 }, { "epoch": 0.45304637934806385, "grad_norm": 1.2073490619659424, "learning_rate": 1.1994426716844675e-05, "loss": 0.5415, "step": 16567 }, { "epoch": 0.4530737256617808, "grad_norm": 1.4728161096572876, "learning_rate": 1.199355879910629e-05, "loss": 0.4952, "step": 16568 }, { "epoch": 0.4531010719754977, "grad_norm": 1.1927721500396729, "learning_rate": 1.1992690865728998e-05, "loss": 0.5069, "step": 16569 }, { "epoch": 0.45312841828921463, "grad_norm": 1.5843411684036255, "learning_rate": 1.199182291671961e-05, "loss": 0.5231, "step": 16570 }, { "epoch": 0.4531557646029315, "grad_norm": 1.412574291229248, "learning_rate": 1.1990954952084928e-05, "loss": 0.8247, "step": 16571 }, { "epoch": 0.4531831109166484, "grad_norm": 1.147278070449829, "learning_rate": 1.1990086971831767e-05, "loss": 0.5238, "step": 16572 }, { "epoch": 0.45321045723036535, "grad_norm": 1.3023432493209839, "learning_rate": 1.198921897596693e-05, "loss": 0.5536, "step": 16573 }, { "epoch": 0.4532378035440823, "grad_norm": 1.2324259281158447, "learning_rate": 1.1988350964497235e-05, "loss": 0.7849, "step": 16574 }, { "epoch": 0.45326514985779914, "grad_norm": 1.3112376928329468, "learning_rate": 1.1987482937429482e-05, "loss": 0.5386, "step": 16575 }, { "epoch": 0.45329249617151607, "grad_norm": 1.3892371654510498, "learning_rate": 1.1986614894770487e-05, "loss": 0.5471, "step": 16576 }, { "epoch": 0.453319842485233, "grad_norm": 1.4596586227416992, "learning_rate": 1.1985746836527057e-05, "loss": 0.4489, "step": 16577 }, { "epoch": 0.4533471887989499, "grad_norm": 1.3936941623687744, "learning_rate": 1.1984878762706002e-05, "loss": 0.4958, "step": 16578 }, { "epoch": 0.4533745351126668, "grad_norm": 1.7299282550811768, "learning_rate": 1.1984010673314133e-05, "loss": 0.3838, "step": 16579 }, { "epoch": 0.4534018814263837, "grad_norm": 1.5185821056365967, "learning_rate": 1.1983142568358254e-05, "loss": 0.5595, "step": 16580 }, { "epoch": 0.45342922774010064, "grad_norm": 1.2047393321990967, "learning_rate": 1.1982274447845185e-05, "loss": 0.4127, "step": 16581 }, { "epoch": 0.45345657405381756, "grad_norm": 1.7027080059051514, "learning_rate": 1.1981406311781727e-05, "loss": 0.8172, "step": 16582 }, { "epoch": 0.45348392036753443, "grad_norm": 1.3964793682098389, "learning_rate": 1.1980538160174697e-05, "loss": 0.5574, "step": 16583 }, { "epoch": 0.45351126668125136, "grad_norm": 1.4822732210159302, "learning_rate": 1.19796699930309e-05, "loss": 0.433, "step": 16584 }, { "epoch": 0.4535386129949683, "grad_norm": 1.1744803190231323, "learning_rate": 1.197880181035715e-05, "loss": 0.5332, "step": 16585 }, { "epoch": 0.4535659593086852, "grad_norm": 1.2757624387741089, "learning_rate": 1.1977933612160257e-05, "loss": 0.5082, "step": 16586 }, { "epoch": 0.4535933056224021, "grad_norm": 1.43193519115448, "learning_rate": 1.1977065398447033e-05, "loss": 0.5552, "step": 16587 }, { "epoch": 0.453620651936119, "grad_norm": 1.324851393699646, "learning_rate": 1.1976197169224283e-05, "loss": 0.5402, "step": 16588 }, { "epoch": 0.4536479982498359, "grad_norm": 1.303857445716858, "learning_rate": 1.1975328924498825e-05, "loss": 0.8189, "step": 16589 }, { "epoch": 0.45367534456355285, "grad_norm": 1.435174584388733, "learning_rate": 1.1974460664277467e-05, "loss": 0.3622, "step": 16590 }, { "epoch": 0.4537026908772697, "grad_norm": 1.0949784517288208, "learning_rate": 1.1973592388567023e-05, "loss": 0.4939, "step": 16591 }, { "epoch": 0.45373003719098665, "grad_norm": 1.4603333473205566, "learning_rate": 1.1972724097374302e-05, "loss": 0.5259, "step": 16592 }, { "epoch": 0.45375738350470357, "grad_norm": 1.5783716440200806, "learning_rate": 1.1971855790706113e-05, "loss": 0.5296, "step": 16593 }, { "epoch": 0.4537847298184205, "grad_norm": 1.261460781097412, "learning_rate": 1.197098746856927e-05, "loss": 0.5225, "step": 16594 }, { "epoch": 0.45381207613213737, "grad_norm": 1.6567460298538208, "learning_rate": 1.1970119130970588e-05, "loss": 0.4358, "step": 16595 }, { "epoch": 0.4538394224458543, "grad_norm": 1.354077696800232, "learning_rate": 1.1969250777916872e-05, "loss": 0.5297, "step": 16596 }, { "epoch": 0.4538667687595712, "grad_norm": 1.4580632448196411, "learning_rate": 1.1968382409414943e-05, "loss": 0.5187, "step": 16597 }, { "epoch": 0.45389411507328814, "grad_norm": 1.2860878705978394, "learning_rate": 1.1967514025471602e-05, "loss": 0.4244, "step": 16598 }, { "epoch": 0.453921461387005, "grad_norm": 1.4668666124343872, "learning_rate": 1.196664562609367e-05, "loss": 0.5465, "step": 16599 }, { "epoch": 0.45394880770072193, "grad_norm": 1.3762609958648682, "learning_rate": 1.196577721128796e-05, "loss": 0.5101, "step": 16600 }, { "epoch": 0.45397615401443886, "grad_norm": 1.4132236242294312, "learning_rate": 1.1964908781061274e-05, "loss": 0.5243, "step": 16601 }, { "epoch": 0.4540035003281558, "grad_norm": 1.0836542844772339, "learning_rate": 1.1964040335420436e-05, "loss": 0.5138, "step": 16602 }, { "epoch": 0.45403084664187265, "grad_norm": 1.3261932134628296, "learning_rate": 1.1963171874372255e-05, "loss": 0.4982, "step": 16603 }, { "epoch": 0.4540581929555896, "grad_norm": 1.2755643129348755, "learning_rate": 1.196230339792354e-05, "loss": 0.5207, "step": 16604 }, { "epoch": 0.4540855392693065, "grad_norm": 1.249636173248291, "learning_rate": 1.1961434906081108e-05, "loss": 0.5008, "step": 16605 }, { "epoch": 0.45411288558302343, "grad_norm": 1.1729110479354858, "learning_rate": 1.1960566398851771e-05, "loss": 0.5239, "step": 16606 }, { "epoch": 0.4541402318967403, "grad_norm": 1.7180780172348022, "learning_rate": 1.1959697876242345e-05, "loss": 0.8724, "step": 16607 }, { "epoch": 0.4541675782104572, "grad_norm": 1.7349581718444824, "learning_rate": 1.1958829338259639e-05, "loss": 0.5153, "step": 16608 }, { "epoch": 0.45419492452417415, "grad_norm": 1.599972128868103, "learning_rate": 1.195796078491047e-05, "loss": 0.4655, "step": 16609 }, { "epoch": 0.4542222708378911, "grad_norm": 1.4841550588607788, "learning_rate": 1.1957092216201644e-05, "loss": 0.516, "step": 16610 }, { "epoch": 0.45424961715160794, "grad_norm": 1.2379640340805054, "learning_rate": 1.1956223632139988e-05, "loss": 0.52, "step": 16611 }, { "epoch": 0.45427696346532487, "grad_norm": 1.1318334341049194, "learning_rate": 1.19553550327323e-05, "loss": 0.5075, "step": 16612 }, { "epoch": 0.4543043097790418, "grad_norm": 1.419769287109375, "learning_rate": 1.195448641798541e-05, "loss": 0.5461, "step": 16613 }, { "epoch": 0.4543316560927587, "grad_norm": 1.4144768714904785, "learning_rate": 1.1953617787906117e-05, "loss": 0.5148, "step": 16614 }, { "epoch": 0.4543590024064756, "grad_norm": 1.270670771598816, "learning_rate": 1.1952749142501246e-05, "loss": 0.508, "step": 16615 }, { "epoch": 0.4543863487201925, "grad_norm": 1.1239265203475952, "learning_rate": 1.1951880481777609e-05, "loss": 0.512, "step": 16616 }, { "epoch": 0.45441369503390944, "grad_norm": 1.3806055784225464, "learning_rate": 1.1951011805742017e-05, "loss": 0.821, "step": 16617 }, { "epoch": 0.45444104134762636, "grad_norm": 1.5259093046188354, "learning_rate": 1.1950143114401285e-05, "loss": 0.5361, "step": 16618 }, { "epoch": 0.45446838766134323, "grad_norm": 1.3407187461853027, "learning_rate": 1.194927440776223e-05, "loss": 0.5424, "step": 16619 }, { "epoch": 0.45449573397506016, "grad_norm": 1.2441003322601318, "learning_rate": 1.1948405685831665e-05, "loss": 0.8215, "step": 16620 }, { "epoch": 0.4545230802887771, "grad_norm": 1.2854671478271484, "learning_rate": 1.194753694861641e-05, "loss": 0.5362, "step": 16621 }, { "epoch": 0.454550426602494, "grad_norm": 1.447330117225647, "learning_rate": 1.1946668196123272e-05, "loss": 0.5389, "step": 16622 }, { "epoch": 0.4545777729162109, "grad_norm": 1.1388095617294312, "learning_rate": 1.1945799428359068e-05, "loss": 0.5129, "step": 16623 }, { "epoch": 0.4546051192299278, "grad_norm": 1.384020209312439, "learning_rate": 1.194493064533062e-05, "loss": 0.4335, "step": 16624 }, { "epoch": 0.4546324655436447, "grad_norm": 1.554940104484558, "learning_rate": 1.1944061847044734e-05, "loss": 0.5137, "step": 16625 }, { "epoch": 0.45465981185736165, "grad_norm": 1.4536707401275635, "learning_rate": 1.1943193033508234e-05, "loss": 0.5273, "step": 16626 }, { "epoch": 0.4546871581710785, "grad_norm": 1.7320008277893066, "learning_rate": 1.1942324204727931e-05, "loss": 0.5418, "step": 16627 }, { "epoch": 0.45471450448479545, "grad_norm": 1.071176528930664, "learning_rate": 1.1941455360710638e-05, "loss": 0.479, "step": 16628 }, { "epoch": 0.45474185079851237, "grad_norm": 1.045472264289856, "learning_rate": 1.1940586501463177e-05, "loss": 0.5014, "step": 16629 }, { "epoch": 0.4547691971122293, "grad_norm": 1.3295972347259521, "learning_rate": 1.193971762699236e-05, "loss": 0.7844, "step": 16630 }, { "epoch": 0.45479654342594616, "grad_norm": 1.5141764879226685, "learning_rate": 1.1938848737305002e-05, "loss": 0.8056, "step": 16631 }, { "epoch": 0.4548238897396631, "grad_norm": 1.1031019687652588, "learning_rate": 1.1937979832407925e-05, "loss": 0.4713, "step": 16632 }, { "epoch": 0.45485123605338, "grad_norm": 1.444291353225708, "learning_rate": 1.1937110912307939e-05, "loss": 0.512, "step": 16633 }, { "epoch": 0.45487858236709694, "grad_norm": 1.4648913145065308, "learning_rate": 1.1936241977011864e-05, "loss": 0.5238, "step": 16634 }, { "epoch": 0.4549059286808138, "grad_norm": 1.3863980770111084, "learning_rate": 1.1935373026526516e-05, "loss": 0.5014, "step": 16635 }, { "epoch": 0.45493327499453073, "grad_norm": 1.2371954917907715, "learning_rate": 1.1934504060858709e-05, "loss": 0.4861, "step": 16636 }, { "epoch": 0.45496062130824766, "grad_norm": 1.792298674583435, "learning_rate": 1.1933635080015267e-05, "loss": 0.5019, "step": 16637 }, { "epoch": 0.4549879676219646, "grad_norm": 1.2071411609649658, "learning_rate": 1.1932766084002996e-05, "loss": 0.4947, "step": 16638 }, { "epoch": 0.45501531393568145, "grad_norm": 1.3863695859909058, "learning_rate": 1.1931897072828721e-05, "loss": 0.5413, "step": 16639 }, { "epoch": 0.4550426602493984, "grad_norm": 1.6550296545028687, "learning_rate": 1.193102804649926e-05, "loss": 0.8338, "step": 16640 }, { "epoch": 0.4550700065631153, "grad_norm": 1.1770459413528442, "learning_rate": 1.1930159005021424e-05, "loss": 0.5031, "step": 16641 }, { "epoch": 0.45509735287683223, "grad_norm": 1.3416742086410522, "learning_rate": 1.1929289948402035e-05, "loss": 0.5091, "step": 16642 }, { "epoch": 0.4551246991905491, "grad_norm": 1.142165184020996, "learning_rate": 1.192842087664791e-05, "loss": 0.5028, "step": 16643 }, { "epoch": 0.455152045504266, "grad_norm": 1.279862880706787, "learning_rate": 1.1927551789765868e-05, "loss": 0.5163, "step": 16644 }, { "epoch": 0.45517939181798295, "grad_norm": 1.5465301275253296, "learning_rate": 1.1926682687762721e-05, "loss": 0.4403, "step": 16645 }, { "epoch": 0.4552067381316999, "grad_norm": 1.4102357625961304, "learning_rate": 1.1925813570645293e-05, "loss": 0.3915, "step": 16646 }, { "epoch": 0.45523408444541674, "grad_norm": 1.4579885005950928, "learning_rate": 1.1924944438420396e-05, "loss": 0.5257, "step": 16647 }, { "epoch": 0.45526143075913367, "grad_norm": 1.1943957805633545, "learning_rate": 1.1924075291094854e-05, "loss": 0.8456, "step": 16648 }, { "epoch": 0.4552887770728506, "grad_norm": 1.4075727462768555, "learning_rate": 1.1923206128675481e-05, "loss": 0.5252, "step": 16649 }, { "epoch": 0.4553161233865675, "grad_norm": 1.1447256803512573, "learning_rate": 1.19223369511691e-05, "loss": 0.5231, "step": 16650 }, { "epoch": 0.4553434697002844, "grad_norm": 1.342773675918579, "learning_rate": 1.1921467758582525e-05, "loss": 0.5082, "step": 16651 }, { "epoch": 0.4553708160140013, "grad_norm": 0.9727121591567993, "learning_rate": 1.1920598550922577e-05, "loss": 0.536, "step": 16652 }, { "epoch": 0.45539816232771824, "grad_norm": 1.348997712135315, "learning_rate": 1.1919729328196073e-05, "loss": 0.5236, "step": 16653 }, { "epoch": 0.45542550864143516, "grad_norm": 1.5765248537063599, "learning_rate": 1.1918860090409835e-05, "loss": 0.8348, "step": 16654 }, { "epoch": 0.45545285495515203, "grad_norm": 1.2167479991912842, "learning_rate": 1.1917990837570674e-05, "loss": 0.5263, "step": 16655 }, { "epoch": 0.45548020126886896, "grad_norm": 1.234557032585144, "learning_rate": 1.1917121569685421e-05, "loss": 0.3833, "step": 16656 }, { "epoch": 0.4555075475825859, "grad_norm": 1.1787594556808472, "learning_rate": 1.1916252286760885e-05, "loss": 0.4792, "step": 16657 }, { "epoch": 0.4555348938963028, "grad_norm": 1.682547926902771, "learning_rate": 1.191538298880389e-05, "loss": 0.4277, "step": 16658 }, { "epoch": 0.4555622402100197, "grad_norm": 1.262893557548523, "learning_rate": 1.1914513675821256e-05, "loss": 0.405, "step": 16659 }, { "epoch": 0.4555895865237366, "grad_norm": 1.3488645553588867, "learning_rate": 1.1913644347819796e-05, "loss": 0.4255, "step": 16660 }, { "epoch": 0.4556169328374535, "grad_norm": 10.2304105758667, "learning_rate": 1.1912775004806338e-05, "loss": 0.8047, "step": 16661 }, { "epoch": 0.4556442791511704, "grad_norm": 1.3810372352600098, "learning_rate": 1.1911905646787703e-05, "loss": 0.5247, "step": 16662 }, { "epoch": 0.4556716254648873, "grad_norm": 1.2821528911590576, "learning_rate": 1.1911036273770701e-05, "loss": 0.5351, "step": 16663 }, { "epoch": 0.45569897177860424, "grad_norm": 1.4311994314193726, "learning_rate": 1.1910166885762159e-05, "loss": 0.5424, "step": 16664 }, { "epoch": 0.45572631809232117, "grad_norm": 1.137628436088562, "learning_rate": 1.1909297482768896e-05, "loss": 0.4752, "step": 16665 }, { "epoch": 0.45575366440603804, "grad_norm": 1.3917198181152344, "learning_rate": 1.1908428064797728e-05, "loss": 0.4822, "step": 16666 }, { "epoch": 0.45578101071975496, "grad_norm": 1.3650994300842285, "learning_rate": 1.1907558631855482e-05, "loss": 0.5105, "step": 16667 }, { "epoch": 0.4558083570334719, "grad_norm": 1.3488316535949707, "learning_rate": 1.1906689183948973e-05, "loss": 0.5433, "step": 16668 }, { "epoch": 0.4558357033471888, "grad_norm": 1.2938586473464966, "learning_rate": 1.1905819721085025e-05, "loss": 0.497, "step": 16669 }, { "epoch": 0.4558630496609057, "grad_norm": 1.3762972354888916, "learning_rate": 1.190495024327046e-05, "loss": 0.4146, "step": 16670 }, { "epoch": 0.4558903959746226, "grad_norm": 1.389526605606079, "learning_rate": 1.1904080750512093e-05, "loss": 0.8119, "step": 16671 }, { "epoch": 0.45591774228833953, "grad_norm": 2.1829020977020264, "learning_rate": 1.1903211242816748e-05, "loss": 0.4173, "step": 16672 }, { "epoch": 0.45594508860205646, "grad_norm": 1.1935027837753296, "learning_rate": 1.190234172019125e-05, "loss": 0.5038, "step": 16673 }, { "epoch": 0.45597243491577333, "grad_norm": 1.4349035024642944, "learning_rate": 1.1901472182642412e-05, "loss": 0.54, "step": 16674 }, { "epoch": 0.45599978122949025, "grad_norm": 1.2146753072738647, "learning_rate": 1.1900602630177064e-05, "loss": 0.5095, "step": 16675 }, { "epoch": 0.4560271275432072, "grad_norm": 1.3164162635803223, "learning_rate": 1.1899733062802024e-05, "loss": 0.5016, "step": 16676 }, { "epoch": 0.4560544738569241, "grad_norm": 1.19489586353302, "learning_rate": 1.1898863480524108e-05, "loss": 0.4665, "step": 16677 }, { "epoch": 0.456081820170641, "grad_norm": 1.294175386428833, "learning_rate": 1.1897993883350147e-05, "loss": 0.5069, "step": 16678 }, { "epoch": 0.4561091664843579, "grad_norm": 1.146783471107483, "learning_rate": 1.1897124271286957e-05, "loss": 0.4988, "step": 16679 }, { "epoch": 0.4561365127980748, "grad_norm": 1.4795377254486084, "learning_rate": 1.1896254644341359e-05, "loss": 0.5401, "step": 16680 }, { "epoch": 0.45616385911179175, "grad_norm": 1.2029427289962769, "learning_rate": 1.1895385002520179e-05, "loss": 0.5036, "step": 16681 }, { "epoch": 0.4561912054255086, "grad_norm": 1.4517014026641846, "learning_rate": 1.1894515345830236e-05, "loss": 0.5368, "step": 16682 }, { "epoch": 0.45621855173922554, "grad_norm": 1.6625034809112549, "learning_rate": 1.189364567427835e-05, "loss": 0.4156, "step": 16683 }, { "epoch": 0.45624589805294247, "grad_norm": 1.0897451639175415, "learning_rate": 1.1892775987871352e-05, "loss": 0.527, "step": 16684 }, { "epoch": 0.4562732443666594, "grad_norm": 1.23904287815094, "learning_rate": 1.1891906286616058e-05, "loss": 0.4808, "step": 16685 }, { "epoch": 0.45630059068037626, "grad_norm": 1.2377336025238037, "learning_rate": 1.1891036570519293e-05, "loss": 0.8491, "step": 16686 }, { "epoch": 0.4563279369940932, "grad_norm": 5.38067626953125, "learning_rate": 1.1890166839587874e-05, "loss": 0.3977, "step": 16687 }, { "epoch": 0.4563552833078101, "grad_norm": 1.3399837017059326, "learning_rate": 1.188929709382863e-05, "loss": 0.5139, "step": 16688 }, { "epoch": 0.45638262962152704, "grad_norm": 1.3068848848342896, "learning_rate": 1.1888427333248383e-05, "loss": 0.5032, "step": 16689 }, { "epoch": 0.4564099759352439, "grad_norm": 1.0676426887512207, "learning_rate": 1.1887557557853953e-05, "loss": 0.5152, "step": 16690 }, { "epoch": 0.45643732224896083, "grad_norm": 1.2750686407089233, "learning_rate": 1.1886687767652166e-05, "loss": 0.5199, "step": 16691 }, { "epoch": 0.45646466856267776, "grad_norm": 1.3192261457443237, "learning_rate": 1.1885817962649843e-05, "loss": 0.5401, "step": 16692 }, { "epoch": 0.4564920148763947, "grad_norm": 1.4802252054214478, "learning_rate": 1.1884948142853809e-05, "loss": 0.5119, "step": 16693 }, { "epoch": 0.45651936119011155, "grad_norm": 1.2585206031799316, "learning_rate": 1.1884078308270892e-05, "loss": 0.4854, "step": 16694 }, { "epoch": 0.4565467075038285, "grad_norm": 1.2826182842254639, "learning_rate": 1.1883208458907907e-05, "loss": 0.4876, "step": 16695 }, { "epoch": 0.4565740538175454, "grad_norm": 1.2801449298858643, "learning_rate": 1.188233859477168e-05, "loss": 0.5363, "step": 16696 }, { "epoch": 0.4566014001312623, "grad_norm": 1.4204176664352417, "learning_rate": 1.1881468715869042e-05, "loss": 0.3752, "step": 16697 }, { "epoch": 0.4566287464449792, "grad_norm": 6.799718379974365, "learning_rate": 1.1880598822206809e-05, "loss": 0.4899, "step": 16698 }, { "epoch": 0.4566560927586961, "grad_norm": 1.3616658449172974, "learning_rate": 1.1879728913791804e-05, "loss": 0.5159, "step": 16699 }, { "epoch": 0.45668343907241304, "grad_norm": 1.4253536462783813, "learning_rate": 1.1878858990630859e-05, "loss": 0.4456, "step": 16700 }, { "epoch": 0.45671078538612997, "grad_norm": 1.473824381828308, "learning_rate": 1.1877989052730791e-05, "loss": 0.517, "step": 16701 }, { "epoch": 0.45673813169984684, "grad_norm": 1.3176263570785522, "learning_rate": 1.187711910009843e-05, "loss": 0.4946, "step": 16702 }, { "epoch": 0.45676547801356376, "grad_norm": 1.4155054092407227, "learning_rate": 1.1876249132740595e-05, "loss": 0.4438, "step": 16703 }, { "epoch": 0.4567928243272807, "grad_norm": 1.3720321655273438, "learning_rate": 1.1875379150664114e-05, "loss": 0.5279, "step": 16704 }, { "epoch": 0.4568201706409976, "grad_norm": 1.188640832901001, "learning_rate": 1.1874509153875814e-05, "loss": 0.5614, "step": 16705 }, { "epoch": 0.4568475169547145, "grad_norm": 1.2693347930908203, "learning_rate": 1.1873639142382516e-05, "loss": 0.5432, "step": 16706 }, { "epoch": 0.4568748632684314, "grad_norm": 1.1612221002578735, "learning_rate": 1.1872769116191046e-05, "loss": 0.4967, "step": 16707 }, { "epoch": 0.45690220958214833, "grad_norm": 1.2445017099380493, "learning_rate": 1.1871899075308228e-05, "loss": 0.5186, "step": 16708 }, { "epoch": 0.45692955589586526, "grad_norm": 1.1624575853347778, "learning_rate": 1.187102901974089e-05, "loss": 0.4895, "step": 16709 }, { "epoch": 0.4569569022095821, "grad_norm": 1.395882487297058, "learning_rate": 1.1870158949495859e-05, "loss": 0.5235, "step": 16710 }, { "epoch": 0.45698424852329905, "grad_norm": 1.1383230686187744, "learning_rate": 1.186928886457995e-05, "loss": 0.5216, "step": 16711 }, { "epoch": 0.457011594837016, "grad_norm": 1.048543930053711, "learning_rate": 1.1868418765e-05, "loss": 0.4998, "step": 16712 }, { "epoch": 0.4570389411507329, "grad_norm": 1.2386723756790161, "learning_rate": 1.1867548650762832e-05, "loss": 0.4836, "step": 16713 }, { "epoch": 0.45706628746444977, "grad_norm": 1.1719176769256592, "learning_rate": 1.186667852187527e-05, "loss": 0.5074, "step": 16714 }, { "epoch": 0.4570936337781667, "grad_norm": 1.3637791872024536, "learning_rate": 1.186580837834414e-05, "loss": 0.8607, "step": 16715 }, { "epoch": 0.4571209800918836, "grad_norm": 1.0610781908035278, "learning_rate": 1.1864938220176268e-05, "loss": 0.5387, "step": 16716 }, { "epoch": 0.45714832640560055, "grad_norm": 1.2471762895584106, "learning_rate": 1.186406804737848e-05, "loss": 0.5246, "step": 16717 }, { "epoch": 0.4571756727193174, "grad_norm": 1.5108212232589722, "learning_rate": 1.18631978599576e-05, "loss": 0.5434, "step": 16718 }, { "epoch": 0.45720301903303434, "grad_norm": 1.1978119611740112, "learning_rate": 1.1862327657920462e-05, "loss": 0.5204, "step": 16719 }, { "epoch": 0.45723036534675127, "grad_norm": 1.3423349857330322, "learning_rate": 1.1861457441273885e-05, "loss": 0.4079, "step": 16720 }, { "epoch": 0.4572577116604682, "grad_norm": 1.2574617862701416, "learning_rate": 1.1860587210024702e-05, "loss": 0.5362, "step": 16721 }, { "epoch": 0.45728505797418506, "grad_norm": 1.3513541221618652, "learning_rate": 1.1859716964179731e-05, "loss": 0.8328, "step": 16722 }, { "epoch": 0.457312404287902, "grad_norm": 1.2163708209991455, "learning_rate": 1.1858846703745803e-05, "loss": 0.8407, "step": 16723 }, { "epoch": 0.4573397506016189, "grad_norm": 2.094221353530884, "learning_rate": 1.185797642872975e-05, "loss": 0.8346, "step": 16724 }, { "epoch": 0.45736709691533584, "grad_norm": 1.389883279800415, "learning_rate": 1.1857106139138392e-05, "loss": 0.5419, "step": 16725 }, { "epoch": 0.4573944432290527, "grad_norm": 1.4577268362045288, "learning_rate": 1.185623583497856e-05, "loss": 0.5157, "step": 16726 }, { "epoch": 0.45742178954276963, "grad_norm": 1.3685991764068604, "learning_rate": 1.1855365516257082e-05, "loss": 0.5003, "step": 16727 }, { "epoch": 0.45744913585648656, "grad_norm": 1.3397812843322754, "learning_rate": 1.1854495182980781e-05, "loss": 0.5226, "step": 16728 }, { "epoch": 0.4574764821702035, "grad_norm": 1.1926978826522827, "learning_rate": 1.185362483515649e-05, "loss": 0.5154, "step": 16729 }, { "epoch": 0.45750382848392035, "grad_norm": 1.4658280611038208, "learning_rate": 1.185275447279103e-05, "loss": 0.5107, "step": 16730 }, { "epoch": 0.4575311747976373, "grad_norm": 1.4657983779907227, "learning_rate": 1.1851884095891235e-05, "loss": 0.5266, "step": 16731 }, { "epoch": 0.4575585211113542, "grad_norm": 1.1477991342544556, "learning_rate": 1.1851013704463934e-05, "loss": 0.5497, "step": 16732 }, { "epoch": 0.4575858674250711, "grad_norm": 1.2153161764144897, "learning_rate": 1.1850143298515945e-05, "loss": 0.5229, "step": 16733 }, { "epoch": 0.457613213738788, "grad_norm": 1.200305700302124, "learning_rate": 1.1849272878054106e-05, "loss": 0.4957, "step": 16734 }, { "epoch": 0.4576405600525049, "grad_norm": 1.2120469808578491, "learning_rate": 1.184840244308524e-05, "loss": 0.3874, "step": 16735 }, { "epoch": 0.45766790636622184, "grad_norm": 1.4542452096939087, "learning_rate": 1.1847531993616177e-05, "loss": 0.4302, "step": 16736 }, { "epoch": 0.45769525267993877, "grad_norm": 1.2981282472610474, "learning_rate": 1.1846661529653747e-05, "loss": 0.8207, "step": 16737 }, { "epoch": 0.45772259899365564, "grad_norm": 1.1868886947631836, "learning_rate": 1.1845791051204776e-05, "loss": 0.5282, "step": 16738 }, { "epoch": 0.45774994530737256, "grad_norm": 1.2424933910369873, "learning_rate": 1.1844920558276094e-05, "loss": 0.4926, "step": 16739 }, { "epoch": 0.4577772916210895, "grad_norm": 1.4001425504684448, "learning_rate": 1.1844050050874529e-05, "loss": 0.4446, "step": 16740 }, { "epoch": 0.4578046379348064, "grad_norm": 1.4259952306747437, "learning_rate": 1.184317952900691e-05, "loss": 0.5159, "step": 16741 }, { "epoch": 0.4578319842485233, "grad_norm": 1.0670180320739746, "learning_rate": 1.1842308992680066e-05, "loss": 0.5005, "step": 16742 }, { "epoch": 0.4578593305622402, "grad_norm": 1.800400972366333, "learning_rate": 1.184143844190083e-05, "loss": 0.8475, "step": 16743 }, { "epoch": 0.45788667687595713, "grad_norm": 1.6380642652511597, "learning_rate": 1.1840567876676023e-05, "loss": 0.5411, "step": 16744 }, { "epoch": 0.45791402318967406, "grad_norm": 1.1084285974502563, "learning_rate": 1.1839697297012482e-05, "loss": 0.5147, "step": 16745 }, { "epoch": 0.4579413695033909, "grad_norm": 1.215376615524292, "learning_rate": 1.1838826702917032e-05, "loss": 0.5508, "step": 16746 }, { "epoch": 0.45796871581710785, "grad_norm": 1.5814940929412842, "learning_rate": 1.1837956094396503e-05, "loss": 0.5205, "step": 16747 }, { "epoch": 0.4579960621308248, "grad_norm": 1.2950536012649536, "learning_rate": 1.1837085471457728e-05, "loss": 0.4921, "step": 16748 }, { "epoch": 0.4580234084445417, "grad_norm": 1.3040415048599243, "learning_rate": 1.1836214834107533e-05, "loss": 0.5199, "step": 16749 }, { "epoch": 0.45805075475825857, "grad_norm": 1.3253730535507202, "learning_rate": 1.1835344182352753e-05, "loss": 0.5053, "step": 16750 }, { "epoch": 0.4580781010719755, "grad_norm": 1.7228517532348633, "learning_rate": 1.183447351620021e-05, "loss": 0.851, "step": 16751 }, { "epoch": 0.4581054473856924, "grad_norm": 1.6384950876235962, "learning_rate": 1.1833602835656741e-05, "loss": 0.4432, "step": 16752 }, { "epoch": 0.45813279369940935, "grad_norm": 1.1921203136444092, "learning_rate": 1.1832732140729171e-05, "loss": 0.4992, "step": 16753 }, { "epoch": 0.4581601400131262, "grad_norm": 1.5010653734207153, "learning_rate": 1.1831861431424337e-05, "loss": 0.4267, "step": 16754 }, { "epoch": 0.45818748632684314, "grad_norm": 1.2615234851837158, "learning_rate": 1.1830990707749062e-05, "loss": 0.5426, "step": 16755 }, { "epoch": 0.45821483264056007, "grad_norm": 1.3029590845108032, "learning_rate": 1.1830119969710183e-05, "loss": 0.5517, "step": 16756 }, { "epoch": 0.458242178954277, "grad_norm": 1.294729232788086, "learning_rate": 1.1829249217314525e-05, "loss": 0.4936, "step": 16757 }, { "epoch": 0.45826952526799386, "grad_norm": 1.5349512100219727, "learning_rate": 1.1828378450568922e-05, "loss": 0.4366, "step": 16758 }, { "epoch": 0.4582968715817108, "grad_norm": 1.454489827156067, "learning_rate": 1.1827507669480207e-05, "loss": 0.4454, "step": 16759 }, { "epoch": 0.4583242178954277, "grad_norm": 1.1744447946548462, "learning_rate": 1.1826636874055205e-05, "loss": 0.4498, "step": 16760 }, { "epoch": 0.4583515642091446, "grad_norm": 1.3310290575027466, "learning_rate": 1.1825766064300754e-05, "loss": 0.4928, "step": 16761 }, { "epoch": 0.4583789105228615, "grad_norm": 1.843639850616455, "learning_rate": 1.182489524022368e-05, "loss": 0.4413, "step": 16762 }, { "epoch": 0.45840625683657843, "grad_norm": 1.0807900428771973, "learning_rate": 1.1824024401830819e-05, "loss": 0.5294, "step": 16763 }, { "epoch": 0.45843360315029535, "grad_norm": 1.7001266479492188, "learning_rate": 1.1823153549129e-05, "loss": 0.8243, "step": 16764 }, { "epoch": 0.4584609494640122, "grad_norm": 1.2998595237731934, "learning_rate": 1.182228268212505e-05, "loss": 0.5191, "step": 16765 }, { "epoch": 0.45848829577772915, "grad_norm": 1.2080278396606445, "learning_rate": 1.1821411800825808e-05, "loss": 0.4951, "step": 16766 }, { "epoch": 0.4585156420914461, "grad_norm": 1.3628056049346924, "learning_rate": 1.1820540905238106e-05, "loss": 0.5365, "step": 16767 }, { "epoch": 0.458542988405163, "grad_norm": 1.2104840278625488, "learning_rate": 1.181966999536877e-05, "loss": 0.5133, "step": 16768 }, { "epoch": 0.45857033471887987, "grad_norm": 1.4595187902450562, "learning_rate": 1.1818799071224634e-05, "loss": 0.4899, "step": 16769 }, { "epoch": 0.4585976810325968, "grad_norm": 1.3385800123214722, "learning_rate": 1.1817928132812536e-05, "loss": 0.5304, "step": 16770 }, { "epoch": 0.4586250273463137, "grad_norm": 1.3095375299453735, "learning_rate": 1.1817057180139298e-05, "loss": 0.4183, "step": 16771 }, { "epoch": 0.45865237366003064, "grad_norm": 1.2930189371109009, "learning_rate": 1.1816186213211758e-05, "loss": 0.8334, "step": 16772 }, { "epoch": 0.4586797199737475, "grad_norm": 1.4420433044433594, "learning_rate": 1.181531523203675e-05, "loss": 0.497, "step": 16773 }, { "epoch": 0.45870706628746444, "grad_norm": 1.4667352437973022, "learning_rate": 1.1814444236621104e-05, "loss": 0.4836, "step": 16774 }, { "epoch": 0.45873441260118136, "grad_norm": 1.1541062593460083, "learning_rate": 1.1813573226971656e-05, "loss": 0.3634, "step": 16775 }, { "epoch": 0.4587617589148983, "grad_norm": 1.4712902307510376, "learning_rate": 1.1812702203095235e-05, "loss": 0.4273, "step": 16776 }, { "epoch": 0.45878910522861516, "grad_norm": 1.2765116691589355, "learning_rate": 1.1811831164998673e-05, "loss": 0.5085, "step": 16777 }, { "epoch": 0.4588164515423321, "grad_norm": 1.2728900909423828, "learning_rate": 1.1810960112688807e-05, "loss": 0.4923, "step": 16778 }, { "epoch": 0.458843797856049, "grad_norm": 1.0575593709945679, "learning_rate": 1.1810089046172468e-05, "loss": 0.4934, "step": 16779 }, { "epoch": 0.45887114416976593, "grad_norm": 1.2767503261566162, "learning_rate": 1.1809217965456492e-05, "loss": 0.4972, "step": 16780 }, { "epoch": 0.4588984904834828, "grad_norm": 1.3652383089065552, "learning_rate": 1.180834687054771e-05, "loss": 0.5274, "step": 16781 }, { "epoch": 0.4589258367971997, "grad_norm": 1.5234636068344116, "learning_rate": 1.1807475761452953e-05, "loss": 0.531, "step": 16782 }, { "epoch": 0.45895318311091665, "grad_norm": 1.2215101718902588, "learning_rate": 1.180660463817906e-05, "loss": 0.527, "step": 16783 }, { "epoch": 0.4589805294246336, "grad_norm": 1.2859731912612915, "learning_rate": 1.180573350073286e-05, "loss": 0.5377, "step": 16784 }, { "epoch": 0.45900787573835045, "grad_norm": 1.3418116569519043, "learning_rate": 1.1804862349121191e-05, "loss": 0.5045, "step": 16785 }, { "epoch": 0.45903522205206737, "grad_norm": 1.368054747581482, "learning_rate": 1.1803991183350884e-05, "loss": 0.5137, "step": 16786 }, { "epoch": 0.4590625683657843, "grad_norm": 1.5673816204071045, "learning_rate": 1.1803120003428774e-05, "loss": 0.5386, "step": 16787 }, { "epoch": 0.4590899146795012, "grad_norm": 1.5045199394226074, "learning_rate": 1.1802248809361694e-05, "loss": 0.5187, "step": 16788 }, { "epoch": 0.4591172609932181, "grad_norm": 1.3828372955322266, "learning_rate": 1.1801377601156481e-05, "loss": 0.5023, "step": 16789 }, { "epoch": 0.459144607306935, "grad_norm": 1.2631900310516357, "learning_rate": 1.1800506378819968e-05, "loss": 0.4915, "step": 16790 }, { "epoch": 0.45917195362065194, "grad_norm": 1.252397060394287, "learning_rate": 1.1799635142358985e-05, "loss": 0.4281, "step": 16791 }, { "epoch": 0.45919929993436887, "grad_norm": 1.2611808776855469, "learning_rate": 1.1798763891780373e-05, "loss": 0.5355, "step": 16792 }, { "epoch": 0.45922664624808573, "grad_norm": 1.2761986255645752, "learning_rate": 1.1797892627090965e-05, "loss": 0.5315, "step": 16793 }, { "epoch": 0.45925399256180266, "grad_norm": 1.3529294729232788, "learning_rate": 1.1797021348297596e-05, "loss": 0.4623, "step": 16794 }, { "epoch": 0.4592813388755196, "grad_norm": 1.4603191614151, "learning_rate": 1.1796150055407099e-05, "loss": 0.4237, "step": 16795 }, { "epoch": 0.4593086851892365, "grad_norm": 2.0490591526031494, "learning_rate": 1.1795278748426312e-05, "loss": 0.5026, "step": 16796 }, { "epoch": 0.4593360315029534, "grad_norm": 1.8062671422958374, "learning_rate": 1.1794407427362067e-05, "loss": 0.5219, "step": 16797 }, { "epoch": 0.4593633778166703, "grad_norm": 1.3644295930862427, "learning_rate": 1.17935360922212e-05, "loss": 0.4969, "step": 16798 }, { "epoch": 0.45939072413038723, "grad_norm": 1.3948814868927002, "learning_rate": 1.1792664743010548e-05, "loss": 0.5038, "step": 16799 }, { "epoch": 0.45941807044410415, "grad_norm": 1.4874649047851562, "learning_rate": 1.1791793379736946e-05, "loss": 0.8319, "step": 16800 }, { "epoch": 0.459445416757821, "grad_norm": 1.995910882949829, "learning_rate": 1.1790922002407228e-05, "loss": 0.4271, "step": 16801 }, { "epoch": 0.45947276307153795, "grad_norm": 1.284091830253601, "learning_rate": 1.1790050611028234e-05, "loss": 0.5235, "step": 16802 }, { "epoch": 0.4595001093852549, "grad_norm": 1.3982954025268555, "learning_rate": 1.1789179205606795e-05, "loss": 0.5765, "step": 16803 }, { "epoch": 0.4595274556989718, "grad_norm": 1.2821003198623657, "learning_rate": 1.1788307786149748e-05, "loss": 0.519, "step": 16804 }, { "epoch": 0.45955480201268867, "grad_norm": 1.3692317008972168, "learning_rate": 1.178743635266393e-05, "loss": 0.4962, "step": 16805 }, { "epoch": 0.4595821483264056, "grad_norm": 1.2046879529953003, "learning_rate": 1.1786564905156177e-05, "loss": 0.5318, "step": 16806 }, { "epoch": 0.4596094946401225, "grad_norm": 1.871552586555481, "learning_rate": 1.1785693443633324e-05, "loss": 0.4543, "step": 16807 }, { "epoch": 0.45963684095383944, "grad_norm": 1.6961965560913086, "learning_rate": 1.178482196810221e-05, "loss": 0.445, "step": 16808 }, { "epoch": 0.4596641872675563, "grad_norm": 1.5395485162734985, "learning_rate": 1.1783950478569667e-05, "loss": 0.4854, "step": 16809 }, { "epoch": 0.45969153358127324, "grad_norm": 1.3350858688354492, "learning_rate": 1.1783078975042538e-05, "loss": 0.5423, "step": 16810 }, { "epoch": 0.45971887989499016, "grad_norm": 1.0540648698806763, "learning_rate": 1.1782207457527655e-05, "loss": 0.5147, "step": 16811 }, { "epoch": 0.4597462262087071, "grad_norm": 1.3854910135269165, "learning_rate": 1.1781335926031855e-05, "loss": 0.3976, "step": 16812 }, { "epoch": 0.45977357252242396, "grad_norm": 1.2362557649612427, "learning_rate": 1.1780464380561978e-05, "loss": 0.5146, "step": 16813 }, { "epoch": 0.4598009188361409, "grad_norm": 1.2082555294036865, "learning_rate": 1.1779592821124859e-05, "loss": 0.5305, "step": 16814 }, { "epoch": 0.4598282651498578, "grad_norm": 1.38323175907135, "learning_rate": 1.1778721247727332e-05, "loss": 0.5141, "step": 16815 }, { "epoch": 0.45985561146357473, "grad_norm": 2.4192309379577637, "learning_rate": 1.1777849660376242e-05, "loss": 0.836, "step": 16816 }, { "epoch": 0.4598829577772916, "grad_norm": 1.3722397089004517, "learning_rate": 1.1776978059078416e-05, "loss": 0.5142, "step": 16817 }, { "epoch": 0.4599103040910085, "grad_norm": 1.1948140859603882, "learning_rate": 1.1776106443840703e-05, "loss": 0.5216, "step": 16818 }, { "epoch": 0.45993765040472545, "grad_norm": 1.282876968383789, "learning_rate": 1.1775234814669931e-05, "loss": 0.8071, "step": 16819 }, { "epoch": 0.4599649967184424, "grad_norm": 1.1024729013442993, "learning_rate": 1.1774363171572945e-05, "loss": 0.5014, "step": 16820 }, { "epoch": 0.45999234303215925, "grad_norm": 1.5559849739074707, "learning_rate": 1.1773491514556578e-05, "loss": 0.7531, "step": 16821 }, { "epoch": 0.46001968934587617, "grad_norm": 1.4230966567993164, "learning_rate": 1.1772619843627666e-05, "loss": 0.807, "step": 16822 }, { "epoch": 0.4600470356595931, "grad_norm": 1.4741543531417847, "learning_rate": 1.1771748158793052e-05, "loss": 0.531, "step": 16823 }, { "epoch": 0.46007438197331, "grad_norm": 1.2846250534057617, "learning_rate": 1.1770876460059574e-05, "loss": 0.4966, "step": 16824 }, { "epoch": 0.4601017282870269, "grad_norm": 1.329392671585083, "learning_rate": 1.1770004747434067e-05, "loss": 0.506, "step": 16825 }, { "epoch": 0.4601290746007438, "grad_norm": 2.9053659439086914, "learning_rate": 1.176913302092337e-05, "loss": 0.3951, "step": 16826 }, { "epoch": 0.46015642091446074, "grad_norm": 1.3964765071868896, "learning_rate": 1.1768261280534326e-05, "loss": 0.8277, "step": 16827 }, { "epoch": 0.46018376722817766, "grad_norm": 1.1507703065872192, "learning_rate": 1.1767389526273764e-05, "loss": 0.5298, "step": 16828 }, { "epoch": 0.46021111354189453, "grad_norm": 1.2635972499847412, "learning_rate": 1.1766517758148535e-05, "loss": 0.5103, "step": 16829 }, { "epoch": 0.46023845985561146, "grad_norm": 1.2022801637649536, "learning_rate": 1.1765645976165467e-05, "loss": 0.5249, "step": 16830 }, { "epoch": 0.4602658061693284, "grad_norm": 1.5067062377929688, "learning_rate": 1.1764774180331404e-05, "loss": 0.4892, "step": 16831 }, { "epoch": 0.4602931524830453, "grad_norm": 1.1722480058670044, "learning_rate": 1.1763902370653184e-05, "loss": 0.4802, "step": 16832 }, { "epoch": 0.4603204987967622, "grad_norm": 1.4477275609970093, "learning_rate": 1.1763030547137645e-05, "loss": 0.5377, "step": 16833 }, { "epoch": 0.4603478451104791, "grad_norm": 1.5378283262252808, "learning_rate": 1.1762158709791633e-05, "loss": 0.8218, "step": 16834 }, { "epoch": 0.46037519142419603, "grad_norm": 1.1040140390396118, "learning_rate": 1.1761286858621976e-05, "loss": 0.5124, "step": 16835 }, { "epoch": 0.46040253773791295, "grad_norm": 1.332701325416565, "learning_rate": 1.1760414993635521e-05, "loss": 0.5543, "step": 16836 }, { "epoch": 0.4604298840516298, "grad_norm": 1.4827860593795776, "learning_rate": 1.175954311483911e-05, "loss": 0.4085, "step": 16837 }, { "epoch": 0.46045723036534675, "grad_norm": 1.2760756015777588, "learning_rate": 1.1758671222239575e-05, "loss": 0.4786, "step": 16838 }, { "epoch": 0.4604845766790637, "grad_norm": 1.856080412864685, "learning_rate": 1.1757799315843758e-05, "loss": 0.3752, "step": 16839 }, { "epoch": 0.4605119229927806, "grad_norm": 4.0448408126831055, "learning_rate": 1.1756927395658502e-05, "loss": 0.5185, "step": 16840 }, { "epoch": 0.46053926930649747, "grad_norm": 1.9391469955444336, "learning_rate": 1.1756055461690644e-05, "loss": 0.7667, "step": 16841 }, { "epoch": 0.4605666156202144, "grad_norm": 1.1618196964263916, "learning_rate": 1.1755183513947023e-05, "loss": 0.53, "step": 16842 }, { "epoch": 0.4605939619339313, "grad_norm": 1.3828843832015991, "learning_rate": 1.1754311552434485e-05, "loss": 0.5245, "step": 16843 }, { "epoch": 0.46062130824764824, "grad_norm": 1.3319746255874634, "learning_rate": 1.1753439577159865e-05, "loss": 0.5212, "step": 16844 }, { "epoch": 0.4606486545613651, "grad_norm": 1.6930017471313477, "learning_rate": 1.1752567588130003e-05, "loss": 0.5331, "step": 16845 }, { "epoch": 0.46067600087508204, "grad_norm": 1.3764448165893555, "learning_rate": 1.1751695585351745e-05, "loss": 0.5477, "step": 16846 }, { "epoch": 0.46070334718879896, "grad_norm": 1.1722536087036133, "learning_rate": 1.1750823568831925e-05, "loss": 0.5072, "step": 16847 }, { "epoch": 0.4607306935025159, "grad_norm": 1.1808412075042725, "learning_rate": 1.174995153857739e-05, "loss": 0.534, "step": 16848 }, { "epoch": 0.46075803981623276, "grad_norm": 1.3143495321273804, "learning_rate": 1.1749079494594976e-05, "loss": 0.5186, "step": 16849 }, { "epoch": 0.4607853861299497, "grad_norm": 1.4273279905319214, "learning_rate": 1.1748207436891523e-05, "loss": 0.4992, "step": 16850 }, { "epoch": 0.4608127324436666, "grad_norm": 2.7193603515625, "learning_rate": 1.1747335365473879e-05, "loss": 0.5623, "step": 16851 }, { "epoch": 0.46084007875738353, "grad_norm": 1.2738680839538574, "learning_rate": 1.1746463280348878e-05, "loss": 0.5142, "step": 16852 }, { "epoch": 0.4608674250711004, "grad_norm": 1.1691632270812988, "learning_rate": 1.1745591181523369e-05, "loss": 0.5196, "step": 16853 }, { "epoch": 0.4608947713848173, "grad_norm": 1.1967421770095825, "learning_rate": 1.1744719069004183e-05, "loss": 0.5158, "step": 16854 }, { "epoch": 0.46092211769853425, "grad_norm": 1.3003654479980469, "learning_rate": 1.1743846942798169e-05, "loss": 0.4975, "step": 16855 }, { "epoch": 0.4609494640122512, "grad_norm": 1.2383192777633667, "learning_rate": 1.1742974802912166e-05, "loss": 0.5293, "step": 16856 }, { "epoch": 0.46097681032596804, "grad_norm": 1.8354557752609253, "learning_rate": 1.1742102649353017e-05, "loss": 0.5006, "step": 16857 }, { "epoch": 0.46100415663968497, "grad_norm": 1.1059417724609375, "learning_rate": 1.1741230482127561e-05, "loss": 0.5078, "step": 16858 }, { "epoch": 0.4610315029534019, "grad_norm": 1.2445749044418335, "learning_rate": 1.1740358301242645e-05, "loss": 0.5395, "step": 16859 }, { "epoch": 0.4610588492671188, "grad_norm": 1.4220062494277954, "learning_rate": 1.1739486106705104e-05, "loss": 0.5209, "step": 16860 }, { "epoch": 0.4610861955808357, "grad_norm": 1.3532958030700684, "learning_rate": 1.1738613898521785e-05, "loss": 0.4217, "step": 16861 }, { "epoch": 0.4611135418945526, "grad_norm": 1.636583685874939, "learning_rate": 1.1737741676699533e-05, "loss": 0.8519, "step": 16862 }, { "epoch": 0.46114088820826954, "grad_norm": 1.503056287765503, "learning_rate": 1.1736869441245183e-05, "loss": 0.4798, "step": 16863 }, { "epoch": 0.4611682345219864, "grad_norm": 1.4400311708450317, "learning_rate": 1.1735997192165583e-05, "loss": 0.4284, "step": 16864 }, { "epoch": 0.46119558083570333, "grad_norm": 1.3787513971328735, "learning_rate": 1.1735124929467569e-05, "loss": 0.5121, "step": 16865 }, { "epoch": 0.46122292714942026, "grad_norm": 1.1268965005874634, "learning_rate": 1.173425265315799e-05, "loss": 0.5002, "step": 16866 }, { "epoch": 0.4612502734631372, "grad_norm": 1.26641047000885, "learning_rate": 1.1733380363243692e-05, "loss": 0.4913, "step": 16867 }, { "epoch": 0.46127761977685405, "grad_norm": 1.4657820463180542, "learning_rate": 1.1732508059731508e-05, "loss": 0.5253, "step": 16868 }, { "epoch": 0.461304966090571, "grad_norm": 1.3360662460327148, "learning_rate": 1.1731635742628287e-05, "loss": 0.5136, "step": 16869 }, { "epoch": 0.4613323124042879, "grad_norm": 1.778243899345398, "learning_rate": 1.1730763411940872e-05, "loss": 0.4035, "step": 16870 }, { "epoch": 0.46135965871800483, "grad_norm": 1.748113989830017, "learning_rate": 1.1729891067676104e-05, "loss": 0.4868, "step": 16871 }, { "epoch": 0.4613870050317217, "grad_norm": 1.3756284713745117, "learning_rate": 1.172901870984083e-05, "loss": 0.5189, "step": 16872 }, { "epoch": 0.4614143513454386, "grad_norm": 1.2294385433197021, "learning_rate": 1.172814633844189e-05, "loss": 0.5202, "step": 16873 }, { "epoch": 0.46144169765915555, "grad_norm": 1.5508171319961548, "learning_rate": 1.1727273953486127e-05, "loss": 0.524, "step": 16874 }, { "epoch": 0.4614690439728725, "grad_norm": 1.4746664762496948, "learning_rate": 1.1726401554980386e-05, "loss": 0.5057, "step": 16875 }, { "epoch": 0.46149639028658934, "grad_norm": 1.5439300537109375, "learning_rate": 1.1725529142931512e-05, "loss": 0.5336, "step": 16876 }, { "epoch": 0.46152373660030627, "grad_norm": 1.5392659902572632, "learning_rate": 1.1724656717346343e-05, "loss": 0.5283, "step": 16877 }, { "epoch": 0.4615510829140232, "grad_norm": 1.3944028615951538, "learning_rate": 1.1723784278231732e-05, "loss": 0.5207, "step": 16878 }, { "epoch": 0.4615784292277401, "grad_norm": 1.343305230140686, "learning_rate": 1.1722911825594516e-05, "loss": 0.8261, "step": 16879 }, { "epoch": 0.461605775541457, "grad_norm": 1.2405202388763428, "learning_rate": 1.1722039359441542e-05, "loss": 0.5029, "step": 16880 }, { "epoch": 0.4616331218551739, "grad_norm": 1.5678770542144775, "learning_rate": 1.1721166879779655e-05, "loss": 0.5107, "step": 16881 }, { "epoch": 0.46166046816889084, "grad_norm": 1.387839674949646, "learning_rate": 1.1720294386615696e-05, "loss": 0.5182, "step": 16882 }, { "epoch": 0.46168781448260776, "grad_norm": 1.6865959167480469, "learning_rate": 1.1719421879956516e-05, "loss": 0.4581, "step": 16883 }, { "epoch": 0.46171516079632463, "grad_norm": 1.9391430616378784, "learning_rate": 1.1718549359808951e-05, "loss": 0.5007, "step": 16884 }, { "epoch": 0.46174250711004156, "grad_norm": 1.1747790575027466, "learning_rate": 1.1717676826179849e-05, "loss": 0.4943, "step": 16885 }, { "epoch": 0.4617698534237585, "grad_norm": 1.389562726020813, "learning_rate": 1.1716804279076062e-05, "loss": 0.5052, "step": 16886 }, { "epoch": 0.4617971997374754, "grad_norm": 1.4295616149902344, "learning_rate": 1.1715931718504423e-05, "loss": 0.5009, "step": 16887 }, { "epoch": 0.4618245460511923, "grad_norm": 2.1784822940826416, "learning_rate": 1.1715059144471786e-05, "loss": 0.3908, "step": 16888 }, { "epoch": 0.4618518923649092, "grad_norm": 1.4733980894088745, "learning_rate": 1.1714186556984987e-05, "loss": 0.5437, "step": 16889 }, { "epoch": 0.4618792386786261, "grad_norm": 1.530906081199646, "learning_rate": 1.1713313956050884e-05, "loss": 0.5582, "step": 16890 }, { "epoch": 0.46190658499234305, "grad_norm": 1.1962416172027588, "learning_rate": 1.171244134167631e-05, "loss": 0.5094, "step": 16891 }, { "epoch": 0.4619339313060599, "grad_norm": 1.2634356021881104, "learning_rate": 1.171156871386812e-05, "loss": 0.5096, "step": 16892 }, { "epoch": 0.46196127761977684, "grad_norm": 1.681238055229187, "learning_rate": 1.1710696072633152e-05, "loss": 0.3768, "step": 16893 }, { "epoch": 0.46198862393349377, "grad_norm": 1.394775152206421, "learning_rate": 1.1709823417978256e-05, "loss": 0.4401, "step": 16894 }, { "epoch": 0.4620159702472107, "grad_norm": 1.4783403873443604, "learning_rate": 1.1708950749910275e-05, "loss": 0.787, "step": 16895 }, { "epoch": 0.46204331656092756, "grad_norm": 1.6213419437408447, "learning_rate": 1.1708078068436058e-05, "loss": 0.542, "step": 16896 }, { "epoch": 0.4620706628746445, "grad_norm": 1.2826545238494873, "learning_rate": 1.1707205373562449e-05, "loss": 0.5158, "step": 16897 }, { "epoch": 0.4620980091883614, "grad_norm": 1.4838486909866333, "learning_rate": 1.1706332665296292e-05, "loss": 0.5138, "step": 16898 }, { "epoch": 0.46212535550207834, "grad_norm": 1.2117043733596802, "learning_rate": 1.1705459943644439e-05, "loss": 0.5089, "step": 16899 }, { "epoch": 0.4621527018157952, "grad_norm": 1.5723627805709839, "learning_rate": 1.170458720861373e-05, "loss": 0.4188, "step": 16900 }, { "epoch": 0.46218004812951213, "grad_norm": 1.3322241306304932, "learning_rate": 1.1703714460211013e-05, "loss": 0.5396, "step": 16901 }, { "epoch": 0.46220739444322906, "grad_norm": 1.2770053148269653, "learning_rate": 1.1702841698443139e-05, "loss": 0.5107, "step": 16902 }, { "epoch": 0.462234740756946, "grad_norm": 1.3247095346450806, "learning_rate": 1.1701968923316948e-05, "loss": 0.7792, "step": 16903 }, { "epoch": 0.46226208707066285, "grad_norm": 2.560357093811035, "learning_rate": 1.1701096134839291e-05, "loss": 0.5168, "step": 16904 }, { "epoch": 0.4622894333843798, "grad_norm": 1.2937313318252563, "learning_rate": 1.1700223333017015e-05, "loss": 0.519, "step": 16905 }, { "epoch": 0.4623167796980967, "grad_norm": 1.500274896621704, "learning_rate": 1.1699350517856964e-05, "loss": 0.8039, "step": 16906 }, { "epoch": 0.4623441260118136, "grad_norm": 1.3014713525772095, "learning_rate": 1.1698477689365987e-05, "loss": 0.4847, "step": 16907 }, { "epoch": 0.4623714723255305, "grad_norm": 1.4311710596084595, "learning_rate": 1.1697604847550932e-05, "loss": 0.4978, "step": 16908 }, { "epoch": 0.4623988186392474, "grad_norm": 1.8691400289535522, "learning_rate": 1.1696731992418642e-05, "loss": 0.5221, "step": 16909 }, { "epoch": 0.46242616495296435, "grad_norm": 1.6025174856185913, "learning_rate": 1.1695859123975968e-05, "loss": 0.5175, "step": 16910 }, { "epoch": 0.46245351126668127, "grad_norm": 1.1170375347137451, "learning_rate": 1.1694986242229758e-05, "loss": 0.5142, "step": 16911 }, { "epoch": 0.46248085758039814, "grad_norm": 1.3823493719100952, "learning_rate": 1.1694113347186854e-05, "loss": 0.5198, "step": 16912 }, { "epoch": 0.46250820389411507, "grad_norm": 1.8437402248382568, "learning_rate": 1.1693240438854112e-05, "loss": 0.5309, "step": 16913 }, { "epoch": 0.462535550207832, "grad_norm": 1.4336587190628052, "learning_rate": 1.1692367517238372e-05, "loss": 0.5416, "step": 16914 }, { "epoch": 0.4625628965215489, "grad_norm": 2.061476230621338, "learning_rate": 1.1691494582346485e-05, "loss": 0.4911, "step": 16915 }, { "epoch": 0.4625902428352658, "grad_norm": 1.5940403938293457, "learning_rate": 1.1690621634185301e-05, "loss": 0.5222, "step": 16916 }, { "epoch": 0.4626175891489827, "grad_norm": 1.6437703371047974, "learning_rate": 1.1689748672761665e-05, "loss": 0.4422, "step": 16917 }, { "epoch": 0.46264493546269964, "grad_norm": 1.2290973663330078, "learning_rate": 1.168887569808243e-05, "loss": 0.5133, "step": 16918 }, { "epoch": 0.46267228177641656, "grad_norm": 1.7364840507507324, "learning_rate": 1.1688002710154437e-05, "loss": 0.5075, "step": 16919 }, { "epoch": 0.46269962809013343, "grad_norm": 1.1390931606292725, "learning_rate": 1.1687129708984535e-05, "loss": 0.4825, "step": 16920 }, { "epoch": 0.46272697440385036, "grad_norm": 1.8239635229110718, "learning_rate": 1.1686256694579578e-05, "loss": 0.5167, "step": 16921 }, { "epoch": 0.4627543207175673, "grad_norm": 1.608590006828308, "learning_rate": 1.1685383666946413e-05, "loss": 0.5357, "step": 16922 }, { "epoch": 0.4627816670312842, "grad_norm": 1.3907967805862427, "learning_rate": 1.1684510626091884e-05, "loss": 0.5055, "step": 16923 }, { "epoch": 0.4628090133450011, "grad_norm": 1.332546591758728, "learning_rate": 1.1683637572022848e-05, "loss": 0.5108, "step": 16924 }, { "epoch": 0.462836359658718, "grad_norm": 1.3728666305541992, "learning_rate": 1.1682764504746145e-05, "loss": 0.5347, "step": 16925 }, { "epoch": 0.4628637059724349, "grad_norm": 1.4041789770126343, "learning_rate": 1.1681891424268632e-05, "loss": 0.5283, "step": 16926 }, { "epoch": 0.46289105228615185, "grad_norm": 1.4308578968048096, "learning_rate": 1.1681018330597152e-05, "loss": 0.5149, "step": 16927 }, { "epoch": 0.4629183985998687, "grad_norm": 1.7063404321670532, "learning_rate": 1.1680145223738556e-05, "loss": 0.3859, "step": 16928 }, { "epoch": 0.46294574491358564, "grad_norm": 1.359791874885559, "learning_rate": 1.1679272103699694e-05, "loss": 0.5093, "step": 16929 }, { "epoch": 0.46297309122730257, "grad_norm": 1.460530400276184, "learning_rate": 1.1678398970487414e-05, "loss": 0.527, "step": 16930 }, { "epoch": 0.4630004375410195, "grad_norm": 1.2820850610733032, "learning_rate": 1.1677525824108566e-05, "loss": 0.4791, "step": 16931 }, { "epoch": 0.46302778385473636, "grad_norm": 1.4962830543518066, "learning_rate": 1.167665266457e-05, "loss": 0.5333, "step": 16932 }, { "epoch": 0.4630551301684533, "grad_norm": 1.3331011533737183, "learning_rate": 1.1675779491878567e-05, "loss": 0.4973, "step": 16933 }, { "epoch": 0.4630824764821702, "grad_norm": 3.748387336730957, "learning_rate": 1.1674906306041114e-05, "loss": 0.5174, "step": 16934 }, { "epoch": 0.46310982279588714, "grad_norm": 1.5893689393997192, "learning_rate": 1.1674033107064496e-05, "loss": 0.4466, "step": 16935 }, { "epoch": 0.463137169109604, "grad_norm": 1.6897752285003662, "learning_rate": 1.1673159894955554e-05, "loss": 0.538, "step": 16936 }, { "epoch": 0.46316451542332093, "grad_norm": 1.6208895444869995, "learning_rate": 1.1672286669721148e-05, "loss": 0.5283, "step": 16937 }, { "epoch": 0.46319186173703786, "grad_norm": 1.45995032787323, "learning_rate": 1.1671413431368122e-05, "loss": 0.4597, "step": 16938 }, { "epoch": 0.4632192080507548, "grad_norm": 1.6488895416259766, "learning_rate": 1.1670540179903327e-05, "loss": 0.3652, "step": 16939 }, { "epoch": 0.46324655436447165, "grad_norm": 1.5413320064544678, "learning_rate": 1.1669666915333618e-05, "loss": 0.447, "step": 16940 }, { "epoch": 0.4632739006781886, "grad_norm": 2.868661642074585, "learning_rate": 1.1668793637665835e-05, "loss": 0.5319, "step": 16941 }, { "epoch": 0.4633012469919055, "grad_norm": 1.4657564163208008, "learning_rate": 1.1667920346906843e-05, "loss": 0.7995, "step": 16942 }, { "epoch": 0.4633285933056224, "grad_norm": 1.369184970855713, "learning_rate": 1.1667047043063484e-05, "loss": 0.4374, "step": 16943 }, { "epoch": 0.4633559396193393, "grad_norm": 1.3892557621002197, "learning_rate": 1.1666173726142607e-05, "loss": 0.5236, "step": 16944 }, { "epoch": 0.4633832859330562, "grad_norm": 1.4745782613754272, "learning_rate": 1.1665300396151067e-05, "loss": 0.5477, "step": 16945 }, { "epoch": 0.46341063224677315, "grad_norm": 1.3399951457977295, "learning_rate": 1.1664427053095715e-05, "loss": 0.5199, "step": 16946 }, { "epoch": 0.46343797856049007, "grad_norm": 1.4334638118743896, "learning_rate": 1.1663553696983401e-05, "loss": 0.5295, "step": 16947 }, { "epoch": 0.46346532487420694, "grad_norm": 1.6863032579421997, "learning_rate": 1.1662680327820976e-05, "loss": 0.8282, "step": 16948 }, { "epoch": 0.46349267118792387, "grad_norm": 1.7551065683364868, "learning_rate": 1.166180694561529e-05, "loss": 0.5263, "step": 16949 }, { "epoch": 0.4635200175016408, "grad_norm": 1.4495608806610107, "learning_rate": 1.1660933550373197e-05, "loss": 0.5073, "step": 16950 }, { "epoch": 0.4635473638153577, "grad_norm": 1.9785816669464111, "learning_rate": 1.166006014210155e-05, "loss": 0.421, "step": 16951 }, { "epoch": 0.4635747101290746, "grad_norm": 1.5886383056640625, "learning_rate": 1.1659186720807196e-05, "loss": 0.5522, "step": 16952 }, { "epoch": 0.4636020564427915, "grad_norm": 1.078718900680542, "learning_rate": 1.1658313286496992e-05, "loss": 0.5089, "step": 16953 }, { "epoch": 0.46362940275650844, "grad_norm": 1.708075761795044, "learning_rate": 1.1657439839177784e-05, "loss": 0.4498, "step": 16954 }, { "epoch": 0.46365674907022536, "grad_norm": 1.137406587600708, "learning_rate": 1.165656637885643e-05, "loss": 0.4539, "step": 16955 }, { "epoch": 0.46368409538394223, "grad_norm": 2.2498817443847656, "learning_rate": 1.1655692905539775e-05, "loss": 0.5214, "step": 16956 }, { "epoch": 0.46371144169765915, "grad_norm": 1.3151689767837524, "learning_rate": 1.1654819419234677e-05, "loss": 0.4989, "step": 16957 }, { "epoch": 0.4637387880113761, "grad_norm": 1.3028552532196045, "learning_rate": 1.1653945919947986e-05, "loss": 0.4973, "step": 16958 }, { "epoch": 0.463766134325093, "grad_norm": 1.3300219774246216, "learning_rate": 1.1653072407686556e-05, "loss": 0.513, "step": 16959 }, { "epoch": 0.4637934806388099, "grad_norm": 1.4949311017990112, "learning_rate": 1.1652198882457235e-05, "loss": 0.5065, "step": 16960 }, { "epoch": 0.4638208269525268, "grad_norm": 1.2127416133880615, "learning_rate": 1.1651325344266883e-05, "loss": 0.4843, "step": 16961 }, { "epoch": 0.4638481732662437, "grad_norm": 1.1924928426742554, "learning_rate": 1.1650451793122348e-05, "loss": 0.5054, "step": 16962 }, { "epoch": 0.46387551957996065, "grad_norm": 1.3100148439407349, "learning_rate": 1.1649578229030481e-05, "loss": 0.3486, "step": 16963 }, { "epoch": 0.4639028658936775, "grad_norm": 2.365642786026001, "learning_rate": 1.164870465199814e-05, "loss": 0.516, "step": 16964 }, { "epoch": 0.46393021220739444, "grad_norm": 1.1713560819625854, "learning_rate": 1.164783106203217e-05, "loss": 0.512, "step": 16965 }, { "epoch": 0.46395755852111137, "grad_norm": 2.08107590675354, "learning_rate": 1.1646957459139431e-05, "loss": 0.5179, "step": 16966 }, { "epoch": 0.46398490483482824, "grad_norm": 1.4463164806365967, "learning_rate": 1.1646083843326777e-05, "loss": 0.4188, "step": 16967 }, { "epoch": 0.46401225114854516, "grad_norm": 1.8428391218185425, "learning_rate": 1.1645210214601054e-05, "loss": 0.515, "step": 16968 }, { "epoch": 0.4640395974622621, "grad_norm": 1.4316729307174683, "learning_rate": 1.1644336572969121e-05, "loss": 0.5456, "step": 16969 }, { "epoch": 0.464066943775979, "grad_norm": 1.3141024112701416, "learning_rate": 1.1643462918437834e-05, "loss": 0.4622, "step": 16970 }, { "epoch": 0.4640942900896959, "grad_norm": 1.2337534427642822, "learning_rate": 1.164258925101404e-05, "loss": 0.5136, "step": 16971 }, { "epoch": 0.4641216364034128, "grad_norm": 1.215405821800232, "learning_rate": 1.1641715570704598e-05, "loss": 0.5284, "step": 16972 }, { "epoch": 0.46414898271712973, "grad_norm": 1.1437724828720093, "learning_rate": 1.1640841877516356e-05, "loss": 0.4975, "step": 16973 }, { "epoch": 0.46417632903084666, "grad_norm": 1.2246274948120117, "learning_rate": 1.1639968171456171e-05, "loss": 0.5236, "step": 16974 }, { "epoch": 0.4642036753445635, "grad_norm": 1.2998257875442505, "learning_rate": 1.1639094452530901e-05, "loss": 0.3981, "step": 16975 }, { "epoch": 0.46423102165828045, "grad_norm": 1.326836109161377, "learning_rate": 1.1638220720747394e-05, "loss": 0.8053, "step": 16976 }, { "epoch": 0.4642583679719974, "grad_norm": 1.3718301057815552, "learning_rate": 1.1637346976112505e-05, "loss": 0.5095, "step": 16977 }, { "epoch": 0.4642857142857143, "grad_norm": 1.4070513248443604, "learning_rate": 1.1636473218633096e-05, "loss": 0.532, "step": 16978 }, { "epoch": 0.46431306059943117, "grad_norm": 1.5832159519195557, "learning_rate": 1.1635599448316009e-05, "loss": 0.4524, "step": 16979 }, { "epoch": 0.4643404069131481, "grad_norm": 1.2010177373886108, "learning_rate": 1.1634725665168105e-05, "loss": 0.5206, "step": 16980 }, { "epoch": 0.464367753226865, "grad_norm": 1.5859488248825073, "learning_rate": 1.163385186919624e-05, "loss": 0.5288, "step": 16981 }, { "epoch": 0.46439509954058195, "grad_norm": 1.3701997995376587, "learning_rate": 1.1632978060407264e-05, "loss": 0.7998, "step": 16982 }, { "epoch": 0.4644224458542988, "grad_norm": 1.1573013067245483, "learning_rate": 1.163210423880804e-05, "loss": 0.5114, "step": 16983 }, { "epoch": 0.46444979216801574, "grad_norm": 1.4271390438079834, "learning_rate": 1.1631230404405413e-05, "loss": 0.5029, "step": 16984 }, { "epoch": 0.46447713848173267, "grad_norm": 1.302033543586731, "learning_rate": 1.1630356557206244e-05, "loss": 0.4842, "step": 16985 }, { "epoch": 0.4645044847954496, "grad_norm": 1.2171367406845093, "learning_rate": 1.162948269721739e-05, "loss": 0.8, "step": 16986 }, { "epoch": 0.46453183110916646, "grad_norm": 1.1241215467453003, "learning_rate": 1.1628608824445697e-05, "loss": 0.513, "step": 16987 }, { "epoch": 0.4645591774228834, "grad_norm": 1.2716201543807983, "learning_rate": 1.1627734938898028e-05, "loss": 0.5151, "step": 16988 }, { "epoch": 0.4645865237366003, "grad_norm": 1.187647819519043, "learning_rate": 1.1626861040581239e-05, "loss": 0.5234, "step": 16989 }, { "epoch": 0.46461387005031723, "grad_norm": 1.3952559232711792, "learning_rate": 1.1625987129502179e-05, "loss": 0.4078, "step": 16990 }, { "epoch": 0.4646412163640341, "grad_norm": 1.4989837408065796, "learning_rate": 1.1625113205667711e-05, "loss": 0.5309, "step": 16991 }, { "epoch": 0.46466856267775103, "grad_norm": 1.3576244115829468, "learning_rate": 1.1624239269084684e-05, "loss": 0.5108, "step": 16992 }, { "epoch": 0.46469590899146795, "grad_norm": 1.7577850818634033, "learning_rate": 1.1623365319759958e-05, "loss": 0.5375, "step": 16993 }, { "epoch": 0.4647232553051849, "grad_norm": 1.4389069080352783, "learning_rate": 1.1622491357700392e-05, "loss": 0.5138, "step": 16994 }, { "epoch": 0.46475060161890175, "grad_norm": 1.3175735473632812, "learning_rate": 1.1621617382912833e-05, "loss": 0.5129, "step": 16995 }, { "epoch": 0.4647779479326187, "grad_norm": 1.3046544790267944, "learning_rate": 1.1620743395404146e-05, "loss": 0.5342, "step": 16996 }, { "epoch": 0.4648052942463356, "grad_norm": 1.372877597808838, "learning_rate": 1.1619869395181182e-05, "loss": 0.5286, "step": 16997 }, { "epoch": 0.4648326405600525, "grad_norm": 1.5611414909362793, "learning_rate": 1.1618995382250797e-05, "loss": 0.5005, "step": 16998 }, { "epoch": 0.4648599868737694, "grad_norm": 1.2746882438659668, "learning_rate": 1.1618121356619847e-05, "loss": 0.8113, "step": 16999 }, { "epoch": 0.4648873331874863, "grad_norm": 1.3175727128982544, "learning_rate": 1.1617247318295195e-05, "loss": 0.5432, "step": 17000 }, { "epoch": 0.46491467950120324, "grad_norm": 1.1487334966659546, "learning_rate": 1.161637326728369e-05, "loss": 0.5308, "step": 17001 }, { "epoch": 0.46494202581492017, "grad_norm": 1.375869870185852, "learning_rate": 1.1615499203592194e-05, "loss": 0.525, "step": 17002 }, { "epoch": 0.46496937212863704, "grad_norm": 1.3913373947143555, "learning_rate": 1.1614625127227556e-05, "loss": 0.5113, "step": 17003 }, { "epoch": 0.46499671844235396, "grad_norm": 1.4700658321380615, "learning_rate": 1.161375103819664e-05, "loss": 0.5222, "step": 17004 }, { "epoch": 0.4650240647560709, "grad_norm": 1.0651745796203613, "learning_rate": 1.1612876936506306e-05, "loss": 0.5245, "step": 17005 }, { "epoch": 0.4650514110697878, "grad_norm": 1.101228952407837, "learning_rate": 1.1612002822163401e-05, "loss": 0.4936, "step": 17006 }, { "epoch": 0.4650787573835047, "grad_norm": 1.5026663541793823, "learning_rate": 1.1611128695174793e-05, "loss": 0.4975, "step": 17007 }, { "epoch": 0.4651061036972216, "grad_norm": 1.5848695039749146, "learning_rate": 1.1610254555547332e-05, "loss": 0.5061, "step": 17008 }, { "epoch": 0.46513345001093853, "grad_norm": 1.2532124519348145, "learning_rate": 1.1609380403287873e-05, "loss": 0.5347, "step": 17009 }, { "epoch": 0.46516079632465546, "grad_norm": 1.3921023607254028, "learning_rate": 1.1608506238403282e-05, "loss": 0.5092, "step": 17010 }, { "epoch": 0.4651881426383723, "grad_norm": 1.1281658411026, "learning_rate": 1.1607632060900408e-05, "loss": 0.5339, "step": 17011 }, { "epoch": 0.46521548895208925, "grad_norm": 1.2659783363342285, "learning_rate": 1.1606757870786115e-05, "loss": 0.3716, "step": 17012 }, { "epoch": 0.4652428352658062, "grad_norm": 1.2674560546875, "learning_rate": 1.1605883668067264e-05, "loss": 0.3957, "step": 17013 }, { "epoch": 0.4652701815795231, "grad_norm": 1.2428339719772339, "learning_rate": 1.1605009452750702e-05, "loss": 0.5275, "step": 17014 }, { "epoch": 0.46529752789323997, "grad_norm": 1.2471803426742554, "learning_rate": 1.1604135224843294e-05, "loss": 0.5384, "step": 17015 }, { "epoch": 0.4653248742069569, "grad_norm": 1.882411241531372, "learning_rate": 1.1603260984351896e-05, "loss": 0.3637, "step": 17016 }, { "epoch": 0.4653522205206738, "grad_norm": 1.33650541305542, "learning_rate": 1.1602386731283367e-05, "loss": 0.4161, "step": 17017 }, { "epoch": 0.46537956683439075, "grad_norm": 1.1858097314834595, "learning_rate": 1.1601512465644564e-05, "loss": 0.4998, "step": 17018 }, { "epoch": 0.4654069131481076, "grad_norm": 1.5478562116622925, "learning_rate": 1.1600638187442348e-05, "loss": 0.3617, "step": 17019 }, { "epoch": 0.46543425946182454, "grad_norm": 1.1661487817764282, "learning_rate": 1.1599763896683573e-05, "loss": 0.5007, "step": 17020 }, { "epoch": 0.46546160577554146, "grad_norm": 1.4866834878921509, "learning_rate": 1.1598889593375104e-05, "loss": 0.4411, "step": 17021 }, { "epoch": 0.4654889520892584, "grad_norm": 1.4460080862045288, "learning_rate": 1.1598015277523795e-05, "loss": 0.5223, "step": 17022 }, { "epoch": 0.46551629840297526, "grad_norm": 1.3090167045593262, "learning_rate": 1.1597140949136505e-05, "loss": 0.4896, "step": 17023 }, { "epoch": 0.4655436447166922, "grad_norm": 1.5594128370285034, "learning_rate": 1.1596266608220095e-05, "loss": 0.5497, "step": 17024 }, { "epoch": 0.4655709910304091, "grad_norm": 1.276896357536316, "learning_rate": 1.1595392254781421e-05, "loss": 0.4893, "step": 17025 }, { "epoch": 0.46559833734412603, "grad_norm": 1.450087308883667, "learning_rate": 1.1594517888827346e-05, "loss": 0.5065, "step": 17026 }, { "epoch": 0.4656256836578429, "grad_norm": 1.6169328689575195, "learning_rate": 1.1593643510364725e-05, "loss": 0.5387, "step": 17027 }, { "epoch": 0.46565302997155983, "grad_norm": 1.4536365270614624, "learning_rate": 1.159276911940042e-05, "loss": 0.5239, "step": 17028 }, { "epoch": 0.46568037628527675, "grad_norm": 1.4492439031600952, "learning_rate": 1.159189471594129e-05, "loss": 0.5109, "step": 17029 }, { "epoch": 0.4657077225989937, "grad_norm": 1.6104350090026855, "learning_rate": 1.1591020299994192e-05, "loss": 0.3425, "step": 17030 }, { "epoch": 0.46573506891271055, "grad_norm": 1.2983399629592896, "learning_rate": 1.159014587156599e-05, "loss": 0.5019, "step": 17031 }, { "epoch": 0.4657624152264275, "grad_norm": 1.2108997106552124, "learning_rate": 1.1589271430663543e-05, "loss": 0.8165, "step": 17032 }, { "epoch": 0.4657897615401444, "grad_norm": 1.2250800132751465, "learning_rate": 1.1588396977293704e-05, "loss": 0.5093, "step": 17033 }, { "epoch": 0.4658171078538613, "grad_norm": 1.38322913646698, "learning_rate": 1.1587522511463338e-05, "loss": 0.53, "step": 17034 }, { "epoch": 0.4658444541675782, "grad_norm": 1.3683655261993408, "learning_rate": 1.1586648033179307e-05, "loss": 0.5314, "step": 17035 }, { "epoch": 0.4658718004812951, "grad_norm": 1.34495210647583, "learning_rate": 1.1585773542448466e-05, "loss": 0.5422, "step": 17036 }, { "epoch": 0.46589914679501204, "grad_norm": 1.2759968042373657, "learning_rate": 1.1584899039277682e-05, "loss": 0.541, "step": 17037 }, { "epoch": 0.46592649310872897, "grad_norm": 1.620141625404358, "learning_rate": 1.1584024523673807e-05, "loss": 0.5307, "step": 17038 }, { "epoch": 0.46595383942244584, "grad_norm": 1.2972116470336914, "learning_rate": 1.1583149995643707e-05, "loss": 0.5304, "step": 17039 }, { "epoch": 0.46598118573616276, "grad_norm": 1.1894636154174805, "learning_rate": 1.1582275455194243e-05, "loss": 0.4889, "step": 17040 }, { "epoch": 0.4660085320498797, "grad_norm": 1.2978289127349854, "learning_rate": 1.158140090233227e-05, "loss": 0.4987, "step": 17041 }, { "epoch": 0.4660358783635966, "grad_norm": 1.2895336151123047, "learning_rate": 1.1580526337064652e-05, "loss": 0.5157, "step": 17042 }, { "epoch": 0.4660632246773135, "grad_norm": 1.0833609104156494, "learning_rate": 1.1579651759398254e-05, "loss": 0.5077, "step": 17043 }, { "epoch": 0.4660905709910304, "grad_norm": 3.006871461868286, "learning_rate": 1.1578777169339928e-05, "loss": 0.772, "step": 17044 }, { "epoch": 0.46611791730474733, "grad_norm": 1.1928085088729858, "learning_rate": 1.1577902566896541e-05, "loss": 0.5122, "step": 17045 }, { "epoch": 0.46614526361846426, "grad_norm": 1.2552790641784668, "learning_rate": 1.1577027952074951e-05, "loss": 0.5155, "step": 17046 }, { "epoch": 0.4661726099321811, "grad_norm": 1.1575753688812256, "learning_rate": 1.1576153324882022e-05, "loss": 0.5099, "step": 17047 }, { "epoch": 0.46619995624589805, "grad_norm": 1.1974257230758667, "learning_rate": 1.1575278685324616e-05, "loss": 0.5254, "step": 17048 }, { "epoch": 0.466227302559615, "grad_norm": 1.320753574371338, "learning_rate": 1.157440403340959e-05, "loss": 0.5116, "step": 17049 }, { "epoch": 0.4662546488733319, "grad_norm": 1.1193691492080688, "learning_rate": 1.1573529369143805e-05, "loss": 0.5053, "step": 17050 }, { "epoch": 0.46628199518704877, "grad_norm": 1.4115431308746338, "learning_rate": 1.1572654692534129e-05, "loss": 0.4989, "step": 17051 }, { "epoch": 0.4663093415007657, "grad_norm": 1.4128475189208984, "learning_rate": 1.1571780003587417e-05, "loss": 0.8129, "step": 17052 }, { "epoch": 0.4663366878144826, "grad_norm": 2.021819829940796, "learning_rate": 1.1570905302310533e-05, "loss": 0.3983, "step": 17053 }, { "epoch": 0.46636403412819954, "grad_norm": 1.5675745010375977, "learning_rate": 1.157003058871034e-05, "loss": 0.3497, "step": 17054 }, { "epoch": 0.4663913804419164, "grad_norm": 2.1289703845977783, "learning_rate": 1.1569155862793697e-05, "loss": 0.5229, "step": 17055 }, { "epoch": 0.46641872675563334, "grad_norm": 1.4414044618606567, "learning_rate": 1.1568281124567471e-05, "loss": 0.535, "step": 17056 }, { "epoch": 0.46644607306935026, "grad_norm": 1.907256007194519, "learning_rate": 1.156740637403852e-05, "loss": 0.8115, "step": 17057 }, { "epoch": 0.4664734193830672, "grad_norm": 1.4815311431884766, "learning_rate": 1.1566531611213704e-05, "loss": 0.4945, "step": 17058 }, { "epoch": 0.46650076569678406, "grad_norm": 1.4292844533920288, "learning_rate": 1.1565656836099893e-05, "loss": 0.5229, "step": 17059 }, { "epoch": 0.466528112010501, "grad_norm": 3.8138909339904785, "learning_rate": 1.1564782048703943e-05, "loss": 0.4919, "step": 17060 }, { "epoch": 0.4665554583242179, "grad_norm": 1.6945549249649048, "learning_rate": 1.1563907249032718e-05, "loss": 0.8238, "step": 17061 }, { "epoch": 0.46658280463793483, "grad_norm": 1.6020333766937256, "learning_rate": 1.156303243709308e-05, "loss": 0.4713, "step": 17062 }, { "epoch": 0.4666101509516517, "grad_norm": 1.375400185585022, "learning_rate": 1.1562157612891892e-05, "loss": 0.4966, "step": 17063 }, { "epoch": 0.46663749726536863, "grad_norm": 1.3810511827468872, "learning_rate": 1.1561282776436019e-05, "loss": 0.5357, "step": 17064 }, { "epoch": 0.46666484357908555, "grad_norm": 2.9382917881011963, "learning_rate": 1.156040792773232e-05, "loss": 0.4928, "step": 17065 }, { "epoch": 0.4666921898928024, "grad_norm": 1.1523679494857788, "learning_rate": 1.1559533066787662e-05, "loss": 0.4861, "step": 17066 }, { "epoch": 0.46671953620651935, "grad_norm": 2.1608262062072754, "learning_rate": 1.1558658193608908e-05, "loss": 0.513, "step": 17067 }, { "epoch": 0.4667468825202363, "grad_norm": 1.5810999870300293, "learning_rate": 1.1557783308202914e-05, "loss": 0.5099, "step": 17068 }, { "epoch": 0.4667742288339532, "grad_norm": 1.5033055543899536, "learning_rate": 1.155690841057655e-05, "loss": 0.4027, "step": 17069 }, { "epoch": 0.46680157514767007, "grad_norm": 2.207404136657715, "learning_rate": 1.1556033500736679e-05, "loss": 0.527, "step": 17070 }, { "epoch": 0.466828921461387, "grad_norm": 1.6870633363723755, "learning_rate": 1.1555158578690163e-05, "loss": 0.5157, "step": 17071 }, { "epoch": 0.4668562677751039, "grad_norm": 1.6294423341751099, "learning_rate": 1.1554283644443865e-05, "loss": 0.5194, "step": 17072 }, { "epoch": 0.46688361408882084, "grad_norm": 1.371145248413086, "learning_rate": 1.1553408698004646e-05, "loss": 0.4816, "step": 17073 }, { "epoch": 0.4669109604025377, "grad_norm": 1.3005151748657227, "learning_rate": 1.1552533739379376e-05, "loss": 0.5491, "step": 17074 }, { "epoch": 0.46693830671625464, "grad_norm": 1.6327821016311646, "learning_rate": 1.1551658768574918e-05, "loss": 0.4112, "step": 17075 }, { "epoch": 0.46696565302997156, "grad_norm": 1.2138105630874634, "learning_rate": 1.155078378559813e-05, "loss": 0.8029, "step": 17076 }, { "epoch": 0.4669929993436885, "grad_norm": 1.489420771598816, "learning_rate": 1.1549908790455879e-05, "loss": 0.535, "step": 17077 }, { "epoch": 0.46702034565740536, "grad_norm": 3.720647096633911, "learning_rate": 1.1549033783155034e-05, "loss": 0.4085, "step": 17078 }, { "epoch": 0.4670476919711223, "grad_norm": 1.5196770429611206, "learning_rate": 1.1548158763702453e-05, "loss": 0.5183, "step": 17079 }, { "epoch": 0.4670750382848392, "grad_norm": 1.4537209272384644, "learning_rate": 1.1547283732105002e-05, "loss": 0.4656, "step": 17080 }, { "epoch": 0.46710238459855613, "grad_norm": 1.528106927871704, "learning_rate": 1.1546408688369543e-05, "loss": 0.5176, "step": 17081 }, { "epoch": 0.467129730912273, "grad_norm": 1.3620529174804688, "learning_rate": 1.1545533632502946e-05, "loss": 0.495, "step": 17082 }, { "epoch": 0.4671570772259899, "grad_norm": 1.2340832948684692, "learning_rate": 1.1544658564512075e-05, "loss": 0.4897, "step": 17083 }, { "epoch": 0.46718442353970685, "grad_norm": 1.416021704673767, "learning_rate": 1.154378348440379e-05, "loss": 0.5073, "step": 17084 }, { "epoch": 0.4672117698534238, "grad_norm": 1.8129562139511108, "learning_rate": 1.1542908392184955e-05, "loss": 0.5208, "step": 17085 }, { "epoch": 0.46723911616714064, "grad_norm": 1.3387048244476318, "learning_rate": 1.1542033287862442e-05, "loss": 0.5144, "step": 17086 }, { "epoch": 0.46726646248085757, "grad_norm": 2.7829887866973877, "learning_rate": 1.154115817144311e-05, "loss": 0.7925, "step": 17087 }, { "epoch": 0.4672938087945745, "grad_norm": 1.2864607572555542, "learning_rate": 1.1540283042933824e-05, "loss": 0.5013, "step": 17088 }, { "epoch": 0.4673211551082914, "grad_norm": 1.2118268013000488, "learning_rate": 1.1539407902341455e-05, "loss": 0.7991, "step": 17089 }, { "epoch": 0.4673485014220083, "grad_norm": 1.5622988939285278, "learning_rate": 1.1538532749672862e-05, "loss": 0.4204, "step": 17090 }, { "epoch": 0.4673758477357252, "grad_norm": 1.7146133184432983, "learning_rate": 1.1537657584934911e-05, "loss": 0.5294, "step": 17091 }, { "epoch": 0.46740319404944214, "grad_norm": 1.6857540607452393, "learning_rate": 1.153678240813447e-05, "loss": 0.5083, "step": 17092 }, { "epoch": 0.46743054036315906, "grad_norm": 1.3419528007507324, "learning_rate": 1.1535907219278404e-05, "loss": 0.4837, "step": 17093 }, { "epoch": 0.46745788667687593, "grad_norm": 1.657960057258606, "learning_rate": 1.153503201837358e-05, "loss": 0.5361, "step": 17094 }, { "epoch": 0.46748523299059286, "grad_norm": 2.756058931350708, "learning_rate": 1.153415680542686e-05, "loss": 0.404, "step": 17095 }, { "epoch": 0.4675125793043098, "grad_norm": 7.73193883895874, "learning_rate": 1.153328158044511e-05, "loss": 0.7911, "step": 17096 }, { "epoch": 0.4675399256180267, "grad_norm": 1.8842830657958984, "learning_rate": 1.15324063434352e-05, "loss": 0.5146, "step": 17097 }, { "epoch": 0.4675672719317436, "grad_norm": 1.2541378736495972, "learning_rate": 1.1531531094403991e-05, "loss": 0.8022, "step": 17098 }, { "epoch": 0.4675946182454605, "grad_norm": 2.019841194152832, "learning_rate": 1.1530655833358356e-05, "loss": 0.523, "step": 17099 }, { "epoch": 0.4676219645591774, "grad_norm": 1.5345280170440674, "learning_rate": 1.1529780560305152e-05, "loss": 0.5401, "step": 17100 }, { "epoch": 0.46764931087289435, "grad_norm": 2.2522175312042236, "learning_rate": 1.1528905275251255e-05, "loss": 0.7833, "step": 17101 }, { "epoch": 0.4676766571866112, "grad_norm": 1.3250137567520142, "learning_rate": 1.1528029978203524e-05, "loss": 0.8602, "step": 17102 }, { "epoch": 0.46770400350032815, "grad_norm": 1.2515944242477417, "learning_rate": 1.1527154669168825e-05, "loss": 0.5242, "step": 17103 }, { "epoch": 0.46773134981404507, "grad_norm": 1.3003300428390503, "learning_rate": 1.152627934815403e-05, "loss": 0.5326, "step": 17104 }, { "epoch": 0.467758696127762, "grad_norm": 1.390653371810913, "learning_rate": 1.1525404015166004e-05, "loss": 0.5064, "step": 17105 }, { "epoch": 0.46778604244147887, "grad_norm": 1.2872905731201172, "learning_rate": 1.152452867021161e-05, "loss": 0.8189, "step": 17106 }, { "epoch": 0.4678133887551958, "grad_norm": 1.2504596710205078, "learning_rate": 1.1523653313297718e-05, "loss": 0.5136, "step": 17107 }, { "epoch": 0.4678407350689127, "grad_norm": 1.3680682182312012, "learning_rate": 1.1522777944431199e-05, "loss": 0.5194, "step": 17108 }, { "epoch": 0.46786808138262964, "grad_norm": 1.6486366987228394, "learning_rate": 1.1521902563618909e-05, "loss": 0.5068, "step": 17109 }, { "epoch": 0.4678954276963465, "grad_norm": 1.4334602355957031, "learning_rate": 1.1521027170867727e-05, "loss": 0.5197, "step": 17110 }, { "epoch": 0.46792277401006344, "grad_norm": 2.341966152191162, "learning_rate": 1.1520151766184511e-05, "loss": 0.5478, "step": 17111 }, { "epoch": 0.46795012032378036, "grad_norm": 1.1981743574142456, "learning_rate": 1.1519276349576134e-05, "loss": 0.386, "step": 17112 }, { "epoch": 0.4679774666374973, "grad_norm": 1.8690085411071777, "learning_rate": 1.1518400921049464e-05, "loss": 0.493, "step": 17113 }, { "epoch": 0.46800481295121416, "grad_norm": 1.4847981929779053, "learning_rate": 1.1517525480611362e-05, "loss": 0.8221, "step": 17114 }, { "epoch": 0.4680321592649311, "grad_norm": 1.293759822845459, "learning_rate": 1.1516650028268704e-05, "loss": 0.4949, "step": 17115 }, { "epoch": 0.468059505578648, "grad_norm": 1.3929569721221924, "learning_rate": 1.1515774564028348e-05, "loss": 0.5221, "step": 17116 }, { "epoch": 0.46808685189236493, "grad_norm": 1.6283080577850342, "learning_rate": 1.1514899087897169e-05, "loss": 0.5009, "step": 17117 }, { "epoch": 0.4681141982060818, "grad_norm": 1.228959083557129, "learning_rate": 1.1514023599882037e-05, "loss": 0.5054, "step": 17118 }, { "epoch": 0.4681415445197987, "grad_norm": 1.2024708986282349, "learning_rate": 1.1513148099989813e-05, "loss": 0.4661, "step": 17119 }, { "epoch": 0.46816889083351565, "grad_norm": 1.3447602987289429, "learning_rate": 1.1512272588227366e-05, "loss": 0.4806, "step": 17120 }, { "epoch": 0.4681962371472326, "grad_norm": 1.8005751371383667, "learning_rate": 1.1511397064601571e-05, "loss": 0.5143, "step": 17121 }, { "epoch": 0.46822358346094944, "grad_norm": 1.3668861389160156, "learning_rate": 1.1510521529119285e-05, "loss": 0.5316, "step": 17122 }, { "epoch": 0.46825092977466637, "grad_norm": 1.239786148071289, "learning_rate": 1.1509645981787385e-05, "loss": 0.5096, "step": 17123 }, { "epoch": 0.4682782760883833, "grad_norm": 1.939420461654663, "learning_rate": 1.150877042261274e-05, "loss": 0.5128, "step": 17124 }, { "epoch": 0.4683056224021002, "grad_norm": 1.3072786331176758, "learning_rate": 1.1507894851602213e-05, "loss": 0.4999, "step": 17125 }, { "epoch": 0.4683329687158171, "grad_norm": 1.3692140579223633, "learning_rate": 1.1507019268762679e-05, "loss": 0.516, "step": 17126 }, { "epoch": 0.468360315029534, "grad_norm": 1.6801915168762207, "learning_rate": 1.1506143674100997e-05, "loss": 0.4293, "step": 17127 }, { "epoch": 0.46838766134325094, "grad_norm": 1.5566304922103882, "learning_rate": 1.1505268067624045e-05, "loss": 0.5231, "step": 17128 }, { "epoch": 0.46841500765696786, "grad_norm": 1.532023310661316, "learning_rate": 1.1504392449338689e-05, "loss": 0.5212, "step": 17129 }, { "epoch": 0.46844235397068473, "grad_norm": 1.3935925960540771, "learning_rate": 1.1503516819251796e-05, "loss": 0.4246, "step": 17130 }, { "epoch": 0.46846970028440166, "grad_norm": 1.4718291759490967, "learning_rate": 1.1502641177370237e-05, "loss": 0.5083, "step": 17131 }, { "epoch": 0.4684970465981186, "grad_norm": 1.13735830783844, "learning_rate": 1.1501765523700883e-05, "loss": 0.4909, "step": 17132 }, { "epoch": 0.4685243929118355, "grad_norm": 2.5248286724090576, "learning_rate": 1.15008898582506e-05, "loss": 0.4939, "step": 17133 }, { "epoch": 0.4685517392255524, "grad_norm": 1.449758529663086, "learning_rate": 1.1500014181026259e-05, "loss": 0.4116, "step": 17134 }, { "epoch": 0.4685790855392693, "grad_norm": 1.191571593284607, "learning_rate": 1.1499138492034724e-05, "loss": 0.516, "step": 17135 }, { "epoch": 0.4686064318529862, "grad_norm": 1.2444044351577759, "learning_rate": 1.1498262791282872e-05, "loss": 0.5183, "step": 17136 }, { "epoch": 0.46863377816670315, "grad_norm": 1.2618341445922852, "learning_rate": 1.1497387078777575e-05, "loss": 0.5155, "step": 17137 }, { "epoch": 0.46866112448042, "grad_norm": 1.4567822217941284, "learning_rate": 1.1496511354525695e-05, "loss": 0.5146, "step": 17138 }, { "epoch": 0.46868847079413695, "grad_norm": 1.682647466659546, "learning_rate": 1.1495635618534102e-05, "loss": 0.5182, "step": 17139 }, { "epoch": 0.46871581710785387, "grad_norm": 1.376654028892517, "learning_rate": 1.1494759870809674e-05, "loss": 0.5268, "step": 17140 }, { "epoch": 0.4687431634215708, "grad_norm": 1.3153364658355713, "learning_rate": 1.1493884111359269e-05, "loss": 0.5179, "step": 17141 }, { "epoch": 0.46877050973528767, "grad_norm": 1.427891731262207, "learning_rate": 1.1493008340189763e-05, "loss": 0.5168, "step": 17142 }, { "epoch": 0.4687978560490046, "grad_norm": 1.2106026411056519, "learning_rate": 1.1492132557308034e-05, "loss": 0.5064, "step": 17143 }, { "epoch": 0.4688252023627215, "grad_norm": 1.37107253074646, "learning_rate": 1.1491256762720939e-05, "loss": 0.8545, "step": 17144 }, { "epoch": 0.46885254867643844, "grad_norm": 1.2690218687057495, "learning_rate": 1.1490380956435358e-05, "loss": 0.522, "step": 17145 }, { "epoch": 0.4688798949901553, "grad_norm": 1.2684502601623535, "learning_rate": 1.1489505138458155e-05, "loss": 0.4837, "step": 17146 }, { "epoch": 0.46890724130387224, "grad_norm": 1.37716805934906, "learning_rate": 1.1488629308796205e-05, "loss": 0.499, "step": 17147 }, { "epoch": 0.46893458761758916, "grad_norm": 1.3421978950500488, "learning_rate": 1.1487753467456377e-05, "loss": 0.47, "step": 17148 }, { "epoch": 0.4689619339313061, "grad_norm": 1.276577353477478, "learning_rate": 1.1486877614445539e-05, "loss": 0.5075, "step": 17149 }, { "epoch": 0.46898928024502295, "grad_norm": 2.192026138305664, "learning_rate": 1.148600174977057e-05, "loss": 0.5012, "step": 17150 }, { "epoch": 0.4690166265587399, "grad_norm": 1.1765525341033936, "learning_rate": 1.148512587343833e-05, "loss": 0.529, "step": 17151 }, { "epoch": 0.4690439728724568, "grad_norm": 1.1785950660705566, "learning_rate": 1.1484249985455698e-05, "loss": 0.4987, "step": 17152 }, { "epoch": 0.46907131918617373, "grad_norm": 1.4693034887313843, "learning_rate": 1.1483374085829543e-05, "loss": 0.497, "step": 17153 }, { "epoch": 0.4690986654998906, "grad_norm": 1.220751404762268, "learning_rate": 1.1482498174566736e-05, "loss": 0.4727, "step": 17154 }, { "epoch": 0.4691260118136075, "grad_norm": 1.2178373336791992, "learning_rate": 1.1481622251674147e-05, "loss": 0.4872, "step": 17155 }, { "epoch": 0.46915335812732445, "grad_norm": 1.169869303703308, "learning_rate": 1.1480746317158649e-05, "loss": 0.3886, "step": 17156 }, { "epoch": 0.4691807044410414, "grad_norm": 1.145033836364746, "learning_rate": 1.1479870371027114e-05, "loss": 0.4891, "step": 17157 }, { "epoch": 0.46920805075475824, "grad_norm": 1.5869675874710083, "learning_rate": 1.1478994413286408e-05, "loss": 0.4151, "step": 17158 }, { "epoch": 0.46923539706847517, "grad_norm": 1.2767932415008545, "learning_rate": 1.147811844394341e-05, "loss": 0.8278, "step": 17159 }, { "epoch": 0.4692627433821921, "grad_norm": 1.64867103099823, "learning_rate": 1.147724246300499e-05, "loss": 0.422, "step": 17160 }, { "epoch": 0.469290089695909, "grad_norm": 1.5198560953140259, "learning_rate": 1.1476366470478016e-05, "loss": 0.51, "step": 17161 }, { "epoch": 0.4693174360096259, "grad_norm": 1.191476821899414, "learning_rate": 1.1475490466369366e-05, "loss": 0.53, "step": 17162 }, { "epoch": 0.4693447823233428, "grad_norm": 1.3735597133636475, "learning_rate": 1.1474614450685904e-05, "loss": 0.5351, "step": 17163 }, { "epoch": 0.46937212863705974, "grad_norm": 1.2844793796539307, "learning_rate": 1.147373842343451e-05, "loss": 0.5131, "step": 17164 }, { "epoch": 0.46939947495077666, "grad_norm": 1.1907660961151123, "learning_rate": 1.147286238462205e-05, "loss": 0.5137, "step": 17165 }, { "epoch": 0.46942682126449353, "grad_norm": 1.2300045490264893, "learning_rate": 1.1471986334255401e-05, "loss": 0.5003, "step": 17166 }, { "epoch": 0.46945416757821046, "grad_norm": 1.0856304168701172, "learning_rate": 1.1471110272341435e-05, "loss": 0.5152, "step": 17167 }, { "epoch": 0.4694815138919274, "grad_norm": 1.3111501932144165, "learning_rate": 1.1470234198887021e-05, "loss": 0.5134, "step": 17168 }, { "epoch": 0.46950886020564425, "grad_norm": 1.2080011367797852, "learning_rate": 1.1469358113899034e-05, "loss": 0.487, "step": 17169 }, { "epoch": 0.4695362065193612, "grad_norm": 1.6400359869003296, "learning_rate": 1.1468482017384346e-05, "loss": 0.5251, "step": 17170 }, { "epoch": 0.4695635528330781, "grad_norm": 1.937477707862854, "learning_rate": 1.1467605909349827e-05, "loss": 0.398, "step": 17171 }, { "epoch": 0.469590899146795, "grad_norm": 1.3620219230651855, "learning_rate": 1.1466729789802357e-05, "loss": 0.403, "step": 17172 }, { "epoch": 0.4696182454605119, "grad_norm": 1.2436367273330688, "learning_rate": 1.1465853658748805e-05, "loss": 0.521, "step": 17173 }, { "epoch": 0.4696455917742288, "grad_norm": 1.2221033573150635, "learning_rate": 1.146497751619604e-05, "loss": 0.5135, "step": 17174 }, { "epoch": 0.46967293808794575, "grad_norm": 1.5818349123001099, "learning_rate": 1.1464101362150941e-05, "loss": 0.5007, "step": 17175 }, { "epoch": 0.46970028440166267, "grad_norm": 1.3573027849197388, "learning_rate": 1.1463225196620378e-05, "loss": 0.537, "step": 17176 }, { "epoch": 0.46972763071537954, "grad_norm": 1.1967829465866089, "learning_rate": 1.1462349019611223e-05, "loss": 0.5182, "step": 17177 }, { "epoch": 0.46975497702909647, "grad_norm": 1.2340545654296875, "learning_rate": 1.1461472831130356e-05, "loss": 0.5151, "step": 17178 }, { "epoch": 0.4697823233428134, "grad_norm": 1.1549333333969116, "learning_rate": 1.1460596631184642e-05, "loss": 0.4803, "step": 17179 }, { "epoch": 0.4698096696565303, "grad_norm": 2.2633256912231445, "learning_rate": 1.1459720419780961e-05, "loss": 0.3874, "step": 17180 }, { "epoch": 0.4698370159702472, "grad_norm": 1.884138822555542, "learning_rate": 1.1458844196926182e-05, "loss": 0.4213, "step": 17181 }, { "epoch": 0.4698643622839641, "grad_norm": 1.3497517108917236, "learning_rate": 1.145796796262718e-05, "loss": 0.518, "step": 17182 }, { "epoch": 0.46989170859768103, "grad_norm": 2.054201602935791, "learning_rate": 1.1457091716890834e-05, "loss": 0.467, "step": 17183 }, { "epoch": 0.46991905491139796, "grad_norm": 1.2042843103408813, "learning_rate": 1.1456215459724012e-05, "loss": 0.5063, "step": 17184 }, { "epoch": 0.46994640122511483, "grad_norm": 1.41567063331604, "learning_rate": 1.1455339191133588e-05, "loss": 0.4133, "step": 17185 }, { "epoch": 0.46997374753883175, "grad_norm": 1.4100029468536377, "learning_rate": 1.1454462911126437e-05, "loss": 0.4977, "step": 17186 }, { "epoch": 0.4700010938525487, "grad_norm": 1.517450213432312, "learning_rate": 1.1453586619709436e-05, "loss": 0.5218, "step": 17187 }, { "epoch": 0.4700284401662656, "grad_norm": 1.4216593503952026, "learning_rate": 1.1452710316889458e-05, "loss": 0.5101, "step": 17188 }, { "epoch": 0.4700557864799825, "grad_norm": 3.2345354557037354, "learning_rate": 1.1451834002673374e-05, "loss": 0.4953, "step": 17189 }, { "epoch": 0.4700831327936994, "grad_norm": 1.3143868446350098, "learning_rate": 1.145095767706806e-05, "loss": 0.5058, "step": 17190 }, { "epoch": 0.4701104791074163, "grad_norm": 1.291373372077942, "learning_rate": 1.1450081340080395e-05, "loss": 0.513, "step": 17191 }, { "epoch": 0.47013782542113325, "grad_norm": 1.237820029258728, "learning_rate": 1.1449204991717247e-05, "loss": 0.501, "step": 17192 }, { "epoch": 0.4701651717348501, "grad_norm": 1.2569146156311035, "learning_rate": 1.1448328631985492e-05, "loss": 0.5165, "step": 17193 }, { "epoch": 0.47019251804856704, "grad_norm": 1.5661060810089111, "learning_rate": 1.1447452260892011e-05, "loss": 0.517, "step": 17194 }, { "epoch": 0.47021986436228397, "grad_norm": 1.5462932586669922, "learning_rate": 1.1446575878443671e-05, "loss": 0.5296, "step": 17195 }, { "epoch": 0.4702472106760009, "grad_norm": 1.3153111934661865, "learning_rate": 1.144569948464735e-05, "loss": 0.4964, "step": 17196 }, { "epoch": 0.47027455698971776, "grad_norm": 1.4874284267425537, "learning_rate": 1.1444823079509928e-05, "loss": 0.8196, "step": 17197 }, { "epoch": 0.4703019033034347, "grad_norm": 1.4935522079467773, "learning_rate": 1.144394666303827e-05, "loss": 0.4633, "step": 17198 }, { "epoch": 0.4703292496171516, "grad_norm": 7.393669605255127, "learning_rate": 1.144307023523926e-05, "loss": 0.5091, "step": 17199 }, { "epoch": 0.47035659593086854, "grad_norm": 1.6214560270309448, "learning_rate": 1.144219379611977e-05, "loss": 0.4676, "step": 17200 }, { "epoch": 0.4703839422445854, "grad_norm": 1.4876445531845093, "learning_rate": 1.1441317345686673e-05, "loss": 0.4969, "step": 17201 }, { "epoch": 0.47041128855830233, "grad_norm": 1.4643733501434326, "learning_rate": 1.144044088394685e-05, "loss": 0.486, "step": 17202 }, { "epoch": 0.47043863487201926, "grad_norm": 1.449877142906189, "learning_rate": 1.1439564410907171e-05, "loss": 0.5315, "step": 17203 }, { "epoch": 0.4704659811857362, "grad_norm": 1.4882804155349731, "learning_rate": 1.1438687926574519e-05, "loss": 0.4893, "step": 17204 }, { "epoch": 0.47049332749945305, "grad_norm": 1.0178096294403076, "learning_rate": 1.1437811430955762e-05, "loss": 0.544, "step": 17205 }, { "epoch": 0.47052067381317, "grad_norm": 1.4454219341278076, "learning_rate": 1.1436934924057777e-05, "loss": 0.4929, "step": 17206 }, { "epoch": 0.4705480201268869, "grad_norm": 1.3415749073028564, "learning_rate": 1.1436058405887445e-05, "loss": 0.8221, "step": 17207 }, { "epoch": 0.4705753664406038, "grad_norm": 1.5050796270370483, "learning_rate": 1.1435181876451638e-05, "loss": 0.5268, "step": 17208 }, { "epoch": 0.4706027127543207, "grad_norm": 1.045532464981079, "learning_rate": 1.1434305335757232e-05, "loss": 0.4963, "step": 17209 }, { "epoch": 0.4706300590680376, "grad_norm": 1.7551120519638062, "learning_rate": 1.1433428783811107e-05, "loss": 0.5466, "step": 17210 }, { "epoch": 0.47065740538175455, "grad_norm": 1.1779690980911255, "learning_rate": 1.1432552220620134e-05, "loss": 0.3701, "step": 17211 }, { "epoch": 0.47068475169547147, "grad_norm": 1.6260744333267212, "learning_rate": 1.143167564619119e-05, "loss": 0.5776, "step": 17212 }, { "epoch": 0.47071209800918834, "grad_norm": 3.6252448558807373, "learning_rate": 1.143079906053116e-05, "loss": 0.385, "step": 17213 }, { "epoch": 0.47073944432290526, "grad_norm": 1.2384467124938965, "learning_rate": 1.142992246364691e-05, "loss": 0.5032, "step": 17214 }, { "epoch": 0.4707667906366222, "grad_norm": 2.566441535949707, "learning_rate": 1.1429045855545322e-05, "loss": 0.3615, "step": 17215 }, { "epoch": 0.4707941369503391, "grad_norm": 1.7329827547073364, "learning_rate": 1.142816923623327e-05, "loss": 0.5142, "step": 17216 }, { "epoch": 0.470821483264056, "grad_norm": 1.433109164237976, "learning_rate": 1.1427292605717631e-05, "loss": 0.5216, "step": 17217 }, { "epoch": 0.4708488295777729, "grad_norm": 2.722233772277832, "learning_rate": 1.1426415964005284e-05, "loss": 0.5249, "step": 17218 }, { "epoch": 0.47087617589148983, "grad_norm": 1.2567023038864136, "learning_rate": 1.1425539311103107e-05, "loss": 0.5448, "step": 17219 }, { "epoch": 0.47090352220520676, "grad_norm": 1.305912971496582, "learning_rate": 1.1424662647017972e-05, "loss": 0.5342, "step": 17220 }, { "epoch": 0.47093086851892363, "grad_norm": 1.134479284286499, "learning_rate": 1.1423785971756764e-05, "loss": 0.5236, "step": 17221 }, { "epoch": 0.47095821483264055, "grad_norm": 1.2734222412109375, "learning_rate": 1.1422909285326353e-05, "loss": 0.5323, "step": 17222 }, { "epoch": 0.4709855611463575, "grad_norm": 1.4307280778884888, "learning_rate": 1.142203258773362e-05, "loss": 0.3612, "step": 17223 }, { "epoch": 0.4710129074600744, "grad_norm": 1.5643800497055054, "learning_rate": 1.1421155878985441e-05, "loss": 0.3595, "step": 17224 }, { "epoch": 0.4710402537737913, "grad_norm": 1.2074675559997559, "learning_rate": 1.1420279159088694e-05, "loss": 0.3901, "step": 17225 }, { "epoch": 0.4710676000875082, "grad_norm": 1.1836532354354858, "learning_rate": 1.1419402428050256e-05, "loss": 0.8453, "step": 17226 }, { "epoch": 0.4710949464012251, "grad_norm": 1.727925181388855, "learning_rate": 1.1418525685877009e-05, "loss": 0.5161, "step": 17227 }, { "epoch": 0.47112229271494205, "grad_norm": 1.4132014513015747, "learning_rate": 1.1417648932575825e-05, "loss": 0.5295, "step": 17228 }, { "epoch": 0.4711496390286589, "grad_norm": 1.4406650066375732, "learning_rate": 1.1416772168153583e-05, "loss": 0.5135, "step": 17229 }, { "epoch": 0.47117698534237584, "grad_norm": 1.3074135780334473, "learning_rate": 1.1415895392617162e-05, "loss": 0.4896, "step": 17230 }, { "epoch": 0.47120433165609277, "grad_norm": 1.260682225227356, "learning_rate": 1.141501860597344e-05, "loss": 0.5159, "step": 17231 }, { "epoch": 0.4712316779698097, "grad_norm": 1.199932336807251, "learning_rate": 1.1414141808229297e-05, "loss": 0.5096, "step": 17232 }, { "epoch": 0.47125902428352656, "grad_norm": 1.1735903024673462, "learning_rate": 1.1413264999391607e-05, "loss": 0.522, "step": 17233 }, { "epoch": 0.4712863705972435, "grad_norm": 1.188634991645813, "learning_rate": 1.1412388179467255e-05, "loss": 0.5147, "step": 17234 }, { "epoch": 0.4713137169109604, "grad_norm": 1.2564069032669067, "learning_rate": 1.1411511348463111e-05, "loss": 0.3777, "step": 17235 }, { "epoch": 0.47134106322467734, "grad_norm": 1.4247936010360718, "learning_rate": 1.1410634506386057e-05, "loss": 0.5254, "step": 17236 }, { "epoch": 0.4713684095383942, "grad_norm": 1.371614933013916, "learning_rate": 1.1409757653242977e-05, "loss": 0.5401, "step": 17237 }, { "epoch": 0.47139575585211113, "grad_norm": 1.1695176362991333, "learning_rate": 1.1408880789040742e-05, "loss": 0.4014, "step": 17238 }, { "epoch": 0.47142310216582806, "grad_norm": 1.2818598747253418, "learning_rate": 1.1408003913786235e-05, "loss": 0.5379, "step": 17239 }, { "epoch": 0.471450448479545, "grad_norm": 1.8300628662109375, "learning_rate": 1.1407127027486331e-05, "loss": 0.4385, "step": 17240 }, { "epoch": 0.47147779479326185, "grad_norm": 1.27519953250885, "learning_rate": 1.1406250130147913e-05, "loss": 0.7853, "step": 17241 }, { "epoch": 0.4715051411069788, "grad_norm": 1.4937524795532227, "learning_rate": 1.140537322177786e-05, "loss": 0.5051, "step": 17242 }, { "epoch": 0.4715324874206957, "grad_norm": 1.5785068273544312, "learning_rate": 1.140449630238305e-05, "loss": 0.5399, "step": 17243 }, { "epoch": 0.4715598337344126, "grad_norm": 1.3323229551315308, "learning_rate": 1.1403619371970359e-05, "loss": 0.5407, "step": 17244 }, { "epoch": 0.4715871800481295, "grad_norm": 1.4411593675613403, "learning_rate": 1.1402742430546671e-05, "loss": 0.4908, "step": 17245 }, { "epoch": 0.4716145263618464, "grad_norm": 1.2828691005706787, "learning_rate": 1.1401865478118861e-05, "loss": 0.4792, "step": 17246 }, { "epoch": 0.47164187267556335, "grad_norm": 2.1445775032043457, "learning_rate": 1.1400988514693811e-05, "loss": 0.5269, "step": 17247 }, { "epoch": 0.47166921898928027, "grad_norm": 1.1734726428985596, "learning_rate": 1.1400111540278405e-05, "loss": 0.531, "step": 17248 }, { "epoch": 0.47169656530299714, "grad_norm": 1.4456567764282227, "learning_rate": 1.1399234554879512e-05, "loss": 0.5029, "step": 17249 }, { "epoch": 0.47172391161671406, "grad_norm": 1.2903993129730225, "learning_rate": 1.1398357558504021e-05, "loss": 0.3667, "step": 17250 }, { "epoch": 0.471751257930431, "grad_norm": 1.3148701190948486, "learning_rate": 1.1397480551158809e-05, "loss": 0.4743, "step": 17251 }, { "epoch": 0.4717786042441479, "grad_norm": 1.668138027191162, "learning_rate": 1.1396603532850751e-05, "loss": 0.5183, "step": 17252 }, { "epoch": 0.4718059505578648, "grad_norm": 5.320466995239258, "learning_rate": 1.1395726503586736e-05, "loss": 0.4894, "step": 17253 }, { "epoch": 0.4718332968715817, "grad_norm": 1.4640663862228394, "learning_rate": 1.1394849463373638e-05, "loss": 0.4923, "step": 17254 }, { "epoch": 0.47186064318529863, "grad_norm": 1.240109920501709, "learning_rate": 1.139397241221834e-05, "loss": 0.4901, "step": 17255 }, { "epoch": 0.47188798949901556, "grad_norm": 1.4092885255813599, "learning_rate": 1.1393095350127719e-05, "loss": 0.551, "step": 17256 }, { "epoch": 0.47191533581273243, "grad_norm": 1.2282800674438477, "learning_rate": 1.1392218277108655e-05, "loss": 0.5204, "step": 17257 }, { "epoch": 0.47194268212644935, "grad_norm": 1.2656092643737793, "learning_rate": 1.1391341193168035e-05, "loss": 0.523, "step": 17258 }, { "epoch": 0.4719700284401663, "grad_norm": 1.1654587984085083, "learning_rate": 1.1390464098312733e-05, "loss": 0.478, "step": 17259 }, { "epoch": 0.4719973747538832, "grad_norm": 1.2821770906448364, "learning_rate": 1.138958699254963e-05, "loss": 0.4929, "step": 17260 }, { "epoch": 0.4720247210676001, "grad_norm": 1.35043203830719, "learning_rate": 1.1388709875885608e-05, "loss": 0.4226, "step": 17261 }, { "epoch": 0.472052067381317, "grad_norm": 1.4731333255767822, "learning_rate": 1.1387832748327551e-05, "loss": 0.7723, "step": 17262 }, { "epoch": 0.4720794136950339, "grad_norm": 1.3408328294754028, "learning_rate": 1.1386955609882333e-05, "loss": 0.4984, "step": 17263 }, { "epoch": 0.47210676000875085, "grad_norm": 1.3702524900436401, "learning_rate": 1.1386078460556842e-05, "loss": 0.7961, "step": 17264 }, { "epoch": 0.4721341063224677, "grad_norm": 1.6134037971496582, "learning_rate": 1.1385201300357953e-05, "loss": 0.5278, "step": 17265 }, { "epoch": 0.47216145263618464, "grad_norm": 1.3242350816726685, "learning_rate": 1.138432412929255e-05, "loss": 0.4, "step": 17266 }, { "epoch": 0.47218879894990157, "grad_norm": 1.4895604848861694, "learning_rate": 1.1383446947367514e-05, "loss": 0.3965, "step": 17267 }, { "epoch": 0.4722161452636185, "grad_norm": 1.1973066329956055, "learning_rate": 1.1382569754589725e-05, "loss": 0.538, "step": 17268 }, { "epoch": 0.47224349157733536, "grad_norm": 1.2703837156295776, "learning_rate": 1.1381692550966068e-05, "loss": 0.529, "step": 17269 }, { "epoch": 0.4722708378910523, "grad_norm": 1.2753124237060547, "learning_rate": 1.1380815336503419e-05, "loss": 0.5271, "step": 17270 }, { "epoch": 0.4722981842047692, "grad_norm": 1.154247522354126, "learning_rate": 1.1379938111208663e-05, "loss": 0.4925, "step": 17271 }, { "epoch": 0.4723255305184861, "grad_norm": 1.346386432647705, "learning_rate": 1.1379060875088684e-05, "loss": 0.4805, "step": 17272 }, { "epoch": 0.472352876832203, "grad_norm": 1.8569878339767456, "learning_rate": 1.1378183628150356e-05, "loss": 0.5111, "step": 17273 }, { "epoch": 0.47238022314591993, "grad_norm": 1.2534900903701782, "learning_rate": 1.1377306370400567e-05, "loss": 0.501, "step": 17274 }, { "epoch": 0.47240756945963686, "grad_norm": 1.1598515510559082, "learning_rate": 1.1376429101846197e-05, "loss": 0.5167, "step": 17275 }, { "epoch": 0.4724349157733537, "grad_norm": 1.2625538110733032, "learning_rate": 1.137555182249413e-05, "loss": 0.4925, "step": 17276 }, { "epoch": 0.47246226208707065, "grad_norm": 1.5828511714935303, "learning_rate": 1.1374674532351244e-05, "loss": 0.5018, "step": 17277 }, { "epoch": 0.4724896084007876, "grad_norm": 1.692527174949646, "learning_rate": 1.1373797231424425e-05, "loss": 0.4823, "step": 17278 }, { "epoch": 0.4725169547145045, "grad_norm": 1.376166820526123, "learning_rate": 1.137291991972055e-05, "loss": 0.5202, "step": 17279 }, { "epoch": 0.47254430102822137, "grad_norm": 1.175843596458435, "learning_rate": 1.137204259724651e-05, "loss": 0.4972, "step": 17280 }, { "epoch": 0.4725716473419383, "grad_norm": 1.3919997215270996, "learning_rate": 1.1371165264009177e-05, "loss": 0.5105, "step": 17281 }, { "epoch": 0.4725989936556552, "grad_norm": 1.3187330961227417, "learning_rate": 1.1370287920015438e-05, "loss": 0.4699, "step": 17282 }, { "epoch": 0.47262633996937214, "grad_norm": 1.2296624183654785, "learning_rate": 1.1369410565272181e-05, "loss": 0.5103, "step": 17283 }, { "epoch": 0.472653686283089, "grad_norm": 1.1885515451431274, "learning_rate": 1.1368533199786278e-05, "loss": 0.4933, "step": 17284 }, { "epoch": 0.47268103259680594, "grad_norm": 1.4627621173858643, "learning_rate": 1.1367655823564619e-05, "loss": 0.4274, "step": 17285 }, { "epoch": 0.47270837891052286, "grad_norm": 1.42328941822052, "learning_rate": 1.1366778436614087e-05, "loss": 0.5336, "step": 17286 }, { "epoch": 0.4727357252242398, "grad_norm": 1.4205875396728516, "learning_rate": 1.136590103894156e-05, "loss": 0.4339, "step": 17287 }, { "epoch": 0.47276307153795666, "grad_norm": 1.5152851343154907, "learning_rate": 1.1365023630553926e-05, "loss": 0.8422, "step": 17288 }, { "epoch": 0.4727904178516736, "grad_norm": 1.2063955068588257, "learning_rate": 1.1364146211458065e-05, "loss": 0.5312, "step": 17289 }, { "epoch": 0.4728177641653905, "grad_norm": 1.195529580116272, "learning_rate": 1.136326878166086e-05, "loss": 0.4937, "step": 17290 }, { "epoch": 0.47284511047910743, "grad_norm": 1.3308610916137695, "learning_rate": 1.1362391341169196e-05, "loss": 0.5027, "step": 17291 }, { "epoch": 0.4728724567928243, "grad_norm": 1.300803542137146, "learning_rate": 1.1361513889989954e-05, "loss": 0.5311, "step": 17292 }, { "epoch": 0.4728998031065412, "grad_norm": 1.189389705657959, "learning_rate": 1.136063642813002e-05, "loss": 0.8103, "step": 17293 }, { "epoch": 0.47292714942025815, "grad_norm": 1.3422045707702637, "learning_rate": 1.1359758955596277e-05, "loss": 0.5412, "step": 17294 }, { "epoch": 0.4729544957339751, "grad_norm": 1.4960869550704956, "learning_rate": 1.1358881472395606e-05, "loss": 0.5368, "step": 17295 }, { "epoch": 0.47298184204769195, "grad_norm": 1.2128366231918335, "learning_rate": 1.1358003978534892e-05, "loss": 0.5103, "step": 17296 }, { "epoch": 0.47300918836140887, "grad_norm": 1.2368086576461792, "learning_rate": 1.1357126474021023e-05, "loss": 0.4652, "step": 17297 }, { "epoch": 0.4730365346751258, "grad_norm": 1.3788961172103882, "learning_rate": 1.1356248958860874e-05, "loss": 0.4524, "step": 17298 }, { "epoch": 0.4730638809888427, "grad_norm": 1.3566887378692627, "learning_rate": 1.1355371433061338e-05, "loss": 0.5003, "step": 17299 }, { "epoch": 0.4730912273025596, "grad_norm": 1.259838581085205, "learning_rate": 1.135449389662929e-05, "loss": 0.52, "step": 17300 }, { "epoch": 0.4731185736162765, "grad_norm": 1.5079779624938965, "learning_rate": 1.1353616349571618e-05, "loss": 0.5126, "step": 17301 }, { "epoch": 0.47314591992999344, "grad_norm": 1.2246235609054565, "learning_rate": 1.135273879189521e-05, "loss": 0.4954, "step": 17302 }, { "epoch": 0.47317326624371037, "grad_norm": 1.3020694255828857, "learning_rate": 1.1351861223606946e-05, "loss": 0.4365, "step": 17303 }, { "epoch": 0.47320061255742724, "grad_norm": 1.5542423725128174, "learning_rate": 1.1350983644713709e-05, "loss": 0.5183, "step": 17304 }, { "epoch": 0.47322795887114416, "grad_norm": 1.7212063074111938, "learning_rate": 1.1350106055222389e-05, "loss": 0.4969, "step": 17305 }, { "epoch": 0.4732553051848611, "grad_norm": 1.2614630460739136, "learning_rate": 1.1349228455139864e-05, "loss": 0.5222, "step": 17306 }, { "epoch": 0.473282651498578, "grad_norm": 3.410417079925537, "learning_rate": 1.1348350844473027e-05, "loss": 0.8025, "step": 17307 }, { "epoch": 0.4733099978122949, "grad_norm": 1.6094170808792114, "learning_rate": 1.134747322322875e-05, "loss": 0.483, "step": 17308 }, { "epoch": 0.4733373441260118, "grad_norm": 1.4503579139709473, "learning_rate": 1.1346595591413927e-05, "loss": 0.5045, "step": 17309 }, { "epoch": 0.47336469043972873, "grad_norm": 1.2917112112045288, "learning_rate": 1.1345717949035442e-05, "loss": 0.5392, "step": 17310 }, { "epoch": 0.47339203675344566, "grad_norm": 1.562514066696167, "learning_rate": 1.1344840296100177e-05, "loss": 0.5411, "step": 17311 }, { "epoch": 0.4734193830671625, "grad_norm": 1.2369861602783203, "learning_rate": 1.1343962632615021e-05, "loss": 0.5037, "step": 17312 }, { "epoch": 0.47344672938087945, "grad_norm": 1.1641623973846436, "learning_rate": 1.1343084958586855e-05, "loss": 0.4885, "step": 17313 }, { "epoch": 0.4734740756945964, "grad_norm": 1.3900120258331299, "learning_rate": 1.1342207274022565e-05, "loss": 0.4319, "step": 17314 }, { "epoch": 0.4735014220083133, "grad_norm": 1.8521603345870972, "learning_rate": 1.1341329578929034e-05, "loss": 0.4542, "step": 17315 }, { "epoch": 0.47352876832203017, "grad_norm": 1.3606150150299072, "learning_rate": 1.1340451873313154e-05, "loss": 0.4319, "step": 17316 }, { "epoch": 0.4735561146357471, "grad_norm": 1.159483790397644, "learning_rate": 1.1339574157181805e-05, "loss": 0.5193, "step": 17317 }, { "epoch": 0.473583460949464, "grad_norm": 1.4090971946716309, "learning_rate": 1.1338696430541876e-05, "loss": 0.5008, "step": 17318 }, { "epoch": 0.47361080726318094, "grad_norm": 1.361022710800171, "learning_rate": 1.1337818693400247e-05, "loss": 0.5314, "step": 17319 }, { "epoch": 0.4736381535768978, "grad_norm": 1.2287038564682007, "learning_rate": 1.1336940945763806e-05, "loss": 0.5458, "step": 17320 }, { "epoch": 0.47366549989061474, "grad_norm": 1.291581392288208, "learning_rate": 1.1336063187639441e-05, "loss": 0.5213, "step": 17321 }, { "epoch": 0.47369284620433166, "grad_norm": 1.3897154331207275, "learning_rate": 1.1335185419034036e-05, "loss": 0.509, "step": 17322 }, { "epoch": 0.4737201925180486, "grad_norm": 1.4389090538024902, "learning_rate": 1.1334307639954478e-05, "loss": 0.8095, "step": 17323 }, { "epoch": 0.47374753883176546, "grad_norm": 1.7865673303604126, "learning_rate": 1.1333429850407652e-05, "loss": 0.7921, "step": 17324 }, { "epoch": 0.4737748851454824, "grad_norm": 1.4838861227035522, "learning_rate": 1.1332552050400443e-05, "loss": 0.4133, "step": 17325 }, { "epoch": 0.4738022314591993, "grad_norm": 2.273218870162964, "learning_rate": 1.133167423993974e-05, "loss": 0.4154, "step": 17326 }, { "epoch": 0.47382957777291623, "grad_norm": 2.099118709564209, "learning_rate": 1.1330796419032426e-05, "loss": 0.8367, "step": 17327 }, { "epoch": 0.4738569240866331, "grad_norm": 1.2089800834655762, "learning_rate": 1.1329918587685386e-05, "loss": 0.5174, "step": 17328 }, { "epoch": 0.47388427040035, "grad_norm": 1.3665457963943481, "learning_rate": 1.1329040745905515e-05, "loss": 0.5158, "step": 17329 }, { "epoch": 0.47391161671406695, "grad_norm": 1.2657095193862915, "learning_rate": 1.1328162893699691e-05, "loss": 0.5012, "step": 17330 }, { "epoch": 0.4739389630277839, "grad_norm": 1.2842416763305664, "learning_rate": 1.1327285031074801e-05, "loss": 0.4943, "step": 17331 }, { "epoch": 0.47396630934150075, "grad_norm": 1.3648133277893066, "learning_rate": 1.1326407158037735e-05, "loss": 0.5093, "step": 17332 }, { "epoch": 0.47399365565521767, "grad_norm": 1.442249059677124, "learning_rate": 1.1325529274595377e-05, "loss": 0.5363, "step": 17333 }, { "epoch": 0.4740210019689346, "grad_norm": 1.647420048713684, "learning_rate": 1.1324651380754616e-05, "loss": 0.5243, "step": 17334 }, { "epoch": 0.4740483482826515, "grad_norm": 1.8194814920425415, "learning_rate": 1.1323773476522338e-05, "loss": 0.4631, "step": 17335 }, { "epoch": 0.4740756945963684, "grad_norm": 1.2833527326583862, "learning_rate": 1.1322895561905427e-05, "loss": 0.5181, "step": 17336 }, { "epoch": 0.4741030409100853, "grad_norm": 1.3837515115737915, "learning_rate": 1.1322017636910776e-05, "loss": 0.8112, "step": 17337 }, { "epoch": 0.47413038722380224, "grad_norm": 1.405864953994751, "learning_rate": 1.1321139701545268e-05, "loss": 0.4972, "step": 17338 }, { "epoch": 0.47415773353751917, "grad_norm": 1.4542680978775024, "learning_rate": 1.1320261755815787e-05, "loss": 0.5364, "step": 17339 }, { "epoch": 0.47418507985123604, "grad_norm": 1.4002892971038818, "learning_rate": 1.1319383799729229e-05, "loss": 0.5269, "step": 17340 }, { "epoch": 0.47421242616495296, "grad_norm": 1.2206156253814697, "learning_rate": 1.1318505833292472e-05, "loss": 0.4957, "step": 17341 }, { "epoch": 0.4742397724786699, "grad_norm": 1.4058516025543213, "learning_rate": 1.1317627856512412e-05, "loss": 0.4773, "step": 17342 }, { "epoch": 0.4742671187923868, "grad_norm": 1.2750805616378784, "learning_rate": 1.131674986939593e-05, "loss": 0.5014, "step": 17343 }, { "epoch": 0.4742944651061037, "grad_norm": 1.5114178657531738, "learning_rate": 1.1315871871949917e-05, "loss": 0.4517, "step": 17344 }, { "epoch": 0.4743218114198206, "grad_norm": 1.5734295845031738, "learning_rate": 1.1314993864181258e-05, "loss": 0.4052, "step": 17345 }, { "epoch": 0.47434915773353753, "grad_norm": 1.4455114603042603, "learning_rate": 1.1314115846096842e-05, "loss": 0.5466, "step": 17346 }, { "epoch": 0.47437650404725445, "grad_norm": 1.278001070022583, "learning_rate": 1.1313237817703559e-05, "loss": 0.5158, "step": 17347 }, { "epoch": 0.4744038503609713, "grad_norm": 1.4916090965270996, "learning_rate": 1.1312359779008297e-05, "loss": 0.5213, "step": 17348 }, { "epoch": 0.47443119667468825, "grad_norm": 1.2334574460983276, "learning_rate": 1.1311481730017939e-05, "loss": 0.498, "step": 17349 }, { "epoch": 0.4744585429884052, "grad_norm": 1.3162940740585327, "learning_rate": 1.1310603670739373e-05, "loss": 0.5008, "step": 17350 }, { "epoch": 0.4744858893021221, "grad_norm": 1.5955368280410767, "learning_rate": 1.1309725601179496e-05, "loss": 0.5235, "step": 17351 }, { "epoch": 0.47451323561583897, "grad_norm": 1.3849520683288574, "learning_rate": 1.1308847521345185e-05, "loss": 0.5139, "step": 17352 }, { "epoch": 0.4745405819295559, "grad_norm": 1.3186886310577393, "learning_rate": 1.1307969431243338e-05, "loss": 0.5256, "step": 17353 }, { "epoch": 0.4745679282432728, "grad_norm": 1.2415199279785156, "learning_rate": 1.1307091330880835e-05, "loss": 0.5209, "step": 17354 }, { "epoch": 0.47459527455698974, "grad_norm": 1.5105303525924683, "learning_rate": 1.1306213220264568e-05, "loss": 0.4329, "step": 17355 }, { "epoch": 0.4746226208707066, "grad_norm": 1.2955434322357178, "learning_rate": 1.130533509940143e-05, "loss": 0.7987, "step": 17356 }, { "epoch": 0.47464996718442354, "grad_norm": 11.144209861755371, "learning_rate": 1.1304456968298304e-05, "loss": 0.5189, "step": 17357 }, { "epoch": 0.47467731349814046, "grad_norm": 1.4261534214019775, "learning_rate": 1.1303578826962076e-05, "loss": 0.536, "step": 17358 }, { "epoch": 0.4747046598118574, "grad_norm": 15.73483657836914, "learning_rate": 1.1302700675399645e-05, "loss": 0.8049, "step": 17359 }, { "epoch": 0.47473200612557426, "grad_norm": 2.4334921836853027, "learning_rate": 1.1301822513617888e-05, "loss": 0.5155, "step": 17360 }, { "epoch": 0.4747593524392912, "grad_norm": 1.3859153985977173, "learning_rate": 1.1300944341623705e-05, "loss": 0.5169, "step": 17361 }, { "epoch": 0.4747866987530081, "grad_norm": 13.791954040527344, "learning_rate": 1.1300066159423977e-05, "loss": 0.5586, "step": 17362 }, { "epoch": 0.47481404506672503, "grad_norm": 1.6707252264022827, "learning_rate": 1.1299187967025596e-05, "loss": 0.5063, "step": 17363 }, { "epoch": 0.4748413913804419, "grad_norm": 1.5197042226791382, "learning_rate": 1.1298309764435454e-05, "loss": 0.5216, "step": 17364 }, { "epoch": 0.4748687376941588, "grad_norm": 1.7846930027008057, "learning_rate": 1.1297431551660435e-05, "loss": 0.5111, "step": 17365 }, { "epoch": 0.47489608400787575, "grad_norm": 1.3436813354492188, "learning_rate": 1.129655332870743e-05, "loss": 0.509, "step": 17366 }, { "epoch": 0.4749234303215927, "grad_norm": 1.462105631828308, "learning_rate": 1.1295675095583332e-05, "loss": 0.4347, "step": 17367 }, { "epoch": 0.47495077663530955, "grad_norm": 1.6859712600708008, "learning_rate": 1.1294796852295022e-05, "loss": 0.4743, "step": 17368 }, { "epoch": 0.47497812294902647, "grad_norm": 1.7098538875579834, "learning_rate": 1.1293918598849398e-05, "loss": 0.4398, "step": 17369 }, { "epoch": 0.4750054692627434, "grad_norm": 1.5036416053771973, "learning_rate": 1.1293040335253346e-05, "loss": 0.4129, "step": 17370 }, { "epoch": 0.47503281557646027, "grad_norm": 1.6317009925842285, "learning_rate": 1.1292162061513758e-05, "loss": 0.4892, "step": 17371 }, { "epoch": 0.4750601618901772, "grad_norm": 1.6925368309020996, "learning_rate": 1.1291283777637523e-05, "loss": 0.5122, "step": 17372 }, { "epoch": 0.4750875082038941, "grad_norm": 1.297267198562622, "learning_rate": 1.1290405483631528e-05, "loss": 0.4654, "step": 17373 }, { "epoch": 0.47511485451761104, "grad_norm": 1.5935767889022827, "learning_rate": 1.1289527179502665e-05, "loss": 0.849, "step": 17374 }, { "epoch": 0.4751422008313279, "grad_norm": 2.0761613845825195, "learning_rate": 1.1288648865257825e-05, "loss": 0.5198, "step": 17375 }, { "epoch": 0.47516954714504483, "grad_norm": 1.4683573246002197, "learning_rate": 1.1287770540903897e-05, "loss": 0.8235, "step": 17376 }, { "epoch": 0.47519689345876176, "grad_norm": 1.3614438772201538, "learning_rate": 1.1286892206447772e-05, "loss": 0.5036, "step": 17377 }, { "epoch": 0.4752242397724787, "grad_norm": 1.3964002132415771, "learning_rate": 1.1286013861896342e-05, "loss": 0.8351, "step": 17378 }, { "epoch": 0.47525158608619555, "grad_norm": 1.3662141561508179, "learning_rate": 1.128513550725649e-05, "loss": 0.8313, "step": 17379 }, { "epoch": 0.4752789323999125, "grad_norm": 1.585513949394226, "learning_rate": 1.1284257142535115e-05, "loss": 0.4001, "step": 17380 }, { "epoch": 0.4753062787136294, "grad_norm": 1.4031516313552856, "learning_rate": 1.1283378767739104e-05, "loss": 0.534, "step": 17381 }, { "epoch": 0.47533362502734633, "grad_norm": 1.424047827720642, "learning_rate": 1.1282500382875346e-05, "loss": 0.805, "step": 17382 }, { "epoch": 0.4753609713410632, "grad_norm": 1.2018998861312866, "learning_rate": 1.1281621987950737e-05, "loss": 0.5258, "step": 17383 }, { "epoch": 0.4753883176547801, "grad_norm": 1.7014148235321045, "learning_rate": 1.1280743582972162e-05, "loss": 0.3957, "step": 17384 }, { "epoch": 0.47541566396849705, "grad_norm": 1.4227466583251953, "learning_rate": 1.1279865167946512e-05, "loss": 0.5088, "step": 17385 }, { "epoch": 0.475443010282214, "grad_norm": 1.5121384859085083, "learning_rate": 1.1278986742880685e-05, "loss": 0.5695, "step": 17386 }, { "epoch": 0.47547035659593084, "grad_norm": 1.517795443534851, "learning_rate": 1.1278108307781561e-05, "loss": 0.5402, "step": 17387 }, { "epoch": 0.47549770290964777, "grad_norm": 1.2394593954086304, "learning_rate": 1.1277229862656043e-05, "loss": 0.525, "step": 17388 }, { "epoch": 0.4755250492233647, "grad_norm": 1.4286125898361206, "learning_rate": 1.1276351407511012e-05, "loss": 0.5159, "step": 17389 }, { "epoch": 0.4755523955370816, "grad_norm": 1.044224739074707, "learning_rate": 1.1275472942353362e-05, "loss": 0.5234, "step": 17390 }, { "epoch": 0.4755797418507985, "grad_norm": 1.3989876508712769, "learning_rate": 1.127459446718999e-05, "loss": 0.5066, "step": 17391 }, { "epoch": 0.4756070881645154, "grad_norm": 1.6114935874938965, "learning_rate": 1.127371598202778e-05, "loss": 0.4362, "step": 17392 }, { "epoch": 0.47563443447823234, "grad_norm": 1.151387095451355, "learning_rate": 1.1272837486873626e-05, "loss": 0.4894, "step": 17393 }, { "epoch": 0.47566178079194926, "grad_norm": 1.3769034147262573, "learning_rate": 1.1271958981734425e-05, "loss": 0.4905, "step": 17394 }, { "epoch": 0.47568912710566613, "grad_norm": 1.24527907371521, "learning_rate": 1.1271080466617059e-05, "loss": 0.5451, "step": 17395 }, { "epoch": 0.47571647341938306, "grad_norm": 1.4035751819610596, "learning_rate": 1.1270201941528428e-05, "loss": 0.5001, "step": 17396 }, { "epoch": 0.4757438197331, "grad_norm": 1.4892408847808838, "learning_rate": 1.1269323406475416e-05, "loss": 0.4862, "step": 17397 }, { "epoch": 0.4757711660468169, "grad_norm": 1.3242824077606201, "learning_rate": 1.1268444861464922e-05, "loss": 0.5225, "step": 17398 }, { "epoch": 0.4757985123605338, "grad_norm": 1.1776272058486938, "learning_rate": 1.1267566306503836e-05, "loss": 0.5125, "step": 17399 }, { "epoch": 0.4758258586742507, "grad_norm": 1.2181719541549683, "learning_rate": 1.1266687741599047e-05, "loss": 0.5318, "step": 17400 }, { "epoch": 0.4758532049879676, "grad_norm": 1.1231838464736938, "learning_rate": 1.1265809166757451e-05, "loss": 0.5239, "step": 17401 }, { "epoch": 0.47588055130168455, "grad_norm": 1.5910003185272217, "learning_rate": 1.1264930581985938e-05, "loss": 0.5124, "step": 17402 }, { "epoch": 0.4759078976154014, "grad_norm": 1.5023926496505737, "learning_rate": 1.1264051987291397e-05, "loss": 0.5387, "step": 17403 }, { "epoch": 0.47593524392911835, "grad_norm": 1.9029841423034668, "learning_rate": 1.1263173382680728e-05, "loss": 0.5512, "step": 17404 }, { "epoch": 0.47596259024283527, "grad_norm": 1.5654577016830444, "learning_rate": 1.1262294768160818e-05, "loss": 0.4866, "step": 17405 }, { "epoch": 0.4759899365565522, "grad_norm": 1.2481367588043213, "learning_rate": 1.126141614373856e-05, "loss": 0.5438, "step": 17406 }, { "epoch": 0.47601728287026907, "grad_norm": 1.3883260488510132, "learning_rate": 1.1260537509420849e-05, "loss": 0.523, "step": 17407 }, { "epoch": 0.476044629183986, "grad_norm": 1.2188609838485718, "learning_rate": 1.1259658865214574e-05, "loss": 0.5304, "step": 17408 }, { "epoch": 0.4760719754977029, "grad_norm": 1.3565036058425903, "learning_rate": 1.125878021112663e-05, "loss": 0.5053, "step": 17409 }, { "epoch": 0.47609932181141984, "grad_norm": 1.4190469980239868, "learning_rate": 1.125790154716391e-05, "loss": 0.3576, "step": 17410 }, { "epoch": 0.4761266681251367, "grad_norm": 1.7358839511871338, "learning_rate": 1.1257022873333306e-05, "loss": 0.5315, "step": 17411 }, { "epoch": 0.47615401443885363, "grad_norm": 1.516177773475647, "learning_rate": 1.1256144189641711e-05, "loss": 0.5217, "step": 17412 }, { "epoch": 0.47618136075257056, "grad_norm": 1.9363256692886353, "learning_rate": 1.125526549609602e-05, "loss": 0.5155, "step": 17413 }, { "epoch": 0.4762087070662875, "grad_norm": 1.3734630346298218, "learning_rate": 1.1254386792703123e-05, "loss": 0.5334, "step": 17414 }, { "epoch": 0.47623605338000435, "grad_norm": 1.3888050317764282, "learning_rate": 1.1253508079469917e-05, "loss": 0.4829, "step": 17415 }, { "epoch": 0.4762633996937213, "grad_norm": 1.3737068176269531, "learning_rate": 1.125262935640329e-05, "loss": 0.501, "step": 17416 }, { "epoch": 0.4762907460074382, "grad_norm": 1.2149211168289185, "learning_rate": 1.1251750623510139e-05, "loss": 0.5034, "step": 17417 }, { "epoch": 0.47631809232115513, "grad_norm": 1.3298054933547974, "learning_rate": 1.1250871880797361e-05, "loss": 0.3789, "step": 17418 }, { "epoch": 0.476345438634872, "grad_norm": 1.345221996307373, "learning_rate": 1.1249993128271839e-05, "loss": 0.5045, "step": 17419 }, { "epoch": 0.4763727849485889, "grad_norm": 1.4575906991958618, "learning_rate": 1.1249114365940472e-05, "loss": 0.5496, "step": 17420 }, { "epoch": 0.47640013126230585, "grad_norm": 1.463438868522644, "learning_rate": 1.124823559381016e-05, "loss": 0.5572, "step": 17421 }, { "epoch": 0.4764274775760228, "grad_norm": 1.378043293952942, "learning_rate": 1.1247356811887786e-05, "loss": 0.4768, "step": 17422 }, { "epoch": 0.47645482388973964, "grad_norm": 1.352830171585083, "learning_rate": 1.1246478020180251e-05, "loss": 0.4765, "step": 17423 }, { "epoch": 0.47648217020345657, "grad_norm": 1.1477185487747192, "learning_rate": 1.1245599218694447e-05, "loss": 0.474, "step": 17424 }, { "epoch": 0.4765095165171735, "grad_norm": 1.3670984506607056, "learning_rate": 1.1244720407437266e-05, "loss": 0.5002, "step": 17425 }, { "epoch": 0.4765368628308904, "grad_norm": 1.5206865072250366, "learning_rate": 1.1243841586415607e-05, "loss": 0.507, "step": 17426 }, { "epoch": 0.4765642091446073, "grad_norm": 1.428849458694458, "learning_rate": 1.1242962755636358e-05, "loss": 0.5104, "step": 17427 }, { "epoch": 0.4765915554583242, "grad_norm": 1.1776386499404907, "learning_rate": 1.1242083915106416e-05, "loss": 0.4991, "step": 17428 }, { "epoch": 0.47661890177204114, "grad_norm": 1.1633110046386719, "learning_rate": 1.1241205064832678e-05, "loss": 0.5557, "step": 17429 }, { "epoch": 0.47664624808575806, "grad_norm": 1.5101763010025024, "learning_rate": 1.1240326204822032e-05, "loss": 0.3917, "step": 17430 }, { "epoch": 0.47667359439947493, "grad_norm": 1.3454170227050781, "learning_rate": 1.1239447335081378e-05, "loss": 0.5157, "step": 17431 }, { "epoch": 0.47670094071319186, "grad_norm": 1.287670373916626, "learning_rate": 1.123856845561761e-05, "loss": 0.5668, "step": 17432 }, { "epoch": 0.4767282870269088, "grad_norm": 2.0237865447998047, "learning_rate": 1.1237689566437618e-05, "loss": 0.5381, "step": 17433 }, { "epoch": 0.4767556333406257, "grad_norm": 1.3228312730789185, "learning_rate": 1.1236810667548302e-05, "loss": 0.5263, "step": 17434 }, { "epoch": 0.4767829796543426, "grad_norm": 1.281046748161316, "learning_rate": 1.123593175895655e-05, "loss": 0.5433, "step": 17435 }, { "epoch": 0.4768103259680595, "grad_norm": 1.2342710494995117, "learning_rate": 1.1235052840669267e-05, "loss": 0.5019, "step": 17436 }, { "epoch": 0.4768376722817764, "grad_norm": 1.6869254112243652, "learning_rate": 1.123417391269334e-05, "loss": 0.515, "step": 17437 }, { "epoch": 0.47686501859549335, "grad_norm": 1.4774068593978882, "learning_rate": 1.1233294975035665e-05, "loss": 0.5118, "step": 17438 }, { "epoch": 0.4768923649092102, "grad_norm": 1.4467889070510864, "learning_rate": 1.1232416027703137e-05, "loss": 0.4741, "step": 17439 }, { "epoch": 0.47691971122292715, "grad_norm": 1.3199225664138794, "learning_rate": 1.1231537070702654e-05, "loss": 0.5454, "step": 17440 }, { "epoch": 0.47694705753664407, "grad_norm": 1.2871679067611694, "learning_rate": 1.1230658104041109e-05, "loss": 0.5114, "step": 17441 }, { "epoch": 0.476974403850361, "grad_norm": 1.502044677734375, "learning_rate": 1.1229779127725397e-05, "loss": 0.4261, "step": 17442 }, { "epoch": 0.47700175016407786, "grad_norm": 1.10171639919281, "learning_rate": 1.1228900141762414e-05, "loss": 0.4861, "step": 17443 }, { "epoch": 0.4770290964777948, "grad_norm": 1.2910674810409546, "learning_rate": 1.1228021146159054e-05, "loss": 0.5035, "step": 17444 }, { "epoch": 0.4770564427915117, "grad_norm": 1.612070083618164, "learning_rate": 1.1227142140922216e-05, "loss": 0.4304, "step": 17445 }, { "epoch": 0.47708378910522864, "grad_norm": 1.4684644937515259, "learning_rate": 1.1226263126058792e-05, "loss": 0.4294, "step": 17446 }, { "epoch": 0.4771111354189455, "grad_norm": 1.5982615947723389, "learning_rate": 1.1225384101575678e-05, "loss": 0.7723, "step": 17447 }, { "epoch": 0.47713848173266243, "grad_norm": 1.3370946645736694, "learning_rate": 1.1224505067479774e-05, "loss": 0.506, "step": 17448 }, { "epoch": 0.47716582804637936, "grad_norm": 1.6078929901123047, "learning_rate": 1.122362602377797e-05, "loss": 0.5299, "step": 17449 }, { "epoch": 0.4771931743600963, "grad_norm": 1.5695931911468506, "learning_rate": 1.1222746970477163e-05, "loss": 0.5024, "step": 17450 }, { "epoch": 0.47722052067381315, "grad_norm": 1.14433753490448, "learning_rate": 1.1221867907584253e-05, "loss": 0.5023, "step": 17451 }, { "epoch": 0.4772478669875301, "grad_norm": 1.246760606765747, "learning_rate": 1.1220988835106127e-05, "loss": 0.5042, "step": 17452 }, { "epoch": 0.477275213301247, "grad_norm": 1.3947175741195679, "learning_rate": 1.1220109753049698e-05, "loss": 0.5016, "step": 17453 }, { "epoch": 0.47730255961496393, "grad_norm": 1.5008888244628906, "learning_rate": 1.1219230661421843e-05, "loss": 0.4083, "step": 17454 }, { "epoch": 0.4773299059286808, "grad_norm": 1.355137586593628, "learning_rate": 1.1218351560229468e-05, "loss": 0.5079, "step": 17455 }, { "epoch": 0.4773572522423977, "grad_norm": 1.085953712463379, "learning_rate": 1.1217472449479469e-05, "loss": 0.4945, "step": 17456 }, { "epoch": 0.47738459855611465, "grad_norm": 3.0438930988311768, "learning_rate": 1.121659332917874e-05, "loss": 0.3594, "step": 17457 }, { "epoch": 0.4774119448698316, "grad_norm": 1.7220690250396729, "learning_rate": 1.121571419933418e-05, "loss": 0.5362, "step": 17458 }, { "epoch": 0.47743929118354844, "grad_norm": 1.3461594581604004, "learning_rate": 1.1214835059952684e-05, "loss": 0.4754, "step": 17459 }, { "epoch": 0.47746663749726537, "grad_norm": 1.5695860385894775, "learning_rate": 1.1213955911041148e-05, "loss": 0.4946, "step": 17460 }, { "epoch": 0.4774939838109823, "grad_norm": 1.253424882888794, "learning_rate": 1.121307675260647e-05, "loss": 0.5129, "step": 17461 }, { "epoch": 0.4775213301246992, "grad_norm": 1.6215901374816895, "learning_rate": 1.1212197584655546e-05, "loss": 0.838, "step": 17462 }, { "epoch": 0.4775486764384161, "grad_norm": 1.3032281398773193, "learning_rate": 1.1211318407195273e-05, "loss": 0.4915, "step": 17463 }, { "epoch": 0.477576022752133, "grad_norm": 1.384675145149231, "learning_rate": 1.121043922023255e-05, "loss": 0.4173, "step": 17464 }, { "epoch": 0.47760336906584994, "grad_norm": 1.146288514137268, "learning_rate": 1.1209560023774269e-05, "loss": 0.4735, "step": 17465 }, { "epoch": 0.47763071537956686, "grad_norm": 1.278994083404541, "learning_rate": 1.1208680817827333e-05, "loss": 0.523, "step": 17466 }, { "epoch": 0.47765806169328373, "grad_norm": 1.2429146766662598, "learning_rate": 1.1207801602398633e-05, "loss": 0.5049, "step": 17467 }, { "epoch": 0.47768540800700066, "grad_norm": 1.2901304960250854, "learning_rate": 1.120692237749507e-05, "loss": 0.5134, "step": 17468 }, { "epoch": 0.4777127543207176, "grad_norm": 1.1441129446029663, "learning_rate": 1.1206043143123543e-05, "loss": 0.8258, "step": 17469 }, { "epoch": 0.4777401006344345, "grad_norm": 1.2698391675949097, "learning_rate": 1.1205163899290944e-05, "loss": 0.5026, "step": 17470 }, { "epoch": 0.4777674469481514, "grad_norm": 1.1000686883926392, "learning_rate": 1.1204284646004178e-05, "loss": 0.4824, "step": 17471 }, { "epoch": 0.4777947932618683, "grad_norm": 1.335263967514038, "learning_rate": 1.1203405383270136e-05, "loss": 0.8172, "step": 17472 }, { "epoch": 0.4778221395755852, "grad_norm": 1.4823063611984253, "learning_rate": 1.1202526111095716e-05, "loss": 0.5176, "step": 17473 }, { "epoch": 0.4778494858893021, "grad_norm": 1.1601969003677368, "learning_rate": 1.1201646829487816e-05, "loss": 0.4917, "step": 17474 }, { "epoch": 0.477876832203019, "grad_norm": 1.4543579816818237, "learning_rate": 1.1200767538453336e-05, "loss": 0.8058, "step": 17475 }, { "epoch": 0.47790417851673594, "grad_norm": 1.2481664419174194, "learning_rate": 1.1199888237999172e-05, "loss": 0.5108, "step": 17476 }, { "epoch": 0.47793152483045287, "grad_norm": 1.0842828750610352, "learning_rate": 1.1199008928132223e-05, "loss": 0.5068, "step": 17477 }, { "epoch": 0.47795887114416974, "grad_norm": 1.317456603050232, "learning_rate": 1.1198129608859386e-05, "loss": 0.539, "step": 17478 }, { "epoch": 0.47798621745788666, "grad_norm": 1.5316134691238403, "learning_rate": 1.1197250280187559e-05, "loss": 0.4914, "step": 17479 }, { "epoch": 0.4780135637716036, "grad_norm": 1.3319509029388428, "learning_rate": 1.1196370942123642e-05, "loss": 0.5024, "step": 17480 }, { "epoch": 0.4780409100853205, "grad_norm": 1.2729722261428833, "learning_rate": 1.1195491594674531e-05, "loss": 0.4965, "step": 17481 }, { "epoch": 0.4780682563990374, "grad_norm": 1.2014565467834473, "learning_rate": 1.1194612237847125e-05, "loss": 0.5106, "step": 17482 }, { "epoch": 0.4780956027127543, "grad_norm": 1.0875650644302368, "learning_rate": 1.1193732871648321e-05, "loss": 0.495, "step": 17483 }, { "epoch": 0.47812294902647123, "grad_norm": 42.20003128051758, "learning_rate": 1.1192853496085019e-05, "loss": 0.9023, "step": 17484 }, { "epoch": 0.47815029534018816, "grad_norm": 2.772583246231079, "learning_rate": 1.1191974111164119e-05, "loss": 0.5166, "step": 17485 }, { "epoch": 0.47817764165390503, "grad_norm": 2.4212987422943115, "learning_rate": 1.1191094716892517e-05, "loss": 0.3733, "step": 17486 }, { "epoch": 0.47820498796762195, "grad_norm": 1.5324394702911377, "learning_rate": 1.1190215313277109e-05, "loss": 0.509, "step": 17487 }, { "epoch": 0.4782323342813389, "grad_norm": 1.7639973163604736, "learning_rate": 1.11893359003248e-05, "loss": 0.7778, "step": 17488 }, { "epoch": 0.4782596805950558, "grad_norm": 1.2556875944137573, "learning_rate": 1.1188456478042486e-05, "loss": 0.501, "step": 17489 }, { "epoch": 0.47828702690877267, "grad_norm": 1.3965399265289307, "learning_rate": 1.1187577046437063e-05, "loss": 0.4245, "step": 17490 }, { "epoch": 0.4783143732224896, "grad_norm": 1.4656002521514893, "learning_rate": 1.1186697605515435e-05, "loss": 0.4608, "step": 17491 }, { "epoch": 0.4783417195362065, "grad_norm": 1.4658654928207397, "learning_rate": 1.1185818155284495e-05, "loss": 0.4325, "step": 17492 }, { "epoch": 0.47836906584992345, "grad_norm": 1.187113642692566, "learning_rate": 1.1184938695751146e-05, "loss": 0.4994, "step": 17493 }, { "epoch": 0.4783964121636403, "grad_norm": 1.3179903030395508, "learning_rate": 1.118405922692229e-05, "loss": 0.5059, "step": 17494 }, { "epoch": 0.47842375847735724, "grad_norm": 1.3394336700439453, "learning_rate": 1.1183179748804819e-05, "loss": 0.532, "step": 17495 }, { "epoch": 0.47845110479107417, "grad_norm": 1.1007486581802368, "learning_rate": 1.1182300261405638e-05, "loss": 0.5219, "step": 17496 }, { "epoch": 0.4784784511047911, "grad_norm": 1.3249939680099487, "learning_rate": 1.118142076473164e-05, "loss": 0.4617, "step": 17497 }, { "epoch": 0.47850579741850796, "grad_norm": 1.2529871463775635, "learning_rate": 1.1180541258789732e-05, "loss": 0.4889, "step": 17498 }, { "epoch": 0.4785331437322249, "grad_norm": 1.638570785522461, "learning_rate": 1.1179661743586811e-05, "loss": 0.4837, "step": 17499 }, { "epoch": 0.4785604900459418, "grad_norm": 1.398327112197876, "learning_rate": 1.1178782219129773e-05, "loss": 0.4221, "step": 17500 }, { "epoch": 0.47858783635965874, "grad_norm": 1.4989771842956543, "learning_rate": 1.1177902685425521e-05, "loss": 0.4201, "step": 17501 }, { "epoch": 0.4786151826733756, "grad_norm": 1.6649882793426514, "learning_rate": 1.1177023142480957e-05, "loss": 0.5412, "step": 17502 }, { "epoch": 0.47864252898709253, "grad_norm": 1.466186285018921, "learning_rate": 1.1176143590302972e-05, "loss": 0.5367, "step": 17503 }, { "epoch": 0.47866987530080946, "grad_norm": 1.5826544761657715, "learning_rate": 1.1175264028898477e-05, "loss": 0.5014, "step": 17504 }, { "epoch": 0.4786972216145264, "grad_norm": 1.2361522912979126, "learning_rate": 1.1174384458274363e-05, "loss": 0.506, "step": 17505 }, { "epoch": 0.47872456792824325, "grad_norm": 1.4208942651748657, "learning_rate": 1.1173504878437536e-05, "loss": 0.5285, "step": 17506 }, { "epoch": 0.4787519142419602, "grad_norm": 5.793174743652344, "learning_rate": 1.1172625289394892e-05, "loss": 0.3871, "step": 17507 }, { "epoch": 0.4787792605556771, "grad_norm": 2.0861759185791016, "learning_rate": 1.1171745691153333e-05, "loss": 0.5025, "step": 17508 }, { "epoch": 0.478806606869394, "grad_norm": 2.935572624206543, "learning_rate": 1.1170866083719756e-05, "loss": 0.4602, "step": 17509 }, { "epoch": 0.4788339531831109, "grad_norm": 1.185817837715149, "learning_rate": 1.1169986467101069e-05, "loss": 0.4952, "step": 17510 }, { "epoch": 0.4788612994968278, "grad_norm": 1.1476713418960571, "learning_rate": 1.1169106841304163e-05, "loss": 0.479, "step": 17511 }, { "epoch": 0.47888864581054474, "grad_norm": 1.1930396556854248, "learning_rate": 1.1168227206335945e-05, "loss": 0.5212, "step": 17512 }, { "epoch": 0.47891599212426167, "grad_norm": 1.4788830280303955, "learning_rate": 1.1167347562203312e-05, "loss": 0.4976, "step": 17513 }, { "epoch": 0.47894333843797854, "grad_norm": 1.2471568584442139, "learning_rate": 1.1166467908913168e-05, "loss": 0.5185, "step": 17514 }, { "epoch": 0.47897068475169546, "grad_norm": 1.0989532470703125, "learning_rate": 1.1165588246472413e-05, "loss": 0.5126, "step": 17515 }, { "epoch": 0.4789980310654124, "grad_norm": 1.4740252494812012, "learning_rate": 1.1164708574887941e-05, "loss": 0.5243, "step": 17516 }, { "epoch": 0.4790253773791293, "grad_norm": 1.2951215505599976, "learning_rate": 1.116382889416666e-05, "loss": 0.5334, "step": 17517 }, { "epoch": 0.4790527236928462, "grad_norm": 1.3983407020568848, "learning_rate": 1.116294920431547e-05, "loss": 0.5004, "step": 17518 }, { "epoch": 0.4790800700065631, "grad_norm": 1.284245252609253, "learning_rate": 1.1162069505341269e-05, "loss": 0.5077, "step": 17519 }, { "epoch": 0.47910741632028003, "grad_norm": 1.1055347919464111, "learning_rate": 1.1161189797250962e-05, "loss": 0.5269, "step": 17520 }, { "epoch": 0.47913476263399696, "grad_norm": 1.4576879739761353, "learning_rate": 1.1160310080051447e-05, "loss": 0.3987, "step": 17521 }, { "epoch": 0.4791621089477138, "grad_norm": 1.2238904237747192, "learning_rate": 1.1159430353749625e-05, "loss": 0.3929, "step": 17522 }, { "epoch": 0.47918945526143075, "grad_norm": 1.1386871337890625, "learning_rate": 1.11585506183524e-05, "loss": 0.5057, "step": 17523 }, { "epoch": 0.4792168015751477, "grad_norm": 1.6151078939437866, "learning_rate": 1.1157670873866673e-05, "loss": 0.5171, "step": 17524 }, { "epoch": 0.4792441478888646, "grad_norm": 1.5889285802841187, "learning_rate": 1.115679112029934e-05, "loss": 0.4113, "step": 17525 }, { "epoch": 0.47927149420258147, "grad_norm": 1.2737953662872314, "learning_rate": 1.1155911357657307e-05, "loss": 0.5161, "step": 17526 }, { "epoch": 0.4792988405162984, "grad_norm": 1.3810865879058838, "learning_rate": 1.1155031585947476e-05, "loss": 0.5331, "step": 17527 }, { "epoch": 0.4793261868300153, "grad_norm": 1.1721463203430176, "learning_rate": 1.1154151805176745e-05, "loss": 0.4828, "step": 17528 }, { "epoch": 0.47935353314373225, "grad_norm": 1.2967939376831055, "learning_rate": 1.1153272015352022e-05, "loss": 0.5036, "step": 17529 }, { "epoch": 0.4793808794574491, "grad_norm": 1.2875151634216309, "learning_rate": 1.11523922164802e-05, "loss": 0.4952, "step": 17530 }, { "epoch": 0.47940822577116604, "grad_norm": 1.4149610996246338, "learning_rate": 1.1151512408568188e-05, "loss": 0.4994, "step": 17531 }, { "epoch": 0.47943557208488297, "grad_norm": 1.7653770446777344, "learning_rate": 1.1150632591622883e-05, "loss": 0.4982, "step": 17532 }, { "epoch": 0.4794629183985999, "grad_norm": 1.4112823009490967, "learning_rate": 1.1149752765651189e-05, "loss": 0.4713, "step": 17533 }, { "epoch": 0.47949026471231676, "grad_norm": 1.16939115524292, "learning_rate": 1.1148872930660013e-05, "loss": 0.4808, "step": 17534 }, { "epoch": 0.4795176110260337, "grad_norm": 1.196702480316162, "learning_rate": 1.1147993086656248e-05, "loss": 0.4612, "step": 17535 }, { "epoch": 0.4795449573397506, "grad_norm": 1.467785120010376, "learning_rate": 1.11471132336468e-05, "loss": 0.5181, "step": 17536 }, { "epoch": 0.47957230365346754, "grad_norm": 5.137241363525391, "learning_rate": 1.1146233371638572e-05, "loss": 0.3662, "step": 17537 }, { "epoch": 0.4795996499671844, "grad_norm": 1.2292982339859009, "learning_rate": 1.1145353500638468e-05, "loss": 0.8279, "step": 17538 }, { "epoch": 0.47962699628090133, "grad_norm": 1.174599528312683, "learning_rate": 1.1144473620653386e-05, "loss": 0.5118, "step": 17539 }, { "epoch": 0.47965434259461825, "grad_norm": 2.283369302749634, "learning_rate": 1.1143593731690232e-05, "loss": 0.5096, "step": 17540 }, { "epoch": 0.4796816889083352, "grad_norm": 1.4365719556808472, "learning_rate": 1.1142713833755907e-05, "loss": 0.5326, "step": 17541 }, { "epoch": 0.47970903522205205, "grad_norm": 1.4817090034484863, "learning_rate": 1.1141833926857313e-05, "loss": 0.3846, "step": 17542 }, { "epoch": 0.479736381535769, "grad_norm": 1.592729091644287, "learning_rate": 1.1140954011001352e-05, "loss": 0.3939, "step": 17543 }, { "epoch": 0.4797637278494859, "grad_norm": 1.4552221298217773, "learning_rate": 1.1140074086194929e-05, "loss": 0.5325, "step": 17544 }, { "epoch": 0.4797910741632028, "grad_norm": 1.2505546808242798, "learning_rate": 1.1139194152444948e-05, "loss": 0.52, "step": 17545 }, { "epoch": 0.4798184204769197, "grad_norm": 1.1450896263122559, "learning_rate": 1.1138314209758305e-05, "loss": 0.8303, "step": 17546 }, { "epoch": 0.4798457667906366, "grad_norm": 2.3711602687835693, "learning_rate": 1.1137434258141908e-05, "loss": 0.3807, "step": 17547 }, { "epoch": 0.47987311310435354, "grad_norm": 1.2367949485778809, "learning_rate": 1.1136554297602661e-05, "loss": 0.5119, "step": 17548 }, { "epoch": 0.47990045941807047, "grad_norm": 1.2908276319503784, "learning_rate": 1.1135674328147465e-05, "loss": 0.4931, "step": 17549 }, { "epoch": 0.47992780573178734, "grad_norm": 1.310813307762146, "learning_rate": 1.1134794349783224e-05, "loss": 0.7881, "step": 17550 }, { "epoch": 0.47995515204550426, "grad_norm": 1.3083220720291138, "learning_rate": 1.113391436251684e-05, "loss": 0.5161, "step": 17551 }, { "epoch": 0.4799824983592212, "grad_norm": 1.4615378379821777, "learning_rate": 1.1133034366355217e-05, "loss": 0.492, "step": 17552 }, { "epoch": 0.4800098446729381, "grad_norm": 1.4072855710983276, "learning_rate": 1.1132154361305259e-05, "loss": 0.5188, "step": 17553 }, { "epoch": 0.480037190986655, "grad_norm": 1.180614948272705, "learning_rate": 1.1131274347373868e-05, "loss": 0.5069, "step": 17554 }, { "epoch": 0.4800645373003719, "grad_norm": 1.3107174634933472, "learning_rate": 1.1130394324567946e-05, "loss": 0.505, "step": 17555 }, { "epoch": 0.48009188361408883, "grad_norm": 1.431333303451538, "learning_rate": 1.1129514292894404e-05, "loss": 0.4894, "step": 17556 }, { "epoch": 0.48011922992780576, "grad_norm": 1.3604726791381836, "learning_rate": 1.1128634252360136e-05, "loss": 0.5124, "step": 17557 }, { "epoch": 0.4801465762415226, "grad_norm": 1.3245818614959717, "learning_rate": 1.1127754202972052e-05, "loss": 0.5352, "step": 17558 }, { "epoch": 0.48017392255523955, "grad_norm": 1.114867091178894, "learning_rate": 1.1126874144737052e-05, "loss": 0.4977, "step": 17559 }, { "epoch": 0.4802012688689565, "grad_norm": 1.506279706954956, "learning_rate": 1.112599407766204e-05, "loss": 0.5218, "step": 17560 }, { "epoch": 0.4802286151826734, "grad_norm": 1.4227831363677979, "learning_rate": 1.1125114001753927e-05, "loss": 0.5176, "step": 17561 }, { "epoch": 0.48025596149639027, "grad_norm": 1.3323686122894287, "learning_rate": 1.1124233917019607e-05, "loss": 0.4861, "step": 17562 }, { "epoch": 0.4802833078101072, "grad_norm": 4.243764877319336, "learning_rate": 1.1123353823465988e-05, "loss": 0.8418, "step": 17563 }, { "epoch": 0.4803106541238241, "grad_norm": 1.360692024230957, "learning_rate": 1.1122473721099975e-05, "loss": 0.5175, "step": 17564 }, { "epoch": 0.48033800043754105, "grad_norm": 1.2803428173065186, "learning_rate": 1.1121593609928474e-05, "loss": 0.4742, "step": 17565 }, { "epoch": 0.4803653467512579, "grad_norm": 1.3211064338684082, "learning_rate": 1.1120713489958384e-05, "loss": 0.5102, "step": 17566 }, { "epoch": 0.48039269306497484, "grad_norm": 1.4256658554077148, "learning_rate": 1.1119833361196612e-05, "loss": 0.4329, "step": 17567 }, { "epoch": 0.48042003937869177, "grad_norm": 1.3572971820831299, "learning_rate": 1.1118953223650064e-05, "loss": 0.421, "step": 17568 }, { "epoch": 0.4804473856924087, "grad_norm": 1.5176408290863037, "learning_rate": 1.1118073077325642e-05, "loss": 0.5175, "step": 17569 }, { "epoch": 0.48047473200612556, "grad_norm": 1.3932301998138428, "learning_rate": 1.1117192922230251e-05, "loss": 0.5004, "step": 17570 }, { "epoch": 0.4805020783198425, "grad_norm": 1.112655520439148, "learning_rate": 1.1116312758370795e-05, "loss": 0.5113, "step": 17571 }, { "epoch": 0.4805294246335594, "grad_norm": 1.2058149576187134, "learning_rate": 1.1115432585754182e-05, "loss": 0.4841, "step": 17572 }, { "epoch": 0.4805567709472763, "grad_norm": 1.4184668064117432, "learning_rate": 1.1114552404387313e-05, "loss": 0.5364, "step": 17573 }, { "epoch": 0.4805841172609932, "grad_norm": 1.6169137954711914, "learning_rate": 1.1113672214277096e-05, "loss": 0.5217, "step": 17574 }, { "epoch": 0.48061146357471013, "grad_norm": 1.6840579509735107, "learning_rate": 1.1112792015430431e-05, "loss": 0.5378, "step": 17575 }, { "epoch": 0.48063880988842705, "grad_norm": 1.1230443716049194, "learning_rate": 1.1111911807854226e-05, "loss": 0.5116, "step": 17576 }, { "epoch": 0.4806661562021439, "grad_norm": 1.8906726837158203, "learning_rate": 1.1111031591555388e-05, "loss": 0.5272, "step": 17577 }, { "epoch": 0.48069350251586085, "grad_norm": 1.109531283378601, "learning_rate": 1.111015136654082e-05, "loss": 0.5118, "step": 17578 }, { "epoch": 0.4807208488295778, "grad_norm": 1.204743504524231, "learning_rate": 1.1109271132817425e-05, "loss": 0.7903, "step": 17579 }, { "epoch": 0.4807481951432947, "grad_norm": 1.2915147542953491, "learning_rate": 1.1108390890392111e-05, "loss": 0.5001, "step": 17580 }, { "epoch": 0.48077554145701157, "grad_norm": 1.548445224761963, "learning_rate": 1.1107510639271782e-05, "loss": 0.8015, "step": 17581 }, { "epoch": 0.4808028877707285, "grad_norm": 1.1174386739730835, "learning_rate": 1.1106630379463344e-05, "loss": 0.5193, "step": 17582 }, { "epoch": 0.4808302340844454, "grad_norm": 1.5062024593353271, "learning_rate": 1.1105750110973701e-05, "loss": 0.5257, "step": 17583 }, { "epoch": 0.48085758039816234, "grad_norm": 1.1209474802017212, "learning_rate": 1.1104869833809761e-05, "loss": 0.509, "step": 17584 }, { "epoch": 0.4808849267118792, "grad_norm": 1.4689712524414062, "learning_rate": 1.1103989547978428e-05, "loss": 0.3712, "step": 17585 }, { "epoch": 0.48091227302559614, "grad_norm": 1.2847800254821777, "learning_rate": 1.1103109253486605e-05, "loss": 0.5219, "step": 17586 }, { "epoch": 0.48093961933931306, "grad_norm": 1.2566826343536377, "learning_rate": 1.1102228950341202e-05, "loss": 0.4971, "step": 17587 }, { "epoch": 0.48096696565303, "grad_norm": 1.4450089931488037, "learning_rate": 1.1101348638549128e-05, "loss": 0.4042, "step": 17588 }, { "epoch": 0.48099431196674686, "grad_norm": 1.2387551069259644, "learning_rate": 1.1100468318117276e-05, "loss": 0.8157, "step": 17589 }, { "epoch": 0.4810216582804638, "grad_norm": 1.1332108974456787, "learning_rate": 1.1099587989052565e-05, "loss": 0.3506, "step": 17590 }, { "epoch": 0.4810490045941807, "grad_norm": 1.4885119199752808, "learning_rate": 1.1098707651361892e-05, "loss": 0.5078, "step": 17591 }, { "epoch": 0.48107635090789763, "grad_norm": 1.2858099937438965, "learning_rate": 1.109782730505217e-05, "loss": 0.4966, "step": 17592 }, { "epoch": 0.4811036972216145, "grad_norm": 1.6996372938156128, "learning_rate": 1.10969469501303e-05, "loss": 0.4944, "step": 17593 }, { "epoch": 0.4811310435353314, "grad_norm": 1.2021468877792358, "learning_rate": 1.1096066586603193e-05, "loss": 0.4997, "step": 17594 }, { "epoch": 0.48115838984904835, "grad_norm": 1.4217175245285034, "learning_rate": 1.109518621447775e-05, "loss": 0.5373, "step": 17595 }, { "epoch": 0.4811857361627653, "grad_norm": 1.2586162090301514, "learning_rate": 1.109430583376088e-05, "loss": 0.5146, "step": 17596 }, { "epoch": 0.48121308247648215, "grad_norm": 1.3156315088272095, "learning_rate": 1.1093425444459485e-05, "loss": 0.5363, "step": 17597 }, { "epoch": 0.48124042879019907, "grad_norm": 1.215806245803833, "learning_rate": 1.1092545046580477e-05, "loss": 0.5126, "step": 17598 }, { "epoch": 0.481267775103916, "grad_norm": 1.1647024154663086, "learning_rate": 1.1091664640130761e-05, "loss": 0.4936, "step": 17599 }, { "epoch": 0.4812951214176329, "grad_norm": 2.3186018466949463, "learning_rate": 1.1090784225117244e-05, "loss": 0.4015, "step": 17600 }, { "epoch": 0.4813224677313498, "grad_norm": 1.636046290397644, "learning_rate": 1.108990380154683e-05, "loss": 0.5396, "step": 17601 }, { "epoch": 0.4813498140450667, "grad_norm": 1.711897373199463, "learning_rate": 1.1089023369426428e-05, "loss": 0.5276, "step": 17602 }, { "epoch": 0.48137716035878364, "grad_norm": 1.2837363481521606, "learning_rate": 1.1088142928762944e-05, "loss": 0.5048, "step": 17603 }, { "epoch": 0.48140450667250057, "grad_norm": 1.3608218431472778, "learning_rate": 1.1087262479563288e-05, "loss": 0.5338, "step": 17604 }, { "epoch": 0.48143185298621743, "grad_norm": 1.1683682203292847, "learning_rate": 1.1086382021834362e-05, "loss": 0.5442, "step": 17605 }, { "epoch": 0.48145919929993436, "grad_norm": 1.2445083856582642, "learning_rate": 1.1085501555583071e-05, "loss": 0.4733, "step": 17606 }, { "epoch": 0.4814865456136513, "grad_norm": 1.24565851688385, "learning_rate": 1.1084621080816331e-05, "loss": 0.4835, "step": 17607 }, { "epoch": 0.4815138919273682, "grad_norm": 1.4830448627471924, "learning_rate": 1.1083740597541039e-05, "loss": 0.5115, "step": 17608 }, { "epoch": 0.4815412382410851, "grad_norm": 1.492200255393982, "learning_rate": 1.108286010576411e-05, "loss": 0.3679, "step": 17609 }, { "epoch": 0.481568584554802, "grad_norm": 1.2395063638687134, "learning_rate": 1.108197960549245e-05, "loss": 0.5285, "step": 17610 }, { "epoch": 0.48159593086851893, "grad_norm": 1.2326154708862305, "learning_rate": 1.1081099096732962e-05, "loss": 0.5154, "step": 17611 }, { "epoch": 0.48162327718223585, "grad_norm": 1.111732840538025, "learning_rate": 1.1080218579492556e-05, "loss": 0.5091, "step": 17612 }, { "epoch": 0.4816506234959527, "grad_norm": 1.4644153118133545, "learning_rate": 1.1079338053778142e-05, "loss": 0.5601, "step": 17613 }, { "epoch": 0.48167796980966965, "grad_norm": 1.3689366579055786, "learning_rate": 1.107845751959662e-05, "loss": 0.3693, "step": 17614 }, { "epoch": 0.4817053161233866, "grad_norm": 1.28171706199646, "learning_rate": 1.1077576976954908e-05, "loss": 0.5256, "step": 17615 }, { "epoch": 0.4817326624371035, "grad_norm": 2.047926664352417, "learning_rate": 1.1076696425859901e-05, "loss": 0.5175, "step": 17616 }, { "epoch": 0.48176000875082037, "grad_norm": 1.5496289730072021, "learning_rate": 1.1075815866318518e-05, "loss": 0.3567, "step": 17617 }, { "epoch": 0.4817873550645373, "grad_norm": 1.3809831142425537, "learning_rate": 1.107493529833766e-05, "loss": 0.4146, "step": 17618 }, { "epoch": 0.4818147013782542, "grad_norm": 1.359860897064209, "learning_rate": 1.1074054721924238e-05, "loss": 0.5328, "step": 17619 }, { "epoch": 0.48184204769197114, "grad_norm": 1.2837982177734375, "learning_rate": 1.1073174137085159e-05, "loss": 0.529, "step": 17620 }, { "epoch": 0.481869394005688, "grad_norm": 1.2720075845718384, "learning_rate": 1.1072293543827332e-05, "loss": 0.4959, "step": 17621 }, { "epoch": 0.48189674031940494, "grad_norm": 1.3821032047271729, "learning_rate": 1.1071412942157663e-05, "loss": 0.5363, "step": 17622 }, { "epoch": 0.48192408663312186, "grad_norm": 1.4288227558135986, "learning_rate": 1.1070532332083061e-05, "loss": 0.8028, "step": 17623 }, { "epoch": 0.4819514329468388, "grad_norm": 1.2401131391525269, "learning_rate": 1.1069651713610434e-05, "loss": 0.4985, "step": 17624 }, { "epoch": 0.48197877926055566, "grad_norm": 1.1838852167129517, "learning_rate": 1.1068771086746691e-05, "loss": 0.5095, "step": 17625 }, { "epoch": 0.4820061255742726, "grad_norm": 1.1855908632278442, "learning_rate": 1.106789045149874e-05, "loss": 0.5138, "step": 17626 }, { "epoch": 0.4820334718879895, "grad_norm": 1.1305431127548218, "learning_rate": 1.1067009807873485e-05, "loss": 0.4924, "step": 17627 }, { "epoch": 0.48206081820170643, "grad_norm": 1.3389434814453125, "learning_rate": 1.1066129155877845e-05, "loss": 0.5127, "step": 17628 }, { "epoch": 0.4820881645154233, "grad_norm": 1.1806150674819946, "learning_rate": 1.106524849551872e-05, "loss": 0.4878, "step": 17629 }, { "epoch": 0.4821155108291402, "grad_norm": 1.1096938848495483, "learning_rate": 1.1064367826803017e-05, "loss": 0.5206, "step": 17630 }, { "epoch": 0.48214285714285715, "grad_norm": 1.3424028158187866, "learning_rate": 1.1063487149737649e-05, "loss": 0.5344, "step": 17631 }, { "epoch": 0.4821702034565741, "grad_norm": 1.081749439239502, "learning_rate": 1.1062606464329524e-05, "loss": 0.5136, "step": 17632 }, { "epoch": 0.48219754977029095, "grad_norm": 1.1913585662841797, "learning_rate": 1.106172577058555e-05, "loss": 0.5062, "step": 17633 }, { "epoch": 0.48222489608400787, "grad_norm": 1.3042738437652588, "learning_rate": 1.1060845068512638e-05, "loss": 0.4873, "step": 17634 }, { "epoch": 0.4822522423977248, "grad_norm": 1.1481621265411377, "learning_rate": 1.1059964358117692e-05, "loss": 0.5049, "step": 17635 }, { "epoch": 0.4822795887114417, "grad_norm": 1.204984426498413, "learning_rate": 1.1059083639407624e-05, "loss": 0.5161, "step": 17636 }, { "epoch": 0.4823069350251586, "grad_norm": 1.2401303052902222, "learning_rate": 1.1058202912389346e-05, "loss": 0.5005, "step": 17637 }, { "epoch": 0.4823342813388755, "grad_norm": 1.400421380996704, "learning_rate": 1.1057322177069763e-05, "loss": 0.3964, "step": 17638 }, { "epoch": 0.48236162765259244, "grad_norm": 1.3600594997406006, "learning_rate": 1.1056441433455784e-05, "loss": 0.4931, "step": 17639 }, { "epoch": 0.48238897396630936, "grad_norm": 2.199561595916748, "learning_rate": 1.1055560681554321e-05, "loss": 0.4856, "step": 17640 }, { "epoch": 0.48241632028002623, "grad_norm": 1.8040480613708496, "learning_rate": 1.1054679921372277e-05, "loss": 0.367, "step": 17641 }, { "epoch": 0.48244366659374316, "grad_norm": 1.703635811805725, "learning_rate": 1.105379915291657e-05, "loss": 0.5149, "step": 17642 }, { "epoch": 0.4824710129074601, "grad_norm": 1.2956784963607788, "learning_rate": 1.1052918376194105e-05, "loss": 0.5133, "step": 17643 }, { "epoch": 0.482498359221177, "grad_norm": 1.5564576387405396, "learning_rate": 1.1052037591211789e-05, "loss": 0.5239, "step": 17644 }, { "epoch": 0.4825257055348939, "grad_norm": 1.661187767982483, "learning_rate": 1.105115679797654e-05, "loss": 0.3975, "step": 17645 }, { "epoch": 0.4825530518486108, "grad_norm": 1.43852961063385, "learning_rate": 1.1050275996495255e-05, "loss": 0.5149, "step": 17646 }, { "epoch": 0.48258039816232773, "grad_norm": 1.154710292816162, "learning_rate": 1.1049395186774854e-05, "loss": 0.5455, "step": 17647 }, { "epoch": 0.48260774447604465, "grad_norm": 1.4710980653762817, "learning_rate": 1.1048514368822243e-05, "loss": 0.4179, "step": 17648 }, { "epoch": 0.4826350907897615, "grad_norm": 1.833680272102356, "learning_rate": 1.1047633542644332e-05, "loss": 0.3732, "step": 17649 }, { "epoch": 0.48266243710347845, "grad_norm": 1.4211534261703491, "learning_rate": 1.104675270824803e-05, "loss": 0.5215, "step": 17650 }, { "epoch": 0.4826897834171954, "grad_norm": 1.4509055614471436, "learning_rate": 1.1045871865640248e-05, "loss": 0.3933, "step": 17651 }, { "epoch": 0.4827171297309123, "grad_norm": 1.9907881021499634, "learning_rate": 1.1044991014827893e-05, "loss": 0.5016, "step": 17652 }, { "epoch": 0.48274447604462917, "grad_norm": 1.3201749324798584, "learning_rate": 1.104411015581788e-05, "loss": 0.5113, "step": 17653 }, { "epoch": 0.4827718223583461, "grad_norm": 1.3002909421920776, "learning_rate": 1.1043229288617114e-05, "loss": 0.4932, "step": 17654 }, { "epoch": 0.482799168672063, "grad_norm": 1.3863201141357422, "learning_rate": 1.1042348413232512e-05, "loss": 0.4923, "step": 17655 }, { "epoch": 0.48282651498577994, "grad_norm": 5.144495964050293, "learning_rate": 1.1041467529670977e-05, "loss": 0.4076, "step": 17656 }, { "epoch": 0.4828538612994968, "grad_norm": 1.1282306909561157, "learning_rate": 1.1040586637939423e-05, "loss": 0.5117, "step": 17657 }, { "epoch": 0.48288120761321374, "grad_norm": 1.3331037759780884, "learning_rate": 1.1039705738044761e-05, "loss": 0.5374, "step": 17658 }, { "epoch": 0.48290855392693066, "grad_norm": 1.4788392782211304, "learning_rate": 1.10388248299939e-05, "loss": 0.4273, "step": 17659 }, { "epoch": 0.4829359002406476, "grad_norm": 1.5617023706436157, "learning_rate": 1.1037943913793747e-05, "loss": 0.5407, "step": 17660 }, { "epoch": 0.48296324655436446, "grad_norm": 1.2974756956100464, "learning_rate": 1.1037062989451221e-05, "loss": 0.5014, "step": 17661 }, { "epoch": 0.4829905928680814, "grad_norm": 1.3166414499282837, "learning_rate": 1.1036182056973224e-05, "loss": 0.8375, "step": 17662 }, { "epoch": 0.4830179391817983, "grad_norm": 1.373599648475647, "learning_rate": 1.103530111636667e-05, "loss": 0.533, "step": 17663 }, { "epoch": 0.48304528549551523, "grad_norm": 1.2934696674346924, "learning_rate": 1.1034420167638476e-05, "loss": 0.4925, "step": 17664 }, { "epoch": 0.4830726318092321, "grad_norm": 1.4121538400650024, "learning_rate": 1.1033539210795544e-05, "loss": 0.4297, "step": 17665 }, { "epoch": 0.483099978122949, "grad_norm": 1.2170664072036743, "learning_rate": 1.1032658245844784e-05, "loss": 0.4999, "step": 17666 }, { "epoch": 0.48312732443666595, "grad_norm": 1.6631070375442505, "learning_rate": 1.1031777272793114e-05, "loss": 0.5174, "step": 17667 }, { "epoch": 0.4831546707503829, "grad_norm": 1.4900785684585571, "learning_rate": 1.1030896291647442e-05, "loss": 0.4492, "step": 17668 }, { "epoch": 0.48318201706409974, "grad_norm": 1.2380545139312744, "learning_rate": 1.1030015302414678e-05, "loss": 0.5025, "step": 17669 }, { "epoch": 0.48320936337781667, "grad_norm": 1.2379393577575684, "learning_rate": 1.1029134305101732e-05, "loss": 0.5285, "step": 17670 }, { "epoch": 0.4832367096915336, "grad_norm": 1.346687912940979, "learning_rate": 1.1028253299715518e-05, "loss": 0.5207, "step": 17671 }, { "epoch": 0.4832640560052505, "grad_norm": 1.0797680616378784, "learning_rate": 1.1027372286262947e-05, "loss": 0.4966, "step": 17672 }, { "epoch": 0.4832914023189674, "grad_norm": 2.3934638500213623, "learning_rate": 1.102649126475093e-05, "loss": 0.7981, "step": 17673 }, { "epoch": 0.4833187486326843, "grad_norm": 1.7813642024993896, "learning_rate": 1.1025610235186374e-05, "loss": 0.538, "step": 17674 }, { "epoch": 0.48334609494640124, "grad_norm": 1.4972389936447144, "learning_rate": 1.10247291975762e-05, "loss": 0.5367, "step": 17675 }, { "epoch": 0.4833734412601181, "grad_norm": 1.1563326120376587, "learning_rate": 1.102384815192731e-05, "loss": 0.4897, "step": 17676 }, { "epoch": 0.48340078757383503, "grad_norm": 1.5184226036071777, "learning_rate": 1.1022967098246622e-05, "loss": 0.3468, "step": 17677 }, { "epoch": 0.48342813388755196, "grad_norm": 1.4699110984802246, "learning_rate": 1.1022086036541044e-05, "loss": 0.5154, "step": 17678 }, { "epoch": 0.4834554802012689, "grad_norm": 1.2399297952651978, "learning_rate": 1.1021204966817487e-05, "loss": 0.5239, "step": 17679 }, { "epoch": 0.48348282651498575, "grad_norm": 1.6320832967758179, "learning_rate": 1.1020323889082866e-05, "loss": 0.4986, "step": 17680 }, { "epoch": 0.4835101728287027, "grad_norm": 1.2404793500900269, "learning_rate": 1.1019442803344092e-05, "loss": 0.5142, "step": 17681 }, { "epoch": 0.4835375191424196, "grad_norm": 1.3422019481658936, "learning_rate": 1.1018561709608075e-05, "loss": 0.534, "step": 17682 }, { "epoch": 0.48356486545613653, "grad_norm": 1.3084570169448853, "learning_rate": 1.1017680607881731e-05, "loss": 0.3824, "step": 17683 }, { "epoch": 0.4835922117698534, "grad_norm": 1.6125394105911255, "learning_rate": 1.1016799498171964e-05, "loss": 0.5117, "step": 17684 }, { "epoch": 0.4836195580835703, "grad_norm": 1.7633448839187622, "learning_rate": 1.1015918380485692e-05, "loss": 0.3948, "step": 17685 }, { "epoch": 0.48364690439728725, "grad_norm": 1.7741798162460327, "learning_rate": 1.101503725482983e-05, "loss": 0.8208, "step": 17686 }, { "epoch": 0.48367425071100417, "grad_norm": 3.2121403217315674, "learning_rate": 1.1014156121211284e-05, "loss": 0.4848, "step": 17687 }, { "epoch": 0.48370159702472104, "grad_norm": 1.2251352071762085, "learning_rate": 1.1013274979636967e-05, "loss": 0.5021, "step": 17688 }, { "epoch": 0.48372894333843797, "grad_norm": 1.3755545616149902, "learning_rate": 1.1012393830113795e-05, "loss": 0.5204, "step": 17689 }, { "epoch": 0.4837562896521549, "grad_norm": 1.4307780265808105, "learning_rate": 1.1011512672648676e-05, "loss": 0.5427, "step": 17690 }, { "epoch": 0.4837836359658718, "grad_norm": 1.2883822917938232, "learning_rate": 1.1010631507248529e-05, "loss": 0.5367, "step": 17691 }, { "epoch": 0.4838109822795887, "grad_norm": 1.4132462739944458, "learning_rate": 1.100975033392026e-05, "loss": 0.4187, "step": 17692 }, { "epoch": 0.4838383285933056, "grad_norm": 1.2350859642028809, "learning_rate": 1.1008869152670783e-05, "loss": 0.5016, "step": 17693 }, { "epoch": 0.48386567490702254, "grad_norm": 1.4927276372909546, "learning_rate": 1.1007987963507013e-05, "loss": 0.383, "step": 17694 }, { "epoch": 0.48389302122073946, "grad_norm": 1.207951307296753, "learning_rate": 1.1007106766435858e-05, "loss": 0.4881, "step": 17695 }, { "epoch": 0.48392036753445633, "grad_norm": 1.3582172393798828, "learning_rate": 1.1006225561464237e-05, "loss": 0.4957, "step": 17696 }, { "epoch": 0.48394771384817326, "grad_norm": 1.4291815757751465, "learning_rate": 1.1005344348599059e-05, "loss": 0.5036, "step": 17697 }, { "epoch": 0.4839750601618902, "grad_norm": 1.398928165435791, "learning_rate": 1.1004463127847236e-05, "loss": 0.5377, "step": 17698 }, { "epoch": 0.4840024064756071, "grad_norm": 1.5868645906448364, "learning_rate": 1.1003581899215683e-05, "loss": 0.3905, "step": 17699 }, { "epoch": 0.484029752789324, "grad_norm": 1.4716007709503174, "learning_rate": 1.1002700662711314e-05, "loss": 0.4715, "step": 17700 }, { "epoch": 0.4840570991030409, "grad_norm": 1.2342393398284912, "learning_rate": 1.100181941834104e-05, "loss": 0.7855, "step": 17701 }, { "epoch": 0.4840844454167578, "grad_norm": 1.4364385604858398, "learning_rate": 1.1000938166111774e-05, "loss": 0.7564, "step": 17702 }, { "epoch": 0.48411179173047475, "grad_norm": 1.3083688020706177, "learning_rate": 1.100005690603043e-05, "loss": 0.4952, "step": 17703 }, { "epoch": 0.4841391380441916, "grad_norm": 1.4436098337173462, "learning_rate": 1.0999175638103922e-05, "loss": 0.4115, "step": 17704 }, { "epoch": 0.48416648435790854, "grad_norm": 1.226613998413086, "learning_rate": 1.099829436233916e-05, "loss": 0.4819, "step": 17705 }, { "epoch": 0.48419383067162547, "grad_norm": 1.1914528608322144, "learning_rate": 1.099741307874306e-05, "loss": 0.8138, "step": 17706 }, { "epoch": 0.4842211769853424, "grad_norm": 1.6745527982711792, "learning_rate": 1.0996531787322537e-05, "loss": 0.5295, "step": 17707 }, { "epoch": 0.48424852329905926, "grad_norm": 1.2228925228118896, "learning_rate": 1.0995650488084501e-05, "loss": 0.5016, "step": 17708 }, { "epoch": 0.4842758696127762, "grad_norm": 1.7006686925888062, "learning_rate": 1.0994769181035868e-05, "loss": 0.4475, "step": 17709 }, { "epoch": 0.4843032159264931, "grad_norm": 1.3273115158081055, "learning_rate": 1.099388786618355e-05, "loss": 0.8214, "step": 17710 }, { "epoch": 0.48433056224021004, "grad_norm": 1.1554614305496216, "learning_rate": 1.0993006543534462e-05, "loss": 0.8428, "step": 17711 }, { "epoch": 0.4843579085539269, "grad_norm": 1.394693374633789, "learning_rate": 1.0992125213095517e-05, "loss": 0.5239, "step": 17712 }, { "epoch": 0.48438525486764383, "grad_norm": 1.2831298112869263, "learning_rate": 1.099124387487363e-05, "loss": 0.4975, "step": 17713 }, { "epoch": 0.48441260118136076, "grad_norm": 1.265670895576477, "learning_rate": 1.0990362528875709e-05, "loss": 0.4988, "step": 17714 }, { "epoch": 0.4844399474950777, "grad_norm": 1.3551076650619507, "learning_rate": 1.0989481175108678e-05, "loss": 0.5474, "step": 17715 }, { "epoch": 0.48446729380879455, "grad_norm": 1.2231285572052002, "learning_rate": 1.0988599813579444e-05, "loss": 0.5136, "step": 17716 }, { "epoch": 0.4844946401225115, "grad_norm": 1.5075989961624146, "learning_rate": 1.0987718444294924e-05, "loss": 0.4928, "step": 17717 }, { "epoch": 0.4845219864362284, "grad_norm": 1.2925469875335693, "learning_rate": 1.098683706726203e-05, "loss": 0.4113, "step": 17718 }, { "epoch": 0.4845493327499453, "grad_norm": 1.352234125137329, "learning_rate": 1.0985955682487674e-05, "loss": 0.8097, "step": 17719 }, { "epoch": 0.4845766790636622, "grad_norm": 1.471981406211853, "learning_rate": 1.0985074289978777e-05, "loss": 0.3914, "step": 17720 }, { "epoch": 0.4846040253773791, "grad_norm": 1.185085415840149, "learning_rate": 1.0984192889742246e-05, "loss": 0.498, "step": 17721 }, { "epoch": 0.48463137169109605, "grad_norm": 1.2878321409225464, "learning_rate": 1.0983311481785002e-05, "loss": 0.5187, "step": 17722 }, { "epoch": 0.48465871800481297, "grad_norm": 3.447397232055664, "learning_rate": 1.0982430066113956e-05, "loss": 0.3628, "step": 17723 }, { "epoch": 0.48468606431852984, "grad_norm": 1.3668491840362549, "learning_rate": 1.098154864273602e-05, "loss": 0.4869, "step": 17724 }, { "epoch": 0.48471341063224677, "grad_norm": 1.1102181673049927, "learning_rate": 1.0980667211658112e-05, "loss": 0.48, "step": 17725 }, { "epoch": 0.4847407569459637, "grad_norm": 1.3096884489059448, "learning_rate": 1.0979785772887146e-05, "loss": 0.5172, "step": 17726 }, { "epoch": 0.4847681032596806, "grad_norm": 1.1391276121139526, "learning_rate": 1.0978904326430035e-05, "loss": 0.5048, "step": 17727 }, { "epoch": 0.4847954495733975, "grad_norm": 1.260656714439392, "learning_rate": 1.0978022872293697e-05, "loss": 0.5108, "step": 17728 }, { "epoch": 0.4848227958871144, "grad_norm": 1.3751510381698608, "learning_rate": 1.0977141410485045e-05, "loss": 0.4974, "step": 17729 }, { "epoch": 0.48485014220083134, "grad_norm": 1.3911553621292114, "learning_rate": 1.0976259941010993e-05, "loss": 0.5178, "step": 17730 }, { "epoch": 0.48487748851454826, "grad_norm": 1.2409437894821167, "learning_rate": 1.0975378463878457e-05, "loss": 0.349, "step": 17731 }, { "epoch": 0.48490483482826513, "grad_norm": 1.9535049200057983, "learning_rate": 1.0974496979094351e-05, "loss": 0.3983, "step": 17732 }, { "epoch": 0.48493218114198205, "grad_norm": 1.943543791770935, "learning_rate": 1.0973615486665589e-05, "loss": 0.5206, "step": 17733 }, { "epoch": 0.484959527455699, "grad_norm": 1.3287070989608765, "learning_rate": 1.097273398659909e-05, "loss": 0.5115, "step": 17734 }, { "epoch": 0.4849868737694159, "grad_norm": 1.1924902200698853, "learning_rate": 1.0971852478901767e-05, "loss": 0.5108, "step": 17735 }, { "epoch": 0.4850142200831328, "grad_norm": 1.3959639072418213, "learning_rate": 1.0970970963580532e-05, "loss": 0.521, "step": 17736 }, { "epoch": 0.4850415663968497, "grad_norm": 1.3229924440383911, "learning_rate": 1.0970089440642306e-05, "loss": 0.5005, "step": 17737 }, { "epoch": 0.4850689127105666, "grad_norm": 1.3755264282226562, "learning_rate": 1.0969207910094e-05, "loss": 0.5063, "step": 17738 }, { "epoch": 0.48509625902428355, "grad_norm": 1.2300175428390503, "learning_rate": 1.0968326371942529e-05, "loss": 0.4734, "step": 17739 }, { "epoch": 0.4851236053380004, "grad_norm": 1.3814958333969116, "learning_rate": 1.0967444826194813e-05, "loss": 0.3756, "step": 17740 }, { "epoch": 0.48515095165171734, "grad_norm": 3.1957783699035645, "learning_rate": 1.0966563272857762e-05, "loss": 0.4932, "step": 17741 }, { "epoch": 0.48517829796543427, "grad_norm": 1.221548080444336, "learning_rate": 1.0965681711938296e-05, "loss": 0.4931, "step": 17742 }, { "epoch": 0.4852056442791512, "grad_norm": 1.3439985513687134, "learning_rate": 1.0964800143443328e-05, "loss": 0.5219, "step": 17743 }, { "epoch": 0.48523299059286806, "grad_norm": 1.0418041944503784, "learning_rate": 1.0963918567379773e-05, "loss": 0.4884, "step": 17744 }, { "epoch": 0.485260336906585, "grad_norm": 1.4904800653457642, "learning_rate": 1.0963036983754551e-05, "loss": 0.4383, "step": 17745 }, { "epoch": 0.4852876832203019, "grad_norm": 1.3170565366744995, "learning_rate": 1.0962155392574572e-05, "loss": 0.8182, "step": 17746 }, { "epoch": 0.48531502953401884, "grad_norm": 1.3191429376602173, "learning_rate": 1.0961273793846758e-05, "loss": 0.5037, "step": 17747 }, { "epoch": 0.4853423758477357, "grad_norm": 2.35886812210083, "learning_rate": 1.096039218757802e-05, "loss": 0.3727, "step": 17748 }, { "epoch": 0.48536972216145263, "grad_norm": 1.4824484586715698, "learning_rate": 1.0959510573775273e-05, "loss": 0.5443, "step": 17749 }, { "epoch": 0.48539706847516956, "grad_norm": 1.551221489906311, "learning_rate": 1.0958628952445439e-05, "loss": 0.4897, "step": 17750 }, { "epoch": 0.4854244147888865, "grad_norm": 1.548114538192749, "learning_rate": 1.095774732359543e-05, "loss": 0.5422, "step": 17751 }, { "epoch": 0.48545176110260335, "grad_norm": 1.2456183433532715, "learning_rate": 1.0956865687232163e-05, "loss": 0.5023, "step": 17752 }, { "epoch": 0.4854791074163203, "grad_norm": 1.5525243282318115, "learning_rate": 1.0955984043362553e-05, "loss": 0.4935, "step": 17753 }, { "epoch": 0.4855064537300372, "grad_norm": 1.334426999092102, "learning_rate": 1.0955102391993517e-05, "loss": 0.8049, "step": 17754 }, { "epoch": 0.4855338000437541, "grad_norm": 1.080546259880066, "learning_rate": 1.095422073313197e-05, "loss": 0.5276, "step": 17755 }, { "epoch": 0.485561146357471, "grad_norm": 1.3388426303863525, "learning_rate": 1.0953339066784833e-05, "loss": 0.5191, "step": 17756 }, { "epoch": 0.4855884926711879, "grad_norm": 1.3484197854995728, "learning_rate": 1.095245739295902e-05, "loss": 0.5092, "step": 17757 }, { "epoch": 0.48561583898490485, "grad_norm": 1.453372836112976, "learning_rate": 1.0951575711661446e-05, "loss": 0.529, "step": 17758 }, { "epoch": 0.48564318529862177, "grad_norm": 1.3137073516845703, "learning_rate": 1.0950694022899026e-05, "loss": 0.4837, "step": 17759 }, { "epoch": 0.48567053161233864, "grad_norm": 1.2933887243270874, "learning_rate": 1.0949812326678679e-05, "loss": 0.4836, "step": 17760 }, { "epoch": 0.48569787792605557, "grad_norm": 1.2736148834228516, "learning_rate": 1.0948930623007325e-05, "loss": 0.7696, "step": 17761 }, { "epoch": 0.4857252242397725, "grad_norm": 1.274512529373169, "learning_rate": 1.0948048911891873e-05, "loss": 0.7948, "step": 17762 }, { "epoch": 0.4857525705534894, "grad_norm": 1.25362229347229, "learning_rate": 1.0947167193339245e-05, "loss": 0.5098, "step": 17763 }, { "epoch": 0.4857799168672063, "grad_norm": 1.1144682168960571, "learning_rate": 1.094628546735636e-05, "loss": 0.4975, "step": 17764 }, { "epoch": 0.4858072631809232, "grad_norm": 1.2227787971496582, "learning_rate": 1.094540373395013e-05, "loss": 0.4971, "step": 17765 }, { "epoch": 0.48583460949464014, "grad_norm": 1.1779699325561523, "learning_rate": 1.0944521993127475e-05, "loss": 0.5159, "step": 17766 }, { "epoch": 0.48586195580835706, "grad_norm": 1.3364821672439575, "learning_rate": 1.0943640244895308e-05, "loss": 0.5128, "step": 17767 }, { "epoch": 0.48588930212207393, "grad_norm": 1.3127403259277344, "learning_rate": 1.094275848926055e-05, "loss": 0.4859, "step": 17768 }, { "epoch": 0.48591664843579085, "grad_norm": 1.2678824663162231, "learning_rate": 1.0941876726230118e-05, "loss": 0.5167, "step": 17769 }, { "epoch": 0.4859439947495078, "grad_norm": 1.932651162147522, "learning_rate": 1.094099495581093e-05, "loss": 0.5153, "step": 17770 }, { "epoch": 0.4859713410632247, "grad_norm": 1.34846031665802, "learning_rate": 1.0940113178009897e-05, "loss": 0.8221, "step": 17771 }, { "epoch": 0.4859986873769416, "grad_norm": 1.3078131675720215, "learning_rate": 1.0939231392833945e-05, "loss": 0.5058, "step": 17772 }, { "epoch": 0.4860260336906585, "grad_norm": 1.5053449869155884, "learning_rate": 1.0938349600289984e-05, "loss": 0.5148, "step": 17773 }, { "epoch": 0.4860533800043754, "grad_norm": 1.2077044248580933, "learning_rate": 1.0937467800384933e-05, "loss": 0.5198, "step": 17774 }, { "epoch": 0.48608072631809235, "grad_norm": 1.36697256565094, "learning_rate": 1.0936585993125714e-05, "loss": 0.5538, "step": 17775 }, { "epoch": 0.4861080726318092, "grad_norm": 1.5887681245803833, "learning_rate": 1.093570417851924e-05, "loss": 0.3888, "step": 17776 }, { "epoch": 0.48613541894552614, "grad_norm": 1.2022188901901245, "learning_rate": 1.0934822356572432e-05, "loss": 0.5246, "step": 17777 }, { "epoch": 0.48616276525924307, "grad_norm": 1.3150578737258911, "learning_rate": 1.0933940527292204e-05, "loss": 0.5058, "step": 17778 }, { "epoch": 0.48619011157295994, "grad_norm": 1.202907919883728, "learning_rate": 1.0933058690685474e-05, "loss": 0.5622, "step": 17779 }, { "epoch": 0.48621745788667686, "grad_norm": 1.8437858819961548, "learning_rate": 1.0932176846759164e-05, "loss": 0.5129, "step": 17780 }, { "epoch": 0.4862448042003938, "grad_norm": 1.3242384195327759, "learning_rate": 1.0931294995520188e-05, "loss": 0.5296, "step": 17781 }, { "epoch": 0.4862721505141107, "grad_norm": 1.5029733180999756, "learning_rate": 1.0930413136975467e-05, "loss": 0.4222, "step": 17782 }, { "epoch": 0.4862994968278276, "grad_norm": 1.2443374395370483, "learning_rate": 1.0929531271131912e-05, "loss": 0.5403, "step": 17783 }, { "epoch": 0.4863268431415445, "grad_norm": 1.2326805591583252, "learning_rate": 1.0928649397996446e-05, "loss": 0.5091, "step": 17784 }, { "epoch": 0.48635418945526143, "grad_norm": 1.6255521774291992, "learning_rate": 1.0927767517575991e-05, "loss": 0.5462, "step": 17785 }, { "epoch": 0.48638153576897836, "grad_norm": 1.4712083339691162, "learning_rate": 1.092688562987746e-05, "loss": 0.5096, "step": 17786 }, { "epoch": 0.4864088820826952, "grad_norm": 1.270477294921875, "learning_rate": 1.0926003734907769e-05, "loss": 0.4998, "step": 17787 }, { "epoch": 0.48643622839641215, "grad_norm": 1.4710776805877686, "learning_rate": 1.0925121832673844e-05, "loss": 0.5479, "step": 17788 }, { "epoch": 0.4864635747101291, "grad_norm": 1.1743052005767822, "learning_rate": 1.0924239923182596e-05, "loss": 0.5047, "step": 17789 }, { "epoch": 0.486490921023846, "grad_norm": 1.3938016891479492, "learning_rate": 1.0923358006440945e-05, "loss": 0.4858, "step": 17790 }, { "epoch": 0.48651826733756287, "grad_norm": 1.2238813638687134, "learning_rate": 1.092247608245581e-05, "loss": 0.4931, "step": 17791 }, { "epoch": 0.4865456136512798, "grad_norm": 1.2196018695831299, "learning_rate": 1.0921594151234112e-05, "loss": 0.4985, "step": 17792 }, { "epoch": 0.4865729599649967, "grad_norm": 1.4762898683547974, "learning_rate": 1.0920712212782765e-05, "loss": 0.4246, "step": 17793 }, { "epoch": 0.48660030627871365, "grad_norm": 1.3577337265014648, "learning_rate": 1.0919830267108693e-05, "loss": 0.5085, "step": 17794 }, { "epoch": 0.4866276525924305, "grad_norm": 1.252495288848877, "learning_rate": 1.0918948314218808e-05, "loss": 0.5357, "step": 17795 }, { "epoch": 0.48665499890614744, "grad_norm": 1.2627493143081665, "learning_rate": 1.0918066354120033e-05, "loss": 0.5091, "step": 17796 }, { "epoch": 0.48668234521986437, "grad_norm": 1.407196283340454, "learning_rate": 1.0917184386819287e-05, "loss": 0.5107, "step": 17797 }, { "epoch": 0.4867096915335813, "grad_norm": 1.3988548517227173, "learning_rate": 1.0916302412323485e-05, "loss": 0.5031, "step": 17798 }, { "epoch": 0.48673703784729816, "grad_norm": 1.5603891611099243, "learning_rate": 1.0915420430639552e-05, "loss": 0.4897, "step": 17799 }, { "epoch": 0.4867643841610151, "grad_norm": 1.5939642190933228, "learning_rate": 1.09145384417744e-05, "loss": 0.5028, "step": 17800 }, { "epoch": 0.486791730474732, "grad_norm": 1.7381614446640015, "learning_rate": 1.0913656445734954e-05, "loss": 0.4906, "step": 17801 }, { "epoch": 0.48681907678844893, "grad_norm": 1.2581454515457153, "learning_rate": 1.0912774442528129e-05, "loss": 0.5263, "step": 17802 }, { "epoch": 0.4868464231021658, "grad_norm": 1.057243824005127, "learning_rate": 1.0911892432160844e-05, "loss": 0.4781, "step": 17803 }, { "epoch": 0.48687376941588273, "grad_norm": 1.4941364526748657, "learning_rate": 1.0911010414640024e-05, "loss": 0.43, "step": 17804 }, { "epoch": 0.48690111572959965, "grad_norm": 1.146457314491272, "learning_rate": 1.091012838997258e-05, "loss": 0.4868, "step": 17805 }, { "epoch": 0.4869284620433166, "grad_norm": 1.812194585800171, "learning_rate": 1.0909246358165435e-05, "loss": 0.4303, "step": 17806 }, { "epoch": 0.48695580835703345, "grad_norm": 1.158345103263855, "learning_rate": 1.090836431922551e-05, "loss": 0.5038, "step": 17807 }, { "epoch": 0.4869831546707504, "grad_norm": 1.4554420709609985, "learning_rate": 1.0907482273159722e-05, "loss": 0.5087, "step": 17808 }, { "epoch": 0.4870105009844673, "grad_norm": 1.215868592262268, "learning_rate": 1.090660021997499e-05, "loss": 0.5129, "step": 17809 }, { "epoch": 0.4870378472981842, "grad_norm": 1.3652671575546265, "learning_rate": 1.0905718159678235e-05, "loss": 0.4958, "step": 17810 }, { "epoch": 0.4870651936119011, "grad_norm": 1.2794930934906006, "learning_rate": 1.0904836092276374e-05, "loss": 0.4691, "step": 17811 }, { "epoch": 0.487092539925618, "grad_norm": 1.3517746925354004, "learning_rate": 1.0903954017776333e-05, "loss": 0.5121, "step": 17812 }, { "epoch": 0.48711988623933494, "grad_norm": 1.145340085029602, "learning_rate": 1.0903071936185023e-05, "loss": 0.4773, "step": 17813 }, { "epoch": 0.48714723255305187, "grad_norm": 1.3598116636276245, "learning_rate": 1.0902189847509368e-05, "loss": 0.5047, "step": 17814 }, { "epoch": 0.48717457886676874, "grad_norm": 1.2756905555725098, "learning_rate": 1.0901307751756289e-05, "loss": 0.8079, "step": 17815 }, { "epoch": 0.48720192518048566, "grad_norm": 1.5928622484207153, "learning_rate": 1.0900425648932705e-05, "loss": 0.5416, "step": 17816 }, { "epoch": 0.4872292714942026, "grad_norm": 1.0498100519180298, "learning_rate": 1.0899543539045532e-05, "loss": 0.4938, "step": 17817 }, { "epoch": 0.4872566178079195, "grad_norm": 1.1548752784729004, "learning_rate": 1.0898661422101698e-05, "loss": 0.5006, "step": 17818 }, { "epoch": 0.4872839641216364, "grad_norm": 1.7098605632781982, "learning_rate": 1.0897779298108113e-05, "loss": 0.3733, "step": 17819 }, { "epoch": 0.4873113104353533, "grad_norm": 1.6091152429580688, "learning_rate": 1.0896897167071707e-05, "loss": 0.4237, "step": 17820 }, { "epoch": 0.48733865674907023, "grad_norm": 3.2456071376800537, "learning_rate": 1.0896015028999392e-05, "loss": 0.5039, "step": 17821 }, { "epoch": 0.48736600306278716, "grad_norm": 1.3527421951293945, "learning_rate": 1.089513288389809e-05, "loss": 0.5185, "step": 17822 }, { "epoch": 0.487393349376504, "grad_norm": 1.2247971296310425, "learning_rate": 1.0894250731774728e-05, "loss": 0.5212, "step": 17823 }, { "epoch": 0.48742069569022095, "grad_norm": 1.4392646551132202, "learning_rate": 1.0893368572636216e-05, "loss": 0.5133, "step": 17824 }, { "epoch": 0.4874480420039379, "grad_norm": 1.8820521831512451, "learning_rate": 1.089248640648948e-05, "loss": 0.3895, "step": 17825 }, { "epoch": 0.4874753883176548, "grad_norm": 1.1756056547164917, "learning_rate": 1.089160423334144e-05, "loss": 0.4916, "step": 17826 }, { "epoch": 0.48750273463137167, "grad_norm": 1.3168171644210815, "learning_rate": 1.0890722053199015e-05, "loss": 0.5015, "step": 17827 }, { "epoch": 0.4875300809450886, "grad_norm": 1.3690348863601685, "learning_rate": 1.0889839866069124e-05, "loss": 0.493, "step": 17828 }, { "epoch": 0.4875574272588055, "grad_norm": 1.6826883554458618, "learning_rate": 1.0888957671958694e-05, "loss": 0.5168, "step": 17829 }, { "epoch": 0.48758477357252245, "grad_norm": 1.214042067527771, "learning_rate": 1.0888075470874638e-05, "loss": 0.5286, "step": 17830 }, { "epoch": 0.4876121198862393, "grad_norm": 1.720765471458435, "learning_rate": 1.088719326282388e-05, "loss": 0.4949, "step": 17831 }, { "epoch": 0.48763946619995624, "grad_norm": 1.5722761154174805, "learning_rate": 1.088631104781334e-05, "loss": 0.5105, "step": 17832 }, { "epoch": 0.48766681251367316, "grad_norm": 1.1716532707214355, "learning_rate": 1.088542882584994e-05, "loss": 0.5032, "step": 17833 }, { "epoch": 0.4876941588273901, "grad_norm": 1.3276166915893555, "learning_rate": 1.0884546596940604e-05, "loss": 0.4879, "step": 17834 }, { "epoch": 0.48772150514110696, "grad_norm": 1.1669652462005615, "learning_rate": 1.0883664361092243e-05, "loss": 0.5079, "step": 17835 }, { "epoch": 0.4877488514548239, "grad_norm": 1.2181851863861084, "learning_rate": 1.0882782118311788e-05, "loss": 0.5203, "step": 17836 }, { "epoch": 0.4877761977685408, "grad_norm": 1.4629405736923218, "learning_rate": 1.0881899868606154e-05, "loss": 0.5548, "step": 17837 }, { "epoch": 0.48780354408225773, "grad_norm": 1.1920552253723145, "learning_rate": 1.0881017611982263e-05, "loss": 0.5003, "step": 17838 }, { "epoch": 0.4878308903959746, "grad_norm": 1.2739969491958618, "learning_rate": 1.0880135348447039e-05, "loss": 0.5006, "step": 17839 }, { "epoch": 0.48785823670969153, "grad_norm": 1.4655567407608032, "learning_rate": 1.0879253078007398e-05, "loss": 0.5366, "step": 17840 }, { "epoch": 0.48788558302340845, "grad_norm": 1.1279150247573853, "learning_rate": 1.0878370800670265e-05, "loss": 0.5195, "step": 17841 }, { "epoch": 0.4879129293371254, "grad_norm": 1.173465371131897, "learning_rate": 1.0877488516442562e-05, "loss": 0.4951, "step": 17842 }, { "epoch": 0.48794027565084225, "grad_norm": 1.8482259511947632, "learning_rate": 1.0876606225331208e-05, "loss": 0.4236, "step": 17843 }, { "epoch": 0.4879676219645592, "grad_norm": 1.323012113571167, "learning_rate": 1.0875723927343122e-05, "loss": 0.502, "step": 17844 }, { "epoch": 0.4879949682782761, "grad_norm": 1.2196942567825317, "learning_rate": 1.0874841622485233e-05, "loss": 0.5022, "step": 17845 }, { "epoch": 0.488022314591993, "grad_norm": 1.8119549751281738, "learning_rate": 1.0873959310764453e-05, "loss": 0.5184, "step": 17846 }, { "epoch": 0.4880496609057099, "grad_norm": 1.2918968200683594, "learning_rate": 1.0873076992187711e-05, "loss": 0.4973, "step": 17847 }, { "epoch": 0.4880770072194268, "grad_norm": 1.09889554977417, "learning_rate": 1.0872194666761926e-05, "loss": 0.4925, "step": 17848 }, { "epoch": 0.48810435353314374, "grad_norm": 1.1997171640396118, "learning_rate": 1.0871312334494016e-05, "loss": 0.5007, "step": 17849 }, { "epoch": 0.48813169984686067, "grad_norm": 2.501918077468872, "learning_rate": 1.087042999539091e-05, "loss": 0.8203, "step": 17850 }, { "epoch": 0.48815904616057754, "grad_norm": 1.3657883405685425, "learning_rate": 1.0869547649459522e-05, "loss": 0.501, "step": 17851 }, { "epoch": 0.48818639247429446, "grad_norm": 1.3341925144195557, "learning_rate": 1.0868665296706779e-05, "loss": 0.5299, "step": 17852 }, { "epoch": 0.4882137387880114, "grad_norm": 1.2705057859420776, "learning_rate": 1.0867782937139601e-05, "loss": 0.4927, "step": 17853 }, { "epoch": 0.4882410851017283, "grad_norm": 1.4958058595657349, "learning_rate": 1.086690057076491e-05, "loss": 0.524, "step": 17854 }, { "epoch": 0.4882684314154452, "grad_norm": 1.2954834699630737, "learning_rate": 1.0866018197589632e-05, "loss": 0.494, "step": 17855 }, { "epoch": 0.4882957777291621, "grad_norm": 1.443135380744934, "learning_rate": 1.086513581762068e-05, "loss": 0.4911, "step": 17856 }, { "epoch": 0.48832312404287903, "grad_norm": 1.1352957487106323, "learning_rate": 1.0864253430864981e-05, "loss": 0.5152, "step": 17857 }, { "epoch": 0.48835047035659596, "grad_norm": 1.530465006828308, "learning_rate": 1.0863371037329463e-05, "loss": 0.5262, "step": 17858 }, { "epoch": 0.4883778166703128, "grad_norm": 1.5321056842803955, "learning_rate": 1.0862488637021035e-05, "loss": 0.4891, "step": 17859 }, { "epoch": 0.48840516298402975, "grad_norm": 1.5651308298110962, "learning_rate": 1.0861606229946629e-05, "loss": 0.4167, "step": 17860 }, { "epoch": 0.4884325092977467, "grad_norm": 1.1722819805145264, "learning_rate": 1.0860723816113165e-05, "loss": 0.5038, "step": 17861 }, { "epoch": 0.4884598556114636, "grad_norm": 1.5911200046539307, "learning_rate": 1.0859841395527564e-05, "loss": 0.4242, "step": 17862 }, { "epoch": 0.48848720192518047, "grad_norm": 1.3486616611480713, "learning_rate": 1.0858958968196749e-05, "loss": 0.4251, "step": 17863 }, { "epoch": 0.4885145482388974, "grad_norm": 1.3112925291061401, "learning_rate": 1.0858076534127645e-05, "loss": 0.5123, "step": 17864 }, { "epoch": 0.4885418945526143, "grad_norm": 1.4804160594940186, "learning_rate": 1.0857194093327168e-05, "loss": 0.7645, "step": 17865 }, { "epoch": 0.48856924086633124, "grad_norm": 1.6723800897598267, "learning_rate": 1.0856311645802249e-05, "loss": 0.5118, "step": 17866 }, { "epoch": 0.4885965871800481, "grad_norm": 1.5401142835617065, "learning_rate": 1.0855429191559803e-05, "loss": 0.4337, "step": 17867 }, { "epoch": 0.48862393349376504, "grad_norm": 1.439763069152832, "learning_rate": 1.0854546730606756e-05, "loss": 0.5361, "step": 17868 }, { "epoch": 0.48865127980748196, "grad_norm": 1.14096200466156, "learning_rate": 1.0853664262950032e-05, "loss": 0.4723, "step": 17869 }, { "epoch": 0.4886786261211989, "grad_norm": 1.5678784847259521, "learning_rate": 1.0852781788596551e-05, "loss": 0.3808, "step": 17870 }, { "epoch": 0.48870597243491576, "grad_norm": 1.5310851335525513, "learning_rate": 1.0851899307553237e-05, "loss": 0.4001, "step": 17871 }, { "epoch": 0.4887333187486327, "grad_norm": 2.2875940799713135, "learning_rate": 1.0851016819827013e-05, "loss": 0.5199, "step": 17872 }, { "epoch": 0.4887606650623496, "grad_norm": 1.7440634965896606, "learning_rate": 1.0850134325424801e-05, "loss": 0.3938, "step": 17873 }, { "epoch": 0.48878801137606653, "grad_norm": 1.262183666229248, "learning_rate": 1.0849251824353525e-05, "loss": 0.8431, "step": 17874 }, { "epoch": 0.4888153576897834, "grad_norm": 1.6522977352142334, "learning_rate": 1.0848369316620109e-05, "loss": 0.4329, "step": 17875 }, { "epoch": 0.48884270400350033, "grad_norm": 1.4783024787902832, "learning_rate": 1.0847486802231473e-05, "loss": 0.7837, "step": 17876 }, { "epoch": 0.48887005031721725, "grad_norm": 1.08999502658844, "learning_rate": 1.0846604281194543e-05, "loss": 0.7654, "step": 17877 }, { "epoch": 0.4888973966309341, "grad_norm": 1.424320101737976, "learning_rate": 1.0845721753516238e-05, "loss": 0.5064, "step": 17878 }, { "epoch": 0.48892474294465105, "grad_norm": 1.3909541368484497, "learning_rate": 1.0844839219203485e-05, "loss": 0.4328, "step": 17879 }, { "epoch": 0.488952089258368, "grad_norm": 1.211582899093628, "learning_rate": 1.0843956678263206e-05, "loss": 0.5439, "step": 17880 }, { "epoch": 0.4889794355720849, "grad_norm": 1.5432311296463013, "learning_rate": 1.0843074130702322e-05, "loss": 0.5619, "step": 17881 }, { "epoch": 0.48900678188580177, "grad_norm": 1.6685658693313599, "learning_rate": 1.0842191576527761e-05, "loss": 0.5478, "step": 17882 }, { "epoch": 0.4890341281995187, "grad_norm": 1.178774118423462, "learning_rate": 1.0841309015746446e-05, "loss": 0.4857, "step": 17883 }, { "epoch": 0.4890614745132356, "grad_norm": 1.1759029626846313, "learning_rate": 1.0840426448365295e-05, "loss": 0.5002, "step": 17884 }, { "epoch": 0.48908882082695254, "grad_norm": 1.043328881263733, "learning_rate": 1.0839543874391238e-05, "loss": 0.5239, "step": 17885 }, { "epoch": 0.4891161671406694, "grad_norm": 1.613847017288208, "learning_rate": 1.0838661293831194e-05, "loss": 0.366, "step": 17886 }, { "epoch": 0.48914351345438634, "grad_norm": 1.074607491493225, "learning_rate": 1.0837778706692088e-05, "loss": 0.4658, "step": 17887 }, { "epoch": 0.48917085976810326, "grad_norm": 1.1941648721694946, "learning_rate": 1.0836896112980846e-05, "loss": 0.5162, "step": 17888 }, { "epoch": 0.4891982060818202, "grad_norm": 1.2100367546081543, "learning_rate": 1.0836013512704386e-05, "loss": 0.4566, "step": 17889 }, { "epoch": 0.48922555239553706, "grad_norm": 1.5468716621398926, "learning_rate": 1.0835130905869637e-05, "loss": 0.5333, "step": 17890 }, { "epoch": 0.489252898709254, "grad_norm": 1.2937194108963013, "learning_rate": 1.0834248292483523e-05, "loss": 0.5099, "step": 17891 }, { "epoch": 0.4892802450229709, "grad_norm": 1.2509496212005615, "learning_rate": 1.0833365672552962e-05, "loss": 0.5188, "step": 17892 }, { "epoch": 0.48930759133668783, "grad_norm": 1.1554627418518066, "learning_rate": 1.0832483046084884e-05, "loss": 0.5012, "step": 17893 }, { "epoch": 0.4893349376504047, "grad_norm": 1.1246877908706665, "learning_rate": 1.0831600413086211e-05, "loss": 0.51, "step": 17894 }, { "epoch": 0.4893622839641216, "grad_norm": 1.2031217813491821, "learning_rate": 1.0830717773563868e-05, "loss": 0.5081, "step": 17895 }, { "epoch": 0.48938963027783855, "grad_norm": 1.3129360675811768, "learning_rate": 1.0829835127524778e-05, "loss": 0.4583, "step": 17896 }, { "epoch": 0.4894169765915555, "grad_norm": 1.2431421279907227, "learning_rate": 1.0828952474975863e-05, "loss": 0.7857, "step": 17897 }, { "epoch": 0.48944432290527234, "grad_norm": 1.4778482913970947, "learning_rate": 1.0828069815924047e-05, "loss": 0.5279, "step": 17898 }, { "epoch": 0.48947166921898927, "grad_norm": 1.2054409980773926, "learning_rate": 1.082718715037626e-05, "loss": 0.4968, "step": 17899 }, { "epoch": 0.4894990155327062, "grad_norm": 1.3768765926361084, "learning_rate": 1.082630447833942e-05, "loss": 0.5211, "step": 17900 }, { "epoch": 0.4895263618464231, "grad_norm": 1.0878522396087646, "learning_rate": 1.0825421799820456e-05, "loss": 0.5269, "step": 17901 }, { "epoch": 0.48955370816014, "grad_norm": 1.4339962005615234, "learning_rate": 1.082453911482629e-05, "loss": 0.4162, "step": 17902 }, { "epoch": 0.4895810544738569, "grad_norm": 1.6312767267227173, "learning_rate": 1.0823656423363844e-05, "loss": 0.5046, "step": 17903 }, { "epoch": 0.48960840078757384, "grad_norm": 1.3773961067199707, "learning_rate": 1.082277372544005e-05, "loss": 0.5353, "step": 17904 }, { "epoch": 0.48963574710129076, "grad_norm": 2.32707142829895, "learning_rate": 1.0821891021061824e-05, "loss": 0.7814, "step": 17905 }, { "epoch": 0.48966309341500763, "grad_norm": 1.4729093313217163, "learning_rate": 1.0821008310236094e-05, "loss": 0.5282, "step": 17906 }, { "epoch": 0.48969043972872456, "grad_norm": 1.5974904298782349, "learning_rate": 1.0820125592969785e-05, "loss": 0.4873, "step": 17907 }, { "epoch": 0.4897177860424415, "grad_norm": 1.4010668992996216, "learning_rate": 1.0819242869269823e-05, "loss": 0.521, "step": 17908 }, { "epoch": 0.4897451323561584, "grad_norm": 1.6853561401367188, "learning_rate": 1.0818360139143134e-05, "loss": 0.4365, "step": 17909 }, { "epoch": 0.4897724786698753, "grad_norm": 1.176680564880371, "learning_rate": 1.0817477402596635e-05, "loss": 0.5096, "step": 17910 }, { "epoch": 0.4897998249835922, "grad_norm": 1.4279452562332153, "learning_rate": 1.0816594659637258e-05, "loss": 0.5125, "step": 17911 }, { "epoch": 0.4898271712973091, "grad_norm": 1.4051212072372437, "learning_rate": 1.0815711910271926e-05, "loss": 0.5127, "step": 17912 }, { "epoch": 0.48985451761102605, "grad_norm": 1.5011086463928223, "learning_rate": 1.0814829154507562e-05, "loss": 0.5236, "step": 17913 }, { "epoch": 0.4898818639247429, "grad_norm": 1.4004143476486206, "learning_rate": 1.0813946392351093e-05, "loss": 0.5294, "step": 17914 }, { "epoch": 0.48990921023845985, "grad_norm": 1.3051204681396484, "learning_rate": 1.0813063623809446e-05, "loss": 0.5014, "step": 17915 }, { "epoch": 0.48993655655217677, "grad_norm": 1.2692134380340576, "learning_rate": 1.0812180848889542e-05, "loss": 0.5317, "step": 17916 }, { "epoch": 0.4899639028658937, "grad_norm": 1.5110231637954712, "learning_rate": 1.0811298067598305e-05, "loss": 0.5399, "step": 17917 }, { "epoch": 0.48999124917961057, "grad_norm": 1.4954874515533447, "learning_rate": 1.0810415279942668e-05, "loss": 0.4169, "step": 17918 }, { "epoch": 0.4900185954933275, "grad_norm": 1.7150845527648926, "learning_rate": 1.0809532485929549e-05, "loss": 0.4176, "step": 17919 }, { "epoch": 0.4900459418070444, "grad_norm": 1.1770248413085938, "learning_rate": 1.0808649685565875e-05, "loss": 0.5004, "step": 17920 }, { "epoch": 0.49007328812076134, "grad_norm": 1.2369993925094604, "learning_rate": 1.0807766878858573e-05, "loss": 0.8125, "step": 17921 }, { "epoch": 0.4901006344344782, "grad_norm": 1.9426361322402954, "learning_rate": 1.0806884065814564e-05, "loss": 0.8179, "step": 17922 }, { "epoch": 0.49012798074819514, "grad_norm": 1.371119499206543, "learning_rate": 1.080600124644078e-05, "loss": 0.4129, "step": 17923 }, { "epoch": 0.49015532706191206, "grad_norm": 1.568334937095642, "learning_rate": 1.0805118420744143e-05, "loss": 0.446, "step": 17924 }, { "epoch": 0.490182673375629, "grad_norm": 1.3653960227966309, "learning_rate": 1.0804235588731574e-05, "loss": 0.3953, "step": 17925 }, { "epoch": 0.49021001968934586, "grad_norm": 1.4617804288864136, "learning_rate": 1.080335275041001e-05, "loss": 0.5302, "step": 17926 }, { "epoch": 0.4902373660030628, "grad_norm": 1.224461555480957, "learning_rate": 1.0802469905786365e-05, "loss": 0.8131, "step": 17927 }, { "epoch": 0.4902647123167797, "grad_norm": 1.403489589691162, "learning_rate": 1.0801587054867573e-05, "loss": 0.7837, "step": 17928 }, { "epoch": 0.49029205863049663, "grad_norm": 1.520410418510437, "learning_rate": 1.0800704197660555e-05, "loss": 0.4193, "step": 17929 }, { "epoch": 0.4903194049442135, "grad_norm": 1.4049232006072998, "learning_rate": 1.0799821334172237e-05, "loss": 0.4929, "step": 17930 }, { "epoch": 0.4903467512579304, "grad_norm": 1.5376139879226685, "learning_rate": 1.0798938464409545e-05, "loss": 0.4581, "step": 17931 }, { "epoch": 0.49037409757164735, "grad_norm": 1.26004159450531, "learning_rate": 1.0798055588379407e-05, "loss": 0.5079, "step": 17932 }, { "epoch": 0.4904014438853643, "grad_norm": 1.5869667530059814, "learning_rate": 1.0797172706088746e-05, "loss": 0.3975, "step": 17933 }, { "epoch": 0.49042879019908114, "grad_norm": 1.3101165294647217, "learning_rate": 1.079628981754449e-05, "loss": 0.5249, "step": 17934 }, { "epoch": 0.49045613651279807, "grad_norm": 1.3452154397964478, "learning_rate": 1.0795406922753564e-05, "loss": 0.5033, "step": 17935 }, { "epoch": 0.490483482826515, "grad_norm": 1.3004772663116455, "learning_rate": 1.0794524021722894e-05, "loss": 0.4883, "step": 17936 }, { "epoch": 0.4905108291402319, "grad_norm": 1.238028883934021, "learning_rate": 1.079364111445941e-05, "loss": 0.4981, "step": 17937 }, { "epoch": 0.4905381754539488, "grad_norm": 1.517007827758789, "learning_rate": 1.079275820097003e-05, "loss": 0.5141, "step": 17938 }, { "epoch": 0.4905655217676657, "grad_norm": 1.4678783416748047, "learning_rate": 1.079187528126169e-05, "loss": 0.4882, "step": 17939 }, { "epoch": 0.49059286808138264, "grad_norm": 1.982396125793457, "learning_rate": 1.0790992355341308e-05, "loss": 0.3795, "step": 17940 }, { "epoch": 0.49062021439509956, "grad_norm": 1.4121854305267334, "learning_rate": 1.0790109423215814e-05, "loss": 0.7875, "step": 17941 }, { "epoch": 0.49064756070881643, "grad_norm": 1.424036979675293, "learning_rate": 1.0789226484892135e-05, "loss": 0.4376, "step": 17942 }, { "epoch": 0.49067490702253336, "grad_norm": 1.233854055404663, "learning_rate": 1.0788343540377195e-05, "loss": 0.7957, "step": 17943 }, { "epoch": 0.4907022533362503, "grad_norm": 1.3097724914550781, "learning_rate": 1.0787460589677924e-05, "loss": 0.5216, "step": 17944 }, { "epoch": 0.4907295996499672, "grad_norm": 1.3942824602127075, "learning_rate": 1.0786577632801245e-05, "loss": 0.3982, "step": 17945 }, { "epoch": 0.4907569459636841, "grad_norm": 1.1337841749191284, "learning_rate": 1.0785694669754085e-05, "loss": 0.7765, "step": 17946 }, { "epoch": 0.490784292277401, "grad_norm": 1.337575078010559, "learning_rate": 1.0784811700543372e-05, "loss": 0.5253, "step": 17947 }, { "epoch": 0.4908116385911179, "grad_norm": 1.1083139181137085, "learning_rate": 1.0783928725176033e-05, "loss": 0.4999, "step": 17948 }, { "epoch": 0.49083898490483485, "grad_norm": 1.2706223726272583, "learning_rate": 1.0783045743658993e-05, "loss": 0.4855, "step": 17949 }, { "epoch": 0.4908663312185517, "grad_norm": 1.299790620803833, "learning_rate": 1.0782162755999182e-05, "loss": 0.5062, "step": 17950 }, { "epoch": 0.49089367753226865, "grad_norm": 1.494340181350708, "learning_rate": 1.078127976220352e-05, "loss": 0.4965, "step": 17951 }, { "epoch": 0.49092102384598557, "grad_norm": 1.3421920537948608, "learning_rate": 1.0780396762278937e-05, "loss": 0.5019, "step": 17952 }, { "epoch": 0.4909483701597025, "grad_norm": 1.5717312097549438, "learning_rate": 1.0779513756232367e-05, "loss": 0.5087, "step": 17953 }, { "epoch": 0.49097571647341937, "grad_norm": 1.4088884592056274, "learning_rate": 1.0778630744070727e-05, "loss": 0.5032, "step": 17954 }, { "epoch": 0.4910030627871363, "grad_norm": 1.2829965353012085, "learning_rate": 1.0777747725800949e-05, "loss": 0.5075, "step": 17955 }, { "epoch": 0.4910304091008532, "grad_norm": 1.4587322473526, "learning_rate": 1.0776864701429959e-05, "loss": 0.5371, "step": 17956 }, { "epoch": 0.49105775541457014, "grad_norm": 1.2602343559265137, "learning_rate": 1.0775981670964684e-05, "loss": 0.5018, "step": 17957 }, { "epoch": 0.491085101728287, "grad_norm": 1.3117467164993286, "learning_rate": 1.077509863441205e-05, "loss": 0.389, "step": 17958 }, { "epoch": 0.49111244804200394, "grad_norm": 1.385026216506958, "learning_rate": 1.0774215591778988e-05, "loss": 0.469, "step": 17959 }, { "epoch": 0.49113979435572086, "grad_norm": 1.2849981784820557, "learning_rate": 1.077333254307242e-05, "loss": 0.3198, "step": 17960 }, { "epoch": 0.4911671406694378, "grad_norm": 1.2903239727020264, "learning_rate": 1.0772449488299277e-05, "loss": 0.5005, "step": 17961 }, { "epoch": 0.49119448698315465, "grad_norm": 1.6284842491149902, "learning_rate": 1.0771566427466484e-05, "loss": 0.5019, "step": 17962 }, { "epoch": 0.4912218332968716, "grad_norm": 1.406218409538269, "learning_rate": 1.0770683360580972e-05, "loss": 0.473, "step": 17963 }, { "epoch": 0.4912491796105885, "grad_norm": 1.5292108058929443, "learning_rate": 1.0769800287649665e-05, "loss": 0.4265, "step": 17964 }, { "epoch": 0.49127652592430543, "grad_norm": 1.3351105451583862, "learning_rate": 1.076891720867949e-05, "loss": 0.4839, "step": 17965 }, { "epoch": 0.4913038722380223, "grad_norm": 1.0885894298553467, "learning_rate": 1.0768034123677378e-05, "loss": 0.4596, "step": 17966 }, { "epoch": 0.4913312185517392, "grad_norm": 1.328078031539917, "learning_rate": 1.0767151032650252e-05, "loss": 0.534, "step": 17967 }, { "epoch": 0.49135856486545615, "grad_norm": 1.2178772687911987, "learning_rate": 1.0766267935605042e-05, "loss": 0.5073, "step": 17968 }, { "epoch": 0.4913859111791731, "grad_norm": 2.048208713531494, "learning_rate": 1.0765384832548677e-05, "loss": 0.3954, "step": 17969 }, { "epoch": 0.49141325749288994, "grad_norm": 1.1652158498764038, "learning_rate": 1.0764501723488084e-05, "loss": 0.507, "step": 17970 }, { "epoch": 0.49144060380660687, "grad_norm": 1.3047133684158325, "learning_rate": 1.0763618608430187e-05, "loss": 0.5079, "step": 17971 }, { "epoch": 0.4914679501203238, "grad_norm": 1.8353835344314575, "learning_rate": 1.0762735487381921e-05, "loss": 0.823, "step": 17972 }, { "epoch": 0.4914952964340407, "grad_norm": 1.3515586853027344, "learning_rate": 1.0761852360350206e-05, "loss": 0.7651, "step": 17973 }, { "epoch": 0.4915226427477576, "grad_norm": 1.2634116411209106, "learning_rate": 1.0760969227341977e-05, "loss": 0.8193, "step": 17974 }, { "epoch": 0.4915499890614745, "grad_norm": 1.5692903995513916, "learning_rate": 1.0760086088364156e-05, "loss": 0.4908, "step": 17975 }, { "epoch": 0.49157733537519144, "grad_norm": 1.4123696088790894, "learning_rate": 1.0759202943423674e-05, "loss": 0.4703, "step": 17976 }, { "epoch": 0.49160468168890836, "grad_norm": 1.7226370573043823, "learning_rate": 1.075831979252746e-05, "loss": 0.4175, "step": 17977 }, { "epoch": 0.49163202800262523, "grad_norm": 1.2287851572036743, "learning_rate": 1.0757436635682441e-05, "loss": 0.4856, "step": 17978 }, { "epoch": 0.49165937431634216, "grad_norm": 1.2684454917907715, "learning_rate": 1.0756553472895543e-05, "loss": 0.5148, "step": 17979 }, { "epoch": 0.4916867206300591, "grad_norm": 1.6344196796417236, "learning_rate": 1.0755670304173698e-05, "loss": 0.5246, "step": 17980 }, { "epoch": 0.49171406694377595, "grad_norm": 1.4275639057159424, "learning_rate": 1.0754787129523832e-05, "loss": 0.4958, "step": 17981 }, { "epoch": 0.4917414132574929, "grad_norm": 1.5376849174499512, "learning_rate": 1.0753903948952871e-05, "loss": 0.518, "step": 17982 }, { "epoch": 0.4917687595712098, "grad_norm": 1.3912231922149658, "learning_rate": 1.0753020762467748e-05, "loss": 0.5334, "step": 17983 }, { "epoch": 0.4917961058849267, "grad_norm": 1.2800638675689697, "learning_rate": 1.0752137570075387e-05, "loss": 0.5136, "step": 17984 }, { "epoch": 0.4918234521986436, "grad_norm": 1.395324945449829, "learning_rate": 1.075125437178272e-05, "loss": 0.4915, "step": 17985 }, { "epoch": 0.4918507985123605, "grad_norm": 1.2478770017623901, "learning_rate": 1.0750371167596672e-05, "loss": 0.487, "step": 17986 }, { "epoch": 0.49187814482607745, "grad_norm": 1.13681161403656, "learning_rate": 1.0749487957524174e-05, "loss": 0.5158, "step": 17987 }, { "epoch": 0.49190549113979437, "grad_norm": 1.9896748065948486, "learning_rate": 1.0748604741572155e-05, "loss": 0.5292, "step": 17988 }, { "epoch": 0.49193283745351124, "grad_norm": 1.3201777935028076, "learning_rate": 1.0747721519747542e-05, "loss": 0.5093, "step": 17989 }, { "epoch": 0.49196018376722817, "grad_norm": 1.5514984130859375, "learning_rate": 1.0746838292057263e-05, "loss": 0.5201, "step": 17990 }, { "epoch": 0.4919875300809451, "grad_norm": 1.2177166938781738, "learning_rate": 1.074595505850825e-05, "loss": 0.5003, "step": 17991 }, { "epoch": 0.492014876394662, "grad_norm": 1.2918223142623901, "learning_rate": 1.0745071819107427e-05, "loss": 0.5222, "step": 17992 }, { "epoch": 0.4920422227083789, "grad_norm": 1.3571926355361938, "learning_rate": 1.0744188573861727e-05, "loss": 0.5385, "step": 17993 }, { "epoch": 0.4920695690220958, "grad_norm": 1.4113659858703613, "learning_rate": 1.0743305322778077e-05, "loss": 0.4871, "step": 17994 }, { "epoch": 0.49209691533581273, "grad_norm": 1.2147421836853027, "learning_rate": 1.0742422065863404e-05, "loss": 0.5118, "step": 17995 }, { "epoch": 0.49212426164952966, "grad_norm": 1.2259232997894287, "learning_rate": 1.0741538803124642e-05, "loss": 0.5186, "step": 17996 }, { "epoch": 0.49215160796324653, "grad_norm": 1.5886597633361816, "learning_rate": 1.0740655534568714e-05, "loss": 0.5336, "step": 17997 }, { "epoch": 0.49217895427696345, "grad_norm": 1.6288297176361084, "learning_rate": 1.0739772260202555e-05, "loss": 0.4716, "step": 17998 }, { "epoch": 0.4922063005906804, "grad_norm": 1.3173778057098389, "learning_rate": 1.0738888980033089e-05, "loss": 0.5019, "step": 17999 }, { "epoch": 0.4922336469043973, "grad_norm": 1.128348708152771, "learning_rate": 1.0738005694067247e-05, "loss": 0.5049, "step": 18000 }, { "epoch": 0.4922609932181142, "grad_norm": 1.21356999874115, "learning_rate": 1.0737122402311955e-05, "loss": 0.4899, "step": 18001 }, { "epoch": 0.4922883395318311, "grad_norm": 1.1477365493774414, "learning_rate": 1.073623910477415e-05, "loss": 0.4717, "step": 18002 }, { "epoch": 0.492315685845548, "grad_norm": 1.5525223016738892, "learning_rate": 1.0735355801460754e-05, "loss": 0.5053, "step": 18003 }, { "epoch": 0.49234303215926495, "grad_norm": 1.5571645498275757, "learning_rate": 1.0734472492378701e-05, "loss": 0.4039, "step": 18004 }, { "epoch": 0.4923703784729818, "grad_norm": 1.36172354221344, "learning_rate": 1.0733589177534914e-05, "loss": 0.4294, "step": 18005 }, { "epoch": 0.49239772478669874, "grad_norm": 1.0653355121612549, "learning_rate": 1.0732705856936328e-05, "loss": 0.472, "step": 18006 }, { "epoch": 0.49242507110041567, "grad_norm": 1.9854710102081299, "learning_rate": 1.0731822530589871e-05, "loss": 0.4895, "step": 18007 }, { "epoch": 0.4924524174141326, "grad_norm": 1.278597354888916, "learning_rate": 1.073093919850247e-05, "loss": 0.5184, "step": 18008 }, { "epoch": 0.49247976372784946, "grad_norm": 1.3170669078826904, "learning_rate": 1.0730055860681061e-05, "loss": 0.4117, "step": 18009 }, { "epoch": 0.4925071100415664, "grad_norm": 1.222029209136963, "learning_rate": 1.0729172517132566e-05, "loss": 0.4872, "step": 18010 }, { "epoch": 0.4925344563552833, "grad_norm": 1.1851245164871216, "learning_rate": 1.0728289167863919e-05, "loss": 0.5115, "step": 18011 }, { "epoch": 0.49256180266900024, "grad_norm": 1.513181209564209, "learning_rate": 1.072740581288205e-05, "loss": 0.5193, "step": 18012 }, { "epoch": 0.4925891489827171, "grad_norm": 1.2727551460266113, "learning_rate": 1.0726522452193884e-05, "loss": 0.4987, "step": 18013 }, { "epoch": 0.49261649529643403, "grad_norm": 1.1749333143234253, "learning_rate": 1.0725639085806355e-05, "loss": 0.5009, "step": 18014 }, { "epoch": 0.49264384161015096, "grad_norm": 1.0604658126831055, "learning_rate": 1.0724755713726393e-05, "loss": 0.4636, "step": 18015 }, { "epoch": 0.4926711879238679, "grad_norm": 1.2151267528533936, "learning_rate": 1.0723872335960925e-05, "loss": 0.5189, "step": 18016 }, { "epoch": 0.49269853423758475, "grad_norm": 1.1770496368408203, "learning_rate": 1.0722988952516883e-05, "loss": 0.5048, "step": 18017 }, { "epoch": 0.4927258805513017, "grad_norm": 3.168612241744995, "learning_rate": 1.0722105563401197e-05, "loss": 0.8592, "step": 18018 }, { "epoch": 0.4927532268650186, "grad_norm": 1.1410510540008545, "learning_rate": 1.0721222168620794e-05, "loss": 0.4684, "step": 18019 }, { "epoch": 0.4927805731787355, "grad_norm": 1.993140459060669, "learning_rate": 1.0720338768182608e-05, "loss": 0.7992, "step": 18020 }, { "epoch": 0.4928079194924524, "grad_norm": 1.6140371561050415, "learning_rate": 1.0719455362093564e-05, "loss": 0.4089, "step": 18021 }, { "epoch": 0.4928352658061693, "grad_norm": 1.508125901222229, "learning_rate": 1.0718571950360597e-05, "loss": 0.4848, "step": 18022 }, { "epoch": 0.49286261211988625, "grad_norm": 1.2471833229064941, "learning_rate": 1.0717688532990635e-05, "loss": 0.5009, "step": 18023 }, { "epoch": 0.49288995843360317, "grad_norm": 1.6421087980270386, "learning_rate": 1.0716805109990609e-05, "loss": 0.4915, "step": 18024 }, { "epoch": 0.49291730474732004, "grad_norm": 1.4562737941741943, "learning_rate": 1.0715921681367447e-05, "loss": 0.5029, "step": 18025 }, { "epoch": 0.49294465106103696, "grad_norm": 1.1174519062042236, "learning_rate": 1.0715038247128085e-05, "loss": 0.4942, "step": 18026 }, { "epoch": 0.4929719973747539, "grad_norm": 1.1461102962493896, "learning_rate": 1.0714154807279444e-05, "loss": 0.4967, "step": 18027 }, { "epoch": 0.4929993436884708, "grad_norm": 1.3047748804092407, "learning_rate": 1.0713271361828464e-05, "loss": 0.5341, "step": 18028 }, { "epoch": 0.4930266900021877, "grad_norm": 1.1902351379394531, "learning_rate": 1.0712387910782067e-05, "loss": 0.4764, "step": 18029 }, { "epoch": 0.4930540363159046, "grad_norm": 1.5052331686019897, "learning_rate": 1.071150445414719e-05, "loss": 0.5514, "step": 18030 }, { "epoch": 0.49308138262962153, "grad_norm": 12.845630645751953, "learning_rate": 1.071062099193076e-05, "loss": 0.8396, "step": 18031 }, { "epoch": 0.49310872894333846, "grad_norm": 1.4049588441848755, "learning_rate": 1.0709737524139707e-05, "loss": 0.511, "step": 18032 }, { "epoch": 0.49313607525705533, "grad_norm": 1.4641209840774536, "learning_rate": 1.0708854050780966e-05, "loss": 0.509, "step": 18033 }, { "epoch": 0.49316342157077225, "grad_norm": 1.3340449333190918, "learning_rate": 1.0707970571861463e-05, "loss": 0.4336, "step": 18034 }, { "epoch": 0.4931907678844892, "grad_norm": 1.2994455099105835, "learning_rate": 1.070708708738813e-05, "loss": 0.5501, "step": 18035 }, { "epoch": 0.4932181141982061, "grad_norm": 1.4918944835662842, "learning_rate": 1.0706203597367896e-05, "loss": 0.5267, "step": 18036 }, { "epoch": 0.493245460511923, "grad_norm": 1.3395620584487915, "learning_rate": 1.0705320101807697e-05, "loss": 0.5166, "step": 18037 }, { "epoch": 0.4932728068256399, "grad_norm": 1.4550248384475708, "learning_rate": 1.0704436600714459e-05, "loss": 0.5078, "step": 18038 }, { "epoch": 0.4933001531393568, "grad_norm": 1.3146387338638306, "learning_rate": 1.0703553094095113e-05, "loss": 0.4974, "step": 18039 }, { "epoch": 0.49332749945307375, "grad_norm": 1.1369742155075073, "learning_rate": 1.070266958195659e-05, "loss": 0.4715, "step": 18040 }, { "epoch": 0.4933548457667906, "grad_norm": 1.636246919631958, "learning_rate": 1.0701786064305823e-05, "loss": 0.8254, "step": 18041 }, { "epoch": 0.49338219208050754, "grad_norm": 1.4297250509262085, "learning_rate": 1.0700902541149743e-05, "loss": 0.5094, "step": 18042 }, { "epoch": 0.49340953839422447, "grad_norm": 1.4809346199035645, "learning_rate": 1.0700019012495277e-05, "loss": 0.4709, "step": 18043 }, { "epoch": 0.4934368847079414, "grad_norm": 1.325851559638977, "learning_rate": 1.0699135478349361e-05, "loss": 0.5067, "step": 18044 }, { "epoch": 0.49346423102165826, "grad_norm": 1.2666107416152954, "learning_rate": 1.0698251938718924e-05, "loss": 0.5295, "step": 18045 }, { "epoch": 0.4934915773353752, "grad_norm": 1.2119663953781128, "learning_rate": 1.0697368393610896e-05, "loss": 0.5222, "step": 18046 }, { "epoch": 0.4935189236490921, "grad_norm": 1.185143232345581, "learning_rate": 1.0696484843032214e-05, "loss": 0.5498, "step": 18047 }, { "epoch": 0.49354626996280904, "grad_norm": 1.5734829902648926, "learning_rate": 1.0695601286989798e-05, "loss": 0.4939, "step": 18048 }, { "epoch": 0.4935736162765259, "grad_norm": 1.3920252323150635, "learning_rate": 1.0694717725490586e-05, "loss": 0.4916, "step": 18049 }, { "epoch": 0.49360096259024283, "grad_norm": 1.289953351020813, "learning_rate": 1.0693834158541514e-05, "loss": 0.5229, "step": 18050 }, { "epoch": 0.49362830890395976, "grad_norm": 1.0809389352798462, "learning_rate": 1.0692950586149504e-05, "loss": 0.3787, "step": 18051 }, { "epoch": 0.4936556552176767, "grad_norm": 1.3367472887039185, "learning_rate": 1.0692067008321494e-05, "loss": 0.492, "step": 18052 }, { "epoch": 0.49368300153139355, "grad_norm": 1.3015140295028687, "learning_rate": 1.0691183425064413e-05, "loss": 0.7818, "step": 18053 }, { "epoch": 0.4937103478451105, "grad_norm": 1.5500165224075317, "learning_rate": 1.0690299836385191e-05, "loss": 0.5364, "step": 18054 }, { "epoch": 0.4937376941588274, "grad_norm": 1.1382126808166504, "learning_rate": 1.068941624229076e-05, "loss": 0.4958, "step": 18055 }, { "epoch": 0.4937650404725443, "grad_norm": 1.442236304283142, "learning_rate": 1.0688532642788057e-05, "loss": 0.4868, "step": 18056 }, { "epoch": 0.4937923867862612, "grad_norm": 1.174034595489502, "learning_rate": 1.0687649037884004e-05, "loss": 0.4994, "step": 18057 }, { "epoch": 0.4938197330999781, "grad_norm": 1.1968681812286377, "learning_rate": 1.0686765427585542e-05, "loss": 0.4984, "step": 18058 }, { "epoch": 0.49384707941369504, "grad_norm": 1.369547963142395, "learning_rate": 1.0685881811899596e-05, "loss": 0.5268, "step": 18059 }, { "epoch": 0.49387442572741197, "grad_norm": 1.2427664995193481, "learning_rate": 1.06849981908331e-05, "loss": 0.5188, "step": 18060 }, { "epoch": 0.49390177204112884, "grad_norm": 1.3298791646957397, "learning_rate": 1.068411456439299e-05, "loss": 0.5303, "step": 18061 }, { "epoch": 0.49392911835484576, "grad_norm": 1.1778627634048462, "learning_rate": 1.0683230932586189e-05, "loss": 0.5116, "step": 18062 }, { "epoch": 0.4939564646685627, "grad_norm": 1.2786718606948853, "learning_rate": 1.0682347295419635e-05, "loss": 0.4967, "step": 18063 }, { "epoch": 0.4939838109822796, "grad_norm": 1.1255998611450195, "learning_rate": 1.068146365290026e-05, "loss": 0.5009, "step": 18064 }, { "epoch": 0.4940111572959965, "grad_norm": 1.5693159103393555, "learning_rate": 1.068058000503499e-05, "loss": 0.4226, "step": 18065 }, { "epoch": 0.4940385036097134, "grad_norm": 1.5765433311462402, "learning_rate": 1.0679696351830767e-05, "loss": 0.3691, "step": 18066 }, { "epoch": 0.49406584992343033, "grad_norm": 1.4102352857589722, "learning_rate": 1.0678812693294514e-05, "loss": 0.4022, "step": 18067 }, { "epoch": 0.49409319623714726, "grad_norm": 1.168494462966919, "learning_rate": 1.0677929029433164e-05, "loss": 0.8134, "step": 18068 }, { "epoch": 0.49412054255086413, "grad_norm": 1.4114773273468018, "learning_rate": 1.0677045360253659e-05, "loss": 0.4939, "step": 18069 }, { "epoch": 0.49414788886458105, "grad_norm": 1.3780637979507446, "learning_rate": 1.067616168576292e-05, "loss": 0.5381, "step": 18070 }, { "epoch": 0.494175235178298, "grad_norm": 1.2093218564987183, "learning_rate": 1.0675278005967882e-05, "loss": 0.8054, "step": 18071 }, { "epoch": 0.4942025814920149, "grad_norm": 1.320400357246399, "learning_rate": 1.067439432087548e-05, "loss": 0.4373, "step": 18072 }, { "epoch": 0.4942299278057318, "grad_norm": 1.4510351419448853, "learning_rate": 1.0673510630492643e-05, "loss": 0.5124, "step": 18073 }, { "epoch": 0.4942572741194487, "grad_norm": 1.2822651863098145, "learning_rate": 1.0672626934826304e-05, "loss": 0.51, "step": 18074 }, { "epoch": 0.4942846204331656, "grad_norm": 1.0897752046585083, "learning_rate": 1.0671743233883395e-05, "loss": 0.5357, "step": 18075 }, { "epoch": 0.49431196674688255, "grad_norm": 1.2096190452575684, "learning_rate": 1.0670859527670851e-05, "loss": 0.5151, "step": 18076 }, { "epoch": 0.4943393130605994, "grad_norm": 1.3426414728164673, "learning_rate": 1.0669975816195604e-05, "loss": 0.5269, "step": 18077 }, { "epoch": 0.49436665937431634, "grad_norm": 1.1311471462249756, "learning_rate": 1.0669092099464584e-05, "loss": 0.4818, "step": 18078 }, { "epoch": 0.49439400568803327, "grad_norm": 1.2715754508972168, "learning_rate": 1.0668208377484723e-05, "loss": 0.5137, "step": 18079 }, { "epoch": 0.4944213520017502, "grad_norm": 1.0737797021865845, "learning_rate": 1.066732465026296e-05, "loss": 0.4998, "step": 18080 }, { "epoch": 0.49444869831546706, "grad_norm": 1.1154749393463135, "learning_rate": 1.0666440917806217e-05, "loss": 0.5202, "step": 18081 }, { "epoch": 0.494476044629184, "grad_norm": 1.2210067510604858, "learning_rate": 1.0665557180121437e-05, "loss": 0.5251, "step": 18082 }, { "epoch": 0.4945033909429009, "grad_norm": 1.1933772563934326, "learning_rate": 1.0664673437215547e-05, "loss": 0.5239, "step": 18083 }, { "epoch": 0.4945307372566178, "grad_norm": 1.1632858514785767, "learning_rate": 1.0663789689095481e-05, "loss": 0.5171, "step": 18084 }, { "epoch": 0.4945580835703347, "grad_norm": 1.6607015132904053, "learning_rate": 1.066290593576817e-05, "loss": 0.5415, "step": 18085 }, { "epoch": 0.49458542988405163, "grad_norm": 1.258500099182129, "learning_rate": 1.0662022177240552e-05, "loss": 0.4815, "step": 18086 }, { "epoch": 0.49461277619776856, "grad_norm": 1.291778802871704, "learning_rate": 1.0661138413519555e-05, "loss": 0.5094, "step": 18087 }, { "epoch": 0.4946401225114854, "grad_norm": 1.4661163091659546, "learning_rate": 1.0660254644612113e-05, "loss": 0.4059, "step": 18088 }, { "epoch": 0.49466746882520235, "grad_norm": 1.5445256233215332, "learning_rate": 1.0659370870525158e-05, "loss": 0.4997, "step": 18089 }, { "epoch": 0.4946948151389193, "grad_norm": 1.246148943901062, "learning_rate": 1.0658487091265624e-05, "loss": 0.5159, "step": 18090 }, { "epoch": 0.4947221614526362, "grad_norm": 1.3365572690963745, "learning_rate": 1.0657603306840447e-05, "loss": 0.4549, "step": 18091 }, { "epoch": 0.49474950776635307, "grad_norm": 1.2395042181015015, "learning_rate": 1.0656719517256552e-05, "loss": 0.5079, "step": 18092 }, { "epoch": 0.49477685408007, "grad_norm": 2.359168529510498, "learning_rate": 1.0655835722520882e-05, "loss": 0.4952, "step": 18093 }, { "epoch": 0.4948042003937869, "grad_norm": 1.3694117069244385, "learning_rate": 1.0654951922640361e-05, "loss": 0.5538, "step": 18094 }, { "epoch": 0.49483154670750384, "grad_norm": 1.4715638160705566, "learning_rate": 1.065406811762193e-05, "loss": 0.5314, "step": 18095 }, { "epoch": 0.4948588930212207, "grad_norm": 1.271685004234314, "learning_rate": 1.0653184307472519e-05, "loss": 0.4972, "step": 18096 }, { "epoch": 0.49488623933493764, "grad_norm": 1.1093847751617432, "learning_rate": 1.065230049219906e-05, "loss": 0.4712, "step": 18097 }, { "epoch": 0.49491358564865456, "grad_norm": 1.1048015356063843, "learning_rate": 1.0651416671808488e-05, "loss": 0.4784, "step": 18098 }, { "epoch": 0.4949409319623715, "grad_norm": 1.2439769506454468, "learning_rate": 1.0650532846307733e-05, "loss": 0.5099, "step": 18099 }, { "epoch": 0.49496827827608836, "grad_norm": 1.7983713150024414, "learning_rate": 1.0649649015703731e-05, "loss": 0.5075, "step": 18100 }, { "epoch": 0.4949956245898053, "grad_norm": 1.5892598628997803, "learning_rate": 1.0648765180003417e-05, "loss": 0.526, "step": 18101 }, { "epoch": 0.4950229709035222, "grad_norm": 1.272531509399414, "learning_rate": 1.0647881339213722e-05, "loss": 0.5096, "step": 18102 }, { "epoch": 0.49505031721723913, "grad_norm": 1.1182000637054443, "learning_rate": 1.064699749334158e-05, "loss": 0.4921, "step": 18103 }, { "epoch": 0.495077663530956, "grad_norm": 1.2637579441070557, "learning_rate": 1.0646113642393928e-05, "loss": 0.4576, "step": 18104 }, { "epoch": 0.4951050098446729, "grad_norm": 1.2381194829940796, "learning_rate": 1.0645229786377692e-05, "loss": 0.4791, "step": 18105 }, { "epoch": 0.49513235615838985, "grad_norm": 1.6394927501678467, "learning_rate": 1.064434592529981e-05, "loss": 0.4384, "step": 18106 }, { "epoch": 0.4951597024721068, "grad_norm": 1.347049593925476, "learning_rate": 1.064346205916722e-05, "loss": 0.4079, "step": 18107 }, { "epoch": 0.49518704878582365, "grad_norm": 1.2103101015090942, "learning_rate": 1.0642578187986847e-05, "loss": 0.5351, "step": 18108 }, { "epoch": 0.49521439509954057, "grad_norm": 1.2331701517105103, "learning_rate": 1.0641694311765628e-05, "loss": 0.5003, "step": 18109 }, { "epoch": 0.4952417414132575, "grad_norm": 1.4282478094100952, "learning_rate": 1.0640810430510501e-05, "loss": 0.5206, "step": 18110 }, { "epoch": 0.4952690877269744, "grad_norm": 1.2630915641784668, "learning_rate": 1.0639926544228397e-05, "loss": 0.4951, "step": 18111 }, { "epoch": 0.4952964340406913, "grad_norm": 1.5071475505828857, "learning_rate": 1.0639042652926248e-05, "loss": 0.3993, "step": 18112 }, { "epoch": 0.4953237803544082, "grad_norm": 1.4051001071929932, "learning_rate": 1.0638158756610989e-05, "loss": 0.5221, "step": 18113 }, { "epoch": 0.49535112666812514, "grad_norm": 1.3926702737808228, "learning_rate": 1.0637274855289553e-05, "loss": 0.5444, "step": 18114 }, { "epoch": 0.49537847298184207, "grad_norm": 1.5298658609390259, "learning_rate": 1.0636390948968876e-05, "loss": 0.4676, "step": 18115 }, { "epoch": 0.49540581929555894, "grad_norm": 1.3951035737991333, "learning_rate": 1.063550703765589e-05, "loss": 0.5181, "step": 18116 }, { "epoch": 0.49543316560927586, "grad_norm": 1.2616605758666992, "learning_rate": 1.0634623121357535e-05, "loss": 0.5014, "step": 18117 }, { "epoch": 0.4954605119229928, "grad_norm": 1.2840018272399902, "learning_rate": 1.0633739200080735e-05, "loss": 0.5046, "step": 18118 }, { "epoch": 0.4954878582367097, "grad_norm": 1.2192590236663818, "learning_rate": 1.063285527383243e-05, "loss": 0.5064, "step": 18119 }, { "epoch": 0.4955152045504266, "grad_norm": 1.4210788011550903, "learning_rate": 1.0631971342619555e-05, "loss": 0.5192, "step": 18120 }, { "epoch": 0.4955425508641435, "grad_norm": 1.3279011249542236, "learning_rate": 1.0631087406449041e-05, "loss": 0.4768, "step": 18121 }, { "epoch": 0.49556989717786043, "grad_norm": 1.701111912727356, "learning_rate": 1.0630203465327826e-05, "loss": 0.4121, "step": 18122 }, { "epoch": 0.49559724349157736, "grad_norm": 1.369150161743164, "learning_rate": 1.0629319519262844e-05, "loss": 0.3897, "step": 18123 }, { "epoch": 0.4956245898052942, "grad_norm": 1.1316858530044556, "learning_rate": 1.0628435568261023e-05, "loss": 0.4927, "step": 18124 }, { "epoch": 0.49565193611901115, "grad_norm": 1.1467845439910889, "learning_rate": 1.0627551612329303e-05, "loss": 0.5256, "step": 18125 }, { "epoch": 0.4956792824327281, "grad_norm": 1.359163761138916, "learning_rate": 1.0626667651474618e-05, "loss": 0.5292, "step": 18126 }, { "epoch": 0.495706628746445, "grad_norm": 1.6050578355789185, "learning_rate": 1.06257836857039e-05, "loss": 0.4148, "step": 18127 }, { "epoch": 0.49573397506016187, "grad_norm": 1.1129670143127441, "learning_rate": 1.0624899715024088e-05, "loss": 0.5056, "step": 18128 }, { "epoch": 0.4957613213738788, "grad_norm": 1.1599743366241455, "learning_rate": 1.0624015739442112e-05, "loss": 0.5044, "step": 18129 }, { "epoch": 0.4957886676875957, "grad_norm": 1.1781563758850098, "learning_rate": 1.0623131758964909e-05, "loss": 0.4968, "step": 18130 }, { "epoch": 0.49581601400131264, "grad_norm": 1.3150781393051147, "learning_rate": 1.0622247773599412e-05, "loss": 0.5007, "step": 18131 }, { "epoch": 0.4958433603150295, "grad_norm": 1.172646164894104, "learning_rate": 1.0621363783352555e-05, "loss": 0.5353, "step": 18132 }, { "epoch": 0.49587070662874644, "grad_norm": 1.2086987495422363, "learning_rate": 1.0620479788231275e-05, "loss": 0.5298, "step": 18133 }, { "epoch": 0.49589805294246336, "grad_norm": 1.494904637336731, "learning_rate": 1.0619595788242507e-05, "loss": 0.8491, "step": 18134 }, { "epoch": 0.4959253992561803, "grad_norm": 1.0882185697555542, "learning_rate": 1.0618711783393182e-05, "loss": 0.4993, "step": 18135 }, { "epoch": 0.49595274556989716, "grad_norm": 1.1062390804290771, "learning_rate": 1.0617827773690239e-05, "loss": 0.4804, "step": 18136 }, { "epoch": 0.4959800918836141, "grad_norm": 1.174250841140747, "learning_rate": 1.061694375914061e-05, "loss": 0.5193, "step": 18137 }, { "epoch": 0.496007438197331, "grad_norm": 1.3024773597717285, "learning_rate": 1.061605973975123e-05, "loss": 0.5143, "step": 18138 }, { "epoch": 0.49603478451104793, "grad_norm": 1.5393497943878174, "learning_rate": 1.0615175715529039e-05, "loss": 0.538, "step": 18139 }, { "epoch": 0.4960621308247648, "grad_norm": 1.2054811716079712, "learning_rate": 1.0614291686480964e-05, "loss": 0.4826, "step": 18140 }, { "epoch": 0.4960894771384817, "grad_norm": 1.484153151512146, "learning_rate": 1.0613407652613946e-05, "loss": 0.4956, "step": 18141 }, { "epoch": 0.49611682345219865, "grad_norm": 1.0665948390960693, "learning_rate": 1.0612523613934915e-05, "loss": 0.5108, "step": 18142 }, { "epoch": 0.4961441697659156, "grad_norm": 1.336319088935852, "learning_rate": 1.0611639570450808e-05, "loss": 0.5093, "step": 18143 }, { "epoch": 0.49617151607963245, "grad_norm": 1.160446047782898, "learning_rate": 1.0610755522168563e-05, "loss": 0.4756, "step": 18144 }, { "epoch": 0.49619886239334937, "grad_norm": 1.8571381568908691, "learning_rate": 1.0609871469095112e-05, "loss": 0.4066, "step": 18145 }, { "epoch": 0.4962262087070663, "grad_norm": 1.265613079071045, "learning_rate": 1.060898741123739e-05, "loss": 0.4687, "step": 18146 }, { "epoch": 0.4962535550207832, "grad_norm": 1.2064422369003296, "learning_rate": 1.0608103348602336e-05, "loss": 0.5098, "step": 18147 }, { "epoch": 0.4962809013345001, "grad_norm": 1.2331740856170654, "learning_rate": 1.0607219281196879e-05, "loss": 0.4852, "step": 18148 }, { "epoch": 0.496308247648217, "grad_norm": 1.0908128023147583, "learning_rate": 1.0606335209027958e-05, "loss": 0.5227, "step": 18149 }, { "epoch": 0.49633559396193394, "grad_norm": 1.4408878087997437, "learning_rate": 1.0605451132102511e-05, "loss": 0.416, "step": 18150 }, { "epoch": 0.49636294027565087, "grad_norm": 1.327761173248291, "learning_rate": 1.0604567050427468e-05, "loss": 0.3569, "step": 18151 }, { "epoch": 0.49639028658936774, "grad_norm": 1.2641698122024536, "learning_rate": 1.0603682964009767e-05, "loss": 0.5554, "step": 18152 }, { "epoch": 0.49641763290308466, "grad_norm": 1.364882230758667, "learning_rate": 1.0602798872856343e-05, "loss": 0.5204, "step": 18153 }, { "epoch": 0.4964449792168016, "grad_norm": 1.5320090055465698, "learning_rate": 1.060191477697413e-05, "loss": 0.4111, "step": 18154 }, { "epoch": 0.4964723255305185, "grad_norm": 1.2899043560028076, "learning_rate": 1.0601030676370068e-05, "loss": 0.7914, "step": 18155 }, { "epoch": 0.4964996718442354, "grad_norm": 1.5260837078094482, "learning_rate": 1.0600146571051086e-05, "loss": 0.5088, "step": 18156 }, { "epoch": 0.4965270181579523, "grad_norm": 1.1744619607925415, "learning_rate": 1.0599262461024127e-05, "loss": 0.5186, "step": 18157 }, { "epoch": 0.49655436447166923, "grad_norm": 1.2823715209960938, "learning_rate": 1.0598378346296124e-05, "loss": 0.7791, "step": 18158 }, { "epoch": 0.49658171078538615, "grad_norm": 1.4605653285980225, "learning_rate": 1.0597494226874006e-05, "loss": 0.5378, "step": 18159 }, { "epoch": 0.496609057099103, "grad_norm": 1.3712676763534546, "learning_rate": 1.0596610102764716e-05, "loss": 0.5228, "step": 18160 }, { "epoch": 0.49663640341281995, "grad_norm": 1.1957517862319946, "learning_rate": 1.0595725973975186e-05, "loss": 0.5093, "step": 18161 }, { "epoch": 0.4966637497265369, "grad_norm": 1.4151859283447266, "learning_rate": 1.0594841840512355e-05, "loss": 0.4555, "step": 18162 }, { "epoch": 0.4966910960402538, "grad_norm": 1.3134729862213135, "learning_rate": 1.0593957702383157e-05, "loss": 0.51, "step": 18163 }, { "epoch": 0.49671844235397067, "grad_norm": 1.274536371231079, "learning_rate": 1.0593073559594529e-05, "loss": 0.5123, "step": 18164 }, { "epoch": 0.4967457886676876, "grad_norm": 1.728168249130249, "learning_rate": 1.0592189412153401e-05, "loss": 0.4937, "step": 18165 }, { "epoch": 0.4967731349814045, "grad_norm": 1.5041334629058838, "learning_rate": 1.059130526006672e-05, "loss": 0.3551, "step": 18166 }, { "epoch": 0.49680048129512144, "grad_norm": 2.002211093902588, "learning_rate": 1.0590421103341413e-05, "loss": 0.4475, "step": 18167 }, { "epoch": 0.4968278276088383, "grad_norm": 1.0848983526229858, "learning_rate": 1.0589536941984417e-05, "loss": 0.506, "step": 18168 }, { "epoch": 0.49685517392255524, "grad_norm": 1.1349506378173828, "learning_rate": 1.0588652776002673e-05, "loss": 0.4963, "step": 18169 }, { "epoch": 0.49688252023627216, "grad_norm": 1.5750739574432373, "learning_rate": 1.058776860540311e-05, "loss": 0.4187, "step": 18170 }, { "epoch": 0.4969098665499891, "grad_norm": 1.408522129058838, "learning_rate": 1.0586884430192672e-05, "loss": 0.5274, "step": 18171 }, { "epoch": 0.49693721286370596, "grad_norm": 1.790536642074585, "learning_rate": 1.0586000250378287e-05, "loss": 0.5608, "step": 18172 }, { "epoch": 0.4969645591774229, "grad_norm": 1.55375337600708, "learning_rate": 1.0585116065966896e-05, "loss": 0.5125, "step": 18173 }, { "epoch": 0.4969919054911398, "grad_norm": 1.270437479019165, "learning_rate": 1.0584231876965436e-05, "loss": 0.5175, "step": 18174 }, { "epoch": 0.49701925180485673, "grad_norm": 1.127086877822876, "learning_rate": 1.0583347683380843e-05, "loss": 0.4781, "step": 18175 }, { "epoch": 0.4970465981185736, "grad_norm": 1.278670310974121, "learning_rate": 1.0582463485220047e-05, "loss": 0.4832, "step": 18176 }, { "epoch": 0.4970739444322905, "grad_norm": 1.5720138549804688, "learning_rate": 1.058157928248999e-05, "loss": 0.3744, "step": 18177 }, { "epoch": 0.49710129074600745, "grad_norm": 1.458229899406433, "learning_rate": 1.0580695075197606e-05, "loss": 0.8054, "step": 18178 }, { "epoch": 0.4971286370597244, "grad_norm": 1.2742255926132202, "learning_rate": 1.0579810863349835e-05, "loss": 0.7779, "step": 18179 }, { "epoch": 0.49715598337344125, "grad_norm": 1.9900286197662354, "learning_rate": 1.0578926646953612e-05, "loss": 0.5318, "step": 18180 }, { "epoch": 0.49718332968715817, "grad_norm": 1.7349956035614014, "learning_rate": 1.0578042426015869e-05, "loss": 0.5177, "step": 18181 }, { "epoch": 0.4972106760008751, "grad_norm": 1.571915626525879, "learning_rate": 1.0577158200543549e-05, "loss": 0.5163, "step": 18182 }, { "epoch": 0.49723802231459197, "grad_norm": 1.1372884511947632, "learning_rate": 1.0576273970543582e-05, "loss": 0.4606, "step": 18183 }, { "epoch": 0.4972653686283089, "grad_norm": 1.4735876321792603, "learning_rate": 1.057538973602291e-05, "loss": 0.4907, "step": 18184 }, { "epoch": 0.4972927149420258, "grad_norm": 1.1872835159301758, "learning_rate": 1.0574505496988468e-05, "loss": 0.4927, "step": 18185 }, { "epoch": 0.49732006125574274, "grad_norm": 1.3648970127105713, "learning_rate": 1.0573621253447188e-05, "loss": 0.5083, "step": 18186 }, { "epoch": 0.4973474075694596, "grad_norm": 1.8167247772216797, "learning_rate": 1.0572737005406012e-05, "loss": 0.3738, "step": 18187 }, { "epoch": 0.49737475388317653, "grad_norm": 1.1402323246002197, "learning_rate": 1.057185275287188e-05, "loss": 0.5103, "step": 18188 }, { "epoch": 0.49740210019689346, "grad_norm": 1.2380249500274658, "learning_rate": 1.0570968495851722e-05, "loss": 0.5168, "step": 18189 }, { "epoch": 0.4974294465106104, "grad_norm": 1.5477960109710693, "learning_rate": 1.0570084234352475e-05, "loss": 0.4895, "step": 18190 }, { "epoch": 0.49745679282432725, "grad_norm": 1.3281527757644653, "learning_rate": 1.0569199968381079e-05, "loss": 0.5171, "step": 18191 }, { "epoch": 0.4974841391380442, "grad_norm": 1.1517549753189087, "learning_rate": 1.0568315697944469e-05, "loss": 0.5056, "step": 18192 }, { "epoch": 0.4975114854517611, "grad_norm": 1.4779064655303955, "learning_rate": 1.0567431423049585e-05, "loss": 0.4945, "step": 18193 }, { "epoch": 0.49753883176547803, "grad_norm": 1.4986777305603027, "learning_rate": 1.0566547143703356e-05, "loss": 0.5438, "step": 18194 }, { "epoch": 0.4975661780791949, "grad_norm": 1.1808767318725586, "learning_rate": 1.0565662859912726e-05, "loss": 0.5021, "step": 18195 }, { "epoch": 0.4975935243929118, "grad_norm": 1.5701923370361328, "learning_rate": 1.0564778571684633e-05, "loss": 0.4137, "step": 18196 }, { "epoch": 0.49762087070662875, "grad_norm": 1.3939518928527832, "learning_rate": 1.0563894279026008e-05, "loss": 0.5323, "step": 18197 }, { "epoch": 0.4976482170203457, "grad_norm": 1.675241231918335, "learning_rate": 1.056300998194379e-05, "loss": 0.7942, "step": 18198 }, { "epoch": 0.49767556333406254, "grad_norm": 1.3322858810424805, "learning_rate": 1.0562125680444921e-05, "loss": 0.5304, "step": 18199 }, { "epoch": 0.49770290964777947, "grad_norm": 2.2930643558502197, "learning_rate": 1.0561241374536331e-05, "loss": 0.8242, "step": 18200 }, { "epoch": 0.4977302559614964, "grad_norm": 1.4324746131896973, "learning_rate": 1.0560357064224964e-05, "loss": 0.5165, "step": 18201 }, { "epoch": 0.4977576022752133, "grad_norm": 1.4766672849655151, "learning_rate": 1.055947274951775e-05, "loss": 0.8031, "step": 18202 }, { "epoch": 0.4977849485889302, "grad_norm": 1.6844922304153442, "learning_rate": 1.055858843042163e-05, "loss": 0.515, "step": 18203 }, { "epoch": 0.4978122949026471, "grad_norm": 1.2454383373260498, "learning_rate": 1.0557704106943542e-05, "loss": 0.5455, "step": 18204 }, { "epoch": 0.49783964121636404, "grad_norm": 1.3207197189331055, "learning_rate": 1.0556819779090421e-05, "loss": 0.3592, "step": 18205 }, { "epoch": 0.49786698753008096, "grad_norm": 1.2621515989303589, "learning_rate": 1.0555935446869208e-05, "loss": 0.4761, "step": 18206 }, { "epoch": 0.49789433384379783, "grad_norm": 1.2906560897827148, "learning_rate": 1.0555051110286837e-05, "loss": 0.4759, "step": 18207 }, { "epoch": 0.49792168015751476, "grad_norm": 1.020512342453003, "learning_rate": 1.0554166769350243e-05, "loss": 0.4734, "step": 18208 }, { "epoch": 0.4979490264712317, "grad_norm": 1.4064279794692993, "learning_rate": 1.0553282424066371e-05, "loss": 0.5269, "step": 18209 }, { "epoch": 0.4979763727849486, "grad_norm": 1.2193000316619873, "learning_rate": 1.0552398074442153e-05, "loss": 0.4988, "step": 18210 }, { "epoch": 0.4980037190986655, "grad_norm": 1.3252232074737549, "learning_rate": 1.0551513720484527e-05, "loss": 0.3939, "step": 18211 }, { "epoch": 0.4980310654123824, "grad_norm": 1.300115942955017, "learning_rate": 1.0550629362200432e-05, "loss": 0.3795, "step": 18212 }, { "epoch": 0.4980584117260993, "grad_norm": 1.2054108381271362, "learning_rate": 1.0549744999596802e-05, "loss": 0.5253, "step": 18213 }, { "epoch": 0.49808575803981625, "grad_norm": 1.1608524322509766, "learning_rate": 1.054886063268058e-05, "loss": 0.4939, "step": 18214 }, { "epoch": 0.4981131043535331, "grad_norm": 1.123486876487732, "learning_rate": 1.0547976261458699e-05, "loss": 0.5076, "step": 18215 }, { "epoch": 0.49814045066725005, "grad_norm": 1.2661981582641602, "learning_rate": 1.0547091885938099e-05, "loss": 0.4853, "step": 18216 }, { "epoch": 0.49816779698096697, "grad_norm": 1.704156756401062, "learning_rate": 1.0546207506125718e-05, "loss": 0.4279, "step": 18217 }, { "epoch": 0.4981951432946839, "grad_norm": 1.558260440826416, "learning_rate": 1.0545323122028493e-05, "loss": 0.5104, "step": 18218 }, { "epoch": 0.49822248960840076, "grad_norm": 1.2331253290176392, "learning_rate": 1.054443873365336e-05, "loss": 0.4558, "step": 18219 }, { "epoch": 0.4982498359221177, "grad_norm": 1.4606462717056274, "learning_rate": 1.0543554341007258e-05, "loss": 0.4413, "step": 18220 }, { "epoch": 0.4982771822358346, "grad_norm": 1.3201290369033813, "learning_rate": 1.0542669944097126e-05, "loss": 0.4935, "step": 18221 }, { "epoch": 0.49830452854955154, "grad_norm": 1.4471393823623657, "learning_rate": 1.0541785542929901e-05, "loss": 0.4367, "step": 18222 }, { "epoch": 0.4983318748632684, "grad_norm": 1.4033293724060059, "learning_rate": 1.054090113751252e-05, "loss": 0.4112, "step": 18223 }, { "epoch": 0.49835922117698533, "grad_norm": 1.4686211347579956, "learning_rate": 1.0540016727851923e-05, "loss": 0.3608, "step": 18224 }, { "epoch": 0.49838656749070226, "grad_norm": 1.3232568502426147, "learning_rate": 1.0539132313955047e-05, "loss": 0.515, "step": 18225 }, { "epoch": 0.4984139138044192, "grad_norm": 1.125596046447754, "learning_rate": 1.053824789582883e-05, "loss": 0.3433, "step": 18226 }, { "epoch": 0.49844126011813605, "grad_norm": 2.335564613342285, "learning_rate": 1.0537363473480208e-05, "loss": 0.8327, "step": 18227 }, { "epoch": 0.498468606431853, "grad_norm": 1.4370778799057007, "learning_rate": 1.0536479046916126e-05, "loss": 0.539, "step": 18228 }, { "epoch": 0.4984959527455699, "grad_norm": 1.5390924215316772, "learning_rate": 1.0535594616143509e-05, "loss": 0.8206, "step": 18229 }, { "epoch": 0.49852329905928683, "grad_norm": 1.433133840560913, "learning_rate": 1.0534710181169307e-05, "loss": 0.5754, "step": 18230 }, { "epoch": 0.4985506453730037, "grad_norm": 1.2158174514770508, "learning_rate": 1.0533825742000456e-05, "loss": 0.519, "step": 18231 }, { "epoch": 0.4985779916867206, "grad_norm": 1.6708232164382935, "learning_rate": 1.0532941298643889e-05, "loss": 0.4021, "step": 18232 }, { "epoch": 0.49860533800043755, "grad_norm": 1.6327852010726929, "learning_rate": 1.0532056851106547e-05, "loss": 0.7737, "step": 18233 }, { "epoch": 0.4986326843141545, "grad_norm": 1.1953054666519165, "learning_rate": 1.053117239939537e-05, "loss": 0.4835, "step": 18234 }, { "epoch": 0.49866003062787134, "grad_norm": 1.163844108581543, "learning_rate": 1.0530287943517297e-05, "loss": 0.5077, "step": 18235 }, { "epoch": 0.49868737694158827, "grad_norm": 6.745936393737793, "learning_rate": 1.0529403483479263e-05, "loss": 0.4957, "step": 18236 }, { "epoch": 0.4987147232553052, "grad_norm": 1.135711669921875, "learning_rate": 1.0528519019288205e-05, "loss": 0.5362, "step": 18237 }, { "epoch": 0.4987420695690221, "grad_norm": 1.4660836458206177, "learning_rate": 1.0527634550951066e-05, "loss": 0.4019, "step": 18238 }, { "epoch": 0.498769415882739, "grad_norm": 1.6511988639831543, "learning_rate": 1.0526750078474786e-05, "loss": 0.7793, "step": 18239 }, { "epoch": 0.4987967621964559, "grad_norm": 2.480940341949463, "learning_rate": 1.0525865601866294e-05, "loss": 0.414, "step": 18240 }, { "epoch": 0.49882410851017284, "grad_norm": 1.450374960899353, "learning_rate": 1.0524981121132537e-05, "loss": 0.5008, "step": 18241 }, { "epoch": 0.49885145482388976, "grad_norm": 1.4744185209274292, "learning_rate": 1.0524096636280452e-05, "loss": 0.5108, "step": 18242 }, { "epoch": 0.49887880113760663, "grad_norm": 1.4427098035812378, "learning_rate": 1.0523212147316975e-05, "loss": 0.486, "step": 18243 }, { "epoch": 0.49890614745132356, "grad_norm": 2.282986640930176, "learning_rate": 1.0522327654249044e-05, "loss": 0.8196, "step": 18244 }, { "epoch": 0.4989334937650405, "grad_norm": 1.1703221797943115, "learning_rate": 1.0521443157083603e-05, "loss": 0.4831, "step": 18245 }, { "epoch": 0.4989608400787574, "grad_norm": 1.6773375272750854, "learning_rate": 1.0520558655827586e-05, "loss": 0.506, "step": 18246 }, { "epoch": 0.4989881863924743, "grad_norm": 2.270880937576294, "learning_rate": 1.0519674150487935e-05, "loss": 0.5316, "step": 18247 }, { "epoch": 0.4990155327061912, "grad_norm": 1.3024628162384033, "learning_rate": 1.0518789641071584e-05, "loss": 0.5278, "step": 18248 }, { "epoch": 0.4990428790199081, "grad_norm": 1.3506337404251099, "learning_rate": 1.0517905127585473e-05, "loss": 0.5076, "step": 18249 }, { "epoch": 0.49907022533362505, "grad_norm": 1.3788337707519531, "learning_rate": 1.0517020610036543e-05, "loss": 0.529, "step": 18250 }, { "epoch": 0.4990975716473419, "grad_norm": 1.651314377784729, "learning_rate": 1.0516136088431731e-05, "loss": 0.7773, "step": 18251 }, { "epoch": 0.49912491796105884, "grad_norm": 1.2338792085647583, "learning_rate": 1.0515251562777979e-05, "loss": 0.496, "step": 18252 }, { "epoch": 0.49915226427477577, "grad_norm": 1.2936066389083862, "learning_rate": 1.0514367033082224e-05, "loss": 0.5085, "step": 18253 }, { "epoch": 0.4991796105884927, "grad_norm": 1.5986860990524292, "learning_rate": 1.0513482499351402e-05, "loss": 0.3646, "step": 18254 }, { "epoch": 0.49920695690220956, "grad_norm": 1.452359676361084, "learning_rate": 1.0512597961592456e-05, "loss": 0.4727, "step": 18255 }, { "epoch": 0.4992343032159265, "grad_norm": 1.215894341468811, "learning_rate": 1.0511713419812321e-05, "loss": 0.3782, "step": 18256 }, { "epoch": 0.4992616495296434, "grad_norm": 1.1786667108535767, "learning_rate": 1.0510828874017939e-05, "loss": 0.5084, "step": 18257 }, { "epoch": 0.49928899584336034, "grad_norm": 1.5278408527374268, "learning_rate": 1.050994432421625e-05, "loss": 0.5078, "step": 18258 }, { "epoch": 0.4993163421570772, "grad_norm": 1.5696271657943726, "learning_rate": 1.050905977041419e-05, "loss": 0.5172, "step": 18259 }, { "epoch": 0.49934368847079413, "grad_norm": 1.2308540344238281, "learning_rate": 1.05081752126187e-05, "loss": 0.5312, "step": 18260 }, { "epoch": 0.49937103478451106, "grad_norm": 3.8285582065582275, "learning_rate": 1.0507290650836716e-05, "loss": 0.4701, "step": 18261 }, { "epoch": 0.499398381098228, "grad_norm": 1.227260708808899, "learning_rate": 1.0506406085075182e-05, "loss": 0.4921, "step": 18262 }, { "epoch": 0.49942572741194485, "grad_norm": 1.335911750793457, "learning_rate": 1.0505521515341036e-05, "loss": 0.5316, "step": 18263 }, { "epoch": 0.4994530737256618, "grad_norm": 1.1811027526855469, "learning_rate": 1.0504636941641216e-05, "loss": 0.4941, "step": 18264 }, { "epoch": 0.4994804200393787, "grad_norm": 1.4779491424560547, "learning_rate": 1.0503752363982658e-05, "loss": 0.4963, "step": 18265 }, { "epoch": 0.49950776635309563, "grad_norm": 1.260459542274475, "learning_rate": 1.0502867782372307e-05, "loss": 0.5034, "step": 18266 }, { "epoch": 0.4995351126668125, "grad_norm": 1.4746805429458618, "learning_rate": 1.0501983196817098e-05, "loss": 0.4838, "step": 18267 }, { "epoch": 0.4995624589805294, "grad_norm": 4.120375156402588, "learning_rate": 1.0501098607323971e-05, "loss": 0.3702, "step": 18268 }, { "epoch": 0.49958980529424635, "grad_norm": 1.4426897764205933, "learning_rate": 1.0500214013899871e-05, "loss": 0.3797, "step": 18269 }, { "epoch": 0.4996171516079633, "grad_norm": 1.7856239080429077, "learning_rate": 1.0499329416551729e-05, "loss": 0.4644, "step": 18270 }, { "epoch": 0.49964449792168014, "grad_norm": 1.462875485420227, "learning_rate": 1.0498444815286491e-05, "loss": 0.4055, "step": 18271 }, { "epoch": 0.49967184423539707, "grad_norm": 1.8875114917755127, "learning_rate": 1.0497560210111091e-05, "loss": 0.8087, "step": 18272 }, { "epoch": 0.499699190549114, "grad_norm": 1.8224881887435913, "learning_rate": 1.049667560103247e-05, "loss": 0.3616, "step": 18273 }, { "epoch": 0.4997265368628309, "grad_norm": 1.0419713258743286, "learning_rate": 1.0495790988057575e-05, "loss": 0.3601, "step": 18274 }, { "epoch": 0.4997538831765478, "grad_norm": 4.018672943115234, "learning_rate": 1.0494906371193334e-05, "loss": 0.8026, "step": 18275 }, { "epoch": 0.4997812294902647, "grad_norm": 1.2814043760299683, "learning_rate": 1.049402175044669e-05, "loss": 0.5065, "step": 18276 }, { "epoch": 0.49980857580398164, "grad_norm": 1.3957395553588867, "learning_rate": 1.049313712582459e-05, "loss": 0.5073, "step": 18277 }, { "epoch": 0.49983592211769856, "grad_norm": 1.1746190786361694, "learning_rate": 1.0492252497333963e-05, "loss": 0.4764, "step": 18278 }, { "epoch": 0.49986326843141543, "grad_norm": 1.3438827991485596, "learning_rate": 1.0491367864981759e-05, "loss": 0.4899, "step": 18279 }, { "epoch": 0.49989061474513236, "grad_norm": 1.2886807918548584, "learning_rate": 1.0490483228774911e-05, "loss": 0.8231, "step": 18280 }, { "epoch": 0.4999179610588493, "grad_norm": 1.3866536617279053, "learning_rate": 1.0489598588720358e-05, "loss": 0.5664, "step": 18281 }, { "epoch": 0.4999453073725662, "grad_norm": 1.3277556896209717, "learning_rate": 1.0488713944825041e-05, "loss": 0.7669, "step": 18282 }, { "epoch": 0.4999726536862831, "grad_norm": 1.3724002838134766, "learning_rate": 1.0487829297095904e-05, "loss": 0.5162, "step": 18283 }, { "epoch": 0.5, "grad_norm": 1.214479923248291, "learning_rate": 1.048694464553988e-05, "loss": 0.5111, "step": 18284 }, { "epoch": 0.5000273463137169, "grad_norm": 2.276094675064087, "learning_rate": 1.0486059990163915e-05, "loss": 0.4994, "step": 18285 }, { "epoch": 0.5000546926274338, "grad_norm": 1.3814072608947754, "learning_rate": 1.0485175330974943e-05, "loss": 0.4026, "step": 18286 }, { "epoch": 0.5000820389411508, "grad_norm": 1.3784453868865967, "learning_rate": 1.0484290667979908e-05, "loss": 0.4858, "step": 18287 }, { "epoch": 0.5001093852548676, "grad_norm": 1.6137961149215698, "learning_rate": 1.0483406001185751e-05, "loss": 0.4374, "step": 18288 }, { "epoch": 0.5001367315685845, "grad_norm": 1.7260867357254028, "learning_rate": 1.048252133059941e-05, "loss": 0.4493, "step": 18289 }, { "epoch": 0.5001640778823014, "grad_norm": 1.4865001440048218, "learning_rate": 1.0481636656227825e-05, "loss": 0.5166, "step": 18290 }, { "epoch": 0.5001914241960184, "grad_norm": 1.2323745489120483, "learning_rate": 1.0480751978077934e-05, "loss": 0.5164, "step": 18291 }, { "epoch": 0.5002187705097353, "grad_norm": 1.2856059074401855, "learning_rate": 1.0479867296156678e-05, "loss": 0.3822, "step": 18292 }, { "epoch": 0.5002461168234522, "grad_norm": 1.2097100019454956, "learning_rate": 1.0478982610471002e-05, "loss": 0.5095, "step": 18293 }, { "epoch": 0.5002734631371691, "grad_norm": 1.2194066047668457, "learning_rate": 1.0478097921027839e-05, "loss": 0.4788, "step": 18294 }, { "epoch": 0.5003008094508861, "grad_norm": 1.3776732683181763, "learning_rate": 1.0477213227834134e-05, "loss": 0.5149, "step": 18295 }, { "epoch": 0.5003281557646029, "grad_norm": 1.2817606925964355, "learning_rate": 1.0476328530896827e-05, "loss": 0.4834, "step": 18296 }, { "epoch": 0.5003555020783198, "grad_norm": 1.3302088975906372, "learning_rate": 1.0475443830222854e-05, "loss": 0.5215, "step": 18297 }, { "epoch": 0.5003828483920367, "grad_norm": 1.316392421722412, "learning_rate": 1.0474559125819159e-05, "loss": 0.507, "step": 18298 }, { "epoch": 0.5004101947057537, "grad_norm": 1.0904353857040405, "learning_rate": 1.0473674417692683e-05, "loss": 0.4844, "step": 18299 }, { "epoch": 0.5004375410194706, "grad_norm": 1.3940478563308716, "learning_rate": 1.0472789705850362e-05, "loss": 0.369, "step": 18300 }, { "epoch": 0.5004648873331875, "grad_norm": 1.8829478025436401, "learning_rate": 1.047190499029914e-05, "loss": 0.5212, "step": 18301 }, { "epoch": 0.5004922336469044, "grad_norm": 1.220119595527649, "learning_rate": 1.0471020271045956e-05, "loss": 0.5208, "step": 18302 }, { "epoch": 0.5005195799606214, "grad_norm": 1.6051321029663086, "learning_rate": 1.0470135548097748e-05, "loss": 0.4127, "step": 18303 }, { "epoch": 0.5005469262743382, "grad_norm": 1.6516814231872559, "learning_rate": 1.0469250821461463e-05, "loss": 0.4122, "step": 18304 }, { "epoch": 0.5005742725880551, "grad_norm": 1.3681988716125488, "learning_rate": 1.0468366091144033e-05, "loss": 0.5231, "step": 18305 }, { "epoch": 0.500601618901772, "grad_norm": 1.221259593963623, "learning_rate": 1.0467481357152405e-05, "loss": 0.503, "step": 18306 }, { "epoch": 0.5006289652154889, "grad_norm": 1.6351360082626343, "learning_rate": 1.0466596619493519e-05, "loss": 0.5186, "step": 18307 }, { "epoch": 0.5006563115292059, "grad_norm": 1.4350697994232178, "learning_rate": 1.0465711878174312e-05, "loss": 0.5118, "step": 18308 }, { "epoch": 0.5006836578429228, "grad_norm": 1.1246600151062012, "learning_rate": 1.0464827133201727e-05, "loss": 0.4707, "step": 18309 }, { "epoch": 0.5007110041566397, "grad_norm": 1.3522361516952515, "learning_rate": 1.0463942384582704e-05, "loss": 0.5325, "step": 18310 }, { "epoch": 0.5007383504703566, "grad_norm": 1.3411729335784912, "learning_rate": 1.046305763232418e-05, "loss": 0.4074, "step": 18311 }, { "epoch": 0.5007656967840735, "grad_norm": 1.468044638633728, "learning_rate": 1.0462172876433105e-05, "loss": 0.5184, "step": 18312 }, { "epoch": 0.5007930430977904, "grad_norm": 1.1770985126495361, "learning_rate": 1.046128811691641e-05, "loss": 0.4859, "step": 18313 }, { "epoch": 0.5008203894115073, "grad_norm": 1.292922019958496, "learning_rate": 1.0460403353781043e-05, "loss": 0.5071, "step": 18314 }, { "epoch": 0.5008477357252242, "grad_norm": 1.1867437362670898, "learning_rate": 1.045951858703394e-05, "loss": 0.5212, "step": 18315 }, { "epoch": 0.5008750820389412, "grad_norm": 1.3913052082061768, "learning_rate": 1.0458633816682043e-05, "loss": 0.5153, "step": 18316 }, { "epoch": 0.5009024283526581, "grad_norm": 1.2514058351516724, "learning_rate": 1.0457749042732291e-05, "loss": 0.4961, "step": 18317 }, { "epoch": 0.500929774666375, "grad_norm": 1.3962996006011963, "learning_rate": 1.0456864265191626e-05, "loss": 0.5165, "step": 18318 }, { "epoch": 0.5009571209800919, "grad_norm": 1.58318030834198, "learning_rate": 1.0455979484066992e-05, "loss": 0.4124, "step": 18319 }, { "epoch": 0.5009844672938087, "grad_norm": 1.4557682275772095, "learning_rate": 1.0455094699365327e-05, "loss": 0.3791, "step": 18320 }, { "epoch": 0.5010118136075257, "grad_norm": 1.724928855895996, "learning_rate": 1.045420991109357e-05, "loss": 0.3418, "step": 18321 }, { "epoch": 0.5010391599212426, "grad_norm": 1.36057448387146, "learning_rate": 1.0453325119258663e-05, "loss": 0.539, "step": 18322 }, { "epoch": 0.5010665062349595, "grad_norm": 1.1613832712173462, "learning_rate": 1.0452440323867551e-05, "loss": 0.4938, "step": 18323 }, { "epoch": 0.5010938525486764, "grad_norm": 1.2955148220062256, "learning_rate": 1.0451555524927173e-05, "loss": 0.5145, "step": 18324 }, { "epoch": 0.5011211988623934, "grad_norm": 1.275316834449768, "learning_rate": 1.045067072244447e-05, "loss": 0.4973, "step": 18325 }, { "epoch": 0.5011485451761103, "grad_norm": 1.2235503196716309, "learning_rate": 1.0449785916426376e-05, "loss": 0.5079, "step": 18326 }, { "epoch": 0.5011758914898272, "grad_norm": 1.286028265953064, "learning_rate": 1.044890110687984e-05, "loss": 0.5177, "step": 18327 }, { "epoch": 0.501203237803544, "grad_norm": 1.4062856435775757, "learning_rate": 1.0448016293811805e-05, "loss": 0.4921, "step": 18328 }, { "epoch": 0.501230584117261, "grad_norm": 1.3328440189361572, "learning_rate": 1.0447131477229205e-05, "loss": 0.4995, "step": 18329 }, { "epoch": 0.5012579304309779, "grad_norm": 1.203757405281067, "learning_rate": 1.0446246657138985e-05, "loss": 0.4879, "step": 18330 }, { "epoch": 0.5012852767446948, "grad_norm": 1.1559219360351562, "learning_rate": 1.0445361833548086e-05, "loss": 0.4933, "step": 18331 }, { "epoch": 0.5013126230584117, "grad_norm": 1.3518683910369873, "learning_rate": 1.0444477006463448e-05, "loss": 0.499, "step": 18332 }, { "epoch": 0.5013399693721287, "grad_norm": 1.2784134149551392, "learning_rate": 1.0443592175892015e-05, "loss": 0.4983, "step": 18333 }, { "epoch": 0.5013673156858456, "grad_norm": 1.3307844400405884, "learning_rate": 1.0442707341840725e-05, "loss": 0.4961, "step": 18334 }, { "epoch": 0.5013946619995625, "grad_norm": 1.4725171327590942, "learning_rate": 1.0441822504316521e-05, "loss": 0.3981, "step": 18335 }, { "epoch": 0.5014220083132793, "grad_norm": 1.198674201965332, "learning_rate": 1.0440937663326343e-05, "loss": 0.8152, "step": 18336 }, { "epoch": 0.5014493546269962, "grad_norm": 1.0280085802078247, "learning_rate": 1.0440052818877134e-05, "loss": 0.5079, "step": 18337 }, { "epoch": 0.5014767009407132, "grad_norm": 1.811816692352295, "learning_rate": 1.0439167970975832e-05, "loss": 0.5072, "step": 18338 }, { "epoch": 0.5015040472544301, "grad_norm": 1.2237287759780884, "learning_rate": 1.0438283119629384e-05, "loss": 0.515, "step": 18339 }, { "epoch": 0.501531393568147, "grad_norm": 1.3362650871276855, "learning_rate": 1.0437398264844725e-05, "loss": 0.528, "step": 18340 }, { "epoch": 0.501558739881864, "grad_norm": 1.043495535850525, "learning_rate": 1.0436513406628801e-05, "loss": 0.4732, "step": 18341 }, { "epoch": 0.5015860861955809, "grad_norm": 1.2855206727981567, "learning_rate": 1.0435628544988554e-05, "loss": 0.5097, "step": 18342 }, { "epoch": 0.5016134325092978, "grad_norm": 1.1723947525024414, "learning_rate": 1.043474367993092e-05, "loss": 0.5178, "step": 18343 }, { "epoch": 0.5016407788230146, "grad_norm": 1.4389287233352661, "learning_rate": 1.043385881146285e-05, "loss": 0.5316, "step": 18344 }, { "epoch": 0.5016681251367315, "grad_norm": 1.2984532117843628, "learning_rate": 1.0432973939591273e-05, "loss": 0.4862, "step": 18345 }, { "epoch": 0.5016954714504485, "grad_norm": 1.3033217191696167, "learning_rate": 1.0432089064323141e-05, "loss": 0.4993, "step": 18346 }, { "epoch": 0.5017228177641654, "grad_norm": 1.13349187374115, "learning_rate": 1.0431204185665394e-05, "loss": 0.5197, "step": 18347 }, { "epoch": 0.5017501640778823, "grad_norm": 1.1997395753860474, "learning_rate": 1.0430319303624966e-05, "loss": 0.486, "step": 18348 }, { "epoch": 0.5017775103915992, "grad_norm": 1.2724144458770752, "learning_rate": 1.0429434418208808e-05, "loss": 0.5065, "step": 18349 }, { "epoch": 0.5018048567053162, "grad_norm": 1.2254656553268433, "learning_rate": 1.0428549529423857e-05, "loss": 0.5265, "step": 18350 }, { "epoch": 0.5018322030190331, "grad_norm": 1.3944851160049438, "learning_rate": 1.0427664637277057e-05, "loss": 0.4821, "step": 18351 }, { "epoch": 0.5018595493327499, "grad_norm": 1.2957075834274292, "learning_rate": 1.0426779741775345e-05, "loss": 0.5318, "step": 18352 }, { "epoch": 0.5018868956464668, "grad_norm": 1.0723017454147339, "learning_rate": 1.042589484292567e-05, "loss": 0.5272, "step": 18353 }, { "epoch": 0.5019142419601837, "grad_norm": 1.1610413789749146, "learning_rate": 1.0425009940734965e-05, "loss": 0.4944, "step": 18354 }, { "epoch": 0.5019415882739007, "grad_norm": 1.338248372077942, "learning_rate": 1.0424125035210181e-05, "loss": 0.4972, "step": 18355 }, { "epoch": 0.5019689345876176, "grad_norm": 1.7066529989242554, "learning_rate": 1.0423240126358254e-05, "loss": 0.4006, "step": 18356 }, { "epoch": 0.5019962809013345, "grad_norm": 1.3222521543502808, "learning_rate": 1.0422355214186127e-05, "loss": 0.4846, "step": 18357 }, { "epoch": 0.5020236272150514, "grad_norm": 1.1734528541564941, "learning_rate": 1.0421470298700742e-05, "loss": 0.5132, "step": 18358 }, { "epoch": 0.5020509735287684, "grad_norm": 1.3987493515014648, "learning_rate": 1.0420585379909041e-05, "loss": 0.5429, "step": 18359 }, { "epoch": 0.5020783198424852, "grad_norm": 1.3445976972579956, "learning_rate": 1.0419700457817965e-05, "loss": 0.3923, "step": 18360 }, { "epoch": 0.5021056661562021, "grad_norm": 3.137829303741455, "learning_rate": 1.0418815532434459e-05, "loss": 0.7834, "step": 18361 }, { "epoch": 0.502133012469919, "grad_norm": 1.1869994401931763, "learning_rate": 1.0417930603765462e-05, "loss": 0.4992, "step": 18362 }, { "epoch": 0.502160358783636, "grad_norm": 1.1455365419387817, "learning_rate": 1.0417045671817918e-05, "loss": 0.4922, "step": 18363 }, { "epoch": 0.5021877050973529, "grad_norm": 1.0636811256408691, "learning_rate": 1.0416160736598766e-05, "loss": 0.5175, "step": 18364 }, { "epoch": 0.5022150514110698, "grad_norm": 1.3271325826644897, "learning_rate": 1.0415275798114951e-05, "loss": 0.5397, "step": 18365 }, { "epoch": 0.5022423977247867, "grad_norm": 1.3381130695343018, "learning_rate": 1.0414390856373417e-05, "loss": 0.518, "step": 18366 }, { "epoch": 0.5022697440385037, "grad_norm": 1.462605595588684, "learning_rate": 1.0413505911381102e-05, "loss": 0.4912, "step": 18367 }, { "epoch": 0.5022970903522205, "grad_norm": 1.2374787330627441, "learning_rate": 1.0412620963144948e-05, "loss": 0.7826, "step": 18368 }, { "epoch": 0.5023244366659374, "grad_norm": 1.4183193445205688, "learning_rate": 1.04117360116719e-05, "loss": 0.4885, "step": 18369 }, { "epoch": 0.5023517829796543, "grad_norm": 1.1713006496429443, "learning_rate": 1.0410851056968898e-05, "loss": 0.5181, "step": 18370 }, { "epoch": 0.5023791292933713, "grad_norm": 1.7436367273330688, "learning_rate": 1.0409966099042886e-05, "loss": 0.5275, "step": 18371 }, { "epoch": 0.5024064756070882, "grad_norm": 1.3152976036071777, "learning_rate": 1.0409081137900806e-05, "loss": 0.5066, "step": 18372 }, { "epoch": 0.5024338219208051, "grad_norm": 1.2519104480743408, "learning_rate": 1.0408196173549596e-05, "loss": 0.4951, "step": 18373 }, { "epoch": 0.502461168234522, "grad_norm": 2.865746021270752, "learning_rate": 1.0407311205996205e-05, "loss": 0.43, "step": 18374 }, { "epoch": 0.502488514548239, "grad_norm": 1.1690387725830078, "learning_rate": 1.040642623524757e-05, "loss": 0.48, "step": 18375 }, { "epoch": 0.5025158608619558, "grad_norm": 1.5367109775543213, "learning_rate": 1.0405541261310636e-05, "loss": 0.4402, "step": 18376 }, { "epoch": 0.5025432071756727, "grad_norm": 1.3424192667007446, "learning_rate": 1.0404656284192348e-05, "loss": 0.5075, "step": 18377 }, { "epoch": 0.5025705534893896, "grad_norm": 1.2135913372039795, "learning_rate": 1.040377130389964e-05, "loss": 0.4881, "step": 18378 }, { "epoch": 0.5025978998031065, "grad_norm": 1.5067170858383179, "learning_rate": 1.0402886320439467e-05, "loss": 0.5275, "step": 18379 }, { "epoch": 0.5026252461168235, "grad_norm": 1.1152689456939697, "learning_rate": 1.0402001333818758e-05, "loss": 0.489, "step": 18380 }, { "epoch": 0.5026525924305404, "grad_norm": 1.2623714208602905, "learning_rate": 1.0401116344044464e-05, "loss": 0.5332, "step": 18381 }, { "epoch": 0.5026799387442573, "grad_norm": 1.4638043642044067, "learning_rate": 1.0400231351123526e-05, "loss": 0.5138, "step": 18382 }, { "epoch": 0.5027072850579742, "grad_norm": 1.0721180438995361, "learning_rate": 1.0399346355062882e-05, "loss": 0.5117, "step": 18383 }, { "epoch": 0.502734631371691, "grad_norm": 1.1467046737670898, "learning_rate": 1.039846135586948e-05, "loss": 0.7811, "step": 18384 }, { "epoch": 0.502761977685408, "grad_norm": 1.900765061378479, "learning_rate": 1.0397576353550264e-05, "loss": 0.5247, "step": 18385 }, { "epoch": 0.5027893239991249, "grad_norm": 1.37485933303833, "learning_rate": 1.0396691348112173e-05, "loss": 0.3871, "step": 18386 }, { "epoch": 0.5028166703128418, "grad_norm": 1.292567491531372, "learning_rate": 1.0395806339562147e-05, "loss": 0.5098, "step": 18387 }, { "epoch": 0.5028440166265588, "grad_norm": 1.202978491783142, "learning_rate": 1.0394921327907135e-05, "loss": 0.4886, "step": 18388 }, { "epoch": 0.5028713629402757, "grad_norm": 13.75847053527832, "learning_rate": 1.0394036313154072e-05, "loss": 0.7829, "step": 18389 }, { "epoch": 0.5028987092539926, "grad_norm": 1.4221057891845703, "learning_rate": 1.0393151295309909e-05, "loss": 0.4174, "step": 18390 }, { "epoch": 0.5029260555677094, "grad_norm": 1.2377301454544067, "learning_rate": 1.0392266274381582e-05, "loss": 0.4983, "step": 18391 }, { "epoch": 0.5029534018814263, "grad_norm": 1.5418857336044312, "learning_rate": 1.0391381250376036e-05, "loss": 0.4628, "step": 18392 }, { "epoch": 0.5029807481951433, "grad_norm": 1.4990111589431763, "learning_rate": 1.0390496223300217e-05, "loss": 0.4011, "step": 18393 }, { "epoch": 0.5030080945088602, "grad_norm": 1.3764842748641968, "learning_rate": 1.0389611193161062e-05, "loss": 0.5432, "step": 18394 }, { "epoch": 0.5030354408225771, "grad_norm": 1.9891079664230347, "learning_rate": 1.0388726159965517e-05, "loss": 0.3491, "step": 18395 }, { "epoch": 0.503062787136294, "grad_norm": 1.3499541282653809, "learning_rate": 1.0387841123720529e-05, "loss": 0.5048, "step": 18396 }, { "epoch": 0.503090133450011, "grad_norm": 1.4387694597244263, "learning_rate": 1.038695608443303e-05, "loss": 0.5098, "step": 18397 }, { "epoch": 0.5031174797637279, "grad_norm": 1.2109473943710327, "learning_rate": 1.0386071042109975e-05, "loss": 0.5087, "step": 18398 }, { "epoch": 0.5031448260774447, "grad_norm": 1.2822591066360474, "learning_rate": 1.03851859967583e-05, "loss": 0.7916, "step": 18399 }, { "epoch": 0.5031721723911616, "grad_norm": 7.222444534301758, "learning_rate": 1.0384300948384947e-05, "loss": 0.5078, "step": 18400 }, { "epoch": 0.5031995187048786, "grad_norm": 1.3073005676269531, "learning_rate": 1.0383415896996865e-05, "loss": 0.5271, "step": 18401 }, { "epoch": 0.5032268650185955, "grad_norm": 1.2335433959960938, "learning_rate": 1.038253084260099e-05, "loss": 0.5447, "step": 18402 }, { "epoch": 0.5032542113323124, "grad_norm": 1.1623419523239136, "learning_rate": 1.0381645785204269e-05, "loss": 0.357, "step": 18403 }, { "epoch": 0.5032815576460293, "grad_norm": 1.604471206665039, "learning_rate": 1.0380760724813646e-05, "loss": 0.5168, "step": 18404 }, { "epoch": 0.5033089039597463, "grad_norm": 1.2444393634796143, "learning_rate": 1.037987566143606e-05, "loss": 0.4832, "step": 18405 }, { "epoch": 0.5033362502734632, "grad_norm": 1.2476221323013306, "learning_rate": 1.0378990595078455e-05, "loss": 0.5078, "step": 18406 }, { "epoch": 0.50336359658718, "grad_norm": 1.1325234174728394, "learning_rate": 1.0378105525747779e-05, "loss": 0.3879, "step": 18407 }, { "epoch": 0.5033909429008969, "grad_norm": 1.2947362661361694, "learning_rate": 1.037722045345097e-05, "loss": 0.7968, "step": 18408 }, { "epoch": 0.5034182892146138, "grad_norm": 1.2375149726867676, "learning_rate": 1.0376335378194975e-05, "loss": 0.4991, "step": 18409 }, { "epoch": 0.5034456355283308, "grad_norm": 1.98945152759552, "learning_rate": 1.0375450299986731e-05, "loss": 0.5271, "step": 18410 }, { "epoch": 0.5034729818420477, "grad_norm": 1.3329209089279175, "learning_rate": 1.0374565218833186e-05, "loss": 0.5377, "step": 18411 }, { "epoch": 0.5035003281557646, "grad_norm": 1.3364315032958984, "learning_rate": 1.0373680134741284e-05, "loss": 0.5471, "step": 18412 }, { "epoch": 0.5035276744694815, "grad_norm": 1.429168462753296, "learning_rate": 1.0372795047717963e-05, "loss": 0.4926, "step": 18413 }, { "epoch": 0.5035550207831985, "grad_norm": 1.534146785736084, "learning_rate": 1.0371909957770175e-05, "loss": 0.3838, "step": 18414 }, { "epoch": 0.5035823670969153, "grad_norm": 1.27995765209198, "learning_rate": 1.0371024864904853e-05, "loss": 0.5317, "step": 18415 }, { "epoch": 0.5036097134106322, "grad_norm": 1.3543708324432373, "learning_rate": 1.0370139769128946e-05, "loss": 0.387, "step": 18416 }, { "epoch": 0.5036370597243491, "grad_norm": 1.3207166194915771, "learning_rate": 1.03692546704494e-05, "loss": 0.4926, "step": 18417 }, { "epoch": 0.5036644060380661, "grad_norm": 1.3013298511505127, "learning_rate": 1.0368369568873154e-05, "loss": 0.5168, "step": 18418 }, { "epoch": 0.503691752351783, "grad_norm": 1.3667587041854858, "learning_rate": 1.036748446440715e-05, "loss": 0.5384, "step": 18419 }, { "epoch": 0.5037190986654999, "grad_norm": 1.5407235622406006, "learning_rate": 1.0366599357058335e-05, "loss": 0.5079, "step": 18420 }, { "epoch": 0.5037464449792168, "grad_norm": 1.2981560230255127, "learning_rate": 1.0365714246833654e-05, "loss": 0.4884, "step": 18421 }, { "epoch": 0.5037737912929338, "grad_norm": 1.2902817726135254, "learning_rate": 1.0364829133740043e-05, "loss": 0.5319, "step": 18422 }, { "epoch": 0.5038011376066506, "grad_norm": 1.1735495328903198, "learning_rate": 1.0363944017784456e-05, "loss": 0.4925, "step": 18423 }, { "epoch": 0.5038284839203675, "grad_norm": 1.2308661937713623, "learning_rate": 1.0363058898973825e-05, "loss": 0.5091, "step": 18424 }, { "epoch": 0.5038558302340844, "grad_norm": 1.0578302145004272, "learning_rate": 1.0362173777315104e-05, "loss": 0.4866, "step": 18425 }, { "epoch": 0.5038831765478013, "grad_norm": 1.3644590377807617, "learning_rate": 1.0361288652815227e-05, "loss": 0.5083, "step": 18426 }, { "epoch": 0.5039105228615183, "grad_norm": 1.130733609199524, "learning_rate": 1.0360403525481144e-05, "loss": 0.4895, "step": 18427 }, { "epoch": 0.5039378691752352, "grad_norm": 1.2439982891082764, "learning_rate": 1.03595183953198e-05, "loss": 0.5103, "step": 18428 }, { "epoch": 0.5039652154889521, "grad_norm": 1.2972211837768555, "learning_rate": 1.035863326233813e-05, "loss": 0.5118, "step": 18429 }, { "epoch": 0.503992561802669, "grad_norm": 1.2594501972198486, "learning_rate": 1.0357748126543084e-05, "loss": 0.5338, "step": 18430 }, { "epoch": 0.5040199081163859, "grad_norm": 2.958449125289917, "learning_rate": 1.0356862987941605e-05, "loss": 0.353, "step": 18431 }, { "epoch": 0.5040472544301028, "grad_norm": 1.3485655784606934, "learning_rate": 1.0355977846540637e-05, "loss": 0.5341, "step": 18432 }, { "epoch": 0.5040746007438197, "grad_norm": 2.187305450439453, "learning_rate": 1.0355092702347124e-05, "loss": 0.4959, "step": 18433 }, { "epoch": 0.5041019470575366, "grad_norm": 10.596551895141602, "learning_rate": 1.0354207555368008e-05, "loss": 0.3825, "step": 18434 }, { "epoch": 0.5041292933712536, "grad_norm": 1.4583439826965332, "learning_rate": 1.035332240561023e-05, "loss": 0.4854, "step": 18435 }, { "epoch": 0.5041566396849705, "grad_norm": 1.3185454607009888, "learning_rate": 1.0352437253080743e-05, "loss": 0.8043, "step": 18436 }, { "epoch": 0.5041839859986874, "grad_norm": 1.376959204673767, "learning_rate": 1.035155209778648e-05, "loss": 0.5097, "step": 18437 }, { "epoch": 0.5042113323124043, "grad_norm": 1.5560789108276367, "learning_rate": 1.0350666939734392e-05, "loss": 0.5037, "step": 18438 }, { "epoch": 0.5042386786261212, "grad_norm": 1.4311314821243286, "learning_rate": 1.0349781778931426e-05, "loss": 0.4283, "step": 18439 }, { "epoch": 0.5042660249398381, "grad_norm": 1.5779248476028442, "learning_rate": 1.0348896615384511e-05, "loss": 0.5323, "step": 18440 }, { "epoch": 0.504293371253555, "grad_norm": 1.2810087203979492, "learning_rate": 1.0348011449100604e-05, "loss": 0.494, "step": 18441 }, { "epoch": 0.5043207175672719, "grad_norm": 1.205020785331726, "learning_rate": 1.0347126280086647e-05, "loss": 0.4398, "step": 18442 }, { "epoch": 0.5043480638809889, "grad_norm": 1.187581181526184, "learning_rate": 1.034624110834958e-05, "loss": 0.4908, "step": 18443 }, { "epoch": 0.5043754101947058, "grad_norm": 1.4310840368270874, "learning_rate": 1.034535593389635e-05, "loss": 0.3951, "step": 18444 }, { "epoch": 0.5044027565084227, "grad_norm": 1.6399551630020142, "learning_rate": 1.0344470756733897e-05, "loss": 0.7923, "step": 18445 }, { "epoch": 0.5044301028221396, "grad_norm": 1.2248809337615967, "learning_rate": 1.034358557686917e-05, "loss": 0.5002, "step": 18446 }, { "epoch": 0.5044574491358564, "grad_norm": 1.4728598594665527, "learning_rate": 1.034270039430911e-05, "loss": 0.523, "step": 18447 }, { "epoch": 0.5044847954495734, "grad_norm": 1.7546213865280151, "learning_rate": 1.0341815209060663e-05, "loss": 0.5041, "step": 18448 }, { "epoch": 0.5045121417632903, "grad_norm": 1.7287561893463135, "learning_rate": 1.034093002113077e-05, "loss": 0.3818, "step": 18449 }, { "epoch": 0.5045394880770072, "grad_norm": 1.4562134742736816, "learning_rate": 1.034004483052638e-05, "loss": 0.4902, "step": 18450 }, { "epoch": 0.5045668343907241, "grad_norm": 1.296769380569458, "learning_rate": 1.0339159637254432e-05, "loss": 0.4976, "step": 18451 }, { "epoch": 0.5045941807044411, "grad_norm": 1.2475872039794922, "learning_rate": 1.0338274441321873e-05, "loss": 0.5049, "step": 18452 }, { "epoch": 0.504621527018158, "grad_norm": 1.3104584217071533, "learning_rate": 1.0337389242735646e-05, "loss": 0.4866, "step": 18453 }, { "epoch": 0.5046488733318749, "grad_norm": 1.959475040435791, "learning_rate": 1.0336504041502694e-05, "loss": 0.4916, "step": 18454 }, { "epoch": 0.5046762196455917, "grad_norm": 1.1918365955352783, "learning_rate": 1.0335618837629966e-05, "loss": 0.4888, "step": 18455 }, { "epoch": 0.5047035659593087, "grad_norm": 1.4364795684814453, "learning_rate": 1.0334733631124401e-05, "loss": 0.4907, "step": 18456 }, { "epoch": 0.5047309122730256, "grad_norm": 1.1829484701156616, "learning_rate": 1.0333848421992943e-05, "loss": 0.5053, "step": 18457 }, { "epoch": 0.5047582585867425, "grad_norm": 1.457995891571045, "learning_rate": 1.0332963210242539e-05, "loss": 0.4231, "step": 18458 }, { "epoch": 0.5047856049004594, "grad_norm": 1.4826678037643433, "learning_rate": 1.0332077995880132e-05, "loss": 0.4941, "step": 18459 }, { "epoch": 0.5048129512141764, "grad_norm": 4.317821979522705, "learning_rate": 1.0331192778912667e-05, "loss": 0.5102, "step": 18460 }, { "epoch": 0.5048402975278933, "grad_norm": 3.0255768299102783, "learning_rate": 1.033030755934709e-05, "loss": 0.4026, "step": 18461 }, { "epoch": 0.5048676438416102, "grad_norm": 1.4499914646148682, "learning_rate": 1.032942233719034e-05, "loss": 0.4274, "step": 18462 }, { "epoch": 0.504894990155327, "grad_norm": 1.2960660457611084, "learning_rate": 1.0328537112449366e-05, "loss": 0.5092, "step": 18463 }, { "epoch": 0.5049223364690439, "grad_norm": 1.277424931526184, "learning_rate": 1.0327651885131109e-05, "loss": 0.5048, "step": 18464 }, { "epoch": 0.5049496827827609, "grad_norm": 1.2841137647628784, "learning_rate": 1.0326766655242517e-05, "loss": 0.4911, "step": 18465 }, { "epoch": 0.5049770290964778, "grad_norm": 1.2134580612182617, "learning_rate": 1.0325881422790533e-05, "loss": 0.4815, "step": 18466 }, { "epoch": 0.5050043754101947, "grad_norm": 1.3204606771469116, "learning_rate": 1.03249961877821e-05, "loss": 0.5049, "step": 18467 }, { "epoch": 0.5050317217239116, "grad_norm": 1.8671513795852661, "learning_rate": 1.0324110950224165e-05, "loss": 0.4188, "step": 18468 }, { "epoch": 0.5050590680376286, "grad_norm": 1.5974714756011963, "learning_rate": 1.0323225710123668e-05, "loss": 0.4506, "step": 18469 }, { "epoch": 0.5050864143513455, "grad_norm": 1.2357391119003296, "learning_rate": 1.0322340467487555e-05, "loss": 0.5295, "step": 18470 }, { "epoch": 0.5051137606650623, "grad_norm": 1.4223177433013916, "learning_rate": 1.0321455222322775e-05, "loss": 0.5138, "step": 18471 }, { "epoch": 0.5051411069787792, "grad_norm": 1.5481973886489868, "learning_rate": 1.0320569974636268e-05, "loss": 0.4226, "step": 18472 }, { "epoch": 0.5051684532924962, "grad_norm": 1.624915361404419, "learning_rate": 1.0319684724434979e-05, "loss": 0.4836, "step": 18473 }, { "epoch": 0.5051957996062131, "grad_norm": 1.3821227550506592, "learning_rate": 1.031879947172586e-05, "loss": 0.4827, "step": 18474 }, { "epoch": 0.50522314591993, "grad_norm": 1.550559163093567, "learning_rate": 1.0317914216515841e-05, "loss": 0.4123, "step": 18475 }, { "epoch": 0.5052504922336469, "grad_norm": 1.4628307819366455, "learning_rate": 1.0317028958811877e-05, "loss": 0.4923, "step": 18476 }, { "epoch": 0.5052778385473639, "grad_norm": 1.1329201459884644, "learning_rate": 1.031614369862091e-05, "loss": 0.8033, "step": 18477 }, { "epoch": 0.5053051848610808, "grad_norm": 1.4299490451812744, "learning_rate": 1.0315258435949885e-05, "loss": 0.5062, "step": 18478 }, { "epoch": 0.5053325311747976, "grad_norm": 1.307629108428955, "learning_rate": 1.0314373170805747e-05, "loss": 0.5266, "step": 18479 }, { "epoch": 0.5053598774885145, "grad_norm": 1.6518309116363525, "learning_rate": 1.0313487903195438e-05, "loss": 0.4938, "step": 18480 }, { "epoch": 0.5053872238022314, "grad_norm": 1.4291514158248901, "learning_rate": 1.0312602633125904e-05, "loss": 0.5175, "step": 18481 }, { "epoch": 0.5054145701159484, "grad_norm": 1.9029136896133423, "learning_rate": 1.0311717360604092e-05, "loss": 0.5284, "step": 18482 }, { "epoch": 0.5054419164296653, "grad_norm": 1.5267785787582397, "learning_rate": 1.0310832085636944e-05, "loss": 0.5236, "step": 18483 }, { "epoch": 0.5054692627433822, "grad_norm": 1.2863911390304565, "learning_rate": 1.0309946808231407e-05, "loss": 0.4804, "step": 18484 }, { "epoch": 0.5054966090570991, "grad_norm": 1.5732778310775757, "learning_rate": 1.0309061528394424e-05, "loss": 0.4422, "step": 18485 }, { "epoch": 0.5055239553708161, "grad_norm": 1.6492027044296265, "learning_rate": 1.030817624613294e-05, "loss": 0.4075, "step": 18486 }, { "epoch": 0.5055513016845329, "grad_norm": 1.9319595098495483, "learning_rate": 1.0307290961453901e-05, "loss": 0.5169, "step": 18487 }, { "epoch": 0.5055786479982498, "grad_norm": 1.5768080949783325, "learning_rate": 1.030640567436425e-05, "loss": 0.5072, "step": 18488 }, { "epoch": 0.5056059943119667, "grad_norm": 1.510843276977539, "learning_rate": 1.0305520384870932e-05, "loss": 0.5294, "step": 18489 }, { "epoch": 0.5056333406256837, "grad_norm": 1.7215163707733154, "learning_rate": 1.0304635092980894e-05, "loss": 0.3567, "step": 18490 }, { "epoch": 0.5056606869394006, "grad_norm": 1.2145878076553345, "learning_rate": 1.0303749798701081e-05, "loss": 0.5436, "step": 18491 }, { "epoch": 0.5056880332531175, "grad_norm": 2.0554563999176025, "learning_rate": 1.0302864502038434e-05, "loss": 0.4101, "step": 18492 }, { "epoch": 0.5057153795668344, "grad_norm": 3.288466691970825, "learning_rate": 1.03019792029999e-05, "loss": 0.4968, "step": 18493 }, { "epoch": 0.5057427258805512, "grad_norm": 1.8389644622802734, "learning_rate": 1.0301093901592424e-05, "loss": 0.4864, "step": 18494 }, { "epoch": 0.5057700721942682, "grad_norm": 1.314103603363037, "learning_rate": 1.030020859782295e-05, "loss": 0.4897, "step": 18495 }, { "epoch": 0.5057974185079851, "grad_norm": 1.2353768348693848, "learning_rate": 1.029932329169843e-05, "loss": 0.4142, "step": 18496 }, { "epoch": 0.505824764821702, "grad_norm": 1.1746052503585815, "learning_rate": 1.0298437983225797e-05, "loss": 0.7965, "step": 18497 }, { "epoch": 0.505852111135419, "grad_norm": 1.4909542798995972, "learning_rate": 1.0297552672412006e-05, "loss": 0.4914, "step": 18498 }, { "epoch": 0.5058794574491359, "grad_norm": 1.4414020776748657, "learning_rate": 1.0296667359263994e-05, "loss": 0.4314, "step": 18499 }, { "epoch": 0.5059068037628528, "grad_norm": 1.4406168460845947, "learning_rate": 1.0295782043788711e-05, "loss": 0.4865, "step": 18500 }, { "epoch": 0.5059341500765697, "grad_norm": 2.7715086936950684, "learning_rate": 1.0294896725993104e-05, "loss": 0.4962, "step": 18501 }, { "epoch": 0.5059614963902865, "grad_norm": 1.708766222000122, "learning_rate": 1.0294011405884112e-05, "loss": 0.5011, "step": 18502 }, { "epoch": 0.5059888427040035, "grad_norm": 4.360052585601807, "learning_rate": 1.0293126083468682e-05, "loss": 0.5112, "step": 18503 }, { "epoch": 0.5060161890177204, "grad_norm": 1.3370304107666016, "learning_rate": 1.0292240758753765e-05, "loss": 0.5411, "step": 18504 }, { "epoch": 0.5060435353314373, "grad_norm": 1.259307861328125, "learning_rate": 1.02913554317463e-05, "loss": 0.4829, "step": 18505 }, { "epoch": 0.5060708816451542, "grad_norm": 1.3666540384292603, "learning_rate": 1.0290470102453231e-05, "loss": 0.5228, "step": 18506 }, { "epoch": 0.5060982279588712, "grad_norm": 1.2253313064575195, "learning_rate": 1.0289584770881509e-05, "loss": 0.7653, "step": 18507 }, { "epoch": 0.5061255742725881, "grad_norm": 1.281448483467102, "learning_rate": 1.0288699437038071e-05, "loss": 0.4907, "step": 18508 }, { "epoch": 0.506152920586305, "grad_norm": 1.2440096139907837, "learning_rate": 1.0287814100929876e-05, "loss": 0.5024, "step": 18509 }, { "epoch": 0.5061802669000218, "grad_norm": 1.5221426486968994, "learning_rate": 1.0286928762563855e-05, "loss": 0.4975, "step": 18510 }, { "epoch": 0.5062076132137387, "grad_norm": 1.307554006576538, "learning_rate": 1.028604342194696e-05, "loss": 0.7879, "step": 18511 }, { "epoch": 0.5062349595274557, "grad_norm": 1.3516937494277954, "learning_rate": 1.0285158079086133e-05, "loss": 0.5141, "step": 18512 }, { "epoch": 0.5062623058411726, "grad_norm": 1.4245374202728271, "learning_rate": 1.028427273398832e-05, "loss": 0.4882, "step": 18513 }, { "epoch": 0.5062896521548895, "grad_norm": 1.2364156246185303, "learning_rate": 1.0283387386660469e-05, "loss": 0.491, "step": 18514 }, { "epoch": 0.5063169984686064, "grad_norm": 1.110379695892334, "learning_rate": 1.0282502037109525e-05, "loss": 0.7616, "step": 18515 }, { "epoch": 0.5063443447823234, "grad_norm": 1.797119379043579, "learning_rate": 1.028161668534243e-05, "loss": 0.4826, "step": 18516 }, { "epoch": 0.5063716910960403, "grad_norm": 1.335708498954773, "learning_rate": 1.0280731331366134e-05, "loss": 0.492, "step": 18517 }, { "epoch": 0.5063990374097571, "grad_norm": 1.319057583808899, "learning_rate": 1.0279845975187576e-05, "loss": 0.7825, "step": 18518 }, { "epoch": 0.506426383723474, "grad_norm": 1.2448841333389282, "learning_rate": 1.0278960616813707e-05, "loss": 0.5155, "step": 18519 }, { "epoch": 0.506453730037191, "grad_norm": 1.62102210521698, "learning_rate": 1.0278075256251472e-05, "loss": 0.3975, "step": 18520 }, { "epoch": 0.5064810763509079, "grad_norm": 1.2101608514785767, "learning_rate": 1.0277189893507812e-05, "loss": 0.4832, "step": 18521 }, { "epoch": 0.5065084226646248, "grad_norm": 1.2207496166229248, "learning_rate": 1.0276304528589679e-05, "loss": 0.3657, "step": 18522 }, { "epoch": 0.5065357689783417, "grad_norm": 1.2997161149978638, "learning_rate": 1.027541916150401e-05, "loss": 0.5217, "step": 18523 }, { "epoch": 0.5065631152920587, "grad_norm": 2.3501594066619873, "learning_rate": 1.0274533792257757e-05, "loss": 0.7509, "step": 18524 }, { "epoch": 0.5065904616057756, "grad_norm": 1.508204698562622, "learning_rate": 1.0273648420857867e-05, "loss": 0.4455, "step": 18525 }, { "epoch": 0.5066178079194924, "grad_norm": 1.2854793071746826, "learning_rate": 1.0272763047311277e-05, "loss": 0.5033, "step": 18526 }, { "epoch": 0.5066451542332093, "grad_norm": 1.5039204359054565, "learning_rate": 1.027187767162494e-05, "loss": 0.512, "step": 18527 }, { "epoch": 0.5066725005469263, "grad_norm": 1.2711442708969116, "learning_rate": 1.0270992293805802e-05, "loss": 0.7939, "step": 18528 }, { "epoch": 0.5066998468606432, "grad_norm": 1.3934305906295776, "learning_rate": 1.0270106913860802e-05, "loss": 0.4379, "step": 18529 }, { "epoch": 0.5067271931743601, "grad_norm": 1.2360626459121704, "learning_rate": 1.0269221531796889e-05, "loss": 0.5026, "step": 18530 }, { "epoch": 0.506754539488077, "grad_norm": 1.2946312427520752, "learning_rate": 1.0268336147621012e-05, "loss": 0.4668, "step": 18531 }, { "epoch": 0.506781885801794, "grad_norm": 1.333937168121338, "learning_rate": 1.0267450761340109e-05, "loss": 0.5244, "step": 18532 }, { "epoch": 0.5068092321155109, "grad_norm": 1.8569098711013794, "learning_rate": 1.0266565372961134e-05, "loss": 0.4186, "step": 18533 }, { "epoch": 0.5068365784292277, "grad_norm": 1.2260351181030273, "learning_rate": 1.0265679982491029e-05, "loss": 0.5017, "step": 18534 }, { "epoch": 0.5068639247429446, "grad_norm": 1.4723827838897705, "learning_rate": 1.0264794589936736e-05, "loss": 0.4861, "step": 18535 }, { "epoch": 0.5068912710566615, "grad_norm": 1.5752954483032227, "learning_rate": 1.0263909195305206e-05, "loss": 0.4731, "step": 18536 }, { "epoch": 0.5069186173703785, "grad_norm": 1.2440592050552368, "learning_rate": 1.0263023798603382e-05, "loss": 0.7949, "step": 18537 }, { "epoch": 0.5069459636840954, "grad_norm": 2.2837722301483154, "learning_rate": 1.0262138399838208e-05, "loss": 0.5186, "step": 18538 }, { "epoch": 0.5069733099978123, "grad_norm": 1.2412315607070923, "learning_rate": 1.0261252999016638e-05, "loss": 0.4979, "step": 18539 }, { "epoch": 0.5070006563115292, "grad_norm": 1.208057165145874, "learning_rate": 1.0260367596145606e-05, "loss": 0.8307, "step": 18540 }, { "epoch": 0.5070280026252462, "grad_norm": 1.514444351196289, "learning_rate": 1.0259482191232068e-05, "loss": 0.496, "step": 18541 }, { "epoch": 0.507055348938963, "grad_norm": 1.4148075580596924, "learning_rate": 1.0258596784282962e-05, "loss": 0.4937, "step": 18542 }, { "epoch": 0.5070826952526799, "grad_norm": 2.67067289352417, "learning_rate": 1.0257711375305237e-05, "loss": 0.7965, "step": 18543 }, { "epoch": 0.5071100415663968, "grad_norm": 1.281084656715393, "learning_rate": 1.0256825964305843e-05, "loss": 0.5017, "step": 18544 }, { "epoch": 0.5071373878801138, "grad_norm": 1.3167390823364258, "learning_rate": 1.0255940551291716e-05, "loss": 0.5234, "step": 18545 }, { "epoch": 0.5071647341938307, "grad_norm": 1.2415244579315186, "learning_rate": 1.025505513626981e-05, "loss": 0.7914, "step": 18546 }, { "epoch": 0.5071920805075476, "grad_norm": 1.1396063566207886, "learning_rate": 1.025416971924707e-05, "loss": 0.4952, "step": 18547 }, { "epoch": 0.5072194268212645, "grad_norm": 1.749675989151001, "learning_rate": 1.0253284300230437e-05, "loss": 0.5232, "step": 18548 }, { "epoch": 0.5072467731349815, "grad_norm": 1.5042375326156616, "learning_rate": 1.0252398879226858e-05, "loss": 0.8133, "step": 18549 }, { "epoch": 0.5072741194486983, "grad_norm": 1.4892044067382812, "learning_rate": 1.0251513456243288e-05, "loss": 0.4944, "step": 18550 }, { "epoch": 0.5073014657624152, "grad_norm": 2.1478676795959473, "learning_rate": 1.025062803128666e-05, "loss": 0.4942, "step": 18551 }, { "epoch": 0.5073288120761321, "grad_norm": 1.1936237812042236, "learning_rate": 1.0249742604363928e-05, "loss": 0.476, "step": 18552 }, { "epoch": 0.507356158389849, "grad_norm": 1.2739434242248535, "learning_rate": 1.0248857175482035e-05, "loss": 0.4978, "step": 18553 }, { "epoch": 0.507383504703566, "grad_norm": 1.4134304523468018, "learning_rate": 1.0247971744647924e-05, "loss": 0.7796, "step": 18554 }, { "epoch": 0.5074108510172829, "grad_norm": 1.3302546739578247, "learning_rate": 1.024708631186855e-05, "loss": 0.5412, "step": 18555 }, { "epoch": 0.5074381973309998, "grad_norm": 0.9776875972747803, "learning_rate": 1.0246200877150849e-05, "loss": 0.3894, "step": 18556 }, { "epoch": 0.5074655436447167, "grad_norm": 1.346832275390625, "learning_rate": 1.0245315440501772e-05, "loss": 0.5105, "step": 18557 }, { "epoch": 0.5074928899584336, "grad_norm": 1.0744470357894897, "learning_rate": 1.0244430001928269e-05, "loss": 0.5009, "step": 18558 }, { "epoch": 0.5075202362721505, "grad_norm": 1.301604151725769, "learning_rate": 1.0243544561437278e-05, "loss": 0.3408, "step": 18559 }, { "epoch": 0.5075475825858674, "grad_norm": 1.5930758714675903, "learning_rate": 1.024265911903575e-05, "loss": 0.5272, "step": 18560 }, { "epoch": 0.5075749288995843, "grad_norm": 1.263723611831665, "learning_rate": 1.0241773674730627e-05, "loss": 0.7901, "step": 18561 }, { "epoch": 0.5076022752133013, "grad_norm": 1.416478157043457, "learning_rate": 1.024088822852886e-05, "loss": 0.5211, "step": 18562 }, { "epoch": 0.5076296215270182, "grad_norm": 1.343946099281311, "learning_rate": 1.0240002780437393e-05, "loss": 0.5349, "step": 18563 }, { "epoch": 0.5076569678407351, "grad_norm": 1.281624436378479, "learning_rate": 1.0239117330463169e-05, "loss": 0.4843, "step": 18564 }, { "epoch": 0.507684314154452, "grad_norm": 1.7047241926193237, "learning_rate": 1.0238231878613137e-05, "loss": 0.4642, "step": 18565 }, { "epoch": 0.5077116604681688, "grad_norm": 1.2995446920394897, "learning_rate": 1.0237346424894246e-05, "loss": 0.4941, "step": 18566 }, { "epoch": 0.5077390067818858, "grad_norm": 1.4802347421646118, "learning_rate": 1.0236460969313436e-05, "loss": 0.3946, "step": 18567 }, { "epoch": 0.5077663530956027, "grad_norm": 1.6510138511657715, "learning_rate": 1.0235575511877655e-05, "loss": 0.5378, "step": 18568 }, { "epoch": 0.5077936994093196, "grad_norm": 1.2755120992660522, "learning_rate": 1.0234690052593854e-05, "loss": 0.7698, "step": 18569 }, { "epoch": 0.5078210457230365, "grad_norm": 1.3588289022445679, "learning_rate": 1.0233804591468975e-05, "loss": 0.5119, "step": 18570 }, { "epoch": 0.5078483920367535, "grad_norm": 1.7841650247573853, "learning_rate": 1.0232919128509964e-05, "loss": 0.415, "step": 18571 }, { "epoch": 0.5078757383504704, "grad_norm": 1.337805986404419, "learning_rate": 1.0232033663723769e-05, "loss": 0.5004, "step": 18572 }, { "epoch": 0.5079030846641873, "grad_norm": 1.2889012098312378, "learning_rate": 1.0231148197117334e-05, "loss": 0.3976, "step": 18573 }, { "epoch": 0.5079304309779041, "grad_norm": 1.7855439186096191, "learning_rate": 1.0230262728697607e-05, "loss": 0.5226, "step": 18574 }, { "epoch": 0.5079577772916211, "grad_norm": 1.5079801082611084, "learning_rate": 1.022937725847153e-05, "loss": 0.5114, "step": 18575 }, { "epoch": 0.507985123605338, "grad_norm": 1.337296724319458, "learning_rate": 1.022849178644606e-05, "loss": 0.7692, "step": 18576 }, { "epoch": 0.5080124699190549, "grad_norm": 1.3198295831680298, "learning_rate": 1.0227606312628131e-05, "loss": 0.4852, "step": 18577 }, { "epoch": 0.5080398162327718, "grad_norm": 1.0938255786895752, "learning_rate": 1.0226720837024695e-05, "loss": 0.5368, "step": 18578 }, { "epoch": 0.5080671625464888, "grad_norm": 1.3381190299987793, "learning_rate": 1.0225835359642702e-05, "loss": 0.814, "step": 18579 }, { "epoch": 0.5080945088602057, "grad_norm": 1.2949447631835938, "learning_rate": 1.0224949880489092e-05, "loss": 0.4912, "step": 18580 }, { "epoch": 0.5081218551739226, "grad_norm": 1.3270314931869507, "learning_rate": 1.0224064399570812e-05, "loss": 0.3961, "step": 18581 }, { "epoch": 0.5081492014876394, "grad_norm": 1.623503565788269, "learning_rate": 1.022317891689481e-05, "loss": 0.4741, "step": 18582 }, { "epoch": 0.5081765478013563, "grad_norm": 1.2547253370285034, "learning_rate": 1.0222293432468034e-05, "loss": 0.5032, "step": 18583 }, { "epoch": 0.5082038941150733, "grad_norm": 1.4365789890289307, "learning_rate": 1.0221407946297425e-05, "loss": 0.7691, "step": 18584 }, { "epoch": 0.5082312404287902, "grad_norm": 1.5954368114471436, "learning_rate": 1.0220522458389936e-05, "loss": 0.3878, "step": 18585 }, { "epoch": 0.5082585867425071, "grad_norm": 1.4297535419464111, "learning_rate": 1.0219636968752508e-05, "loss": 0.4786, "step": 18586 }, { "epoch": 0.508285933056224, "grad_norm": 1.7280590534210205, "learning_rate": 1.0218751477392094e-05, "loss": 0.5104, "step": 18587 }, { "epoch": 0.508313279369941, "grad_norm": 1.2315540313720703, "learning_rate": 1.0217865984315634e-05, "loss": 0.476, "step": 18588 }, { "epoch": 0.5083406256836578, "grad_norm": 1.4756778478622437, "learning_rate": 1.0216980489530075e-05, "loss": 0.4157, "step": 18589 }, { "epoch": 0.5083679719973747, "grad_norm": 2.7663474082946777, "learning_rate": 1.0216094993042366e-05, "loss": 0.422, "step": 18590 }, { "epoch": 0.5083953183110916, "grad_norm": 1.590164303779602, "learning_rate": 1.0215209494859453e-05, "loss": 0.4926, "step": 18591 }, { "epoch": 0.5084226646248086, "grad_norm": 3.4022409915924072, "learning_rate": 1.0214323994988283e-05, "loss": 0.351, "step": 18592 }, { "epoch": 0.5084500109385255, "grad_norm": 1.5200802087783813, "learning_rate": 1.0213438493435801e-05, "loss": 0.4056, "step": 18593 }, { "epoch": 0.5084773572522424, "grad_norm": 1.5866107940673828, "learning_rate": 1.0212552990208952e-05, "loss": 0.5052, "step": 18594 }, { "epoch": 0.5085047035659593, "grad_norm": 1.2868468761444092, "learning_rate": 1.0211667485314687e-05, "loss": 0.5022, "step": 18595 }, { "epoch": 0.5085320498796763, "grad_norm": 1.3003120422363281, "learning_rate": 1.0210781978759949e-05, "loss": 0.7906, "step": 18596 }, { "epoch": 0.5085593961933931, "grad_norm": 1.4068928956985474, "learning_rate": 1.0209896470551687e-05, "loss": 0.3942, "step": 18597 }, { "epoch": 0.50858674250711, "grad_norm": 1.3004904985427856, "learning_rate": 1.0209010960696848e-05, "loss": 0.5094, "step": 18598 }, { "epoch": 0.5086140888208269, "grad_norm": 1.2494779825210571, "learning_rate": 1.0208125449202373e-05, "loss": 0.7458, "step": 18599 }, { "epoch": 0.5086414351345439, "grad_norm": 1.3545867204666138, "learning_rate": 1.0207239936075213e-05, "loss": 0.4918, "step": 18600 }, { "epoch": 0.5086687814482608, "grad_norm": 1.1122716665267944, "learning_rate": 1.0206354421322315e-05, "loss": 0.4918, "step": 18601 }, { "epoch": 0.5086961277619777, "grad_norm": 1.4091691970825195, "learning_rate": 1.0205468904950624e-05, "loss": 0.3691, "step": 18602 }, { "epoch": 0.5087234740756946, "grad_norm": 1.2684811353683472, "learning_rate": 1.0204583386967086e-05, "loss": 0.5071, "step": 18603 }, { "epoch": 0.5087508203894116, "grad_norm": 1.1954030990600586, "learning_rate": 1.0203697867378652e-05, "loss": 0.5007, "step": 18604 }, { "epoch": 0.5087781667031284, "grad_norm": 1.3619816303253174, "learning_rate": 1.0202812346192263e-05, "loss": 0.4961, "step": 18605 }, { "epoch": 0.5088055130168453, "grad_norm": 1.2808713912963867, "learning_rate": 1.0201926823414871e-05, "loss": 0.7884, "step": 18606 }, { "epoch": 0.5088328593305622, "grad_norm": 1.31521475315094, "learning_rate": 1.0201041299053418e-05, "loss": 0.5096, "step": 18607 }, { "epoch": 0.5088602056442791, "grad_norm": 1.11532723903656, "learning_rate": 1.0200155773114853e-05, "loss": 0.4808, "step": 18608 }, { "epoch": 0.5088875519579961, "grad_norm": 1.4909669160842896, "learning_rate": 1.0199270245606123e-05, "loss": 0.4498, "step": 18609 }, { "epoch": 0.508914898271713, "grad_norm": 1.462895393371582, "learning_rate": 1.019838471653417e-05, "loss": 0.4985, "step": 18610 }, { "epoch": 0.5089422445854299, "grad_norm": 1.2943092584609985, "learning_rate": 1.0197499185905948e-05, "loss": 0.4789, "step": 18611 }, { "epoch": 0.5089695908991468, "grad_norm": 1.3575894832611084, "learning_rate": 1.0196613653728403e-05, "loss": 0.499, "step": 18612 }, { "epoch": 0.5089969372128637, "grad_norm": 1.3053867816925049, "learning_rate": 1.0195728120008475e-05, "loss": 0.501, "step": 18613 }, { "epoch": 0.5090242835265806, "grad_norm": 1.395204782485962, "learning_rate": 1.0194842584753118e-05, "loss": 0.526, "step": 18614 }, { "epoch": 0.5090516298402975, "grad_norm": 1.3168381452560425, "learning_rate": 1.0193957047969277e-05, "loss": 0.538, "step": 18615 }, { "epoch": 0.5090789761540144, "grad_norm": 1.3457775115966797, "learning_rate": 1.0193071509663891e-05, "loss": 0.4947, "step": 18616 }, { "epoch": 0.5091063224677314, "grad_norm": 1.29669189453125, "learning_rate": 1.019218596984392e-05, "loss": 0.4789, "step": 18617 }, { "epoch": 0.5091336687814483, "grad_norm": 1.3575612306594849, "learning_rate": 1.01913004285163e-05, "loss": 0.5158, "step": 18618 }, { "epoch": 0.5091610150951652, "grad_norm": 1.6438050270080566, "learning_rate": 1.0190414885687982e-05, "loss": 0.347, "step": 18619 }, { "epoch": 0.5091883614088821, "grad_norm": 2.1623525619506836, "learning_rate": 1.0189529341365917e-05, "loss": 0.3577, "step": 18620 }, { "epoch": 0.5092157077225989, "grad_norm": 1.3303916454315186, "learning_rate": 1.0188643795557043e-05, "loss": 0.4956, "step": 18621 }, { "epoch": 0.5092430540363159, "grad_norm": 1.1088510751724243, "learning_rate": 1.018775824826831e-05, "loss": 0.4829, "step": 18622 }, { "epoch": 0.5092704003500328, "grad_norm": 1.2294056415557861, "learning_rate": 1.0186872699506673e-05, "loss": 0.5059, "step": 18623 }, { "epoch": 0.5092977466637497, "grad_norm": 1.3228962421417236, "learning_rate": 1.0185987149279069e-05, "loss": 0.4877, "step": 18624 }, { "epoch": 0.5093250929774666, "grad_norm": 2.3450803756713867, "learning_rate": 1.0185101597592449e-05, "loss": 0.4032, "step": 18625 }, { "epoch": 0.5093524392911836, "grad_norm": 1.1676008701324463, "learning_rate": 1.0184216044453756e-05, "loss": 0.4883, "step": 18626 }, { "epoch": 0.5093797856049005, "grad_norm": 1.3972907066345215, "learning_rate": 1.0183330489869943e-05, "loss": 0.5421, "step": 18627 }, { "epoch": 0.5094071319186174, "grad_norm": 1.1226725578308105, "learning_rate": 1.0182444933847952e-05, "loss": 0.4949, "step": 18628 }, { "epoch": 0.5094344782323342, "grad_norm": 1.299731731414795, "learning_rate": 1.0181559376394734e-05, "loss": 0.5118, "step": 18629 }, { "epoch": 0.5094618245460512, "grad_norm": 1.319775938987732, "learning_rate": 1.0180673817517232e-05, "loss": 0.7936, "step": 18630 }, { "epoch": 0.5094891708597681, "grad_norm": 1.199522852897644, "learning_rate": 1.0179788257222396e-05, "loss": 0.5155, "step": 18631 }, { "epoch": 0.509516517173485, "grad_norm": 1.612939715385437, "learning_rate": 1.0178902695517171e-05, "loss": 0.5313, "step": 18632 }, { "epoch": 0.5095438634872019, "grad_norm": 1.1853039264678955, "learning_rate": 1.0178017132408505e-05, "loss": 0.5185, "step": 18633 }, { "epoch": 0.5095712098009189, "grad_norm": 1.9214656352996826, "learning_rate": 1.0177131567903344e-05, "loss": 0.3511, "step": 18634 }, { "epoch": 0.5095985561146358, "grad_norm": 1.4137873649597168, "learning_rate": 1.0176246002008635e-05, "loss": 0.5043, "step": 18635 }, { "epoch": 0.5096259024283527, "grad_norm": 1.2596341371536255, "learning_rate": 1.017536043473133e-05, "loss": 0.7905, "step": 18636 }, { "epoch": 0.5096532487420695, "grad_norm": 1.4780150651931763, "learning_rate": 1.0174474866078367e-05, "loss": 0.3518, "step": 18637 }, { "epoch": 0.5096805950557864, "grad_norm": 1.4520570039749146, "learning_rate": 1.0173589296056699e-05, "loss": 0.387, "step": 18638 }, { "epoch": 0.5097079413695034, "grad_norm": 1.606919527053833, "learning_rate": 1.0172703724673275e-05, "loss": 0.4037, "step": 18639 }, { "epoch": 0.5097352876832203, "grad_norm": 1.3849058151245117, "learning_rate": 1.0171818151935033e-05, "loss": 0.476, "step": 18640 }, { "epoch": 0.5097626339969372, "grad_norm": 1.609160304069519, "learning_rate": 1.0170932577848931e-05, "loss": 0.4029, "step": 18641 }, { "epoch": 0.5097899803106541, "grad_norm": 1.3065201044082642, "learning_rate": 1.0170047002421907e-05, "loss": 0.4974, "step": 18642 }, { "epoch": 0.5098173266243711, "grad_norm": 1.2137576341629028, "learning_rate": 1.0169161425660915e-05, "loss": 0.4891, "step": 18643 }, { "epoch": 0.509844672938088, "grad_norm": 1.6825693845748901, "learning_rate": 1.0168275847572899e-05, "loss": 0.7698, "step": 18644 }, { "epoch": 0.5098720192518048, "grad_norm": 1.6095582246780396, "learning_rate": 1.0167390268164805e-05, "loss": 0.3836, "step": 18645 }, { "epoch": 0.5098993655655217, "grad_norm": 1.5916063785552979, "learning_rate": 1.016650468744358e-05, "loss": 0.356, "step": 18646 }, { "epoch": 0.5099267118792387, "grad_norm": 1.719497799873352, "learning_rate": 1.0165619105416178e-05, "loss": 0.5397, "step": 18647 }, { "epoch": 0.5099540581929556, "grad_norm": 1.429564118385315, "learning_rate": 1.0164733522089536e-05, "loss": 0.5519, "step": 18648 }, { "epoch": 0.5099814045066725, "grad_norm": 1.5493122339248657, "learning_rate": 1.016384793747061e-05, "loss": 0.5014, "step": 18649 }, { "epoch": 0.5100087508203894, "grad_norm": 1.124996542930603, "learning_rate": 1.0162962351566341e-05, "loss": 0.4981, "step": 18650 }, { "epoch": 0.5100360971341064, "grad_norm": 1.3304225206375122, "learning_rate": 1.0162076764383677e-05, "loss": 0.4969, "step": 18651 }, { "epoch": 0.5100634434478233, "grad_norm": 1.1038397550582886, "learning_rate": 1.0161191175929567e-05, "loss": 0.492, "step": 18652 }, { "epoch": 0.5100907897615401, "grad_norm": 1.262687087059021, "learning_rate": 1.0160305586210957e-05, "loss": 0.5102, "step": 18653 }, { "epoch": 0.510118136075257, "grad_norm": 1.3502471446990967, "learning_rate": 1.0159419995234795e-05, "loss": 0.4537, "step": 18654 }, { "epoch": 0.510145482388974, "grad_norm": 1.1040440797805786, "learning_rate": 1.015853440300803e-05, "loss": 0.4842, "step": 18655 }, { "epoch": 0.5101728287026909, "grad_norm": 1.3045318126678467, "learning_rate": 1.0157648809537606e-05, "loss": 0.4898, "step": 18656 }, { "epoch": 0.5102001750164078, "grad_norm": 1.650587558746338, "learning_rate": 1.0156763214830469e-05, "loss": 0.5095, "step": 18657 }, { "epoch": 0.5102275213301247, "grad_norm": 1.1673563718795776, "learning_rate": 1.0155877618893572e-05, "loss": 0.4824, "step": 18658 }, { "epoch": 0.5102548676438416, "grad_norm": 1.472521185874939, "learning_rate": 1.0154992021733856e-05, "loss": 0.5357, "step": 18659 }, { "epoch": 0.5102822139575586, "grad_norm": 1.2901668548583984, "learning_rate": 1.0154106423358275e-05, "loss": 0.5354, "step": 18660 }, { "epoch": 0.5103095602712754, "grad_norm": 1.174882173538208, "learning_rate": 1.015322082377377e-05, "loss": 0.5073, "step": 18661 }, { "epoch": 0.5103369065849923, "grad_norm": 1.2194944620132446, "learning_rate": 1.015233522298729e-05, "loss": 0.5399, "step": 18662 }, { "epoch": 0.5103642528987092, "grad_norm": 1.1896347999572754, "learning_rate": 1.0151449621005786e-05, "loss": 0.4926, "step": 18663 }, { "epoch": 0.5103915992124262, "grad_norm": 1.2224273681640625, "learning_rate": 1.01505640178362e-05, "loss": 0.4945, "step": 18664 }, { "epoch": 0.5104189455261431, "grad_norm": 1.334376573562622, "learning_rate": 1.0149678413485483e-05, "loss": 0.524, "step": 18665 }, { "epoch": 0.51044629183986, "grad_norm": 1.318170428276062, "learning_rate": 1.014879280796058e-05, "loss": 0.4922, "step": 18666 }, { "epoch": 0.5104736381535769, "grad_norm": 1.1733741760253906, "learning_rate": 1.0147907201268441e-05, "loss": 0.4949, "step": 18667 }, { "epoch": 0.5105009844672939, "grad_norm": 1.8044044971466064, "learning_rate": 1.014702159341601e-05, "loss": 0.7922, "step": 18668 }, { "epoch": 0.5105283307810107, "grad_norm": 1.2719533443450928, "learning_rate": 1.0146135984410234e-05, "loss": 0.5328, "step": 18669 }, { "epoch": 0.5105556770947276, "grad_norm": 1.5048466920852661, "learning_rate": 1.0145250374258065e-05, "loss": 0.4316, "step": 18670 }, { "epoch": 0.5105830234084445, "grad_norm": 1.2241966724395752, "learning_rate": 1.0144364762966447e-05, "loss": 0.5229, "step": 18671 }, { "epoch": 0.5106103697221615, "grad_norm": 1.2919641733169556, "learning_rate": 1.0143479150542328e-05, "loss": 0.4958, "step": 18672 }, { "epoch": 0.5106377160358784, "grad_norm": 1.358317255973816, "learning_rate": 1.0142593536992655e-05, "loss": 0.398, "step": 18673 }, { "epoch": 0.5106650623495953, "grad_norm": 1.33212149143219, "learning_rate": 1.0141707922324377e-05, "loss": 0.4903, "step": 18674 }, { "epoch": 0.5106924086633122, "grad_norm": 1.2474478483200073, "learning_rate": 1.014082230654444e-05, "loss": 0.51, "step": 18675 }, { "epoch": 0.5107197549770292, "grad_norm": 1.5817946195602417, "learning_rate": 1.013993668965979e-05, "loss": 0.4052, "step": 18676 }, { "epoch": 0.510747101290746, "grad_norm": 1.297187089920044, "learning_rate": 1.013905107167738e-05, "loss": 0.5153, "step": 18677 }, { "epoch": 0.5107744476044629, "grad_norm": 1.4907828569412231, "learning_rate": 1.0138165452604149e-05, "loss": 0.4231, "step": 18678 }, { "epoch": 0.5108017939181798, "grad_norm": 1.704825520515442, "learning_rate": 1.0137279832447053e-05, "loss": 0.7945, "step": 18679 }, { "epoch": 0.5108291402318967, "grad_norm": 1.0422887802124023, "learning_rate": 1.0136394211213033e-05, "loss": 0.3299, "step": 18680 }, { "epoch": 0.5108564865456137, "grad_norm": 1.0050324201583862, "learning_rate": 1.0135508588909038e-05, "loss": 0.5039, "step": 18681 }, { "epoch": 0.5108838328593306, "grad_norm": 1.2546058893203735, "learning_rate": 1.0134622965542018e-05, "loss": 0.5077, "step": 18682 }, { "epoch": 0.5109111791730475, "grad_norm": 1.2319040298461914, "learning_rate": 1.0133737341118917e-05, "loss": 0.5141, "step": 18683 }, { "epoch": 0.5109385254867644, "grad_norm": 1.2039811611175537, "learning_rate": 1.0132851715646685e-05, "loss": 0.5037, "step": 18684 }, { "epoch": 0.5109658718004813, "grad_norm": 7.747110843658447, "learning_rate": 1.013196608913227e-05, "loss": 0.4484, "step": 18685 }, { "epoch": 0.5109932181141982, "grad_norm": 1.2843421697616577, "learning_rate": 1.0131080461582616e-05, "loss": 0.5031, "step": 18686 }, { "epoch": 0.5110205644279151, "grad_norm": 1.2969772815704346, "learning_rate": 1.0130194833004674e-05, "loss": 0.5128, "step": 18687 }, { "epoch": 0.511047910741632, "grad_norm": 1.2560514211654663, "learning_rate": 1.012930920340539e-05, "loss": 0.4742, "step": 18688 }, { "epoch": 0.511075257055349, "grad_norm": 1.2036097049713135, "learning_rate": 1.0128423572791713e-05, "loss": 0.5056, "step": 18689 }, { "epoch": 0.5111026033690659, "grad_norm": 1.7049211263656616, "learning_rate": 1.0127537941170587e-05, "loss": 0.4924, "step": 18690 }, { "epoch": 0.5111299496827828, "grad_norm": 1.2942451238632202, "learning_rate": 1.012665230854896e-05, "loss": 0.4717, "step": 18691 }, { "epoch": 0.5111572959964996, "grad_norm": 1.3519078493118286, "learning_rate": 1.0125766674933785e-05, "loss": 0.5169, "step": 18692 }, { "epoch": 0.5111846423102165, "grad_norm": 1.5470867156982422, "learning_rate": 1.0124881040332006e-05, "loss": 0.5015, "step": 18693 }, { "epoch": 0.5112119886239335, "grad_norm": 1.3361009359359741, "learning_rate": 1.0123995404750568e-05, "loss": 0.52, "step": 18694 }, { "epoch": 0.5112393349376504, "grad_norm": 1.206076979637146, "learning_rate": 1.0123109768196422e-05, "loss": 0.349, "step": 18695 }, { "epoch": 0.5112666812513673, "grad_norm": 1.4868451356887817, "learning_rate": 1.0122224130676515e-05, "loss": 0.4289, "step": 18696 }, { "epoch": 0.5112940275650842, "grad_norm": 1.2624785900115967, "learning_rate": 1.0121338492197793e-05, "loss": 0.5119, "step": 18697 }, { "epoch": 0.5113213738788012, "grad_norm": 1.4610720872879028, "learning_rate": 1.0120452852767206e-05, "loss": 0.5181, "step": 18698 }, { "epoch": 0.5113487201925181, "grad_norm": 1.5152122974395752, "learning_rate": 1.0119567212391699e-05, "loss": 0.5278, "step": 18699 }, { "epoch": 0.5113760665062349, "grad_norm": 2.0157175064086914, "learning_rate": 1.0118681571078219e-05, "loss": 0.3716, "step": 18700 }, { "epoch": 0.5114034128199518, "grad_norm": 1.1602723598480225, "learning_rate": 1.011779592883372e-05, "loss": 0.3429, "step": 18701 }, { "epoch": 0.5114307591336688, "grad_norm": 1.2256323099136353, "learning_rate": 1.011691028566514e-05, "loss": 0.4883, "step": 18702 }, { "epoch": 0.5114581054473857, "grad_norm": 1.2281314134597778, "learning_rate": 1.0116024641579436e-05, "loss": 0.5135, "step": 18703 }, { "epoch": 0.5114854517611026, "grad_norm": 1.273066520690918, "learning_rate": 1.0115138996583553e-05, "loss": 0.4896, "step": 18704 }, { "epoch": 0.5115127980748195, "grad_norm": 1.3060462474822998, "learning_rate": 1.0114253350684433e-05, "loss": 0.5422, "step": 18705 }, { "epoch": 0.5115401443885365, "grad_norm": 1.3685063123703003, "learning_rate": 1.011336770388903e-05, "loss": 0.5129, "step": 18706 }, { "epoch": 0.5115674907022534, "grad_norm": 1.0475668907165527, "learning_rate": 1.0112482056204288e-05, "loss": 0.4766, "step": 18707 }, { "epoch": 0.5115948370159702, "grad_norm": 1.2269681692123413, "learning_rate": 1.0111596407637153e-05, "loss": 0.5216, "step": 18708 }, { "epoch": 0.5116221833296871, "grad_norm": 1.711509346961975, "learning_rate": 1.011071075819458e-05, "loss": 0.7873, "step": 18709 }, { "epoch": 0.511649529643404, "grad_norm": 1.2240949869155884, "learning_rate": 1.0109825107883508e-05, "loss": 0.5221, "step": 18710 }, { "epoch": 0.511676875957121, "grad_norm": 1.222990870475769, "learning_rate": 1.0108939456710891e-05, "loss": 0.5137, "step": 18711 }, { "epoch": 0.5117042222708379, "grad_norm": 1.2719817161560059, "learning_rate": 1.0108053804683677e-05, "loss": 0.5111, "step": 18712 }, { "epoch": 0.5117315685845548, "grad_norm": 1.147023320198059, "learning_rate": 1.0107168151808809e-05, "loss": 0.4936, "step": 18713 }, { "epoch": 0.5117589148982717, "grad_norm": 1.2355400323867798, "learning_rate": 1.0106282498093238e-05, "loss": 0.5157, "step": 18714 }, { "epoch": 0.5117862612119887, "grad_norm": 1.4606937170028687, "learning_rate": 1.0105396843543909e-05, "loss": 0.416, "step": 18715 }, { "epoch": 0.5118136075257055, "grad_norm": 1.468168020248413, "learning_rate": 1.0104511188167771e-05, "loss": 0.4914, "step": 18716 }, { "epoch": 0.5118409538394224, "grad_norm": 1.1919182538986206, "learning_rate": 1.0103625531971778e-05, "loss": 0.4805, "step": 18717 }, { "epoch": 0.5118683001531393, "grad_norm": 1.3112244606018066, "learning_rate": 1.0102739874962865e-05, "loss": 0.4744, "step": 18718 }, { "epoch": 0.5118956464668563, "grad_norm": 1.2726141214370728, "learning_rate": 1.010185421714799e-05, "loss": 0.4933, "step": 18719 }, { "epoch": 0.5119229927805732, "grad_norm": 1.249810814857483, "learning_rate": 1.01009685585341e-05, "loss": 0.5242, "step": 18720 }, { "epoch": 0.5119503390942901, "grad_norm": 1.7269470691680908, "learning_rate": 1.0100082899128134e-05, "loss": 0.7599, "step": 18721 }, { "epoch": 0.511977685408007, "grad_norm": 1.352587342262268, "learning_rate": 1.0099197238937046e-05, "loss": 0.5221, "step": 18722 }, { "epoch": 0.512005031721724, "grad_norm": 3.021758794784546, "learning_rate": 1.0098311577967788e-05, "loss": 0.3748, "step": 18723 }, { "epoch": 0.5120323780354408, "grad_norm": 1.3484607934951782, "learning_rate": 1.0097425916227302e-05, "loss": 0.7676, "step": 18724 }, { "epoch": 0.5120597243491577, "grad_norm": 1.3692480325698853, "learning_rate": 1.0096540253722536e-05, "loss": 0.4111, "step": 18725 }, { "epoch": 0.5120870706628746, "grad_norm": 1.3403648138046265, "learning_rate": 1.009565459046044e-05, "loss": 0.4849, "step": 18726 }, { "epoch": 0.5121144169765915, "grad_norm": 1.1360397338867188, "learning_rate": 1.0094768926447957e-05, "loss": 0.3623, "step": 18727 }, { "epoch": 0.5121417632903085, "grad_norm": 1.1670517921447754, "learning_rate": 1.0093883261692044e-05, "loss": 0.5022, "step": 18728 }, { "epoch": 0.5121691096040254, "grad_norm": 1.513569712638855, "learning_rate": 1.0092997596199639e-05, "loss": 0.5105, "step": 18729 }, { "epoch": 0.5121964559177423, "grad_norm": 1.5064722299575806, "learning_rate": 1.0092111929977698e-05, "loss": 0.5467, "step": 18730 }, { "epoch": 0.5122238022314592, "grad_norm": 1.4160645008087158, "learning_rate": 1.009122626303316e-05, "loss": 0.4994, "step": 18731 }, { "epoch": 0.5122511485451761, "grad_norm": 1.205646276473999, "learning_rate": 1.009034059537298e-05, "loss": 0.513, "step": 18732 }, { "epoch": 0.512278494858893, "grad_norm": 3.286280632019043, "learning_rate": 1.0089454927004106e-05, "loss": 0.4757, "step": 18733 }, { "epoch": 0.5123058411726099, "grad_norm": 1.6718552112579346, "learning_rate": 1.0088569257933478e-05, "loss": 0.4894, "step": 18734 }, { "epoch": 0.5123331874863268, "grad_norm": 1.33595609664917, "learning_rate": 1.008768358816805e-05, "loss": 0.5219, "step": 18735 }, { "epoch": 0.5123605338000438, "grad_norm": 1.174748420715332, "learning_rate": 1.0086797917714775e-05, "loss": 0.4862, "step": 18736 }, { "epoch": 0.5123878801137607, "grad_norm": 1.3903602361679077, "learning_rate": 1.0085912246580589e-05, "loss": 0.5253, "step": 18737 }, { "epoch": 0.5124152264274776, "grad_norm": 1.3959003686904907, "learning_rate": 1.0085026574772447e-05, "loss": 0.4018, "step": 18738 }, { "epoch": 0.5124425727411945, "grad_norm": 1.1917214393615723, "learning_rate": 1.0084140902297298e-05, "loss": 0.5162, "step": 18739 }, { "epoch": 0.5124699190549113, "grad_norm": 1.169298529624939, "learning_rate": 1.0083255229162083e-05, "loss": 0.487, "step": 18740 }, { "epoch": 0.5124972653686283, "grad_norm": 1.2294572591781616, "learning_rate": 1.008236955537376e-05, "loss": 0.5077, "step": 18741 }, { "epoch": 0.5125246116823452, "grad_norm": 1.331067442893982, "learning_rate": 1.0081483880939265e-05, "loss": 0.5233, "step": 18742 }, { "epoch": 0.5125519579960621, "grad_norm": 1.29213547706604, "learning_rate": 1.0080598205865553e-05, "loss": 0.5154, "step": 18743 }, { "epoch": 0.512579304309779, "grad_norm": 1.321919322013855, "learning_rate": 1.0079712530159574e-05, "loss": 0.3876, "step": 18744 }, { "epoch": 0.512606650623496, "grad_norm": 1.6068792343139648, "learning_rate": 1.007882685382827e-05, "loss": 0.7587, "step": 18745 }, { "epoch": 0.5126339969372129, "grad_norm": 1.557210922241211, "learning_rate": 1.007794117687859e-05, "loss": 0.5142, "step": 18746 }, { "epoch": 0.5126613432509298, "grad_norm": 1.300516963005066, "learning_rate": 1.0077055499317487e-05, "loss": 0.5015, "step": 18747 }, { "epoch": 0.5126886895646466, "grad_norm": 1.5322883129119873, "learning_rate": 1.0076169821151902e-05, "loss": 0.3809, "step": 18748 }, { "epoch": 0.5127160358783636, "grad_norm": 1.3710497617721558, "learning_rate": 1.0075284142388791e-05, "loss": 0.5303, "step": 18749 }, { "epoch": 0.5127433821920805, "grad_norm": 1.169372320175171, "learning_rate": 1.0074398463035091e-05, "loss": 0.4664, "step": 18750 }, { "epoch": 0.5127707285057974, "grad_norm": 1.351826548576355, "learning_rate": 1.0073512783097759e-05, "loss": 0.3842, "step": 18751 }, { "epoch": 0.5127980748195143, "grad_norm": 1.2522367238998413, "learning_rate": 1.0072627102583741e-05, "loss": 0.5072, "step": 18752 }, { "epoch": 0.5128254211332313, "grad_norm": 1.5022387504577637, "learning_rate": 1.007174142149998e-05, "loss": 0.3739, "step": 18753 }, { "epoch": 0.5128527674469482, "grad_norm": 1.2778003215789795, "learning_rate": 1.007085573985343e-05, "loss": 0.5031, "step": 18754 }, { "epoch": 0.5128801137606651, "grad_norm": 1.2179205417633057, "learning_rate": 1.0069970057651042e-05, "loss": 0.5374, "step": 18755 }, { "epoch": 0.5129074600743819, "grad_norm": 1.248410701751709, "learning_rate": 1.0069084374899752e-05, "loss": 0.5124, "step": 18756 }, { "epoch": 0.5129348063880989, "grad_norm": 1.265828251838684, "learning_rate": 1.0068198691606514e-05, "loss": 0.7528, "step": 18757 }, { "epoch": 0.5129621527018158, "grad_norm": 1.3728517293930054, "learning_rate": 1.0067313007778281e-05, "loss": 0.4942, "step": 18758 }, { "epoch": 0.5129894990155327, "grad_norm": 1.3179231882095337, "learning_rate": 1.0066427323421992e-05, "loss": 0.5023, "step": 18759 }, { "epoch": 0.5130168453292496, "grad_norm": 1.2700594663619995, "learning_rate": 1.0065541638544604e-05, "loss": 0.5123, "step": 18760 }, { "epoch": 0.5130441916429666, "grad_norm": 1.5993248224258423, "learning_rate": 1.0064655953153057e-05, "loss": 0.4096, "step": 18761 }, { "epoch": 0.5130715379566835, "grad_norm": 1.287145972251892, "learning_rate": 1.0063770267254302e-05, "loss": 0.5152, "step": 18762 }, { "epoch": 0.5130988842704004, "grad_norm": 1.2600328922271729, "learning_rate": 1.006288458085529e-05, "loss": 0.5167, "step": 18763 }, { "epoch": 0.5131262305841172, "grad_norm": 1.3102935552597046, "learning_rate": 1.0061998893962963e-05, "loss": 0.4693, "step": 18764 }, { "epoch": 0.5131535768978341, "grad_norm": 1.3614012002944946, "learning_rate": 1.0061113206584271e-05, "loss": 0.4998, "step": 18765 }, { "epoch": 0.5131809232115511, "grad_norm": 1.3071749210357666, "learning_rate": 1.0060227518726166e-05, "loss": 0.5123, "step": 18766 }, { "epoch": 0.513208269525268, "grad_norm": 1.6495267152786255, "learning_rate": 1.0059341830395593e-05, "loss": 0.4879, "step": 18767 }, { "epoch": 0.5132356158389849, "grad_norm": 1.1817235946655273, "learning_rate": 1.0058456141599502e-05, "loss": 0.4998, "step": 18768 }, { "epoch": 0.5132629621527018, "grad_norm": 1.2674012184143066, "learning_rate": 1.0057570452344836e-05, "loss": 0.5046, "step": 18769 }, { "epoch": 0.5132903084664188, "grad_norm": 1.4180831909179688, "learning_rate": 1.0056684762638544e-05, "loss": 0.5175, "step": 18770 }, { "epoch": 0.5133176547801357, "grad_norm": 1.1669018268585205, "learning_rate": 1.005579907248758e-05, "loss": 0.4872, "step": 18771 }, { "epoch": 0.5133450010938525, "grad_norm": 1.3247710466384888, "learning_rate": 1.0054913381898885e-05, "loss": 0.5154, "step": 18772 }, { "epoch": 0.5133723474075694, "grad_norm": 1.3060864210128784, "learning_rate": 1.0054027690879413e-05, "loss": 0.5302, "step": 18773 }, { "epoch": 0.5133996937212864, "grad_norm": 1.341338038444519, "learning_rate": 1.0053141999436108e-05, "loss": 0.4951, "step": 18774 }, { "epoch": 0.5134270400350033, "grad_norm": 1.2879379987716675, "learning_rate": 1.0052256307575918e-05, "loss": 0.4943, "step": 18775 }, { "epoch": 0.5134543863487202, "grad_norm": 1.273958683013916, "learning_rate": 1.0051370615305793e-05, "loss": 0.5245, "step": 18776 }, { "epoch": 0.5134817326624371, "grad_norm": 1.1630570888519287, "learning_rate": 1.005048492263268e-05, "loss": 0.4733, "step": 18777 }, { "epoch": 0.513509078976154, "grad_norm": 1.3176617622375488, "learning_rate": 1.0049599229563527e-05, "loss": 0.5082, "step": 18778 }, { "epoch": 0.513536425289871, "grad_norm": 1.6167083978652954, "learning_rate": 1.0048713536105282e-05, "loss": 0.8087, "step": 18779 }, { "epoch": 0.5135637716035878, "grad_norm": 1.0604877471923828, "learning_rate": 1.0047827842264892e-05, "loss": 0.5062, "step": 18780 }, { "epoch": 0.5135911179173047, "grad_norm": 1.6101365089416504, "learning_rate": 1.0046942148049306e-05, "loss": 0.4321, "step": 18781 }, { "epoch": 0.5136184642310216, "grad_norm": 1.3115590810775757, "learning_rate": 1.0046056453465473e-05, "loss": 0.4824, "step": 18782 }, { "epoch": 0.5136458105447386, "grad_norm": 1.0333675146102905, "learning_rate": 1.0045170758520337e-05, "loss": 0.5188, "step": 18783 }, { "epoch": 0.5136731568584555, "grad_norm": 1.2349352836608887, "learning_rate": 1.0044285063220856e-05, "loss": 0.4936, "step": 18784 }, { "epoch": 0.5137005031721724, "grad_norm": 1.330436110496521, "learning_rate": 1.0043399367573964e-05, "loss": 0.4331, "step": 18785 }, { "epoch": 0.5137278494858893, "grad_norm": 1.3424707651138306, "learning_rate": 1.0042513671586618e-05, "loss": 0.4112, "step": 18786 }, { "epoch": 0.5137551957996063, "grad_norm": 1.3036748170852661, "learning_rate": 1.0041627975265766e-05, "loss": 0.5319, "step": 18787 }, { "epoch": 0.5137825421133231, "grad_norm": 1.176749348640442, "learning_rate": 1.0040742278618354e-05, "loss": 0.4793, "step": 18788 }, { "epoch": 0.51380988842704, "grad_norm": 1.2923974990844727, "learning_rate": 1.0039856581651328e-05, "loss": 0.49, "step": 18789 }, { "epoch": 0.5138372347407569, "grad_norm": 1.1816444396972656, "learning_rate": 1.0038970884371641e-05, "loss": 0.5282, "step": 18790 }, { "epoch": 0.5138645810544739, "grad_norm": 1.1579805612564087, "learning_rate": 1.0038085186786239e-05, "loss": 0.4545, "step": 18791 }, { "epoch": 0.5138919273681908, "grad_norm": 1.146881103515625, "learning_rate": 1.0037199488902067e-05, "loss": 0.5129, "step": 18792 }, { "epoch": 0.5139192736819077, "grad_norm": 1.335857629776001, "learning_rate": 1.0036313790726078e-05, "loss": 0.4965, "step": 18793 }, { "epoch": 0.5139466199956246, "grad_norm": 1.2317345142364502, "learning_rate": 1.0035428092265216e-05, "loss": 0.5303, "step": 18794 }, { "epoch": 0.5139739663093414, "grad_norm": 0.9951464533805847, "learning_rate": 1.0034542393526432e-05, "loss": 0.5115, "step": 18795 }, { "epoch": 0.5140013126230584, "grad_norm": 1.146563172340393, "learning_rate": 1.0033656694516669e-05, "loss": 0.4749, "step": 18796 }, { "epoch": 0.5140286589367753, "grad_norm": 1.8064050674438477, "learning_rate": 1.003277099524288e-05, "loss": 0.4136, "step": 18797 }, { "epoch": 0.5140560052504922, "grad_norm": 1.1605795621871948, "learning_rate": 1.0031885295712014e-05, "loss": 0.5024, "step": 18798 }, { "epoch": 0.5140833515642091, "grad_norm": 1.8895469903945923, "learning_rate": 1.0030999595931014e-05, "loss": 0.3541, "step": 18799 }, { "epoch": 0.5141106978779261, "grad_norm": 1.2916805744171143, "learning_rate": 1.0030113895906833e-05, "loss": 0.5106, "step": 18800 }, { "epoch": 0.514138044191643, "grad_norm": 1.2936311960220337, "learning_rate": 1.0029228195646417e-05, "loss": 0.4803, "step": 18801 }, { "epoch": 0.5141653905053599, "grad_norm": 1.2012466192245483, "learning_rate": 1.0028342495156713e-05, "loss": 0.4987, "step": 18802 }, { "epoch": 0.5141927368190767, "grad_norm": 1.4508315324783325, "learning_rate": 1.0027456794444673e-05, "loss": 0.5138, "step": 18803 }, { "epoch": 0.5142200831327937, "grad_norm": 1.1227686405181885, "learning_rate": 1.0026571093517237e-05, "loss": 0.5319, "step": 18804 }, { "epoch": 0.5142474294465106, "grad_norm": 1.6547836065292358, "learning_rate": 1.0025685392381362e-05, "loss": 0.4941, "step": 18805 }, { "epoch": 0.5142747757602275, "grad_norm": 1.4382195472717285, "learning_rate": 1.0024799691043991e-05, "loss": 0.4753, "step": 18806 }, { "epoch": 0.5143021220739444, "grad_norm": 1.5054028034210205, "learning_rate": 1.0023913989512073e-05, "loss": 0.4618, "step": 18807 }, { "epoch": 0.5143294683876614, "grad_norm": 1.1465179920196533, "learning_rate": 1.0023028287792558e-05, "loss": 0.478, "step": 18808 }, { "epoch": 0.5143568147013783, "grad_norm": 1.0885764360427856, "learning_rate": 1.0022142585892393e-05, "loss": 0.4717, "step": 18809 }, { "epoch": 0.5143841610150952, "grad_norm": 1.3720316886901855, "learning_rate": 1.0021256883818524e-05, "loss": 0.4096, "step": 18810 }, { "epoch": 0.514411507328812, "grad_norm": 1.1771125793457031, "learning_rate": 1.00203711815779e-05, "loss": 0.4787, "step": 18811 }, { "epoch": 0.514438853642529, "grad_norm": 1.334080696105957, "learning_rate": 1.001948547917747e-05, "loss": 0.3713, "step": 18812 }, { "epoch": 0.5144661999562459, "grad_norm": 1.2926630973815918, "learning_rate": 1.0018599776624182e-05, "loss": 0.4983, "step": 18813 }, { "epoch": 0.5144935462699628, "grad_norm": 1.285622239112854, "learning_rate": 1.0017714073924987e-05, "loss": 0.4996, "step": 18814 }, { "epoch": 0.5145208925836797, "grad_norm": 1.1815484762191772, "learning_rate": 1.0016828371086827e-05, "loss": 0.4763, "step": 18815 }, { "epoch": 0.5145482388973966, "grad_norm": 1.3446696996688843, "learning_rate": 1.0015942668116655e-05, "loss": 0.5325, "step": 18816 }, { "epoch": 0.5145755852111136, "grad_norm": 1.322852373123169, "learning_rate": 1.0015056965021416e-05, "loss": 0.5022, "step": 18817 }, { "epoch": 0.5146029315248305, "grad_norm": 1.4904775619506836, "learning_rate": 1.0014171261808059e-05, "loss": 0.4918, "step": 18818 }, { "epoch": 0.5146302778385473, "grad_norm": 1.4629814624786377, "learning_rate": 1.0013285558483532e-05, "loss": 0.4975, "step": 18819 }, { "epoch": 0.5146576241522642, "grad_norm": 1.3836756944656372, "learning_rate": 1.0012399855054786e-05, "loss": 0.5359, "step": 18820 }, { "epoch": 0.5146849704659812, "grad_norm": 1.4143131971359253, "learning_rate": 1.0011514151528762e-05, "loss": 0.5438, "step": 18821 }, { "epoch": 0.5147123167796981, "grad_norm": 1.2126151323318481, "learning_rate": 1.0010628447912419e-05, "loss": 0.5236, "step": 18822 }, { "epoch": 0.514739663093415, "grad_norm": 1.5640619993209839, "learning_rate": 1.0009742744212693e-05, "loss": 0.4977, "step": 18823 }, { "epoch": 0.5147670094071319, "grad_norm": 1.268795371055603, "learning_rate": 1.0008857040436542e-05, "loss": 0.4858, "step": 18824 }, { "epoch": 0.5147943557208489, "grad_norm": 1.11351478099823, "learning_rate": 1.0007971336590909e-05, "loss": 0.5014, "step": 18825 }, { "epoch": 0.5148217020345658, "grad_norm": 1.1682124137878418, "learning_rate": 1.0007085632682744e-05, "loss": 0.5033, "step": 18826 }, { "epoch": 0.5148490483482826, "grad_norm": 1.6338766813278198, "learning_rate": 1.0006199928718992e-05, "loss": 0.3861, "step": 18827 }, { "epoch": 0.5148763946619995, "grad_norm": 1.1849100589752197, "learning_rate": 1.0005314224706606e-05, "loss": 0.478, "step": 18828 }, { "epoch": 0.5149037409757165, "grad_norm": 1.1283313035964966, "learning_rate": 1.000442852065253e-05, "loss": 0.4896, "step": 18829 }, { "epoch": 0.5149310872894334, "grad_norm": 1.1639654636383057, "learning_rate": 1.0003542816563712e-05, "loss": 0.7842, "step": 18830 }, { "epoch": 0.5149584336031503, "grad_norm": 1.40534508228302, "learning_rate": 1.0002657112447102e-05, "loss": 0.5218, "step": 18831 }, { "epoch": 0.5149857799168672, "grad_norm": 1.3749839067459106, "learning_rate": 1.0001771408309649e-05, "loss": 0.4959, "step": 18832 }, { "epoch": 0.5150131262305842, "grad_norm": 1.47858464717865, "learning_rate": 1.0000885704158301e-05, "loss": 0.4497, "step": 18833 }, { "epoch": 0.5150404725443011, "grad_norm": 1.2908718585968018, "learning_rate": 1e-05, "loss": 0.5291, "step": 18834 }, { "epoch": 0.5150678188580179, "grad_norm": 1.0854977369308472, "learning_rate": 9.999114295841704e-06, "loss": 0.8036, "step": 18835 }, { "epoch": 0.5150951651717348, "grad_norm": 1.1831380128860474, "learning_rate": 9.998228591690354e-06, "loss": 0.5161, "step": 18836 }, { "epoch": 0.5151225114854517, "grad_norm": 1.2540102005004883, "learning_rate": 9.9973428875529e-06, "loss": 0.5014, "step": 18837 }, { "epoch": 0.5151498577991687, "grad_norm": 2.797053813934326, "learning_rate": 9.996457183436292e-06, "loss": 0.7853, "step": 18838 }, { "epoch": 0.5151772041128856, "grad_norm": 1.1031230688095093, "learning_rate": 9.995571479347475e-06, "loss": 0.5358, "step": 18839 }, { "epoch": 0.5152045504266025, "grad_norm": 1.387459635734558, "learning_rate": 9.994685775293397e-06, "loss": 0.5062, "step": 18840 }, { "epoch": 0.5152318967403194, "grad_norm": 1.3220432996749878, "learning_rate": 9.99380007128101e-06, "loss": 0.5232, "step": 18841 }, { "epoch": 0.5152592430540364, "grad_norm": 1.6178367137908936, "learning_rate": 9.992914367317258e-06, "loss": 0.484, "step": 18842 }, { "epoch": 0.5152865893677532, "grad_norm": 1.219877004623413, "learning_rate": 9.992028663409091e-06, "loss": 0.5179, "step": 18843 }, { "epoch": 0.5153139356814701, "grad_norm": 5.787145137786865, "learning_rate": 9.99114295956346e-06, "loss": 0.757, "step": 18844 }, { "epoch": 0.515341281995187, "grad_norm": 1.4074490070343018, "learning_rate": 9.990257255787306e-06, "loss": 0.4313, "step": 18845 }, { "epoch": 0.515368628308904, "grad_norm": 1.5136945247650146, "learning_rate": 9.989371552087585e-06, "loss": 0.5151, "step": 18846 }, { "epoch": 0.5153959746226209, "grad_norm": 2.0780487060546875, "learning_rate": 9.988485848471236e-06, "loss": 0.7607, "step": 18847 }, { "epoch": 0.5154233209363378, "grad_norm": 1.3460596799850464, "learning_rate": 9.987600144945215e-06, "loss": 0.5054, "step": 18848 }, { "epoch": 0.5154506672500547, "grad_norm": 1.4846549034118652, "learning_rate": 9.986714441516468e-06, "loss": 0.5406, "step": 18849 }, { "epoch": 0.5154780135637717, "grad_norm": 1.2481285333633423, "learning_rate": 9.985828738191941e-06, "loss": 0.494, "step": 18850 }, { "epoch": 0.5155053598774885, "grad_norm": 1.752633810043335, "learning_rate": 9.984943034978587e-06, "loss": 0.5041, "step": 18851 }, { "epoch": 0.5155327061912054, "grad_norm": 1.0978367328643799, "learning_rate": 9.98405733188335e-06, "loss": 0.502, "step": 18852 }, { "epoch": 0.5155600525049223, "grad_norm": 1.6033549308776855, "learning_rate": 9.983171628913178e-06, "loss": 0.5078, "step": 18853 }, { "epoch": 0.5155873988186392, "grad_norm": 1.4012115001678467, "learning_rate": 9.982285926075018e-06, "loss": 0.412, "step": 18854 }, { "epoch": 0.5156147451323562, "grad_norm": 1.1714624166488647, "learning_rate": 9.981400223375823e-06, "loss": 0.4993, "step": 18855 }, { "epoch": 0.5156420914460731, "grad_norm": 1.3701711893081665, "learning_rate": 9.980514520822535e-06, "loss": 0.7478, "step": 18856 }, { "epoch": 0.51566943775979, "grad_norm": 1.0870170593261719, "learning_rate": 9.979628818422104e-06, "loss": 0.7742, "step": 18857 }, { "epoch": 0.5156967840735069, "grad_norm": 1.1668606996536255, "learning_rate": 9.978743116181481e-06, "loss": 0.5061, "step": 18858 }, { "epoch": 0.5157241303872238, "grad_norm": 1.1915725469589233, "learning_rate": 9.977857414107612e-06, "loss": 0.5244, "step": 18859 }, { "epoch": 0.5157514767009407, "grad_norm": 1.6457699537277222, "learning_rate": 9.976971712207447e-06, "loss": 0.4837, "step": 18860 }, { "epoch": 0.5157788230146576, "grad_norm": 1.3030245304107666, "learning_rate": 9.97608601048793e-06, "loss": 0.5274, "step": 18861 }, { "epoch": 0.5158061693283745, "grad_norm": 1.375277042388916, "learning_rate": 9.975200308956012e-06, "loss": 0.4893, "step": 18862 }, { "epoch": 0.5158335156420915, "grad_norm": 1.3643437623977661, "learning_rate": 9.974314607618643e-06, "loss": 0.5078, "step": 18863 }, { "epoch": 0.5158608619558084, "grad_norm": 1.530711054801941, "learning_rate": 9.973428906482766e-06, "loss": 0.4715, "step": 18864 }, { "epoch": 0.5158882082695253, "grad_norm": 1.1889114379882812, "learning_rate": 9.972543205555332e-06, "loss": 0.4967, "step": 18865 }, { "epoch": 0.5159155545832422, "grad_norm": 1.2810577154159546, "learning_rate": 9.97165750484329e-06, "loss": 0.5072, "step": 18866 }, { "epoch": 0.515942900896959, "grad_norm": 1.3860819339752197, "learning_rate": 9.970771804353585e-06, "loss": 0.527, "step": 18867 }, { "epoch": 0.515970247210676, "grad_norm": 1.1419931650161743, "learning_rate": 9.96988610409317e-06, "loss": 0.4893, "step": 18868 }, { "epoch": 0.5159975935243929, "grad_norm": 1.302220106124878, "learning_rate": 9.969000404068987e-06, "loss": 0.4758, "step": 18869 }, { "epoch": 0.5160249398381098, "grad_norm": 1.50872802734375, "learning_rate": 9.968114704287987e-06, "loss": 0.5028, "step": 18870 }, { "epoch": 0.5160522861518267, "grad_norm": 1.1739716529846191, "learning_rate": 9.967229004757121e-06, "loss": 0.4703, "step": 18871 }, { "epoch": 0.5160796324655437, "grad_norm": 1.1342451572418213, "learning_rate": 9.966343305483334e-06, "loss": 0.4969, "step": 18872 }, { "epoch": 0.5161069787792606, "grad_norm": 1.6922833919525146, "learning_rate": 9.965457606473571e-06, "loss": 0.5002, "step": 18873 }, { "epoch": 0.5161343250929775, "grad_norm": 1.6633081436157227, "learning_rate": 9.964571907734788e-06, "loss": 0.4051, "step": 18874 }, { "epoch": 0.5161616714066943, "grad_norm": 1.6720936298370361, "learning_rate": 9.963686209273924e-06, "loss": 0.7932, "step": 18875 }, { "epoch": 0.5161890177204113, "grad_norm": 1.1528047323226929, "learning_rate": 9.962800511097936e-06, "loss": 0.4878, "step": 18876 }, { "epoch": 0.5162163640341282, "grad_norm": 1.4201571941375732, "learning_rate": 9.961914813213763e-06, "loss": 0.5042, "step": 18877 }, { "epoch": 0.5162437103478451, "grad_norm": 1.4312124252319336, "learning_rate": 9.961029115628359e-06, "loss": 0.5289, "step": 18878 }, { "epoch": 0.516271056661562, "grad_norm": 1.28584623336792, "learning_rate": 9.960143418348672e-06, "loss": 0.5116, "step": 18879 }, { "epoch": 0.516298402975279, "grad_norm": 1.7100956439971924, "learning_rate": 9.959257721381646e-06, "loss": 0.3813, "step": 18880 }, { "epoch": 0.5163257492889959, "grad_norm": 1.228992223739624, "learning_rate": 9.958372024734236e-06, "loss": 0.4934, "step": 18881 }, { "epoch": 0.5163530956027128, "grad_norm": 1.2511096000671387, "learning_rate": 9.957486328413382e-06, "loss": 0.474, "step": 18882 }, { "epoch": 0.5163804419164296, "grad_norm": 1.644630789756775, "learning_rate": 9.956600632426038e-06, "loss": 0.354, "step": 18883 }, { "epoch": 0.5164077882301465, "grad_norm": 1.3136110305786133, "learning_rate": 9.955714936779148e-06, "loss": 0.4918, "step": 18884 }, { "epoch": 0.5164351345438635, "grad_norm": 1.3061543703079224, "learning_rate": 9.954829241479662e-06, "loss": 0.4963, "step": 18885 }, { "epoch": 0.5164624808575804, "grad_norm": 1.2768703699111938, "learning_rate": 9.953943546534528e-06, "loss": 0.5367, "step": 18886 }, { "epoch": 0.5164898271712973, "grad_norm": 1.3485528230667114, "learning_rate": 9.953057851950699e-06, "loss": 0.4888, "step": 18887 }, { "epoch": 0.5165171734850142, "grad_norm": 1.9750556945800781, "learning_rate": 9.952172157735114e-06, "loss": 0.3785, "step": 18888 }, { "epoch": 0.5165445197987312, "grad_norm": 1.4348266124725342, "learning_rate": 9.951286463894723e-06, "loss": 0.4028, "step": 18889 }, { "epoch": 0.5165718661124481, "grad_norm": 1.219115138053894, "learning_rate": 9.950400770436478e-06, "loss": 0.5263, "step": 18890 }, { "epoch": 0.5165992124261649, "grad_norm": 1.0342645645141602, "learning_rate": 9.949515077367325e-06, "loss": 0.478, "step": 18891 }, { "epoch": 0.5166265587398818, "grad_norm": 1.0884846448898315, "learning_rate": 9.94862938469421e-06, "loss": 0.4779, "step": 18892 }, { "epoch": 0.5166539050535988, "grad_norm": 1.2172771692276, "learning_rate": 9.947743692424085e-06, "loss": 0.4895, "step": 18893 }, { "epoch": 0.5166812513673157, "grad_norm": 1.2591595649719238, "learning_rate": 9.946858000563897e-06, "loss": 0.5474, "step": 18894 }, { "epoch": 0.5167085976810326, "grad_norm": 1.2478545904159546, "learning_rate": 9.945972309120589e-06, "loss": 0.5045, "step": 18895 }, { "epoch": 0.5167359439947495, "grad_norm": 1.5547945499420166, "learning_rate": 9.945086618101116e-06, "loss": 0.4935, "step": 18896 }, { "epoch": 0.5167632903084665, "grad_norm": 1.50547456741333, "learning_rate": 9.944200927512423e-06, "loss": 0.4692, "step": 18897 }, { "epoch": 0.5167906366221833, "grad_norm": 3.7301883697509766, "learning_rate": 9.943315237361459e-06, "loss": 0.4991, "step": 18898 }, { "epoch": 0.5168179829359002, "grad_norm": 1.4352670907974243, "learning_rate": 9.942429547655169e-06, "loss": 0.4337, "step": 18899 }, { "epoch": 0.5168453292496171, "grad_norm": 1.4372611045837402, "learning_rate": 9.941543858400503e-06, "loss": 0.4693, "step": 18900 }, { "epoch": 0.516872675563334, "grad_norm": 1.1987532377243042, "learning_rate": 9.940658169604409e-06, "loss": 0.5102, "step": 18901 }, { "epoch": 0.516900021877051, "grad_norm": 1.1921194791793823, "learning_rate": 9.939772481273835e-06, "loss": 0.5139, "step": 18902 }, { "epoch": 0.5169273681907679, "grad_norm": 1.170123815536499, "learning_rate": 9.93888679341573e-06, "loss": 0.492, "step": 18903 }, { "epoch": 0.5169547145044848, "grad_norm": 1.225954294204712, "learning_rate": 9.93800110603704e-06, "loss": 0.4854, "step": 18904 }, { "epoch": 0.5169820608182017, "grad_norm": 1.8440231084823608, "learning_rate": 9.937115419144714e-06, "loss": 0.7814, "step": 18905 }, { "epoch": 0.5170094071319186, "grad_norm": 1.0988085269927979, "learning_rate": 9.936229732745701e-06, "loss": 0.5062, "step": 18906 }, { "epoch": 0.5170367534456355, "grad_norm": 1.3464051485061646, "learning_rate": 9.935344046846946e-06, "loss": 0.5114, "step": 18907 }, { "epoch": 0.5170640997593524, "grad_norm": 2.0009429454803467, "learning_rate": 9.9344583614554e-06, "loss": 0.5015, "step": 18908 }, { "epoch": 0.5170914460730693, "grad_norm": 1.124313473701477, "learning_rate": 9.93357267657801e-06, "loss": 0.5047, "step": 18909 }, { "epoch": 0.5171187923867863, "grad_norm": 1.3263307809829712, "learning_rate": 9.932686992221722e-06, "loss": 0.5251, "step": 18910 }, { "epoch": 0.5171461387005032, "grad_norm": 1.2569984197616577, "learning_rate": 9.931801308393488e-06, "loss": 0.5034, "step": 18911 }, { "epoch": 0.5171734850142201, "grad_norm": 1.2629270553588867, "learning_rate": 9.93091562510025e-06, "loss": 0.4984, "step": 18912 }, { "epoch": 0.517200831327937, "grad_norm": 1.2325397729873657, "learning_rate": 9.93002994234896e-06, "loss": 0.5179, "step": 18913 }, { "epoch": 0.5172281776416539, "grad_norm": 1.2664923667907715, "learning_rate": 9.929144260146571e-06, "loss": 0.5409, "step": 18914 }, { "epoch": 0.5172555239553708, "grad_norm": 1.381304383277893, "learning_rate": 9.92825857850002e-06, "loss": 0.5004, "step": 18915 }, { "epoch": 0.5172828702690877, "grad_norm": 1.027831792831421, "learning_rate": 9.927372897416262e-06, "loss": 0.5198, "step": 18916 }, { "epoch": 0.5173102165828046, "grad_norm": 1.4958851337432861, "learning_rate": 9.926487216902243e-06, "loss": 0.5296, "step": 18917 }, { "epoch": 0.5173375628965216, "grad_norm": 1.1832789182662964, "learning_rate": 9.92560153696491e-06, "loss": 0.8265, "step": 18918 }, { "epoch": 0.5173649092102385, "grad_norm": 1.5365560054779053, "learning_rate": 9.924715857611214e-06, "loss": 0.3664, "step": 18919 }, { "epoch": 0.5173922555239554, "grad_norm": 1.4540523290634155, "learning_rate": 9.923830178848098e-06, "loss": 0.5087, "step": 18920 }, { "epoch": 0.5174196018376723, "grad_norm": 1.365072250366211, "learning_rate": 9.922944500682515e-06, "loss": 0.3314, "step": 18921 }, { "epoch": 0.5174469481513891, "grad_norm": 1.5084257125854492, "learning_rate": 9.922058823121413e-06, "loss": 0.3906, "step": 18922 }, { "epoch": 0.5174742944651061, "grad_norm": 1.3700823783874512, "learning_rate": 9.921173146171736e-06, "loss": 0.5565, "step": 18923 }, { "epoch": 0.517501640778823, "grad_norm": 1.1960458755493164, "learning_rate": 9.920287469840431e-06, "loss": 0.7449, "step": 18924 }, { "epoch": 0.5175289870925399, "grad_norm": 1.183236002922058, "learning_rate": 9.919401794134452e-06, "loss": 0.4991, "step": 18925 }, { "epoch": 0.5175563334062568, "grad_norm": 1.0565749406814575, "learning_rate": 9.91851611906074e-06, "loss": 0.5163, "step": 18926 }, { "epoch": 0.5175836797199738, "grad_norm": 1.4415620565414429, "learning_rate": 9.917630444626246e-06, "loss": 0.4859, "step": 18927 }, { "epoch": 0.5176110260336907, "grad_norm": 2.2090892791748047, "learning_rate": 9.91674477083792e-06, "loss": 0.3854, "step": 18928 }, { "epoch": 0.5176383723474076, "grad_norm": 1.1232129335403442, "learning_rate": 9.915859097702707e-06, "loss": 0.4978, "step": 18929 }, { "epoch": 0.5176657186611244, "grad_norm": 2.5427968502044678, "learning_rate": 9.914973425227555e-06, "loss": 0.4711, "step": 18930 }, { "epoch": 0.5176930649748414, "grad_norm": 1.3702032566070557, "learning_rate": 9.914087753419415e-06, "loss": 0.782, "step": 18931 }, { "epoch": 0.5177204112885583, "grad_norm": 1.4782294034957886, "learning_rate": 9.91320208228523e-06, "loss": 0.4452, "step": 18932 }, { "epoch": 0.5177477576022752, "grad_norm": 1.0879888534545898, "learning_rate": 9.912316411831951e-06, "loss": 0.4989, "step": 18933 }, { "epoch": 0.5177751039159921, "grad_norm": 1.2159016132354736, "learning_rate": 9.911430742066524e-06, "loss": 0.3799, "step": 18934 }, { "epoch": 0.517802450229709, "grad_norm": 1.1694889068603516, "learning_rate": 9.9105450729959e-06, "loss": 0.5176, "step": 18935 }, { "epoch": 0.517829796543426, "grad_norm": 1.2238714694976807, "learning_rate": 9.909659404627023e-06, "loss": 0.4929, "step": 18936 }, { "epoch": 0.5178571428571429, "grad_norm": 1.2080270051956177, "learning_rate": 9.908773736966842e-06, "loss": 0.5412, "step": 18937 }, { "epoch": 0.5178844891708597, "grad_norm": 1.149308204650879, "learning_rate": 9.907888070022306e-06, "loss": 0.5014, "step": 18938 }, { "epoch": 0.5179118354845766, "grad_norm": 1.1756854057312012, "learning_rate": 9.907002403800363e-06, "loss": 0.4851, "step": 18939 }, { "epoch": 0.5179391817982936, "grad_norm": 1.2743552923202515, "learning_rate": 9.90611673830796e-06, "loss": 0.3578, "step": 18940 }, { "epoch": 0.5179665281120105, "grad_norm": 1.237107276916504, "learning_rate": 9.905231073552044e-06, "loss": 0.5182, "step": 18941 }, { "epoch": 0.5179938744257274, "grad_norm": 1.228406548500061, "learning_rate": 9.904345409539563e-06, "loss": 0.5047, "step": 18942 }, { "epoch": 0.5180212207394443, "grad_norm": 1.8800784349441528, "learning_rate": 9.903459746277466e-06, "loss": 0.5154, "step": 18943 }, { "epoch": 0.5180485670531613, "grad_norm": 1.3278688192367554, "learning_rate": 9.902574083772703e-06, "loss": 0.4876, "step": 18944 }, { "epoch": 0.5180759133668782, "grad_norm": 1.3716042041778564, "learning_rate": 9.901688422032214e-06, "loss": 0.5251, "step": 18945 }, { "epoch": 0.518103259680595, "grad_norm": 2.325948476791382, "learning_rate": 9.900802761062956e-06, "loss": 0.4041, "step": 18946 }, { "epoch": 0.5181306059943119, "grad_norm": 1.2218492031097412, "learning_rate": 9.89991710087187e-06, "loss": 0.3566, "step": 18947 }, { "epoch": 0.5181579523080289, "grad_norm": 1.3791276216506958, "learning_rate": 9.899031441465906e-06, "loss": 0.7638, "step": 18948 }, { "epoch": 0.5181852986217458, "grad_norm": 1.1608144044876099, "learning_rate": 9.898145782852011e-06, "loss": 0.4834, "step": 18949 }, { "epoch": 0.5182126449354627, "grad_norm": 1.3603384494781494, "learning_rate": 9.897260125037137e-06, "loss": 0.5052, "step": 18950 }, { "epoch": 0.5182399912491796, "grad_norm": 1.2638026475906372, "learning_rate": 9.896374468028224e-06, "loss": 0.4949, "step": 18951 }, { "epoch": 0.5182673375628966, "grad_norm": 2.334512233734131, "learning_rate": 9.895488811832229e-06, "loss": 0.5444, "step": 18952 }, { "epoch": 0.5182946838766135, "grad_norm": 1.4113563299179077, "learning_rate": 9.894603156456091e-06, "loss": 0.5067, "step": 18953 }, { "epoch": 0.5183220301903303, "grad_norm": 1.2062064409255981, "learning_rate": 9.893717501906764e-06, "loss": 0.8034, "step": 18954 }, { "epoch": 0.5183493765040472, "grad_norm": 1.2792128324508667, "learning_rate": 9.892831848191193e-06, "loss": 0.4924, "step": 18955 }, { "epoch": 0.5183767228177641, "grad_norm": 1.246270775794983, "learning_rate": 9.891946195316323e-06, "loss": 0.497, "step": 18956 }, { "epoch": 0.5184040691314811, "grad_norm": 1.199469804763794, "learning_rate": 9.891060543289109e-06, "loss": 0.5077, "step": 18957 }, { "epoch": 0.518431415445198, "grad_norm": 1.246625542640686, "learning_rate": 9.890174892116496e-06, "loss": 0.5116, "step": 18958 }, { "epoch": 0.5184587617589149, "grad_norm": 1.2898567914962769, "learning_rate": 9.889289241805425e-06, "loss": 0.4647, "step": 18959 }, { "epoch": 0.5184861080726318, "grad_norm": 1.0844601392745972, "learning_rate": 9.888403592362849e-06, "loss": 0.5076, "step": 18960 }, { "epoch": 0.5185134543863488, "grad_norm": 1.299351692199707, "learning_rate": 9.887517943795719e-06, "loss": 0.5045, "step": 18961 }, { "epoch": 0.5185408007000656, "grad_norm": 1.5818066596984863, "learning_rate": 9.886632296110975e-06, "loss": 0.4165, "step": 18962 }, { "epoch": 0.5185681470137825, "grad_norm": 1.367583990097046, "learning_rate": 9.885746649315572e-06, "loss": 0.3731, "step": 18963 }, { "epoch": 0.5185954933274994, "grad_norm": 2.0640645027160645, "learning_rate": 9.884861003416452e-06, "loss": 0.5039, "step": 18964 }, { "epoch": 0.5186228396412164, "grad_norm": 1.3937832117080688, "learning_rate": 9.883975358420566e-06, "loss": 0.5147, "step": 18965 }, { "epoch": 0.5186501859549333, "grad_norm": 1.0865988731384277, "learning_rate": 9.883089714334862e-06, "loss": 0.4616, "step": 18966 }, { "epoch": 0.5186775322686502, "grad_norm": 1.2000267505645752, "learning_rate": 9.882204071166284e-06, "loss": 0.5089, "step": 18967 }, { "epoch": 0.5187048785823671, "grad_norm": 1.2127569913864136, "learning_rate": 9.881318428921783e-06, "loss": 0.5078, "step": 18968 }, { "epoch": 0.5187322248960841, "grad_norm": 1.2028052806854248, "learning_rate": 9.880432787608304e-06, "loss": 0.4841, "step": 18969 }, { "epoch": 0.5187595712098009, "grad_norm": 1.0983797311782837, "learning_rate": 9.879547147232798e-06, "loss": 0.5118, "step": 18970 }, { "epoch": 0.5187869175235178, "grad_norm": 1.2024105787277222, "learning_rate": 9.87866150780221e-06, "loss": 0.5078, "step": 18971 }, { "epoch": 0.5188142638372347, "grad_norm": 1.2230801582336426, "learning_rate": 9.877775869323489e-06, "loss": 0.4952, "step": 18972 }, { "epoch": 0.5188416101509516, "grad_norm": 1.383707880973816, "learning_rate": 9.87689023180358e-06, "loss": 0.404, "step": 18973 }, { "epoch": 0.5188689564646686, "grad_norm": 1.2623029947280884, "learning_rate": 9.876004595249435e-06, "loss": 0.4704, "step": 18974 }, { "epoch": 0.5188963027783855, "grad_norm": 1.3143961429595947, "learning_rate": 9.875118959667998e-06, "loss": 0.4936, "step": 18975 }, { "epoch": 0.5189236490921024, "grad_norm": 1.3741182088851929, "learning_rate": 9.874233325066218e-06, "loss": 0.7698, "step": 18976 }, { "epoch": 0.5189509954058193, "grad_norm": 1.3541269302368164, "learning_rate": 9.873347691451041e-06, "loss": 0.48, "step": 18977 }, { "epoch": 0.5189783417195362, "grad_norm": 1.3802410364151, "learning_rate": 9.872462058829415e-06, "loss": 0.4292, "step": 18978 }, { "epoch": 0.5190056880332531, "grad_norm": 1.059884786605835, "learning_rate": 9.871576427208292e-06, "loss": 0.5067, "step": 18979 }, { "epoch": 0.51903303434697, "grad_norm": 1.0649710893630981, "learning_rate": 9.870690796594611e-06, "loss": 0.4975, "step": 18980 }, { "epoch": 0.5190603806606869, "grad_norm": 1.3894306421279907, "learning_rate": 9.86980516699533e-06, "loss": 0.5274, "step": 18981 }, { "epoch": 0.5190877269744039, "grad_norm": 1.2333862781524658, "learning_rate": 9.868919538417386e-06, "loss": 0.4879, "step": 18982 }, { "epoch": 0.5191150732881208, "grad_norm": 1.1293315887451172, "learning_rate": 9.868033910867733e-06, "loss": 0.5058, "step": 18983 }, { "epoch": 0.5191424196018377, "grad_norm": 1.5915356874465942, "learning_rate": 9.867148284353318e-06, "loss": 0.5161, "step": 18984 }, { "epoch": 0.5191697659155546, "grad_norm": 1.1931904554367065, "learning_rate": 9.866262658881083e-06, "loss": 0.4943, "step": 18985 }, { "epoch": 0.5191971122292715, "grad_norm": 1.526556372642517, "learning_rate": 9.865377034457984e-06, "loss": 0.5228, "step": 18986 }, { "epoch": 0.5192244585429884, "grad_norm": 1.0948543548583984, "learning_rate": 9.864491411090963e-06, "loss": 0.3716, "step": 18987 }, { "epoch": 0.5192518048567053, "grad_norm": 1.4904747009277344, "learning_rate": 9.863605788786969e-06, "loss": 0.5406, "step": 18988 }, { "epoch": 0.5192791511704222, "grad_norm": 1.3846122026443481, "learning_rate": 9.86272016755295e-06, "loss": 0.5239, "step": 18989 }, { "epoch": 0.5193064974841392, "grad_norm": 1.3494911193847656, "learning_rate": 9.861834547395851e-06, "loss": 0.5083, "step": 18990 }, { "epoch": 0.5193338437978561, "grad_norm": 1.126806378364563, "learning_rate": 9.860948928322622e-06, "loss": 0.5092, "step": 18991 }, { "epoch": 0.519361190111573, "grad_norm": 1.339120864868164, "learning_rate": 9.86006331034021e-06, "loss": 0.4684, "step": 18992 }, { "epoch": 0.5193885364252899, "grad_norm": 1.5085128545761108, "learning_rate": 9.859177693455565e-06, "loss": 0.4291, "step": 18993 }, { "epoch": 0.5194158827390067, "grad_norm": 1.0887125730514526, "learning_rate": 9.858292077675627e-06, "loss": 0.4656, "step": 18994 }, { "epoch": 0.5194432290527237, "grad_norm": 1.126122236251831, "learning_rate": 9.857406463007348e-06, "loss": 0.5005, "step": 18995 }, { "epoch": 0.5194705753664406, "grad_norm": 1.2788492441177368, "learning_rate": 9.856520849457677e-06, "loss": 0.4816, "step": 18996 }, { "epoch": 0.5194979216801575, "grad_norm": 2.225376844406128, "learning_rate": 9.855635237033556e-06, "loss": 0.4866, "step": 18997 }, { "epoch": 0.5195252679938744, "grad_norm": 1.0990843772888184, "learning_rate": 9.854749625741939e-06, "loss": 0.4962, "step": 18998 }, { "epoch": 0.5195526143075914, "grad_norm": 1.4324613809585571, "learning_rate": 9.853864015589769e-06, "loss": 0.4915, "step": 18999 }, { "epoch": 0.5195799606213083, "grad_norm": 1.2903566360473633, "learning_rate": 9.852978406583996e-06, "loss": 0.4797, "step": 19000 }, { "epoch": 0.5196073069350251, "grad_norm": 1.2515090703964233, "learning_rate": 9.852092798731566e-06, "loss": 0.4992, "step": 19001 }, { "epoch": 0.519634653248742, "grad_norm": 1.25020432472229, "learning_rate": 9.851207192039424e-06, "loss": 0.4823, "step": 19002 }, { "epoch": 0.519661999562459, "grad_norm": 1.3703062534332275, "learning_rate": 9.850321586514519e-06, "loss": 0.493, "step": 19003 }, { "epoch": 0.5196893458761759, "grad_norm": 1.5818573236465454, "learning_rate": 9.849435982163803e-06, "loss": 0.4008, "step": 19004 }, { "epoch": 0.5197166921898928, "grad_norm": 1.2092865705490112, "learning_rate": 9.848550378994217e-06, "loss": 0.5178, "step": 19005 }, { "epoch": 0.5197440385036097, "grad_norm": 1.3260725736618042, "learning_rate": 9.847664777012713e-06, "loss": 0.3966, "step": 19006 }, { "epoch": 0.5197713848173267, "grad_norm": 1.39775550365448, "learning_rate": 9.846779176226233e-06, "loss": 0.4186, "step": 19007 }, { "epoch": 0.5197987311310436, "grad_norm": 1.4266042709350586, "learning_rate": 9.845893576641727e-06, "loss": 0.5126, "step": 19008 }, { "epoch": 0.5198260774447604, "grad_norm": 1.0880539417266846, "learning_rate": 9.845007978266146e-06, "loss": 0.34, "step": 19009 }, { "epoch": 0.5198534237584773, "grad_norm": 1.1979875564575195, "learning_rate": 9.844122381106431e-06, "loss": 0.4681, "step": 19010 }, { "epoch": 0.5198807700721942, "grad_norm": 1.3113197088241577, "learning_rate": 9.843236785169534e-06, "loss": 0.4978, "step": 19011 }, { "epoch": 0.5199081163859112, "grad_norm": 1.2446166276931763, "learning_rate": 9.842351190462399e-06, "loss": 0.5359, "step": 19012 }, { "epoch": 0.5199354626996281, "grad_norm": 1.330232858657837, "learning_rate": 9.841465596991972e-06, "loss": 0.4853, "step": 19013 }, { "epoch": 0.519962809013345, "grad_norm": 2.1369383335113525, "learning_rate": 9.840580004765208e-06, "loss": 0.467, "step": 19014 }, { "epoch": 0.5199901553270619, "grad_norm": 1.2005524635314941, "learning_rate": 9.839694413789044e-06, "loss": 0.4949, "step": 19015 }, { "epoch": 0.5200175016407789, "grad_norm": 1.2953464984893799, "learning_rate": 9.838808824070435e-06, "loss": 0.4832, "step": 19016 }, { "epoch": 0.5200448479544957, "grad_norm": 1.2775148153305054, "learning_rate": 9.837923235616327e-06, "loss": 0.5028, "step": 19017 }, { "epoch": 0.5200721942682126, "grad_norm": 1.244445562362671, "learning_rate": 9.837037648433662e-06, "loss": 0.8147, "step": 19018 }, { "epoch": 0.5200995405819295, "grad_norm": 1.442747712135315, "learning_rate": 9.836152062529395e-06, "loss": 0.4878, "step": 19019 }, { "epoch": 0.5201268868956465, "grad_norm": 1.3349858522415161, "learning_rate": 9.835266477910464e-06, "loss": 0.7733, "step": 19020 }, { "epoch": 0.5201542332093634, "grad_norm": 1.8454079627990723, "learning_rate": 9.834380894583822e-06, "loss": 0.5126, "step": 19021 }, { "epoch": 0.5201815795230803, "grad_norm": 1.538337230682373, "learning_rate": 9.83349531255642e-06, "loss": 0.5352, "step": 19022 }, { "epoch": 0.5202089258367972, "grad_norm": 1.2381867170333862, "learning_rate": 9.832609731835195e-06, "loss": 0.5084, "step": 19023 }, { "epoch": 0.5202362721505142, "grad_norm": 1.1806459426879883, "learning_rate": 9.831724152427104e-06, "loss": 0.4927, "step": 19024 }, { "epoch": 0.520263618464231, "grad_norm": 1.4212071895599365, "learning_rate": 9.830838574339085e-06, "loss": 0.3471, "step": 19025 }, { "epoch": 0.5202909647779479, "grad_norm": 1.2045499086380005, "learning_rate": 9.829952997578093e-06, "loss": 0.4938, "step": 19026 }, { "epoch": 0.5203183110916648, "grad_norm": 1.5453580617904663, "learning_rate": 9.829067422151072e-06, "loss": 0.3931, "step": 19027 }, { "epoch": 0.5203456574053817, "grad_norm": 1.3206428289413452, "learning_rate": 9.828181848064972e-06, "loss": 0.4762, "step": 19028 }, { "epoch": 0.5203730037190987, "grad_norm": 1.3602936267852783, "learning_rate": 9.827296275326732e-06, "loss": 0.7474, "step": 19029 }, { "epoch": 0.5204003500328156, "grad_norm": 1.110946536064148, "learning_rate": 9.826410703943304e-06, "loss": 0.3971, "step": 19030 }, { "epoch": 0.5204276963465325, "grad_norm": 1.608908772468567, "learning_rate": 9.825525133921638e-06, "loss": 0.3971, "step": 19031 }, { "epoch": 0.5204550426602494, "grad_norm": 1.1178157329559326, "learning_rate": 9.824639565268675e-06, "loss": 0.4728, "step": 19032 }, { "epoch": 0.5204823889739663, "grad_norm": 1.1493854522705078, "learning_rate": 9.823753997991368e-06, "loss": 0.4099, "step": 19033 }, { "epoch": 0.5205097352876832, "grad_norm": 1.4620481729507446, "learning_rate": 9.82286843209666e-06, "loss": 0.4174, "step": 19034 }, { "epoch": 0.5205370816014001, "grad_norm": 6.888040065765381, "learning_rate": 9.821982867591498e-06, "loss": 0.7756, "step": 19035 }, { "epoch": 0.520564427915117, "grad_norm": 1.5956460237503052, "learning_rate": 9.821097304482834e-06, "loss": 0.7658, "step": 19036 }, { "epoch": 0.520591774228834, "grad_norm": 1.5725133419036865, "learning_rate": 9.820211742777608e-06, "loss": 0.5199, "step": 19037 }, { "epoch": 0.5206191205425509, "grad_norm": 1.388790249824524, "learning_rate": 9.81932618248277e-06, "loss": 0.4953, "step": 19038 }, { "epoch": 0.5206464668562678, "grad_norm": 1.3458707332611084, "learning_rate": 9.81844062360527e-06, "loss": 0.485, "step": 19039 }, { "epoch": 0.5206738131699847, "grad_norm": 1.8612310886383057, "learning_rate": 9.817555066152051e-06, "loss": 0.371, "step": 19040 }, { "epoch": 0.5207011594837015, "grad_norm": 1.3595458269119263, "learning_rate": 9.816669510130062e-06, "loss": 0.391, "step": 19041 }, { "epoch": 0.5207285057974185, "grad_norm": 3.005558490753174, "learning_rate": 9.815783955546247e-06, "loss": 0.5001, "step": 19042 }, { "epoch": 0.5207558521111354, "grad_norm": 1.3482707738876343, "learning_rate": 9.814898402407555e-06, "loss": 0.8047, "step": 19043 }, { "epoch": 0.5207831984248523, "grad_norm": 1.403182029724121, "learning_rate": 9.814012850720935e-06, "loss": 0.4947, "step": 19044 }, { "epoch": 0.5208105447385692, "grad_norm": 1.1710150241851807, "learning_rate": 9.81312730049333e-06, "loss": 0.4807, "step": 19045 }, { "epoch": 0.5208378910522862, "grad_norm": 1.1630001068115234, "learning_rate": 9.812241751731691e-06, "loss": 0.4704, "step": 19046 }, { "epoch": 0.5208652373660031, "grad_norm": 1.506371021270752, "learning_rate": 9.81135620444296e-06, "loss": 0.4997, "step": 19047 }, { "epoch": 0.52089258367972, "grad_norm": 1.2134040594100952, "learning_rate": 9.810470658634087e-06, "loss": 0.5071, "step": 19048 }, { "epoch": 0.5209199299934368, "grad_norm": 1.3948967456817627, "learning_rate": 9.80958511431202e-06, "loss": 0.4661, "step": 19049 }, { "epoch": 0.5209472763071538, "grad_norm": 1.0702661275863647, "learning_rate": 9.808699571483702e-06, "loss": 0.4789, "step": 19050 }, { "epoch": 0.5209746226208707, "grad_norm": 1.2505366802215576, "learning_rate": 9.807814030156083e-06, "loss": 0.5062, "step": 19051 }, { "epoch": 0.5210019689345876, "grad_norm": 1.1821538209915161, "learning_rate": 9.80692849033611e-06, "loss": 0.5038, "step": 19052 }, { "epoch": 0.5210293152483045, "grad_norm": 1.4603906869888306, "learning_rate": 9.806042952030728e-06, "loss": 0.7891, "step": 19053 }, { "epoch": 0.5210566615620215, "grad_norm": 1.3741196393966675, "learning_rate": 9.805157415246885e-06, "loss": 0.496, "step": 19054 }, { "epoch": 0.5210840078757384, "grad_norm": 1.1959000825881958, "learning_rate": 9.804271879991525e-06, "loss": 0.4951, "step": 19055 }, { "epoch": 0.5211113541894553, "grad_norm": 1.2089844942092896, "learning_rate": 9.803386346271599e-06, "loss": 0.5226, "step": 19056 }, { "epoch": 0.5211387005031721, "grad_norm": 1.2276655435562134, "learning_rate": 9.802500814094052e-06, "loss": 0.3638, "step": 19057 }, { "epoch": 0.521166046816889, "grad_norm": 1.4823379516601562, "learning_rate": 9.801615283465829e-06, "loss": 0.5194, "step": 19058 }, { "epoch": 0.521193393130606, "grad_norm": 1.3297876119613647, "learning_rate": 9.800729754393879e-06, "loss": 0.5017, "step": 19059 }, { "epoch": 0.5212207394443229, "grad_norm": 1.553672432899475, "learning_rate": 9.799844226885148e-06, "loss": 0.467, "step": 19060 }, { "epoch": 0.5212480857580398, "grad_norm": 1.2104958295822144, "learning_rate": 9.798958700946583e-06, "loss": 0.4884, "step": 19061 }, { "epoch": 0.5212754320717568, "grad_norm": 1.250168800354004, "learning_rate": 9.79807317658513e-06, "loss": 0.5025, "step": 19062 }, { "epoch": 0.5213027783854737, "grad_norm": 1.199307918548584, "learning_rate": 9.79718765380774e-06, "loss": 0.5037, "step": 19063 }, { "epoch": 0.5213301246991906, "grad_norm": 1.0264344215393066, "learning_rate": 9.796302132621351e-06, "loss": 0.5149, "step": 19064 }, { "epoch": 0.5213574710129074, "grad_norm": 1.2557361125946045, "learning_rate": 9.795416613032916e-06, "loss": 0.5048, "step": 19065 }, { "epoch": 0.5213848173266243, "grad_norm": 1.1280535459518433, "learning_rate": 9.794531095049381e-06, "loss": 0.8518, "step": 19066 }, { "epoch": 0.5214121636403413, "grad_norm": 1.2212942838668823, "learning_rate": 9.79364557867769e-06, "loss": 0.5088, "step": 19067 }, { "epoch": 0.5214395099540582, "grad_norm": 1.3001712560653687, "learning_rate": 9.79276006392479e-06, "loss": 0.5356, "step": 19068 }, { "epoch": 0.5214668562677751, "grad_norm": 1.5542515516281128, "learning_rate": 9.791874550797632e-06, "loss": 0.5028, "step": 19069 }, { "epoch": 0.521494202581492, "grad_norm": 1.2774845361709595, "learning_rate": 9.790989039303157e-06, "loss": 0.7526, "step": 19070 }, { "epoch": 0.521521548895209, "grad_norm": 1.353930950164795, "learning_rate": 9.790103529448318e-06, "loss": 0.7443, "step": 19071 }, { "epoch": 0.5215488952089259, "grad_norm": 1.4411219358444214, "learning_rate": 9.789218021240053e-06, "loss": 0.4992, "step": 19072 }, { "epoch": 0.5215762415226427, "grad_norm": 1.3100508451461792, "learning_rate": 9.788332514685315e-06, "loss": 0.4914, "step": 19073 }, { "epoch": 0.5216035878363596, "grad_norm": 1.3221722841262817, "learning_rate": 9.787447009791051e-06, "loss": 0.7941, "step": 19074 }, { "epoch": 0.5216309341500766, "grad_norm": 1.2487046718597412, "learning_rate": 9.786561506564202e-06, "loss": 0.4878, "step": 19075 }, { "epoch": 0.5216582804637935, "grad_norm": 1.4525598287582397, "learning_rate": 9.785676005011722e-06, "loss": 0.5308, "step": 19076 }, { "epoch": 0.5216856267775104, "grad_norm": 1.6123807430267334, "learning_rate": 9.78479050514055e-06, "loss": 0.4957, "step": 19077 }, { "epoch": 0.5217129730912273, "grad_norm": 1.2910183668136597, "learning_rate": 9.783905006957635e-06, "loss": 0.4959, "step": 19078 }, { "epoch": 0.5217403194049443, "grad_norm": 1.180845856666565, "learning_rate": 9.783019510469928e-06, "loss": 0.4991, "step": 19079 }, { "epoch": 0.5217676657186612, "grad_norm": 1.149154543876648, "learning_rate": 9.78213401568437e-06, "loss": 0.4979, "step": 19080 }, { "epoch": 0.521795012032378, "grad_norm": 1.3119847774505615, "learning_rate": 9.781248522607909e-06, "loss": 0.5114, "step": 19081 }, { "epoch": 0.5218223583460949, "grad_norm": 1.49118971824646, "learning_rate": 9.780363031247494e-06, "loss": 0.4236, "step": 19082 }, { "epoch": 0.5218497046598118, "grad_norm": 1.1879115104675293, "learning_rate": 9.779477541610067e-06, "loss": 0.4979, "step": 19083 }, { "epoch": 0.5218770509735288, "grad_norm": 1.3386290073394775, "learning_rate": 9.778592053702578e-06, "loss": 0.4942, "step": 19084 }, { "epoch": 0.5219043972872457, "grad_norm": 1.2908223867416382, "learning_rate": 9.77770656753197e-06, "loss": 0.5149, "step": 19085 }, { "epoch": 0.5219317436009626, "grad_norm": 1.279134750366211, "learning_rate": 9.776821083105193e-06, "loss": 0.518, "step": 19086 }, { "epoch": 0.5219590899146795, "grad_norm": 1.7449243068695068, "learning_rate": 9.775935600429191e-06, "loss": 0.3953, "step": 19087 }, { "epoch": 0.5219864362283965, "grad_norm": 1.4748834371566772, "learning_rate": 9.775050119510911e-06, "loss": 0.4122, "step": 19088 }, { "epoch": 0.5220137825421133, "grad_norm": 1.0246027708053589, "learning_rate": 9.774164640357301e-06, "loss": 0.492, "step": 19089 }, { "epoch": 0.5220411288558302, "grad_norm": 1.3542225360870361, "learning_rate": 9.773279162975305e-06, "loss": 0.4892, "step": 19090 }, { "epoch": 0.5220684751695471, "grad_norm": 1.6107468605041504, "learning_rate": 9.772393687371869e-06, "loss": 0.4015, "step": 19091 }, { "epoch": 0.522095821483264, "grad_norm": 1.171488642692566, "learning_rate": 9.771508213553943e-06, "loss": 0.4801, "step": 19092 }, { "epoch": 0.522123167796981, "grad_norm": 1.3078336715698242, "learning_rate": 9.770622741528468e-06, "loss": 0.4799, "step": 19093 }, { "epoch": 0.5221505141106979, "grad_norm": 1.473246693611145, "learning_rate": 9.769737271302393e-06, "loss": 0.4142, "step": 19094 }, { "epoch": 0.5221778604244148, "grad_norm": 1.237391471862793, "learning_rate": 9.768851802882668e-06, "loss": 0.7781, "step": 19095 }, { "epoch": 0.5222052067381318, "grad_norm": 1.1036840677261353, "learning_rate": 9.767966336276233e-06, "loss": 0.4805, "step": 19096 }, { "epoch": 0.5222325530518486, "grad_norm": 2.7936415672302246, "learning_rate": 9.767080871490038e-06, "loss": 0.5006, "step": 19097 }, { "epoch": 0.5222598993655655, "grad_norm": 1.335585117340088, "learning_rate": 9.76619540853103e-06, "loss": 0.4912, "step": 19098 }, { "epoch": 0.5222872456792824, "grad_norm": 1.9206352233886719, "learning_rate": 9.765309947406149e-06, "loss": 0.7766, "step": 19099 }, { "epoch": 0.5223145919929993, "grad_norm": 1.2717242240905762, "learning_rate": 9.764424488122348e-06, "loss": 0.5073, "step": 19100 }, { "epoch": 0.5223419383067163, "grad_norm": 1.4601848125457764, "learning_rate": 9.76353903068657e-06, "loss": 0.4912, "step": 19101 }, { "epoch": 0.5223692846204332, "grad_norm": 1.4967230558395386, "learning_rate": 9.76265357510576e-06, "loss": 0.3958, "step": 19102 }, { "epoch": 0.5223966309341501, "grad_norm": 1.5848464965820312, "learning_rate": 9.761768121386865e-06, "loss": 0.5259, "step": 19103 }, { "epoch": 0.5224239772478669, "grad_norm": 1.2807214260101318, "learning_rate": 9.760882669536836e-06, "loss": 0.5083, "step": 19104 }, { "epoch": 0.5224513235615839, "grad_norm": 1.1978358030319214, "learning_rate": 9.759997219562612e-06, "loss": 0.5241, "step": 19105 }, { "epoch": 0.5224786698753008, "grad_norm": 1.4554320573806763, "learning_rate": 9.759111771471145e-06, "loss": 0.4203, "step": 19106 }, { "epoch": 0.5225060161890177, "grad_norm": 1.1039173603057861, "learning_rate": 9.758226325269376e-06, "loss": 0.5239, "step": 19107 }, { "epoch": 0.5225333625027346, "grad_norm": 1.0906866788864136, "learning_rate": 9.757340880964254e-06, "loss": 0.4897, "step": 19108 }, { "epoch": 0.5225607088164516, "grad_norm": 1.0219919681549072, "learning_rate": 9.756455438562726e-06, "loss": 0.4755, "step": 19109 }, { "epoch": 0.5225880551301685, "grad_norm": 2.1931886672973633, "learning_rate": 9.755569998071734e-06, "loss": 0.5299, "step": 19110 }, { "epoch": 0.5226154014438854, "grad_norm": 1.2677347660064697, "learning_rate": 9.75468455949823e-06, "loss": 0.5318, "step": 19111 }, { "epoch": 0.5226427477576022, "grad_norm": 3.378648042678833, "learning_rate": 9.753799122849153e-06, "loss": 0.7953, "step": 19112 }, { "epoch": 0.5226700940713191, "grad_norm": 1.1990150213241577, "learning_rate": 9.752913688131454e-06, "loss": 0.4901, "step": 19113 }, { "epoch": 0.5226974403850361, "grad_norm": 1.3690696954727173, "learning_rate": 9.752028255352079e-06, "loss": 0.4914, "step": 19114 }, { "epoch": 0.522724786698753, "grad_norm": 1.1151437759399414, "learning_rate": 9.75114282451797e-06, "loss": 0.5219, "step": 19115 }, { "epoch": 0.5227521330124699, "grad_norm": 1.2826080322265625, "learning_rate": 9.750257395636075e-06, "loss": 0.3728, "step": 19116 }, { "epoch": 0.5227794793261868, "grad_norm": 1.2640221118927002, "learning_rate": 9.749371968713344e-06, "loss": 0.5015, "step": 19117 }, { "epoch": 0.5228068256399038, "grad_norm": 1.2268718481063843, "learning_rate": 9.748486543756717e-06, "loss": 0.4815, "step": 19118 }, { "epoch": 0.5228341719536207, "grad_norm": 1.314763069152832, "learning_rate": 9.747601120773143e-06, "loss": 0.5308, "step": 19119 }, { "epoch": 0.5228615182673375, "grad_norm": 1.2361143827438354, "learning_rate": 9.746715699769566e-06, "loss": 0.503, "step": 19120 }, { "epoch": 0.5228888645810544, "grad_norm": 1.4838533401489258, "learning_rate": 9.745830280752934e-06, "loss": 0.4053, "step": 19121 }, { "epoch": 0.5229162108947714, "grad_norm": 1.3952971696853638, "learning_rate": 9.744944863730192e-06, "loss": 0.481, "step": 19122 }, { "epoch": 0.5229435572084883, "grad_norm": 1.0135234594345093, "learning_rate": 9.744059448708285e-06, "loss": 0.451, "step": 19123 }, { "epoch": 0.5229709035222052, "grad_norm": 1.3809471130371094, "learning_rate": 9.743174035694159e-06, "loss": 0.5269, "step": 19124 }, { "epoch": 0.5229982498359221, "grad_norm": 1.552868127822876, "learning_rate": 9.742288624694764e-06, "loss": 0.5273, "step": 19125 }, { "epoch": 0.5230255961496391, "grad_norm": 1.172791600227356, "learning_rate": 9.74140321571704e-06, "loss": 0.5075, "step": 19126 }, { "epoch": 0.523052942463356, "grad_norm": 1.0998947620391846, "learning_rate": 9.740517808767935e-06, "loss": 0.4985, "step": 19127 }, { "epoch": 0.5230802887770728, "grad_norm": 1.2973400354385376, "learning_rate": 9.739632403854394e-06, "loss": 0.7787, "step": 19128 }, { "epoch": 0.5231076350907897, "grad_norm": 1.558098554611206, "learning_rate": 9.738747000983364e-06, "loss": 0.4724, "step": 19129 }, { "epoch": 0.5231349814045066, "grad_norm": 1.4051934480667114, "learning_rate": 9.737861600161792e-06, "loss": 0.553, "step": 19130 }, { "epoch": 0.5231623277182236, "grad_norm": 1.3209586143493652, "learning_rate": 9.736976201396618e-06, "loss": 0.528, "step": 19131 }, { "epoch": 0.5231896740319405, "grad_norm": 1.4233695268630981, "learning_rate": 9.736090804694797e-06, "loss": 0.5239, "step": 19132 }, { "epoch": 0.5232170203456574, "grad_norm": 1.5715141296386719, "learning_rate": 9.735205410063266e-06, "loss": 0.3739, "step": 19133 }, { "epoch": 0.5232443666593743, "grad_norm": 1.4535170793533325, "learning_rate": 9.734320017508976e-06, "loss": 0.4815, "step": 19134 }, { "epoch": 0.5232717129730913, "grad_norm": 1.3260539770126343, "learning_rate": 9.733434627038869e-06, "loss": 0.4797, "step": 19135 }, { "epoch": 0.5232990592868081, "grad_norm": 1.2974478006362915, "learning_rate": 9.732549238659894e-06, "loss": 0.7743, "step": 19136 }, { "epoch": 0.523326405600525, "grad_norm": 1.2281458377838135, "learning_rate": 9.731663852378993e-06, "loss": 0.4133, "step": 19137 }, { "epoch": 0.5233537519142419, "grad_norm": 1.2756718397140503, "learning_rate": 9.730778468203113e-06, "loss": 0.3876, "step": 19138 }, { "epoch": 0.5233810982279589, "grad_norm": 1.266113519668579, "learning_rate": 9.729893086139203e-06, "loss": 0.4959, "step": 19139 }, { "epoch": 0.5234084445416758, "grad_norm": 1.4859098196029663, "learning_rate": 9.729007706194203e-06, "loss": 0.5073, "step": 19140 }, { "epoch": 0.5234357908553927, "grad_norm": 1.2337191104888916, "learning_rate": 9.728122328375064e-06, "loss": 0.487, "step": 19141 }, { "epoch": 0.5234631371691096, "grad_norm": 1.4877915382385254, "learning_rate": 9.727236952688726e-06, "loss": 0.4264, "step": 19142 }, { "epoch": 0.5234904834828266, "grad_norm": 3.0292115211486816, "learning_rate": 9.726351579142137e-06, "loss": 0.7769, "step": 19143 }, { "epoch": 0.5235178297965434, "grad_norm": 1.1258254051208496, "learning_rate": 9.725466207742246e-06, "loss": 0.4972, "step": 19144 }, { "epoch": 0.5235451761102603, "grad_norm": 1.1523579359054565, "learning_rate": 9.724580838495992e-06, "loss": 0.4993, "step": 19145 }, { "epoch": 0.5235725224239772, "grad_norm": 1.4011563062667847, "learning_rate": 9.723695471410325e-06, "loss": 0.4715, "step": 19146 }, { "epoch": 0.5235998687376942, "grad_norm": 1.048205018043518, "learning_rate": 9.722810106492191e-06, "loss": 0.4823, "step": 19147 }, { "epoch": 0.5236272150514111, "grad_norm": 1.328457236289978, "learning_rate": 9.721924743748531e-06, "loss": 0.5186, "step": 19148 }, { "epoch": 0.523654561365128, "grad_norm": 1.1991320848464966, "learning_rate": 9.721039383186296e-06, "loss": 0.5037, "step": 19149 }, { "epoch": 0.5236819076788449, "grad_norm": 1.366956353187561, "learning_rate": 9.720154024812426e-06, "loss": 0.516, "step": 19150 }, { "epoch": 0.5237092539925619, "grad_norm": 1.3916436433792114, "learning_rate": 9.71926866863387e-06, "loss": 0.494, "step": 19151 }, { "epoch": 0.5237366003062787, "grad_norm": 1.3458945751190186, "learning_rate": 9.718383314657573e-06, "loss": 0.5239, "step": 19152 }, { "epoch": 0.5237639466199956, "grad_norm": 1.4423317909240723, "learning_rate": 9.717497962890478e-06, "loss": 0.7838, "step": 19153 }, { "epoch": 0.5237912929337125, "grad_norm": 1.4726667404174805, "learning_rate": 9.716612613339534e-06, "loss": 0.4337, "step": 19154 }, { "epoch": 0.5238186392474294, "grad_norm": 1.5549900531768799, "learning_rate": 9.715727266011683e-06, "loss": 0.3956, "step": 19155 }, { "epoch": 0.5238459855611464, "grad_norm": 1.1751797199249268, "learning_rate": 9.71484192091387e-06, "loss": 0.4882, "step": 19156 }, { "epoch": 0.5238733318748633, "grad_norm": 1.2221013307571411, "learning_rate": 9.713956578053046e-06, "loss": 0.5172, "step": 19157 }, { "epoch": 0.5239006781885802, "grad_norm": 1.3168580532073975, "learning_rate": 9.713071237436148e-06, "loss": 0.4971, "step": 19158 }, { "epoch": 0.5239280245022971, "grad_norm": 1.3462458848953247, "learning_rate": 9.712185899070127e-06, "loss": 0.5015, "step": 19159 }, { "epoch": 0.523955370816014, "grad_norm": 1.4185724258422852, "learning_rate": 9.711300562961927e-06, "loss": 0.5165, "step": 19160 }, { "epoch": 0.5239827171297309, "grad_norm": 1.3159360885620117, "learning_rate": 9.710415229118493e-06, "loss": 0.5015, "step": 19161 }, { "epoch": 0.5240100634434478, "grad_norm": 1.4702047109603882, "learning_rate": 9.70952989754677e-06, "loss": 0.5183, "step": 19162 }, { "epoch": 0.5240374097571647, "grad_norm": 1.2536693811416626, "learning_rate": 9.708644568253702e-06, "loss": 0.5118, "step": 19163 }, { "epoch": 0.5240647560708817, "grad_norm": 1.1134073734283447, "learning_rate": 9.707759241246235e-06, "loss": 0.4732, "step": 19164 }, { "epoch": 0.5240921023845986, "grad_norm": 1.3503004312515259, "learning_rate": 9.706873916531318e-06, "loss": 0.5058, "step": 19165 }, { "epoch": 0.5241194486983155, "grad_norm": 1.4897456169128418, "learning_rate": 9.70598859411589e-06, "loss": 0.4821, "step": 19166 }, { "epoch": 0.5241467950120324, "grad_norm": 1.4405497312545776, "learning_rate": 9.7051032740069e-06, "loss": 0.5208, "step": 19167 }, { "epoch": 0.5241741413257492, "grad_norm": 1.414843201637268, "learning_rate": 9.704217956211289e-06, "loss": 0.4124, "step": 19168 }, { "epoch": 0.5242014876394662, "grad_norm": 1.6197569370269775, "learning_rate": 9.703332640736011e-06, "loss": 0.5035, "step": 19169 }, { "epoch": 0.5242288339531831, "grad_norm": 1.5328010320663452, "learning_rate": 9.702447327588001e-06, "loss": 0.3933, "step": 19170 }, { "epoch": 0.5242561802669, "grad_norm": 1.2859455347061157, "learning_rate": 9.701562016774208e-06, "loss": 0.4931, "step": 19171 }, { "epoch": 0.5242835265806169, "grad_norm": 1.3143199682235718, "learning_rate": 9.700676708301576e-06, "loss": 0.5131, "step": 19172 }, { "epoch": 0.5243108728943339, "grad_norm": 1.2309849262237549, "learning_rate": 9.699791402177051e-06, "loss": 0.5294, "step": 19173 }, { "epoch": 0.5243382192080508, "grad_norm": 1.2208203077316284, "learning_rate": 9.69890609840758e-06, "loss": 0.5282, "step": 19174 }, { "epoch": 0.5243655655217677, "grad_norm": 1.6604020595550537, "learning_rate": 9.698020797000104e-06, "loss": 0.4991, "step": 19175 }, { "epoch": 0.5243929118354845, "grad_norm": 1.2478573322296143, "learning_rate": 9.697135497961572e-06, "loss": 0.4998, "step": 19176 }, { "epoch": 0.5244202581492015, "grad_norm": 1.3063361644744873, "learning_rate": 9.696250201298924e-06, "loss": 0.4807, "step": 19177 }, { "epoch": 0.5244476044629184, "grad_norm": 1.3704537153244019, "learning_rate": 9.695364907019108e-06, "loss": 0.4203, "step": 19178 }, { "epoch": 0.5244749507766353, "grad_norm": 1.1485443115234375, "learning_rate": 9.694479615129071e-06, "loss": 0.462, "step": 19179 }, { "epoch": 0.5245022970903522, "grad_norm": 1.2934157848358154, "learning_rate": 9.693594325635752e-06, "loss": 0.5018, "step": 19180 }, { "epoch": 0.5245296434040692, "grad_norm": 1.1872260570526123, "learning_rate": 9.6927090385461e-06, "loss": 0.4901, "step": 19181 }, { "epoch": 0.5245569897177861, "grad_norm": 1.291764259338379, "learning_rate": 9.691823753867062e-06, "loss": 0.533, "step": 19182 }, { "epoch": 0.524584336031503, "grad_norm": 1.458061933517456, "learning_rate": 9.69093847160558e-06, "loss": 0.3716, "step": 19183 }, { "epoch": 0.5246116823452198, "grad_norm": 1.3380101919174194, "learning_rate": 9.690053191768597e-06, "loss": 0.493, "step": 19184 }, { "epoch": 0.5246390286589367, "grad_norm": 1.1016420125961304, "learning_rate": 9.689167914363058e-06, "loss": 0.5188, "step": 19185 }, { "epoch": 0.5246663749726537, "grad_norm": 1.2765744924545288, "learning_rate": 9.68828263939591e-06, "loss": 0.4854, "step": 19186 }, { "epoch": 0.5246937212863706, "grad_norm": 1.4064816236495972, "learning_rate": 9.6873973668741e-06, "loss": 0.3493, "step": 19187 }, { "epoch": 0.5247210676000875, "grad_norm": 1.2433509826660156, "learning_rate": 9.686512096804565e-06, "loss": 0.5166, "step": 19188 }, { "epoch": 0.5247484139138044, "grad_norm": 1.0804227590560913, "learning_rate": 9.685626829194258e-06, "loss": 0.5002, "step": 19189 }, { "epoch": 0.5247757602275214, "grad_norm": 1.5131444931030273, "learning_rate": 9.684741564050118e-06, "loss": 0.4119, "step": 19190 }, { "epoch": 0.5248031065412383, "grad_norm": 1.2826443910598755, "learning_rate": 9.683856301379092e-06, "loss": 0.8063, "step": 19191 }, { "epoch": 0.5248304528549551, "grad_norm": 1.2435261011123657, "learning_rate": 9.682971041188127e-06, "loss": 0.4909, "step": 19192 }, { "epoch": 0.524857799168672, "grad_norm": 1.5016745328903198, "learning_rate": 9.68208578348416e-06, "loss": 0.4323, "step": 19193 }, { "epoch": 0.524885145482389, "grad_norm": 1.8037309646606445, "learning_rate": 9.681200528274144e-06, "loss": 0.3863, "step": 19194 }, { "epoch": 0.5249124917961059, "grad_norm": 1.2896575927734375, "learning_rate": 9.680315275565021e-06, "loss": 0.4979, "step": 19195 }, { "epoch": 0.5249398381098228, "grad_norm": 1.419865608215332, "learning_rate": 9.679430025363732e-06, "loss": 0.5233, "step": 19196 }, { "epoch": 0.5249671844235397, "grad_norm": 1.1128737926483154, "learning_rate": 9.678544777677226e-06, "loss": 0.5309, "step": 19197 }, { "epoch": 0.5249945307372567, "grad_norm": 1.325248122215271, "learning_rate": 9.677659532512445e-06, "loss": 0.775, "step": 19198 }, { "epoch": 0.5250218770509735, "grad_norm": 1.7841358184814453, "learning_rate": 9.676774289876334e-06, "loss": 0.379, "step": 19199 }, { "epoch": 0.5250492233646904, "grad_norm": 1.1037688255310059, "learning_rate": 9.67588904977584e-06, "loss": 0.5043, "step": 19200 }, { "epoch": 0.5250765696784073, "grad_norm": 1.2673298120498657, "learning_rate": 9.6750038122179e-06, "loss": 0.5149, "step": 19201 }, { "epoch": 0.5251039159921242, "grad_norm": 1.2827204465866089, "learning_rate": 9.674118577209469e-06, "loss": 0.5185, "step": 19202 }, { "epoch": 0.5251312623058412, "grad_norm": 1.0779658555984497, "learning_rate": 9.673233344757484e-06, "loss": 0.5054, "step": 19203 }, { "epoch": 0.5251586086195581, "grad_norm": 1.3443489074707031, "learning_rate": 9.672348114868894e-06, "loss": 0.5118, "step": 19204 }, { "epoch": 0.525185954933275, "grad_norm": 1.2319833040237427, "learning_rate": 9.67146288755064e-06, "loss": 0.7617, "step": 19205 }, { "epoch": 0.525213301246992, "grad_norm": 1.8078885078430176, "learning_rate": 9.670577662809665e-06, "loss": 0.4925, "step": 19206 }, { "epoch": 0.5252406475607088, "grad_norm": 1.0798403024673462, "learning_rate": 9.669692440652915e-06, "loss": 0.4742, "step": 19207 }, { "epoch": 0.5252679938744257, "grad_norm": 1.1122249364852905, "learning_rate": 9.668807221087336e-06, "loss": 0.4722, "step": 19208 }, { "epoch": 0.5252953401881426, "grad_norm": 1.5129306316375732, "learning_rate": 9.667922004119871e-06, "loss": 0.5234, "step": 19209 }, { "epoch": 0.5253226865018595, "grad_norm": 1.081008791923523, "learning_rate": 9.667036789757465e-06, "loss": 0.4899, "step": 19210 }, { "epoch": 0.5253500328155765, "grad_norm": 1.3444818258285522, "learning_rate": 9.66615157800706e-06, "loss": 0.3959, "step": 19211 }, { "epoch": 0.5253773791292934, "grad_norm": 1.2506383657455444, "learning_rate": 9.665266368875604e-06, "loss": 0.5307, "step": 19212 }, { "epoch": 0.5254047254430103, "grad_norm": 1.4424406290054321, "learning_rate": 9.664381162370038e-06, "loss": 0.4908, "step": 19213 }, { "epoch": 0.5254320717567272, "grad_norm": 1.1923298835754395, "learning_rate": 9.663495958497309e-06, "loss": 0.5357, "step": 19214 }, { "epoch": 0.525459418070444, "grad_norm": 1.2192559242248535, "learning_rate": 9.662610757264357e-06, "loss": 0.5023, "step": 19215 }, { "epoch": 0.525486764384161, "grad_norm": 1.5539448261260986, "learning_rate": 9.661725558678129e-06, "loss": 0.7662, "step": 19216 }, { "epoch": 0.5255141106978779, "grad_norm": 1.0693110227584839, "learning_rate": 9.66084036274557e-06, "loss": 0.481, "step": 19217 }, { "epoch": 0.5255414570115948, "grad_norm": 1.6415215730667114, "learning_rate": 9.659955169473621e-06, "loss": 0.5294, "step": 19218 }, { "epoch": 0.5255688033253118, "grad_norm": 1.5090711116790771, "learning_rate": 9.659069978869232e-06, "loss": 0.4592, "step": 19219 }, { "epoch": 0.5255961496390287, "grad_norm": 1.3517698049545288, "learning_rate": 9.658184790939339e-06, "loss": 0.487, "step": 19220 }, { "epoch": 0.5256234959527456, "grad_norm": 1.087142825126648, "learning_rate": 9.657299605690891e-06, "loss": 0.4677, "step": 19221 }, { "epoch": 0.5256508422664625, "grad_norm": 1.1988108158111572, "learning_rate": 9.656414423130833e-06, "loss": 0.5034, "step": 19222 }, { "epoch": 0.5256781885801793, "grad_norm": 1.4404367208480835, "learning_rate": 9.655529243266106e-06, "loss": 0.4723, "step": 19223 }, { "epoch": 0.5257055348938963, "grad_norm": 1.441590428352356, "learning_rate": 9.654644066103653e-06, "loss": 0.3935, "step": 19224 }, { "epoch": 0.5257328812076132, "grad_norm": 1.6429946422576904, "learning_rate": 9.653758891650424e-06, "loss": 0.4031, "step": 19225 }, { "epoch": 0.5257602275213301, "grad_norm": 1.2320114374160767, "learning_rate": 9.652873719913356e-06, "loss": 0.5025, "step": 19226 }, { "epoch": 0.525787573835047, "grad_norm": 1.3245208263397217, "learning_rate": 9.651988550899398e-06, "loss": 0.3658, "step": 19227 }, { "epoch": 0.525814920148764, "grad_norm": 1.1607915163040161, "learning_rate": 9.65110338461549e-06, "loss": 0.4896, "step": 19228 }, { "epoch": 0.5258422664624809, "grad_norm": 1.241031527519226, "learning_rate": 9.65021822106858e-06, "loss": 0.515, "step": 19229 }, { "epoch": 0.5258696127761978, "grad_norm": 1.2010828256607056, "learning_rate": 9.64933306026561e-06, "loss": 0.5025, "step": 19230 }, { "epoch": 0.5258969590899146, "grad_norm": 1.2584681510925293, "learning_rate": 9.64844790221352e-06, "loss": 0.5162, "step": 19231 }, { "epoch": 0.5259243054036316, "grad_norm": 1.3314099311828613, "learning_rate": 9.64756274691926e-06, "loss": 0.3632, "step": 19232 }, { "epoch": 0.5259516517173485, "grad_norm": 1.058179497718811, "learning_rate": 9.646677594389769e-06, "loss": 0.4826, "step": 19233 }, { "epoch": 0.5259789980310654, "grad_norm": 1.6996965408325195, "learning_rate": 9.645792444631993e-06, "loss": 0.5025, "step": 19234 }, { "epoch": 0.5260063443447823, "grad_norm": 1.157667636871338, "learning_rate": 9.644907297652879e-06, "loss": 0.4888, "step": 19235 }, { "epoch": 0.5260336906584993, "grad_norm": 1.274001121520996, "learning_rate": 9.644022153459364e-06, "loss": 0.4878, "step": 19236 }, { "epoch": 0.5260610369722162, "grad_norm": 1.6084462404251099, "learning_rate": 9.643137012058394e-06, "loss": 0.4751, "step": 19237 }, { "epoch": 0.5260883832859331, "grad_norm": 1.3355927467346191, "learning_rate": 9.642251873456918e-06, "loss": 0.4944, "step": 19238 }, { "epoch": 0.5261157295996499, "grad_norm": 1.5842597484588623, "learning_rate": 9.641366737661877e-06, "loss": 0.382, "step": 19239 }, { "epoch": 0.5261430759133668, "grad_norm": 1.6463936567306519, "learning_rate": 9.640481604680207e-06, "loss": 0.4954, "step": 19240 }, { "epoch": 0.5261704222270838, "grad_norm": 1.6437273025512695, "learning_rate": 9.639596474518862e-06, "loss": 0.5151, "step": 19241 }, { "epoch": 0.5261977685408007, "grad_norm": 1.2768752574920654, "learning_rate": 9.638711347184778e-06, "loss": 0.5086, "step": 19242 }, { "epoch": 0.5262251148545176, "grad_norm": 1.4884597063064575, "learning_rate": 9.637826222684901e-06, "loss": 0.5028, "step": 19243 }, { "epoch": 0.5262524611682345, "grad_norm": 1.4539459943771362, "learning_rate": 9.63694110102618e-06, "loss": 0.4646, "step": 19244 }, { "epoch": 0.5262798074819515, "grad_norm": 1.6578470468521118, "learning_rate": 9.63605598221555e-06, "loss": 0.3948, "step": 19245 }, { "epoch": 0.5263071537956684, "grad_norm": 1.4848129749298096, "learning_rate": 9.635170866259958e-06, "loss": 0.4849, "step": 19246 }, { "epoch": 0.5263345001093852, "grad_norm": 1.2518951892852783, "learning_rate": 9.634285753166351e-06, "loss": 0.5146, "step": 19247 }, { "epoch": 0.5263618464231021, "grad_norm": 2.7215309143066406, "learning_rate": 9.633400642941666e-06, "loss": 0.3272, "step": 19248 }, { "epoch": 0.5263891927368191, "grad_norm": 1.1010451316833496, "learning_rate": 9.632515535592852e-06, "loss": 0.51, "step": 19249 }, { "epoch": 0.526416539050536, "grad_norm": 1.261064887046814, "learning_rate": 9.63163043112685e-06, "loss": 0.4754, "step": 19250 }, { "epoch": 0.5264438853642529, "grad_norm": 1.329127311706543, "learning_rate": 9.630745329550601e-06, "loss": 0.5089, "step": 19251 }, { "epoch": 0.5264712316779698, "grad_norm": 1.1537318229675293, "learning_rate": 9.629860230871055e-06, "loss": 0.3564, "step": 19252 }, { "epoch": 0.5264985779916868, "grad_norm": 1.398547887802124, "learning_rate": 9.628975135095149e-06, "loss": 0.4942, "step": 19253 }, { "epoch": 0.5265259243054037, "grad_norm": 2.1669445037841797, "learning_rate": 9.628090042229828e-06, "loss": 0.3837, "step": 19254 }, { "epoch": 0.5265532706191205, "grad_norm": 1.163297176361084, "learning_rate": 9.627204952282038e-06, "loss": 0.7879, "step": 19255 }, { "epoch": 0.5265806169328374, "grad_norm": 1.705659031867981, "learning_rate": 9.62631986525872e-06, "loss": 0.4096, "step": 19256 }, { "epoch": 0.5266079632465543, "grad_norm": 1.6831899881362915, "learning_rate": 9.625434781166817e-06, "loss": 0.4105, "step": 19257 }, { "epoch": 0.5266353095602713, "grad_norm": 1.4117743968963623, "learning_rate": 9.624549700013272e-06, "loss": 0.7873, "step": 19258 }, { "epoch": 0.5266626558739882, "grad_norm": 1.3060425519943237, "learning_rate": 9.623664621805029e-06, "loss": 0.4804, "step": 19259 }, { "epoch": 0.5266900021877051, "grad_norm": 1.4953744411468506, "learning_rate": 9.622779546549032e-06, "loss": 0.4702, "step": 19260 }, { "epoch": 0.526717348501422, "grad_norm": 1.412814736366272, "learning_rate": 9.621894474252223e-06, "loss": 0.5118, "step": 19261 }, { "epoch": 0.526744694815139, "grad_norm": 1.4146720170974731, "learning_rate": 9.621009404921547e-06, "loss": 0.382, "step": 19262 }, { "epoch": 0.5267720411288558, "grad_norm": 2.34291410446167, "learning_rate": 9.620124338563943e-06, "loss": 0.792, "step": 19263 }, { "epoch": 0.5267993874425727, "grad_norm": 1.3058788776397705, "learning_rate": 9.619239275186355e-06, "loss": 0.4993, "step": 19264 }, { "epoch": 0.5268267337562896, "grad_norm": 1.1730561256408691, "learning_rate": 9.618354214795733e-06, "loss": 0.4806, "step": 19265 }, { "epoch": 0.5268540800700066, "grad_norm": 1.6332592964172363, "learning_rate": 9.617469157399012e-06, "loss": 0.5086, "step": 19266 }, { "epoch": 0.5268814263837235, "grad_norm": 1.5109845399856567, "learning_rate": 9.616584103003137e-06, "loss": 0.3917, "step": 19267 }, { "epoch": 0.5269087726974404, "grad_norm": 1.9713375568389893, "learning_rate": 9.615699051615055e-06, "loss": 0.5118, "step": 19268 }, { "epoch": 0.5269361190111573, "grad_norm": 1.4829492568969727, "learning_rate": 9.614814003241702e-06, "loss": 0.3403, "step": 19269 }, { "epoch": 0.5269634653248743, "grad_norm": 1.3111815452575684, "learning_rate": 9.613928957890027e-06, "loss": 0.5179, "step": 19270 }, { "epoch": 0.5269908116385911, "grad_norm": 1.4811850786209106, "learning_rate": 9.61304391556697e-06, "loss": 0.4857, "step": 19271 }, { "epoch": 0.527018157952308, "grad_norm": 1.583450436592102, "learning_rate": 9.612158876279473e-06, "loss": 0.4073, "step": 19272 }, { "epoch": 0.5270455042660249, "grad_norm": 2.5391085147857666, "learning_rate": 9.611273840034483e-06, "loss": 0.5075, "step": 19273 }, { "epoch": 0.5270728505797418, "grad_norm": 1.5898423194885254, "learning_rate": 9.610388806838943e-06, "loss": 0.4312, "step": 19274 }, { "epoch": 0.5271001968934588, "grad_norm": 1.2167946100234985, "learning_rate": 9.609503776699788e-06, "loss": 0.4933, "step": 19275 }, { "epoch": 0.5271275432071757, "grad_norm": 4.841585159301758, "learning_rate": 9.608618749623968e-06, "loss": 0.509, "step": 19276 }, { "epoch": 0.5271548895208926, "grad_norm": 1.244529128074646, "learning_rate": 9.607733725618423e-06, "loss": 0.5097, "step": 19277 }, { "epoch": 0.5271822358346095, "grad_norm": 1.4752585887908936, "learning_rate": 9.606848704690098e-06, "loss": 0.4982, "step": 19278 }, { "epoch": 0.5272095821483264, "grad_norm": 1.2059640884399414, "learning_rate": 9.605963686845933e-06, "loss": 0.5001, "step": 19279 }, { "epoch": 0.5272369284620433, "grad_norm": 1.24424147605896, "learning_rate": 9.605078672092872e-06, "loss": 0.7781, "step": 19280 }, { "epoch": 0.5272642747757602, "grad_norm": 1.1446058750152588, "learning_rate": 9.604193660437856e-06, "loss": 0.493, "step": 19281 }, { "epoch": 0.5272916210894771, "grad_norm": 1.3574138879776, "learning_rate": 9.603308651887832e-06, "loss": 0.5339, "step": 19282 }, { "epoch": 0.5273189674031941, "grad_norm": 1.4629569053649902, "learning_rate": 9.602423646449739e-06, "loss": 0.5127, "step": 19283 }, { "epoch": 0.527346313716911, "grad_norm": 1.0887339115142822, "learning_rate": 9.601538644130521e-06, "loss": 0.4995, "step": 19284 }, { "epoch": 0.5273736600306279, "grad_norm": 1.1776360273361206, "learning_rate": 9.60065364493712e-06, "loss": 0.5032, "step": 19285 }, { "epoch": 0.5274010063443448, "grad_norm": 14.440381050109863, "learning_rate": 9.599768648876477e-06, "loss": 0.3755, "step": 19286 }, { "epoch": 0.5274283526580616, "grad_norm": 1.185412049293518, "learning_rate": 9.59888365595554e-06, "loss": 0.538, "step": 19287 }, { "epoch": 0.5274556989717786, "grad_norm": 1.165951132774353, "learning_rate": 9.597998666181243e-06, "loss": 0.4793, "step": 19288 }, { "epoch": 0.5274830452854955, "grad_norm": 1.3125066757202148, "learning_rate": 9.597113679560536e-06, "loss": 0.4886, "step": 19289 }, { "epoch": 0.5275103915992124, "grad_norm": 1.340580701828003, "learning_rate": 9.596228696100361e-06, "loss": 0.4941, "step": 19290 }, { "epoch": 0.5275377379129293, "grad_norm": 1.1618046760559082, "learning_rate": 9.595343715807654e-06, "loss": 0.5079, "step": 19291 }, { "epoch": 0.5275650842266463, "grad_norm": 1.322864294052124, "learning_rate": 9.594458738689366e-06, "loss": 0.4882, "step": 19292 }, { "epoch": 0.5275924305403632, "grad_norm": 1.3618448972702026, "learning_rate": 9.593573764752431e-06, "loss": 0.5015, "step": 19293 }, { "epoch": 0.5276197768540801, "grad_norm": 1.057616114616394, "learning_rate": 9.592688794003797e-06, "loss": 0.4854, "step": 19294 }, { "epoch": 0.5276471231677969, "grad_norm": 1.233747124671936, "learning_rate": 9.591803826450408e-06, "loss": 0.4994, "step": 19295 }, { "epoch": 0.5276744694815139, "grad_norm": 1.327770709991455, "learning_rate": 9.5909188620992e-06, "loss": 0.7655, "step": 19296 }, { "epoch": 0.5277018157952308, "grad_norm": 1.2956424951553345, "learning_rate": 9.590033900957118e-06, "loss": 0.8315, "step": 19297 }, { "epoch": 0.5277291621089477, "grad_norm": 1.7268879413604736, "learning_rate": 9.589148943031105e-06, "loss": 0.8026, "step": 19298 }, { "epoch": 0.5277565084226646, "grad_norm": 1.1739052534103394, "learning_rate": 9.588263988328102e-06, "loss": 0.4973, "step": 19299 }, { "epoch": 0.5277838547363816, "grad_norm": 1.198407769203186, "learning_rate": 9.587379036855054e-06, "loss": 0.4811, "step": 19300 }, { "epoch": 0.5278112010500985, "grad_norm": 2.303548574447632, "learning_rate": 9.5864940886189e-06, "loss": 0.5118, "step": 19301 }, { "epoch": 0.5278385473638153, "grad_norm": 1.1985609531402588, "learning_rate": 9.585609143626584e-06, "loss": 0.5005, "step": 19302 }, { "epoch": 0.5278658936775322, "grad_norm": 1.3102205991744995, "learning_rate": 9.58472420188505e-06, "loss": 0.4875, "step": 19303 }, { "epoch": 0.5278932399912492, "grad_norm": 1.2870539426803589, "learning_rate": 9.583839263401234e-06, "loss": 0.4922, "step": 19304 }, { "epoch": 0.5279205863049661, "grad_norm": 1.3528975248336792, "learning_rate": 9.582954328182084e-06, "loss": 0.4887, "step": 19305 }, { "epoch": 0.527947932618683, "grad_norm": 1.1649829149246216, "learning_rate": 9.58206939623454e-06, "loss": 0.5154, "step": 19306 }, { "epoch": 0.5279752789323999, "grad_norm": 1.2488617897033691, "learning_rate": 9.581184467565541e-06, "loss": 0.5041, "step": 19307 }, { "epoch": 0.5280026252461169, "grad_norm": 2.486940860748291, "learning_rate": 9.580299542182037e-06, "loss": 0.3217, "step": 19308 }, { "epoch": 0.5280299715598338, "grad_norm": 1.557213544845581, "learning_rate": 9.57941462009096e-06, "loss": 0.4199, "step": 19309 }, { "epoch": 0.5280573178735506, "grad_norm": 1.4328726530075073, "learning_rate": 9.578529701299264e-06, "loss": 0.433, "step": 19310 }, { "epoch": 0.5280846641872675, "grad_norm": 1.1414704322814941, "learning_rate": 9.577644785813878e-06, "loss": 0.5178, "step": 19311 }, { "epoch": 0.5281120105009844, "grad_norm": 1.2200312614440918, "learning_rate": 9.576759873641753e-06, "loss": 0.4942, "step": 19312 }, { "epoch": 0.5281393568147014, "grad_norm": 1.3144456148147583, "learning_rate": 9.575874964789824e-06, "loss": 0.4634, "step": 19313 }, { "epoch": 0.5281667031284183, "grad_norm": 1.3293815851211548, "learning_rate": 9.574990059265039e-06, "loss": 0.4835, "step": 19314 }, { "epoch": 0.5281940494421352, "grad_norm": 1.429075837135315, "learning_rate": 9.574105157074336e-06, "loss": 0.4924, "step": 19315 }, { "epoch": 0.5282213957558521, "grad_norm": 1.0784517526626587, "learning_rate": 9.573220258224658e-06, "loss": 0.4747, "step": 19316 }, { "epoch": 0.5282487420695691, "grad_norm": 1.2876582145690918, "learning_rate": 9.57233536272295e-06, "loss": 0.5022, "step": 19317 }, { "epoch": 0.5282760883832859, "grad_norm": 1.5642248392105103, "learning_rate": 9.571450470576145e-06, "loss": 0.5176, "step": 19318 }, { "epoch": 0.5283034346970028, "grad_norm": 0.9897289872169495, "learning_rate": 9.570565581791194e-06, "loss": 0.3351, "step": 19319 }, { "epoch": 0.5283307810107197, "grad_norm": 1.4789537191390991, "learning_rate": 9.569680696375037e-06, "loss": 0.5008, "step": 19320 }, { "epoch": 0.5283581273244367, "grad_norm": 1.3693403005599976, "learning_rate": 9.568795814334611e-06, "loss": 0.4682, "step": 19321 }, { "epoch": 0.5283854736381536, "grad_norm": 1.3174339532852173, "learning_rate": 9.567910935676862e-06, "loss": 0.5277, "step": 19322 }, { "epoch": 0.5284128199518705, "grad_norm": 1.4597268104553223, "learning_rate": 9.567026060408729e-06, "loss": 0.4126, "step": 19323 }, { "epoch": 0.5284401662655874, "grad_norm": 1.308384895324707, "learning_rate": 9.566141188537154e-06, "loss": 0.5089, "step": 19324 }, { "epoch": 0.5284675125793044, "grad_norm": 1.288558840751648, "learning_rate": 9.565256320069081e-06, "loss": 0.5108, "step": 19325 }, { "epoch": 0.5284948588930212, "grad_norm": 1.1717231273651123, "learning_rate": 9.564371455011449e-06, "loss": 0.4863, "step": 19326 }, { "epoch": 0.5285222052067381, "grad_norm": 1.3333052396774292, "learning_rate": 9.563486593371202e-06, "loss": 0.5177, "step": 19327 }, { "epoch": 0.528549551520455, "grad_norm": 1.1708415746688843, "learning_rate": 9.562601735155279e-06, "loss": 0.4907, "step": 19328 }, { "epoch": 0.5285768978341719, "grad_norm": 1.2755874395370483, "learning_rate": 9.56171688037062e-06, "loss": 0.5043, "step": 19329 }, { "epoch": 0.5286042441478889, "grad_norm": 1.095322847366333, "learning_rate": 9.560832029024173e-06, "loss": 0.4828, "step": 19330 }, { "epoch": 0.5286315904616058, "grad_norm": 1.3770040273666382, "learning_rate": 9.559947181122871e-06, "loss": 0.7725, "step": 19331 }, { "epoch": 0.5286589367753227, "grad_norm": 1.151900291442871, "learning_rate": 9.559062336673659e-06, "loss": 0.5139, "step": 19332 }, { "epoch": 0.5286862830890396, "grad_norm": 1.2122259140014648, "learning_rate": 9.558177495683482e-06, "loss": 0.4934, "step": 19333 }, { "epoch": 0.5287136294027565, "grad_norm": 1.3923895359039307, "learning_rate": 9.557292658159278e-06, "loss": 0.762, "step": 19334 }, { "epoch": 0.5287409757164734, "grad_norm": 1.2420169115066528, "learning_rate": 9.556407824107988e-06, "loss": 0.7956, "step": 19335 }, { "epoch": 0.5287683220301903, "grad_norm": 1.1524651050567627, "learning_rate": 9.555522993536552e-06, "loss": 0.4762, "step": 19336 }, { "epoch": 0.5287956683439072, "grad_norm": 1.3261648416519165, "learning_rate": 9.554638166451914e-06, "loss": 0.5336, "step": 19337 }, { "epoch": 0.5288230146576242, "grad_norm": 1.1585720777511597, "learning_rate": 9.553753342861017e-06, "loss": 0.5152, "step": 19338 }, { "epoch": 0.5288503609713411, "grad_norm": 1.6722753047943115, "learning_rate": 9.552868522770795e-06, "loss": 0.5179, "step": 19339 }, { "epoch": 0.528877707285058, "grad_norm": 1.2988924980163574, "learning_rate": 9.551983706188198e-06, "loss": 0.4938, "step": 19340 }, { "epoch": 0.5289050535987749, "grad_norm": 1.785205602645874, "learning_rate": 9.55109889312016e-06, "loss": 0.4174, "step": 19341 }, { "epoch": 0.5289323999124917, "grad_norm": 4.747771263122559, "learning_rate": 9.550214083573624e-06, "loss": 0.8039, "step": 19342 }, { "epoch": 0.5289597462262087, "grad_norm": 1.3797340393066406, "learning_rate": 9.549329277555535e-06, "loss": 0.4831, "step": 19343 }, { "epoch": 0.5289870925399256, "grad_norm": 2.020536184310913, "learning_rate": 9.548444475072828e-06, "loss": 0.5084, "step": 19344 }, { "epoch": 0.5290144388536425, "grad_norm": 1.5693070888519287, "learning_rate": 9.547559676132452e-06, "loss": 0.4989, "step": 19345 }, { "epoch": 0.5290417851673594, "grad_norm": 1.1412897109985352, "learning_rate": 9.546674880741338e-06, "loss": 0.4872, "step": 19346 }, { "epoch": 0.5290691314810764, "grad_norm": 1.2785581350326538, "learning_rate": 9.545790088906435e-06, "loss": 0.7597, "step": 19347 }, { "epoch": 0.5290964777947933, "grad_norm": 1.5243208408355713, "learning_rate": 9.544905300634678e-06, "loss": 0.5075, "step": 19348 }, { "epoch": 0.5291238241085102, "grad_norm": 1.2561548948287964, "learning_rate": 9.544020515933013e-06, "loss": 0.4826, "step": 19349 }, { "epoch": 0.529151170422227, "grad_norm": 1.3622745275497437, "learning_rate": 9.543135734808377e-06, "loss": 0.4919, "step": 19350 }, { "epoch": 0.529178516735944, "grad_norm": 1.20238196849823, "learning_rate": 9.542250957267714e-06, "loss": 0.48, "step": 19351 }, { "epoch": 0.5292058630496609, "grad_norm": 1.2773983478546143, "learning_rate": 9.541366183317964e-06, "loss": 0.527, "step": 19352 }, { "epoch": 0.5292332093633778, "grad_norm": 1.2198951244354248, "learning_rate": 9.540481412966064e-06, "loss": 0.503, "step": 19353 }, { "epoch": 0.5292605556770947, "grad_norm": 1.2595652341842651, "learning_rate": 9.53959664621896e-06, "loss": 0.5052, "step": 19354 }, { "epoch": 0.5292879019908117, "grad_norm": 1.319054365158081, "learning_rate": 9.538711883083592e-06, "loss": 0.4836, "step": 19355 }, { "epoch": 0.5293152483045286, "grad_norm": 1.2919776439666748, "learning_rate": 9.537827123566897e-06, "loss": 0.3389, "step": 19356 }, { "epoch": 0.5293425946182455, "grad_norm": 1.2883307933807373, "learning_rate": 9.536942367675822e-06, "loss": 0.5049, "step": 19357 }, { "epoch": 0.5293699409319623, "grad_norm": 1.1307258605957031, "learning_rate": 9.5360576154173e-06, "loss": 0.5105, "step": 19358 }, { "epoch": 0.5293972872456792, "grad_norm": 1.4015036821365356, "learning_rate": 9.535172866798276e-06, "loss": 0.4892, "step": 19359 }, { "epoch": 0.5294246335593962, "grad_norm": 1.2604444026947021, "learning_rate": 9.534288121825691e-06, "loss": 0.5185, "step": 19360 }, { "epoch": 0.5294519798731131, "grad_norm": 1.615089774131775, "learning_rate": 9.533403380506484e-06, "loss": 0.4618, "step": 19361 }, { "epoch": 0.52947932618683, "grad_norm": 1.254033088684082, "learning_rate": 9.532518642847598e-06, "loss": 0.5064, "step": 19362 }, { "epoch": 0.529506672500547, "grad_norm": 1.215752363204956, "learning_rate": 9.531633908855968e-06, "loss": 0.5101, "step": 19363 }, { "epoch": 0.5295340188142639, "grad_norm": 1.4063446521759033, "learning_rate": 9.53074917853854e-06, "loss": 0.5367, "step": 19364 }, { "epoch": 0.5295613651279808, "grad_norm": 1.107061743736267, "learning_rate": 9.529864451902253e-06, "loss": 0.4937, "step": 19365 }, { "epoch": 0.5295887114416976, "grad_norm": 1.159675121307373, "learning_rate": 9.528979728954048e-06, "loss": 0.5101, "step": 19366 }, { "epoch": 0.5296160577554145, "grad_norm": 1.3377840518951416, "learning_rate": 9.528095009700862e-06, "loss": 0.5319, "step": 19367 }, { "epoch": 0.5296434040691315, "grad_norm": 1.1572670936584473, "learning_rate": 9.527210294149641e-06, "loss": 0.4773, "step": 19368 }, { "epoch": 0.5296707503828484, "grad_norm": 1.1480790376663208, "learning_rate": 9.52632558230732e-06, "loss": 0.4684, "step": 19369 }, { "epoch": 0.5296980966965653, "grad_norm": 1.109803318977356, "learning_rate": 9.525440874180844e-06, "loss": 0.4867, "step": 19370 }, { "epoch": 0.5297254430102822, "grad_norm": 1.164994478225708, "learning_rate": 9.524556169777148e-06, "loss": 0.4853, "step": 19371 }, { "epoch": 0.5297527893239992, "grad_norm": 1.1939963102340698, "learning_rate": 9.523671469103174e-06, "loss": 0.4779, "step": 19372 }, { "epoch": 0.5297801356377161, "grad_norm": 2.1089069843292236, "learning_rate": 9.522786772165868e-06, "loss": 0.7719, "step": 19373 }, { "epoch": 0.5298074819514329, "grad_norm": 1.2899596691131592, "learning_rate": 9.521902078972161e-06, "loss": 0.4671, "step": 19374 }, { "epoch": 0.5298348282651498, "grad_norm": 1.4316431283950806, "learning_rate": 9.521017389528998e-06, "loss": 0.4177, "step": 19375 }, { "epoch": 0.5298621745788668, "grad_norm": 1.3128430843353271, "learning_rate": 9.520132703843322e-06, "loss": 0.8062, "step": 19376 }, { "epoch": 0.5298895208925837, "grad_norm": 1.2266732454299927, "learning_rate": 9.519248021922066e-06, "loss": 0.3908, "step": 19377 }, { "epoch": 0.5299168672063006, "grad_norm": 1.2037535905838013, "learning_rate": 9.518363343772178e-06, "loss": 0.4436, "step": 19378 }, { "epoch": 0.5299442135200175, "grad_norm": 1.556134819984436, "learning_rate": 9.51747866940059e-06, "loss": 0.5214, "step": 19379 }, { "epoch": 0.5299715598337345, "grad_norm": 1.4579241275787354, "learning_rate": 9.516593998814252e-06, "loss": 0.4772, "step": 19380 }, { "epoch": 0.5299989061474514, "grad_norm": 1.4107060432434082, "learning_rate": 9.515709332020094e-06, "loss": 0.4745, "step": 19381 }, { "epoch": 0.5300262524611682, "grad_norm": 1.2861894369125366, "learning_rate": 9.514824669025062e-06, "loss": 0.4865, "step": 19382 }, { "epoch": 0.5300535987748851, "grad_norm": 1.298382043838501, "learning_rate": 9.51394000983609e-06, "loss": 0.49, "step": 19383 }, { "epoch": 0.530080945088602, "grad_norm": 1.324622631072998, "learning_rate": 9.513055354460123e-06, "loss": 0.7308, "step": 19384 }, { "epoch": 0.530108291402319, "grad_norm": 1.307143211364746, "learning_rate": 9.512170702904103e-06, "loss": 0.3834, "step": 19385 }, { "epoch": 0.5301356377160359, "grad_norm": 1.2136142253875732, "learning_rate": 9.511286055174962e-06, "loss": 0.5027, "step": 19386 }, { "epoch": 0.5301629840297528, "grad_norm": 1.5626388788223267, "learning_rate": 9.510401411279649e-06, "loss": 0.7391, "step": 19387 }, { "epoch": 0.5301903303434697, "grad_norm": 1.3065820932388306, "learning_rate": 9.509516771225094e-06, "loss": 0.5402, "step": 19388 }, { "epoch": 0.5302176766571867, "grad_norm": 1.2309849262237549, "learning_rate": 9.508632135018244e-06, "loss": 0.4974, "step": 19389 }, { "epoch": 0.5302450229709035, "grad_norm": 1.3352009057998657, "learning_rate": 9.507747502666038e-06, "loss": 0.3858, "step": 19390 }, { "epoch": 0.5302723692846204, "grad_norm": 1.1712379455566406, "learning_rate": 9.506862874175413e-06, "loss": 0.4831, "step": 19391 }, { "epoch": 0.5302997155983373, "grad_norm": 1.8992213010787964, "learning_rate": 9.505978249553311e-06, "loss": 0.4684, "step": 19392 }, { "epoch": 0.5303270619120543, "grad_norm": 1.266648530960083, "learning_rate": 9.50509362880667e-06, "loss": 0.5062, "step": 19393 }, { "epoch": 0.5303544082257712, "grad_norm": 3.505113363265991, "learning_rate": 9.504209011942429e-06, "loss": 0.4414, "step": 19394 }, { "epoch": 0.5303817545394881, "grad_norm": 1.1378984451293945, "learning_rate": 9.503324398967531e-06, "loss": 0.5133, "step": 19395 }, { "epoch": 0.530409100853205, "grad_norm": 1.5368988513946533, "learning_rate": 9.502439789888912e-06, "loss": 0.4843, "step": 19396 }, { "epoch": 0.530436447166922, "grad_norm": 1.2609106302261353, "learning_rate": 9.501555184713512e-06, "loss": 0.5139, "step": 19397 }, { "epoch": 0.5304637934806388, "grad_norm": 1.4629005193710327, "learning_rate": 9.500670583448274e-06, "loss": 0.4985, "step": 19398 }, { "epoch": 0.5304911397943557, "grad_norm": 1.2058613300323486, "learning_rate": 9.499785986100132e-06, "loss": 0.4812, "step": 19399 }, { "epoch": 0.5305184861080726, "grad_norm": 0.9018003344535828, "learning_rate": 9.49890139267603e-06, "loss": 0.3893, "step": 19400 }, { "epoch": 0.5305458324217895, "grad_norm": 3.4081521034240723, "learning_rate": 9.498016803182905e-06, "loss": 0.5079, "step": 19401 }, { "epoch": 0.5305731787355065, "grad_norm": 1.2546660900115967, "learning_rate": 9.497132217627696e-06, "loss": 0.5229, "step": 19402 }, { "epoch": 0.5306005250492234, "grad_norm": 1.2735024690628052, "learning_rate": 9.496247636017346e-06, "loss": 0.5397, "step": 19403 }, { "epoch": 0.5306278713629403, "grad_norm": 1.4213327169418335, "learning_rate": 9.495363058358787e-06, "loss": 0.5377, "step": 19404 }, { "epoch": 0.5306552176766571, "grad_norm": 1.1693775653839111, "learning_rate": 9.494478484658966e-06, "loss": 0.4876, "step": 19405 }, { "epoch": 0.5306825639903741, "grad_norm": 1.4030145406723022, "learning_rate": 9.493593914924817e-06, "loss": 0.7708, "step": 19406 }, { "epoch": 0.530709910304091, "grad_norm": 1.3147053718566895, "learning_rate": 9.492709349163282e-06, "loss": 0.4642, "step": 19407 }, { "epoch": 0.5307372566178079, "grad_norm": 1.5665264129638672, "learning_rate": 9.491824787381303e-06, "loss": 0.7366, "step": 19408 }, { "epoch": 0.5307646029315248, "grad_norm": 1.3731443881988525, "learning_rate": 9.49094022958581e-06, "loss": 0.5078, "step": 19409 }, { "epoch": 0.5307919492452418, "grad_norm": 1.4921560287475586, "learning_rate": 9.49005567578375e-06, "loss": 0.3981, "step": 19410 }, { "epoch": 0.5308192955589587, "grad_norm": 1.2956138849258423, "learning_rate": 9.489171125982063e-06, "loss": 0.4685, "step": 19411 }, { "epoch": 0.5308466418726756, "grad_norm": 1.405053734779358, "learning_rate": 9.488286580187679e-06, "loss": 0.7777, "step": 19412 }, { "epoch": 0.5308739881863924, "grad_norm": 1.0583943128585815, "learning_rate": 9.487402038407547e-06, "loss": 0.4747, "step": 19413 }, { "epoch": 0.5309013345001093, "grad_norm": 1.363736629486084, "learning_rate": 9.4865175006486e-06, "loss": 0.4508, "step": 19414 }, { "epoch": 0.5309286808138263, "grad_norm": 1.389195203781128, "learning_rate": 9.485632966917781e-06, "loss": 0.4883, "step": 19415 }, { "epoch": 0.5309560271275432, "grad_norm": 1.6642087697982788, "learning_rate": 9.484748437222025e-06, "loss": 0.4371, "step": 19416 }, { "epoch": 0.5309833734412601, "grad_norm": 1.2034419775009155, "learning_rate": 9.483863911568272e-06, "loss": 0.486, "step": 19417 }, { "epoch": 0.531010719754977, "grad_norm": 1.230351448059082, "learning_rate": 9.48297938996346e-06, "loss": 0.5002, "step": 19418 }, { "epoch": 0.531038066068694, "grad_norm": 1.3179669380187988, "learning_rate": 9.48209487241453e-06, "loss": 0.5185, "step": 19419 }, { "epoch": 0.5310654123824109, "grad_norm": 1.3687608242034912, "learning_rate": 9.481210358928421e-06, "loss": 0.49, "step": 19420 }, { "epoch": 0.5310927586961277, "grad_norm": 1.594258189201355, "learning_rate": 9.48032584951207e-06, "loss": 0.3787, "step": 19421 }, { "epoch": 0.5311201050098446, "grad_norm": 1.1530522108078003, "learning_rate": 9.479441344172419e-06, "loss": 0.4971, "step": 19422 }, { "epoch": 0.5311474513235616, "grad_norm": 1.4785640239715576, "learning_rate": 9.4785568429164e-06, "loss": 0.3714, "step": 19423 }, { "epoch": 0.5311747976372785, "grad_norm": 1.1823104619979858, "learning_rate": 9.477672345750957e-06, "loss": 0.5108, "step": 19424 }, { "epoch": 0.5312021439509954, "grad_norm": 1.3157405853271484, "learning_rate": 9.47678785268303e-06, "loss": 0.4769, "step": 19425 }, { "epoch": 0.5312294902647123, "grad_norm": 1.5477464199066162, "learning_rate": 9.475903363719553e-06, "loss": 0.4769, "step": 19426 }, { "epoch": 0.5312568365784293, "grad_norm": 1.2903910875320435, "learning_rate": 9.475018878867467e-06, "loss": 0.4843, "step": 19427 }, { "epoch": 0.5312841828921462, "grad_norm": 1.2881863117218018, "learning_rate": 9.47413439813371e-06, "loss": 0.5006, "step": 19428 }, { "epoch": 0.531311529205863, "grad_norm": 3.2911481857299805, "learning_rate": 9.473249921525218e-06, "loss": 0.7739, "step": 19429 }, { "epoch": 0.5313388755195799, "grad_norm": 1.268661379814148, "learning_rate": 9.472365449048935e-06, "loss": 0.4855, "step": 19430 }, { "epoch": 0.5313662218332968, "grad_norm": 1.1574256420135498, "learning_rate": 9.471480980711796e-06, "loss": 0.516, "step": 19431 }, { "epoch": 0.5313935681470138, "grad_norm": 1.4240057468414307, "learning_rate": 9.47059651652074e-06, "loss": 0.4296, "step": 19432 }, { "epoch": 0.5314209144607307, "grad_norm": 1.1997758150100708, "learning_rate": 9.469712056482708e-06, "loss": 0.5027, "step": 19433 }, { "epoch": 0.5314482607744476, "grad_norm": 1.2391949892044067, "learning_rate": 9.46882760060463e-06, "loss": 0.5352, "step": 19434 }, { "epoch": 0.5314756070881645, "grad_norm": 2.0545785427093506, "learning_rate": 9.467943148893456e-06, "loss": 0.4865, "step": 19435 }, { "epoch": 0.5315029534018815, "grad_norm": 1.3373743295669556, "learning_rate": 9.467058701356114e-06, "loss": 0.3849, "step": 19436 }, { "epoch": 0.5315302997155983, "grad_norm": 2.26042103767395, "learning_rate": 9.466174257999547e-06, "loss": 0.5123, "step": 19437 }, { "epoch": 0.5315576460293152, "grad_norm": 1.3742716312408447, "learning_rate": 9.465289818830694e-06, "loss": 0.4625, "step": 19438 }, { "epoch": 0.5315849923430321, "grad_norm": 1.1237003803253174, "learning_rate": 9.464405383856493e-06, "loss": 0.4697, "step": 19439 }, { "epoch": 0.5316123386567491, "grad_norm": 1.266007661819458, "learning_rate": 9.463520953083878e-06, "loss": 0.495, "step": 19440 }, { "epoch": 0.531639684970466, "grad_norm": 1.399917483329773, "learning_rate": 9.462636526519793e-06, "loss": 0.7716, "step": 19441 }, { "epoch": 0.5316670312841829, "grad_norm": 1.155901312828064, "learning_rate": 9.461752104171171e-06, "loss": 0.4957, "step": 19442 }, { "epoch": 0.5316943775978998, "grad_norm": 1.233644962310791, "learning_rate": 9.460867686044955e-06, "loss": 0.476, "step": 19443 }, { "epoch": 0.5317217239116168, "grad_norm": 1.3727210760116577, "learning_rate": 9.459983272148077e-06, "loss": 0.4935, "step": 19444 }, { "epoch": 0.5317490702253336, "grad_norm": 1.3505206108093262, "learning_rate": 9.459098862487479e-06, "loss": 0.3735, "step": 19445 }, { "epoch": 0.5317764165390505, "grad_norm": 1.4421859979629517, "learning_rate": 9.4582144570701e-06, "loss": 0.5289, "step": 19446 }, { "epoch": 0.5318037628527674, "grad_norm": 1.5320242643356323, "learning_rate": 9.457330055902874e-06, "loss": 0.4038, "step": 19447 }, { "epoch": 0.5318311091664844, "grad_norm": 1.3935363292694092, "learning_rate": 9.456445658992744e-06, "loss": 0.5305, "step": 19448 }, { "epoch": 0.5318584554802013, "grad_norm": 1.5016595125198364, "learning_rate": 9.455561266346642e-06, "loss": 0.4712, "step": 19449 }, { "epoch": 0.5318858017939182, "grad_norm": 1.415299892425537, "learning_rate": 9.454676877971514e-06, "loss": 0.5123, "step": 19450 }, { "epoch": 0.5319131481076351, "grad_norm": 1.6028562784194946, "learning_rate": 9.453792493874287e-06, "loss": 0.4698, "step": 19451 }, { "epoch": 0.531940494421352, "grad_norm": 1.3988654613494873, "learning_rate": 9.452908114061906e-06, "loss": 0.4044, "step": 19452 }, { "epoch": 0.5319678407350689, "grad_norm": 1.586876630783081, "learning_rate": 9.452023738541305e-06, "loss": 0.5287, "step": 19453 }, { "epoch": 0.5319951870487858, "grad_norm": 1.2201249599456787, "learning_rate": 9.451139367319424e-06, "loss": 0.4706, "step": 19454 }, { "epoch": 0.5320225333625027, "grad_norm": 1.2626680135726929, "learning_rate": 9.450255000403203e-06, "loss": 0.4501, "step": 19455 }, { "epoch": 0.5320498796762196, "grad_norm": 1.606696605682373, "learning_rate": 9.449370637799573e-06, "loss": 0.4851, "step": 19456 }, { "epoch": 0.5320772259899366, "grad_norm": 1.5822858810424805, "learning_rate": 9.448486279515478e-06, "loss": 0.5264, "step": 19457 }, { "epoch": 0.5321045723036535, "grad_norm": 2.3923206329345703, "learning_rate": 9.447601925557852e-06, "loss": 0.7401, "step": 19458 }, { "epoch": 0.5321319186173704, "grad_norm": 1.2182059288024902, "learning_rate": 9.446717575933632e-06, "loss": 0.7594, "step": 19459 }, { "epoch": 0.5321592649310873, "grad_norm": 1.2532981634140015, "learning_rate": 9.445833230649759e-06, "loss": 0.5117, "step": 19460 }, { "epoch": 0.5321866112448042, "grad_norm": 1.401574969291687, "learning_rate": 9.444948889713167e-06, "loss": 0.51, "step": 19461 }, { "epoch": 0.5322139575585211, "grad_norm": 2.1438913345336914, "learning_rate": 9.444064553130795e-06, "loss": 0.5165, "step": 19462 }, { "epoch": 0.532241303872238, "grad_norm": 1.2021650075912476, "learning_rate": 9.44318022090958e-06, "loss": 0.5114, "step": 19463 }, { "epoch": 0.5322686501859549, "grad_norm": 1.1729429960250854, "learning_rate": 9.44229589305646e-06, "loss": 0.5265, "step": 19464 }, { "epoch": 0.5322959964996719, "grad_norm": 1.2368863821029663, "learning_rate": 9.441411569578373e-06, "loss": 0.4836, "step": 19465 }, { "epoch": 0.5323233428133888, "grad_norm": 1.795874834060669, "learning_rate": 9.440527250482253e-06, "loss": 0.5174, "step": 19466 }, { "epoch": 0.5323506891271057, "grad_norm": 1.4038267135620117, "learning_rate": 9.43964293577504e-06, "loss": 0.3574, "step": 19467 }, { "epoch": 0.5323780354408226, "grad_norm": 1.1981929540634155, "learning_rate": 9.438758625463672e-06, "loss": 0.5176, "step": 19468 }, { "epoch": 0.5324053817545394, "grad_norm": 1.623670220375061, "learning_rate": 9.437874319555082e-06, "loss": 0.4074, "step": 19469 }, { "epoch": 0.5324327280682564, "grad_norm": 1.1986531019210815, "learning_rate": 9.436990018056212e-06, "loss": 0.462, "step": 19470 }, { "epoch": 0.5324600743819733, "grad_norm": 1.3060452938079834, "learning_rate": 9.436105720973995e-06, "loss": 0.5018, "step": 19471 }, { "epoch": 0.5324874206956902, "grad_norm": 1.1131540536880493, "learning_rate": 9.435221428315369e-06, "loss": 0.4915, "step": 19472 }, { "epoch": 0.5325147670094071, "grad_norm": 1.2688910961151123, "learning_rate": 9.434337140087276e-06, "loss": 0.5002, "step": 19473 }, { "epoch": 0.5325421133231241, "grad_norm": 1.0501827001571655, "learning_rate": 9.433452856296646e-06, "loss": 0.5284, "step": 19474 }, { "epoch": 0.532569459636841, "grad_norm": 1.7368561029434204, "learning_rate": 9.432568576950418e-06, "loss": 0.5114, "step": 19475 }, { "epoch": 0.5325968059505579, "grad_norm": 0.9991858005523682, "learning_rate": 9.431684302055533e-06, "loss": 0.3857, "step": 19476 }, { "epoch": 0.5326241522642747, "grad_norm": 1.1132469177246094, "learning_rate": 9.430800031618921e-06, "loss": 0.4859, "step": 19477 }, { "epoch": 0.5326514985779917, "grad_norm": 3.772738218307495, "learning_rate": 9.429915765647527e-06, "loss": 0.4932, "step": 19478 }, { "epoch": 0.5326788448917086, "grad_norm": 1.3813676834106445, "learning_rate": 9.42903150414828e-06, "loss": 0.375, "step": 19479 }, { "epoch": 0.5327061912054255, "grad_norm": 1.177600622177124, "learning_rate": 9.42814724712812e-06, "loss": 0.5061, "step": 19480 }, { "epoch": 0.5327335375191424, "grad_norm": 1.2133132219314575, "learning_rate": 9.427262994593986e-06, "loss": 0.4829, "step": 19481 }, { "epoch": 0.5327608838328594, "grad_norm": 1.2695488929748535, "learning_rate": 9.426378746552812e-06, "loss": 0.492, "step": 19482 }, { "epoch": 0.5327882301465763, "grad_norm": 1.1792820692062378, "learning_rate": 9.425494503011536e-06, "loss": 0.3844, "step": 19483 }, { "epoch": 0.5328155764602932, "grad_norm": 1.329693078994751, "learning_rate": 9.424610263977091e-06, "loss": 0.5066, "step": 19484 }, { "epoch": 0.53284292277401, "grad_norm": 1.2863378524780273, "learning_rate": 9.423726029456423e-06, "loss": 0.4807, "step": 19485 }, { "epoch": 0.5328702690877269, "grad_norm": 1.2800989151000977, "learning_rate": 9.422841799456456e-06, "loss": 0.7437, "step": 19486 }, { "epoch": 0.5328976154014439, "grad_norm": 1.184130311012268, "learning_rate": 9.421957573984136e-06, "loss": 0.5055, "step": 19487 }, { "epoch": 0.5329249617151608, "grad_norm": 1.2492778301239014, "learning_rate": 9.421073353046393e-06, "loss": 0.4984, "step": 19488 }, { "epoch": 0.5329523080288777, "grad_norm": 1.1848257780075073, "learning_rate": 9.420189136650169e-06, "loss": 0.3699, "step": 19489 }, { "epoch": 0.5329796543425946, "grad_norm": 1.4389622211456299, "learning_rate": 9.419304924802397e-06, "loss": 0.5195, "step": 19490 }, { "epoch": 0.5330070006563116, "grad_norm": 1.1683948040008545, "learning_rate": 9.418420717510015e-06, "loss": 0.495, "step": 19491 }, { "epoch": 0.5330343469700285, "grad_norm": 1.428513765335083, "learning_rate": 9.41753651477996e-06, "loss": 0.5265, "step": 19492 }, { "epoch": 0.5330616932837453, "grad_norm": 1.3455708026885986, "learning_rate": 9.416652316619164e-06, "loss": 0.543, "step": 19493 }, { "epoch": 0.5330890395974622, "grad_norm": 1.3270974159240723, "learning_rate": 9.415768123034566e-06, "loss": 0.4799, "step": 19494 }, { "epoch": 0.5331163859111792, "grad_norm": 1.0900760889053345, "learning_rate": 9.414883934033106e-06, "loss": 0.5136, "step": 19495 }, { "epoch": 0.5331437322248961, "grad_norm": 1.5961428880691528, "learning_rate": 9.413999749621714e-06, "loss": 0.4163, "step": 19496 }, { "epoch": 0.533171078538613, "grad_norm": 1.181011438369751, "learning_rate": 9.413115569807331e-06, "loss": 0.5019, "step": 19497 }, { "epoch": 0.5331984248523299, "grad_norm": 1.141695499420166, "learning_rate": 9.412231394596891e-06, "loss": 0.5202, "step": 19498 }, { "epoch": 0.5332257711660469, "grad_norm": 1.2926661968231201, "learning_rate": 9.411347223997329e-06, "loss": 0.4961, "step": 19499 }, { "epoch": 0.5332531174797638, "grad_norm": 2.3257851600646973, "learning_rate": 9.410463058015585e-06, "loss": 0.4901, "step": 19500 }, { "epoch": 0.5332804637934806, "grad_norm": 1.1394569873809814, "learning_rate": 9.40957889665859e-06, "loss": 0.8025, "step": 19501 }, { "epoch": 0.5333078101071975, "grad_norm": 1.1361194849014282, "learning_rate": 9.408694739933282e-06, "loss": 0.7891, "step": 19502 }, { "epoch": 0.5333351564209144, "grad_norm": 1.3343617916107178, "learning_rate": 9.4078105878466e-06, "loss": 0.3993, "step": 19503 }, { "epoch": 0.5333625027346314, "grad_norm": 1.4115899801254272, "learning_rate": 9.406926440405475e-06, "loss": 0.505, "step": 19504 }, { "epoch": 0.5333898490483483, "grad_norm": 1.2200294733047485, "learning_rate": 9.406042297616845e-06, "loss": 0.5029, "step": 19505 }, { "epoch": 0.5334171953620652, "grad_norm": 1.1118041276931763, "learning_rate": 9.405158159487648e-06, "loss": 0.5134, "step": 19506 }, { "epoch": 0.5334445416757821, "grad_norm": 1.1585077047348022, "learning_rate": 9.404274026024816e-06, "loss": 0.5082, "step": 19507 }, { "epoch": 0.533471887989499, "grad_norm": 1.2400132417678833, "learning_rate": 9.40338989723529e-06, "loss": 0.5204, "step": 19508 }, { "epoch": 0.5334992343032159, "grad_norm": 1.4016950130462646, "learning_rate": 9.402505773125998e-06, "loss": 0.4789, "step": 19509 }, { "epoch": 0.5335265806169328, "grad_norm": 1.0836364030838013, "learning_rate": 9.401621653703881e-06, "loss": 0.505, "step": 19510 }, { "epoch": 0.5335539269306497, "grad_norm": 1.5541980266571045, "learning_rate": 9.400737538975876e-06, "loss": 0.4073, "step": 19511 }, { "epoch": 0.5335812732443667, "grad_norm": 1.3543241024017334, "learning_rate": 9.399853428948914e-06, "loss": 0.4955, "step": 19512 }, { "epoch": 0.5336086195580836, "grad_norm": 1.2212109565734863, "learning_rate": 9.398969323629935e-06, "loss": 0.772, "step": 19513 }, { "epoch": 0.5336359658718005, "grad_norm": 1.4016720056533813, "learning_rate": 9.39808522302587e-06, "loss": 0.4996, "step": 19514 }, { "epoch": 0.5336633121855174, "grad_norm": 1.2752108573913574, "learning_rate": 9.397201127143658e-06, "loss": 0.4812, "step": 19515 }, { "epoch": 0.5336906584992342, "grad_norm": 1.132022500038147, "learning_rate": 9.396317035990235e-06, "loss": 0.483, "step": 19516 }, { "epoch": 0.5337180048129512, "grad_norm": 1.2915750741958618, "learning_rate": 9.395432949572532e-06, "loss": 0.5183, "step": 19517 }, { "epoch": 0.5337453511266681, "grad_norm": 3.363600015640259, "learning_rate": 9.394548867897489e-06, "loss": 0.4923, "step": 19518 }, { "epoch": 0.533772697440385, "grad_norm": 1.7418068647384644, "learning_rate": 9.39366479097204e-06, "loss": 0.4741, "step": 19519 }, { "epoch": 0.533800043754102, "grad_norm": 1.4255927801132202, "learning_rate": 9.392780718803121e-06, "loss": 0.3489, "step": 19520 }, { "epoch": 0.5338273900678189, "grad_norm": 1.4707832336425781, "learning_rate": 9.391896651397669e-06, "loss": 0.532, "step": 19521 }, { "epoch": 0.5338547363815358, "grad_norm": 1.5004727840423584, "learning_rate": 9.391012588762615e-06, "loss": 0.5161, "step": 19522 }, { "epoch": 0.5338820826952527, "grad_norm": 1.4486016035079956, "learning_rate": 9.390128530904891e-06, "loss": 0.4121, "step": 19523 }, { "epoch": 0.5339094290089695, "grad_norm": 0.9979230165481567, "learning_rate": 9.38924447783144e-06, "loss": 0.4792, "step": 19524 }, { "epoch": 0.5339367753226865, "grad_norm": 1.3838152885437012, "learning_rate": 9.388360429549195e-06, "loss": 0.343, "step": 19525 }, { "epoch": 0.5339641216364034, "grad_norm": 1.7881888151168823, "learning_rate": 9.38747638606509e-06, "loss": 0.79, "step": 19526 }, { "epoch": 0.5339914679501203, "grad_norm": 1.2189280986785889, "learning_rate": 9.38659234738606e-06, "loss": 0.4935, "step": 19527 }, { "epoch": 0.5340188142638372, "grad_norm": 1.0899531841278076, "learning_rate": 9.38570831351904e-06, "loss": 0.4831, "step": 19528 }, { "epoch": 0.5340461605775542, "grad_norm": 1.613340973854065, "learning_rate": 9.384824284470965e-06, "loss": 0.4107, "step": 19529 }, { "epoch": 0.5340735068912711, "grad_norm": 1.0673953294754028, "learning_rate": 9.383940260248772e-06, "loss": 0.4954, "step": 19530 }, { "epoch": 0.534100853204988, "grad_norm": 1.381557822227478, "learning_rate": 9.383056240859393e-06, "loss": 0.5141, "step": 19531 }, { "epoch": 0.5341281995187048, "grad_norm": 1.144340991973877, "learning_rate": 9.382172226309763e-06, "loss": 0.7615, "step": 19532 }, { "epoch": 0.5341555458324218, "grad_norm": 1.390089511871338, "learning_rate": 9.381288216606821e-06, "loss": 0.7626, "step": 19533 }, { "epoch": 0.5341828921461387, "grad_norm": 1.1747267246246338, "learning_rate": 9.380404211757497e-06, "loss": 0.789, "step": 19534 }, { "epoch": 0.5342102384598556, "grad_norm": 1.1641311645507812, "learning_rate": 9.379520211768728e-06, "loss": 0.5062, "step": 19535 }, { "epoch": 0.5342375847735725, "grad_norm": 1.787057876586914, "learning_rate": 9.378636216647447e-06, "loss": 0.5448, "step": 19536 }, { "epoch": 0.5342649310872895, "grad_norm": 1.4490951299667358, "learning_rate": 9.377752226400591e-06, "loss": 0.5037, "step": 19537 }, { "epoch": 0.5342922774010064, "grad_norm": 1.4016779661178589, "learning_rate": 9.376868241035095e-06, "loss": 0.4891, "step": 19538 }, { "epoch": 0.5343196237147233, "grad_norm": 1.1277693510055542, "learning_rate": 9.375984260557891e-06, "loss": 0.3852, "step": 19539 }, { "epoch": 0.5343469700284401, "grad_norm": 1.2486419677734375, "learning_rate": 9.375100284975913e-06, "loss": 0.506, "step": 19540 }, { "epoch": 0.534374316342157, "grad_norm": 1.0710432529449463, "learning_rate": 9.374216314296101e-06, "loss": 0.4723, "step": 19541 }, { "epoch": 0.534401662655874, "grad_norm": 1.692209005355835, "learning_rate": 9.373332348525384e-06, "loss": 0.4816, "step": 19542 }, { "epoch": 0.5344290089695909, "grad_norm": 1.49550199508667, "learning_rate": 9.3724483876707e-06, "loss": 0.4083, "step": 19543 }, { "epoch": 0.5344563552833078, "grad_norm": 1.374481201171875, "learning_rate": 9.37156443173898e-06, "loss": 0.4868, "step": 19544 }, { "epoch": 0.5344837015970247, "grad_norm": 1.1397382020950317, "learning_rate": 9.37068048073716e-06, "loss": 0.4649, "step": 19545 }, { "epoch": 0.5345110479107417, "grad_norm": 1.167648196220398, "learning_rate": 9.369796534672176e-06, "loss": 0.4874, "step": 19546 }, { "epoch": 0.5345383942244586, "grad_norm": 1.2320274114608765, "learning_rate": 9.368912593550959e-06, "loss": 0.4915, "step": 19547 }, { "epoch": 0.5345657405381754, "grad_norm": 1.1987686157226562, "learning_rate": 9.368028657380448e-06, "loss": 0.496, "step": 19548 }, { "epoch": 0.5345930868518923, "grad_norm": 1.2805615663528442, "learning_rate": 9.36714472616757e-06, "loss": 0.5202, "step": 19549 }, { "epoch": 0.5346204331656093, "grad_norm": 1.3704863786697388, "learning_rate": 9.366260799919266e-06, "loss": 0.478, "step": 19550 }, { "epoch": 0.5346477794793262, "grad_norm": 1.4693794250488281, "learning_rate": 9.36537687864247e-06, "loss": 0.5096, "step": 19551 }, { "epoch": 0.5346751257930431, "grad_norm": 1.3073748350143433, "learning_rate": 9.36449296234411e-06, "loss": 0.4919, "step": 19552 }, { "epoch": 0.53470247210676, "grad_norm": 1.4281724691390991, "learning_rate": 9.363609051031124e-06, "loss": 0.4908, "step": 19553 }, { "epoch": 0.534729818420477, "grad_norm": 1.46432626247406, "learning_rate": 9.362725144710449e-06, "loss": 0.4065, "step": 19554 }, { "epoch": 0.5347571647341939, "grad_norm": 1.271746277809143, "learning_rate": 9.361841243389013e-06, "loss": 0.4792, "step": 19555 }, { "epoch": 0.5347845110479107, "grad_norm": 1.3582465648651123, "learning_rate": 9.360957347073757e-06, "loss": 0.523, "step": 19556 }, { "epoch": 0.5348118573616276, "grad_norm": 1.2177906036376953, "learning_rate": 9.36007345577161e-06, "loss": 0.5179, "step": 19557 }, { "epoch": 0.5348392036753445, "grad_norm": 1.176626205444336, "learning_rate": 9.359189569489504e-06, "loss": 0.3698, "step": 19558 }, { "epoch": 0.5348665499890615, "grad_norm": 1.2170857191085815, "learning_rate": 9.358305688234373e-06, "loss": 0.4973, "step": 19559 }, { "epoch": 0.5348938963027784, "grad_norm": 1.3102725744247437, "learning_rate": 9.357421812013158e-06, "loss": 0.4989, "step": 19560 }, { "epoch": 0.5349212426164953, "grad_norm": 1.322657585144043, "learning_rate": 9.356537940832787e-06, "loss": 0.5261, "step": 19561 }, { "epoch": 0.5349485889302122, "grad_norm": 1.0688902139663696, "learning_rate": 9.355654074700191e-06, "loss": 0.4843, "step": 19562 }, { "epoch": 0.5349759352439292, "grad_norm": 1.3349534273147583, "learning_rate": 9.354770213622313e-06, "loss": 0.7728, "step": 19563 }, { "epoch": 0.535003281557646, "grad_norm": 1.3434457778930664, "learning_rate": 9.353886357606077e-06, "loss": 0.4844, "step": 19564 }, { "epoch": 0.5350306278713629, "grad_norm": 1.8618371486663818, "learning_rate": 9.353002506658425e-06, "loss": 0.5214, "step": 19565 }, { "epoch": 0.5350579741850798, "grad_norm": 1.530248761177063, "learning_rate": 9.352118660786281e-06, "loss": 0.3674, "step": 19566 }, { "epoch": 0.5350853204987968, "grad_norm": 1.2709274291992188, "learning_rate": 9.351234819996585e-06, "loss": 0.4815, "step": 19567 }, { "epoch": 0.5351126668125137, "grad_norm": 1.1865882873535156, "learning_rate": 9.350350984296272e-06, "loss": 0.5251, "step": 19568 }, { "epoch": 0.5351400131262306, "grad_norm": 1.601810097694397, "learning_rate": 9.34946715369227e-06, "loss": 0.5222, "step": 19569 }, { "epoch": 0.5351673594399475, "grad_norm": 1.6402028799057007, "learning_rate": 9.348583328191517e-06, "loss": 0.4806, "step": 19570 }, { "epoch": 0.5351947057536645, "grad_norm": 1.1823973655700684, "learning_rate": 9.347699507800945e-06, "loss": 0.5129, "step": 19571 }, { "epoch": 0.5352220520673813, "grad_norm": 1.1562433242797852, "learning_rate": 9.346815692527483e-06, "loss": 0.504, "step": 19572 }, { "epoch": 0.5352493983810982, "grad_norm": 1.2588133811950684, "learning_rate": 9.345931882378073e-06, "loss": 0.5032, "step": 19573 }, { "epoch": 0.5352767446948151, "grad_norm": 1.331963300704956, "learning_rate": 9.34504807735964e-06, "loss": 0.5063, "step": 19574 }, { "epoch": 0.535304091008532, "grad_norm": 1.27892005443573, "learning_rate": 9.34416427747912e-06, "loss": 0.7861, "step": 19575 }, { "epoch": 0.535331437322249, "grad_norm": 1.357749104499817, "learning_rate": 9.34328048274345e-06, "loss": 0.4955, "step": 19576 }, { "epoch": 0.5353587836359659, "grad_norm": 1.6033347845077515, "learning_rate": 9.342396693159556e-06, "loss": 0.5087, "step": 19577 }, { "epoch": 0.5353861299496828, "grad_norm": 1.190395712852478, "learning_rate": 9.34151290873438e-06, "loss": 0.7916, "step": 19578 }, { "epoch": 0.5354134762633997, "grad_norm": 1.1801711320877075, "learning_rate": 9.340629129474845e-06, "loss": 0.492, "step": 19579 }, { "epoch": 0.5354408225771166, "grad_norm": 1.2699567079544067, "learning_rate": 9.339745355387889e-06, "loss": 0.4864, "step": 19580 }, { "epoch": 0.5354681688908335, "grad_norm": 1.6635197401046753, "learning_rate": 9.338861586480447e-06, "loss": 0.4709, "step": 19581 }, { "epoch": 0.5354955152045504, "grad_norm": 1.1616719961166382, "learning_rate": 9.33797782275945e-06, "loss": 0.5064, "step": 19582 }, { "epoch": 0.5355228615182673, "grad_norm": 1.2726032733917236, "learning_rate": 9.337094064231829e-06, "loss": 0.5109, "step": 19583 }, { "epoch": 0.5355502078319843, "grad_norm": 1.1881294250488281, "learning_rate": 9.33621031090452e-06, "loss": 0.7866, "step": 19584 }, { "epoch": 0.5355775541457012, "grad_norm": 1.1724382638931274, "learning_rate": 9.335326562784453e-06, "loss": 0.4903, "step": 19585 }, { "epoch": 0.5356049004594181, "grad_norm": 1.638155460357666, "learning_rate": 9.334442819878565e-06, "loss": 0.4641, "step": 19586 }, { "epoch": 0.535632246773135, "grad_norm": 1.2893893718719482, "learning_rate": 9.333559082193783e-06, "loss": 0.511, "step": 19587 }, { "epoch": 0.5356595930868518, "grad_norm": 1.1542019844055176, "learning_rate": 9.332675349737042e-06, "loss": 0.5181, "step": 19588 }, { "epoch": 0.5356869394005688, "grad_norm": 1.8050897121429443, "learning_rate": 9.331791622515277e-06, "loss": 0.4838, "step": 19589 }, { "epoch": 0.5357142857142857, "grad_norm": 1.3153704404830933, "learning_rate": 9.330907900535416e-06, "loss": 0.508, "step": 19590 }, { "epoch": 0.5357416320280026, "grad_norm": 1.6303261518478394, "learning_rate": 9.3300241838044e-06, "loss": 0.5152, "step": 19591 }, { "epoch": 0.5357689783417195, "grad_norm": 1.5600676536560059, "learning_rate": 9.32914047232915e-06, "loss": 0.4874, "step": 19592 }, { "epoch": 0.5357963246554365, "grad_norm": 1.6467300653457642, "learning_rate": 9.32825676611661e-06, "loss": 0.4851, "step": 19593 }, { "epoch": 0.5358236709691534, "grad_norm": 1.619287133216858, "learning_rate": 9.327373065173701e-06, "loss": 0.4981, "step": 19594 }, { "epoch": 0.5358510172828703, "grad_norm": 1.2925289869308472, "learning_rate": 9.326489369507364e-06, "loss": 0.528, "step": 19595 }, { "epoch": 0.5358783635965871, "grad_norm": 1.209588646888733, "learning_rate": 9.325605679124525e-06, "loss": 0.5229, "step": 19596 }, { "epoch": 0.5359057099103041, "grad_norm": 1.49349045753479, "learning_rate": 9.32472199403212e-06, "loss": 0.3418, "step": 19597 }, { "epoch": 0.535933056224021, "grad_norm": 1.3461554050445557, "learning_rate": 9.323838314237085e-06, "loss": 0.5113, "step": 19598 }, { "epoch": 0.5359604025377379, "grad_norm": 1.8642224073410034, "learning_rate": 9.322954639746344e-06, "loss": 0.5271, "step": 19599 }, { "epoch": 0.5359877488514548, "grad_norm": 1.1734172105789185, "learning_rate": 9.322070970566837e-06, "loss": 0.4863, "step": 19600 }, { "epoch": 0.5360150951651718, "grad_norm": 1.226514220237732, "learning_rate": 9.32118730670549e-06, "loss": 0.5091, "step": 19601 }, { "epoch": 0.5360424414788887, "grad_norm": 1.3660361766815186, "learning_rate": 9.320303648169236e-06, "loss": 0.5383, "step": 19602 }, { "epoch": 0.5360697877926056, "grad_norm": 1.291682481765747, "learning_rate": 9.319419994965011e-06, "loss": 0.5271, "step": 19603 }, { "epoch": 0.5360971341063224, "grad_norm": 1.5761491060256958, "learning_rate": 9.318536347099745e-06, "loss": 0.4789, "step": 19604 }, { "epoch": 0.5361244804200394, "grad_norm": 1.4421554803848267, "learning_rate": 9.317652704580368e-06, "loss": 0.3868, "step": 19605 }, { "epoch": 0.5361518267337563, "grad_norm": 1.356073260307312, "learning_rate": 9.316769067413816e-06, "loss": 0.7749, "step": 19606 }, { "epoch": 0.5361791730474732, "grad_norm": 1.424676775932312, "learning_rate": 9.315885435607016e-06, "loss": 0.3847, "step": 19607 }, { "epoch": 0.5362065193611901, "grad_norm": 1.4391424655914307, "learning_rate": 9.315001809166903e-06, "loss": 0.5155, "step": 19608 }, { "epoch": 0.536233865674907, "grad_norm": 1.4531902074813843, "learning_rate": 9.314118188100407e-06, "loss": 0.7748, "step": 19609 }, { "epoch": 0.536261211988624, "grad_norm": 1.3923935890197754, "learning_rate": 9.313234572414461e-06, "loss": 0.5137, "step": 19610 }, { "epoch": 0.5362885583023408, "grad_norm": 1.3193830251693726, "learning_rate": 9.312350962115998e-06, "loss": 0.7341, "step": 19611 }, { "epoch": 0.5363159046160577, "grad_norm": 1.5233571529388428, "learning_rate": 9.311467357211948e-06, "loss": 0.5075, "step": 19612 }, { "epoch": 0.5363432509297746, "grad_norm": 1.1995372772216797, "learning_rate": 9.310583757709242e-06, "loss": 0.485, "step": 19613 }, { "epoch": 0.5363705972434916, "grad_norm": 1.5800659656524658, "learning_rate": 9.309700163614812e-06, "loss": 0.4336, "step": 19614 }, { "epoch": 0.5363979435572085, "grad_norm": 1.5771846771240234, "learning_rate": 9.30881657493559e-06, "loss": 0.5396, "step": 19615 }, { "epoch": 0.5364252898709254, "grad_norm": 1.8223799467086792, "learning_rate": 9.30793299167851e-06, "loss": 0.3959, "step": 19616 }, { "epoch": 0.5364526361846423, "grad_norm": 1.4902398586273193, "learning_rate": 9.307049413850498e-06, "loss": 0.4806, "step": 19617 }, { "epoch": 0.5364799824983593, "grad_norm": 1.322556734085083, "learning_rate": 9.306165841458488e-06, "loss": 0.3968, "step": 19618 }, { "epoch": 0.5365073288120761, "grad_norm": 1.2210590839385986, "learning_rate": 9.305282274509414e-06, "loss": 0.7661, "step": 19619 }, { "epoch": 0.536534675125793, "grad_norm": 1.2187613248825073, "learning_rate": 9.304398713010203e-06, "loss": 0.5032, "step": 19620 }, { "epoch": 0.5365620214395099, "grad_norm": 1.4803506135940552, "learning_rate": 9.303515156967791e-06, "loss": 0.5315, "step": 19621 }, { "epoch": 0.5365893677532269, "grad_norm": 1.204941749572754, "learning_rate": 9.302631606389103e-06, "loss": 0.4821, "step": 19622 }, { "epoch": 0.5366167140669438, "grad_norm": 1.6239110231399536, "learning_rate": 9.301748061281074e-06, "loss": 0.5118, "step": 19623 }, { "epoch": 0.5366440603806607, "grad_norm": 1.3219267129898071, "learning_rate": 9.300864521650639e-06, "loss": 0.5072, "step": 19624 }, { "epoch": 0.5366714066943776, "grad_norm": 1.5148426294326782, "learning_rate": 9.299980987504721e-06, "loss": 0.5037, "step": 19625 }, { "epoch": 0.5366987530080946, "grad_norm": 1.3367259502410889, "learning_rate": 9.299097458850262e-06, "loss": 0.5084, "step": 19626 }, { "epoch": 0.5367260993218114, "grad_norm": 1.1672183275222778, "learning_rate": 9.29821393569418e-06, "loss": 0.5033, "step": 19627 }, { "epoch": 0.5367534456355283, "grad_norm": 1.3938853740692139, "learning_rate": 9.297330418043415e-06, "loss": 0.4968, "step": 19628 }, { "epoch": 0.5367807919492452, "grad_norm": 1.4396262168884277, "learning_rate": 9.296446905904892e-06, "loss": 0.4835, "step": 19629 }, { "epoch": 0.5368081382629621, "grad_norm": 1.3828054666519165, "learning_rate": 9.295563399285548e-06, "loss": 0.7948, "step": 19630 }, { "epoch": 0.5368354845766791, "grad_norm": 1.168447732925415, "learning_rate": 9.294679898192308e-06, "loss": 0.4828, "step": 19631 }, { "epoch": 0.536862830890396, "grad_norm": 1.342143177986145, "learning_rate": 9.293796402632105e-06, "loss": 0.7431, "step": 19632 }, { "epoch": 0.5368901772041129, "grad_norm": 1.3430603742599487, "learning_rate": 9.292912912611876e-06, "loss": 0.7889, "step": 19633 }, { "epoch": 0.5369175235178298, "grad_norm": 1.1486469507217407, "learning_rate": 9.29202942813854e-06, "loss": 0.455, "step": 19634 }, { "epoch": 0.5369448698315467, "grad_norm": 1.1715788841247559, "learning_rate": 9.291145949219036e-06, "loss": 0.487, "step": 19635 }, { "epoch": 0.5369722161452636, "grad_norm": 1.3991265296936035, "learning_rate": 9.290262475860295e-06, "loss": 0.5102, "step": 19636 }, { "epoch": 0.5369995624589805, "grad_norm": 1.4473505020141602, "learning_rate": 9.289379008069243e-06, "loss": 0.5289, "step": 19637 }, { "epoch": 0.5370269087726974, "grad_norm": 1.3289601802825928, "learning_rate": 9.288495545852814e-06, "loss": 0.4972, "step": 19638 }, { "epoch": 0.5370542550864144, "grad_norm": 2.1048243045806885, "learning_rate": 9.287612089217935e-06, "loss": 0.7883, "step": 19639 }, { "epoch": 0.5370816014001313, "grad_norm": 1.465298056602478, "learning_rate": 9.286728638171538e-06, "loss": 0.4471, "step": 19640 }, { "epoch": 0.5371089477138482, "grad_norm": 1.8486754894256592, "learning_rate": 9.285845192720558e-06, "loss": 0.3593, "step": 19641 }, { "epoch": 0.5371362940275651, "grad_norm": 1.258305549621582, "learning_rate": 9.284961752871918e-06, "loss": 0.7626, "step": 19642 }, { "epoch": 0.537163640341282, "grad_norm": 1.3317185640335083, "learning_rate": 9.284078318632556e-06, "loss": 0.4932, "step": 19643 }, { "epoch": 0.5371909866549989, "grad_norm": 1.2991000413894653, "learning_rate": 9.283194890009393e-06, "loss": 0.4695, "step": 19644 }, { "epoch": 0.5372183329687158, "grad_norm": 1.5336757898330688, "learning_rate": 9.282311467009367e-06, "loss": 0.4087, "step": 19645 }, { "epoch": 0.5372456792824327, "grad_norm": 1.2611969709396362, "learning_rate": 9.281428049639406e-06, "loss": 0.4969, "step": 19646 }, { "epoch": 0.5372730255961496, "grad_norm": 1.272042989730835, "learning_rate": 9.280544637906437e-06, "loss": 0.5009, "step": 19647 }, { "epoch": 0.5373003719098666, "grad_norm": 1.818953037261963, "learning_rate": 9.279661231817395e-06, "loss": 0.3912, "step": 19648 }, { "epoch": 0.5373277182235835, "grad_norm": 1.2716600894927979, "learning_rate": 9.278777831379208e-06, "loss": 0.5105, "step": 19649 }, { "epoch": 0.5373550645373004, "grad_norm": 1.6643645763397217, "learning_rate": 9.277894436598806e-06, "loss": 0.4835, "step": 19650 }, { "epoch": 0.5373824108510172, "grad_norm": 1.394277572631836, "learning_rate": 9.27701104748312e-06, "loss": 0.4172, "step": 19651 }, { "epoch": 0.5374097571647342, "grad_norm": 1.3858610391616821, "learning_rate": 9.276127664039077e-06, "loss": 0.7726, "step": 19652 }, { "epoch": 0.5374371034784511, "grad_norm": 1.1330591440200806, "learning_rate": 9.275244286273608e-06, "loss": 0.4878, "step": 19653 }, { "epoch": 0.537464449792168, "grad_norm": 1.1667211055755615, "learning_rate": 9.274360914193647e-06, "loss": 0.4836, "step": 19654 }, { "epoch": 0.5374917961058849, "grad_norm": 1.3856666088104248, "learning_rate": 9.273477547806116e-06, "loss": 0.4063, "step": 19655 }, { "epoch": 0.5375191424196019, "grad_norm": 1.429428219795227, "learning_rate": 9.272594187117954e-06, "loss": 0.4687, "step": 19656 }, { "epoch": 0.5375464887333188, "grad_norm": 1.167281150817871, "learning_rate": 9.271710832136081e-06, "loss": 0.4518, "step": 19657 }, { "epoch": 0.5375738350470357, "grad_norm": 1.7482281923294067, "learning_rate": 9.270827482867434e-06, "loss": 0.3838, "step": 19658 }, { "epoch": 0.5376011813607525, "grad_norm": 1.495987892150879, "learning_rate": 9.269944139318942e-06, "loss": 0.4919, "step": 19659 }, { "epoch": 0.5376285276744694, "grad_norm": 1.367424488067627, "learning_rate": 9.26906080149753e-06, "loss": 0.5402, "step": 19660 }, { "epoch": 0.5376558739881864, "grad_norm": 1.5079578161239624, "learning_rate": 9.268177469410134e-06, "loss": 0.3576, "step": 19661 }, { "epoch": 0.5376832203019033, "grad_norm": 1.3217133283615112, "learning_rate": 9.267294143063677e-06, "loss": 0.4925, "step": 19662 }, { "epoch": 0.5377105666156202, "grad_norm": 1.2059162855148315, "learning_rate": 9.266410822465091e-06, "loss": 0.4634, "step": 19663 }, { "epoch": 0.5377379129293371, "grad_norm": 1.21306574344635, "learning_rate": 9.265527507621306e-06, "loss": 0.5, "step": 19664 }, { "epoch": 0.5377652592430541, "grad_norm": 1.2882838249206543, "learning_rate": 9.264644198539252e-06, "loss": 0.5148, "step": 19665 }, { "epoch": 0.537792605556771, "grad_norm": 1.1629801988601685, "learning_rate": 9.263760895225856e-06, "loss": 0.5313, "step": 19666 }, { "epoch": 0.5378199518704878, "grad_norm": 1.623224139213562, "learning_rate": 9.262877597688046e-06, "loss": 0.5214, "step": 19667 }, { "epoch": 0.5378472981842047, "grad_norm": 1.44316828250885, "learning_rate": 9.261994305932758e-06, "loss": 0.5131, "step": 19668 }, { "epoch": 0.5378746444979217, "grad_norm": 1.2670978307724, "learning_rate": 9.261111019966916e-06, "loss": 0.4904, "step": 19669 }, { "epoch": 0.5379019908116386, "grad_norm": 1.50118887424469, "learning_rate": 9.260227739797448e-06, "loss": 0.3798, "step": 19670 }, { "epoch": 0.5379293371253555, "grad_norm": 1.4061373472213745, "learning_rate": 9.259344465431289e-06, "loss": 0.5249, "step": 19671 }, { "epoch": 0.5379566834390724, "grad_norm": 1.4172383546829224, "learning_rate": 9.258461196875361e-06, "loss": 0.785, "step": 19672 }, { "epoch": 0.5379840297527894, "grad_norm": 1.0988444089889526, "learning_rate": 9.2575779341366e-06, "loss": 0.4708, "step": 19673 }, { "epoch": 0.5380113760665063, "grad_norm": 1.3515355587005615, "learning_rate": 9.256694677221928e-06, "loss": 0.4948, "step": 19674 }, { "epoch": 0.5380387223802231, "grad_norm": 1.6593058109283447, "learning_rate": 9.255811426138275e-06, "loss": 0.5226, "step": 19675 }, { "epoch": 0.53806606869394, "grad_norm": 1.3673126697540283, "learning_rate": 9.254928180892576e-06, "loss": 0.4904, "step": 19676 }, { "epoch": 0.538093415007657, "grad_norm": 1.1918097734451294, "learning_rate": 9.254044941491753e-06, "loss": 0.495, "step": 19677 }, { "epoch": 0.5381207613213739, "grad_norm": 1.2497528791427612, "learning_rate": 9.25316170794274e-06, "loss": 0.5092, "step": 19678 }, { "epoch": 0.5381481076350908, "grad_norm": 1.2847950458526611, "learning_rate": 9.252278480252461e-06, "loss": 0.3462, "step": 19679 }, { "epoch": 0.5381754539488077, "grad_norm": 1.2487919330596924, "learning_rate": 9.251395258427847e-06, "loss": 0.5248, "step": 19680 }, { "epoch": 0.5382028002625246, "grad_norm": 1.184615135192871, "learning_rate": 9.25051204247583e-06, "loss": 0.4702, "step": 19681 }, { "epoch": 0.5382301465762416, "grad_norm": 1.075435757637024, "learning_rate": 9.24962883240333e-06, "loss": 0.4601, "step": 19682 }, { "epoch": 0.5382574928899584, "grad_norm": 1.1835869550704956, "learning_rate": 9.248745628217283e-06, "loss": 0.4662, "step": 19683 }, { "epoch": 0.5382848392036753, "grad_norm": 1.6655644178390503, "learning_rate": 9.247862429924616e-06, "loss": 0.4021, "step": 19684 }, { "epoch": 0.5383121855173922, "grad_norm": 1.3070929050445557, "learning_rate": 9.246979237532256e-06, "loss": 0.5321, "step": 19685 }, { "epoch": 0.5383395318311092, "grad_norm": 1.4514256715774536, "learning_rate": 9.246096051047132e-06, "loss": 0.4077, "step": 19686 }, { "epoch": 0.5383668781448261, "grad_norm": 1.427232265472412, "learning_rate": 9.245212870476172e-06, "loss": 0.5118, "step": 19687 }, { "epoch": 0.538394224458543, "grad_norm": 1.3410640954971313, "learning_rate": 9.244329695826302e-06, "loss": 0.5129, "step": 19688 }, { "epoch": 0.5384215707722599, "grad_norm": 1.17605459690094, "learning_rate": 9.243446527104459e-06, "loss": 0.5034, "step": 19689 }, { "epoch": 0.5384489170859769, "grad_norm": 1.346182942390442, "learning_rate": 9.24256336431756e-06, "loss": 0.5289, "step": 19690 }, { "epoch": 0.5384762633996937, "grad_norm": 1.6665388345718384, "learning_rate": 9.241680207472538e-06, "loss": 0.4197, "step": 19691 }, { "epoch": 0.5385036097134106, "grad_norm": 1.124648928642273, "learning_rate": 9.240797056576325e-06, "loss": 0.4944, "step": 19692 }, { "epoch": 0.5385309560271275, "grad_norm": 1.4135717153549194, "learning_rate": 9.239913911635843e-06, "loss": 0.498, "step": 19693 }, { "epoch": 0.5385583023408445, "grad_norm": 1.3320162296295166, "learning_rate": 9.239030772658025e-06, "loss": 0.7729, "step": 19694 }, { "epoch": 0.5385856486545614, "grad_norm": 1.2005984783172607, "learning_rate": 9.238147639649794e-06, "loss": 0.5059, "step": 19695 }, { "epoch": 0.5386129949682783, "grad_norm": 1.5825787782669067, "learning_rate": 9.237264512618079e-06, "loss": 0.4269, "step": 19696 }, { "epoch": 0.5386403412819952, "grad_norm": 1.376261591911316, "learning_rate": 9.236381391569814e-06, "loss": 0.499, "step": 19697 }, { "epoch": 0.5386676875957122, "grad_norm": 1.1660023927688599, "learning_rate": 9.235498276511921e-06, "loss": 0.4931, "step": 19698 }, { "epoch": 0.538695033909429, "grad_norm": 1.1927798986434937, "learning_rate": 9.234615167451328e-06, "loss": 0.4822, "step": 19699 }, { "epoch": 0.5387223802231459, "grad_norm": 1.3915953636169434, "learning_rate": 9.233732064394962e-06, "loss": 0.384, "step": 19700 }, { "epoch": 0.5387497265368628, "grad_norm": 1.4396568536758423, "learning_rate": 9.232848967349753e-06, "loss": 0.4012, "step": 19701 }, { "epoch": 0.5387770728505797, "grad_norm": 1.1743464469909668, "learning_rate": 9.231965876322628e-06, "loss": 0.4873, "step": 19702 }, { "epoch": 0.5388044191642967, "grad_norm": 1.3697623014450073, "learning_rate": 9.231082791320515e-06, "loss": 0.5015, "step": 19703 }, { "epoch": 0.5388317654780136, "grad_norm": 1.3827733993530273, "learning_rate": 9.23019971235034e-06, "loss": 0.3794, "step": 19704 }, { "epoch": 0.5388591117917305, "grad_norm": 1.4893534183502197, "learning_rate": 9.229316639419031e-06, "loss": 0.4202, "step": 19705 }, { "epoch": 0.5388864581054474, "grad_norm": 2.143684148788452, "learning_rate": 9.22843357253352e-06, "loss": 0.4027, "step": 19706 }, { "epoch": 0.5389138044191643, "grad_norm": 1.266770362854004, "learning_rate": 9.227550511700728e-06, "loss": 0.4782, "step": 19707 }, { "epoch": 0.5389411507328812, "grad_norm": 1.3515952825546265, "learning_rate": 9.226667456927586e-06, "loss": 0.5125, "step": 19708 }, { "epoch": 0.5389684970465981, "grad_norm": 1.4621495008468628, "learning_rate": 9.225784408221017e-06, "loss": 0.5071, "step": 19709 }, { "epoch": 0.538995843360315, "grad_norm": 1.2836158275604248, "learning_rate": 9.224901365587951e-06, "loss": 0.4507, "step": 19710 }, { "epoch": 0.539023189674032, "grad_norm": 1.243225336074829, "learning_rate": 9.22401832903532e-06, "loss": 0.5317, "step": 19711 }, { "epoch": 0.5390505359877489, "grad_norm": 1.4708404541015625, "learning_rate": 9.223135298570044e-06, "loss": 0.3445, "step": 19712 }, { "epoch": 0.5390778823014658, "grad_norm": 1.4464936256408691, "learning_rate": 9.222252274199053e-06, "loss": 0.5155, "step": 19713 }, { "epoch": 0.5391052286151826, "grad_norm": 1.2613298892974854, "learning_rate": 9.221369255929277e-06, "loss": 0.4926, "step": 19714 }, { "epoch": 0.5391325749288995, "grad_norm": 1.172263741493225, "learning_rate": 9.220486243767637e-06, "loss": 0.4691, "step": 19715 }, { "epoch": 0.5391599212426165, "grad_norm": 1.0920569896697998, "learning_rate": 9.219603237721064e-06, "loss": 0.4765, "step": 19716 }, { "epoch": 0.5391872675563334, "grad_norm": 1.8981537818908691, "learning_rate": 9.218720237796484e-06, "loss": 0.4975, "step": 19717 }, { "epoch": 0.5392146138700503, "grad_norm": 1.2424049377441406, "learning_rate": 9.217837244000822e-06, "loss": 0.5001, "step": 19718 }, { "epoch": 0.5392419601837672, "grad_norm": 1.3947118520736694, "learning_rate": 9.21695425634101e-06, "loss": 0.758, "step": 19719 }, { "epoch": 0.5392693064974842, "grad_norm": 1.1274845600128174, "learning_rate": 9.216071274823969e-06, "loss": 0.5073, "step": 19720 }, { "epoch": 0.5392966528112011, "grad_norm": 1.325256586074829, "learning_rate": 9.21518829945663e-06, "loss": 0.4874, "step": 19721 }, { "epoch": 0.5393239991249179, "grad_norm": 1.2342302799224854, "learning_rate": 9.214305330245916e-06, "loss": 0.5073, "step": 19722 }, { "epoch": 0.5393513454386348, "grad_norm": 1.2475123405456543, "learning_rate": 9.213422367198756e-06, "loss": 0.4991, "step": 19723 }, { "epoch": 0.5393786917523518, "grad_norm": 1.244753360748291, "learning_rate": 9.212539410322079e-06, "loss": 0.4574, "step": 19724 }, { "epoch": 0.5394060380660687, "grad_norm": 1.607067346572876, "learning_rate": 9.211656459622805e-06, "loss": 0.4829, "step": 19725 }, { "epoch": 0.5394333843797856, "grad_norm": 1.8502936363220215, "learning_rate": 9.210773515107865e-06, "loss": 0.355, "step": 19726 }, { "epoch": 0.5394607306935025, "grad_norm": 1.4250150918960571, "learning_rate": 9.209890576784188e-06, "loss": 0.4834, "step": 19727 }, { "epoch": 0.5394880770072195, "grad_norm": 1.1274410486221313, "learning_rate": 9.209007644658692e-06, "loss": 0.5191, "step": 19728 }, { "epoch": 0.5395154233209364, "grad_norm": 1.9286928176879883, "learning_rate": 9.208124718738313e-06, "loss": 0.4956, "step": 19729 }, { "epoch": 0.5395427696346532, "grad_norm": 1.7556554079055786, "learning_rate": 9.207241799029968e-06, "loss": 0.7798, "step": 19730 }, { "epoch": 0.5395701159483701, "grad_norm": 1.4806387424468994, "learning_rate": 9.206358885540592e-06, "loss": 0.4211, "step": 19731 }, { "epoch": 0.539597462262087, "grad_norm": 1.2689582109451294, "learning_rate": 9.205475978277108e-06, "loss": 0.4898, "step": 19732 }, { "epoch": 0.539624808575804, "grad_norm": 2.0604918003082275, "learning_rate": 9.204593077246441e-06, "loss": 0.3703, "step": 19733 }, { "epoch": 0.5396521548895209, "grad_norm": 1.1264941692352295, "learning_rate": 9.203710182455515e-06, "loss": 0.4792, "step": 19734 }, { "epoch": 0.5396795012032378, "grad_norm": 1.345777988433838, "learning_rate": 9.20282729391126e-06, "loss": 0.3798, "step": 19735 }, { "epoch": 0.5397068475169547, "grad_norm": 1.526065707206726, "learning_rate": 9.201944411620598e-06, "loss": 0.4654, "step": 19736 }, { "epoch": 0.5397341938306717, "grad_norm": 1.5186563730239868, "learning_rate": 9.20106153559046e-06, "loss": 0.7945, "step": 19737 }, { "epoch": 0.5397615401443885, "grad_norm": 1.1601009368896484, "learning_rate": 9.200178665827768e-06, "loss": 0.485, "step": 19738 }, { "epoch": 0.5397888864581054, "grad_norm": 1.425052285194397, "learning_rate": 9.19929580233945e-06, "loss": 0.5186, "step": 19739 }, { "epoch": 0.5398162327718223, "grad_norm": 1.3950024843215942, "learning_rate": 9.19841294513243e-06, "loss": 0.3837, "step": 19740 }, { "epoch": 0.5398435790855393, "grad_norm": 1.7004685401916504, "learning_rate": 9.197530094213638e-06, "loss": 0.7537, "step": 19741 }, { "epoch": 0.5398709253992562, "grad_norm": 1.5281319618225098, "learning_rate": 9.196647249589992e-06, "loss": 0.4841, "step": 19742 }, { "epoch": 0.5398982717129731, "grad_norm": 1.7054256200790405, "learning_rate": 9.195764411268427e-06, "loss": 0.4626, "step": 19743 }, { "epoch": 0.53992561802669, "grad_norm": 1.2018455266952515, "learning_rate": 9.194881579255862e-06, "loss": 0.7671, "step": 19744 }, { "epoch": 0.539952964340407, "grad_norm": 1.2014034986495972, "learning_rate": 9.193998753559221e-06, "loss": 0.5109, "step": 19745 }, { "epoch": 0.5399803106541238, "grad_norm": 1.3777437210083008, "learning_rate": 9.193115934185439e-06, "loss": 0.5366, "step": 19746 }, { "epoch": 0.5400076569678407, "grad_norm": 1.427918791770935, "learning_rate": 9.19223312114143e-06, "loss": 0.5025, "step": 19747 }, { "epoch": 0.5400350032815576, "grad_norm": 1.3526591062545776, "learning_rate": 9.191350314434127e-06, "loss": 0.5177, "step": 19748 }, { "epoch": 0.5400623495952745, "grad_norm": 1.3818479776382446, "learning_rate": 9.190467514070455e-06, "loss": 0.3928, "step": 19749 }, { "epoch": 0.5400896959089915, "grad_norm": 1.2419637441635132, "learning_rate": 9.189584720057334e-06, "loss": 0.5048, "step": 19750 }, { "epoch": 0.5401170422227084, "grad_norm": 1.2043516635894775, "learning_rate": 9.188701932401697e-06, "loss": 0.4836, "step": 19751 }, { "epoch": 0.5401443885364253, "grad_norm": 1.547761082649231, "learning_rate": 9.187819151110462e-06, "loss": 0.5352, "step": 19752 }, { "epoch": 0.5401717348501422, "grad_norm": 2.4763264656066895, "learning_rate": 9.186936376190556e-06, "loss": 0.784, "step": 19753 }, { "epoch": 0.5401990811638591, "grad_norm": 1.7605457305908203, "learning_rate": 9.186053607648909e-06, "loss": 0.7634, "step": 19754 }, { "epoch": 0.540226427477576, "grad_norm": 1.2216503620147705, "learning_rate": 9.18517084549244e-06, "loss": 0.4006, "step": 19755 }, { "epoch": 0.5402537737912929, "grad_norm": 1.352681279182434, "learning_rate": 9.184288089728075e-06, "loss": 0.476, "step": 19756 }, { "epoch": 0.5402811201050098, "grad_norm": 1.24271559715271, "learning_rate": 9.183405340362744e-06, "loss": 0.4817, "step": 19757 }, { "epoch": 0.5403084664187268, "grad_norm": 1.12294602394104, "learning_rate": 9.182522597403367e-06, "loss": 0.5105, "step": 19758 }, { "epoch": 0.5403358127324437, "grad_norm": 1.3805280923843384, "learning_rate": 9.181639860856871e-06, "loss": 0.523, "step": 19759 }, { "epoch": 0.5403631590461606, "grad_norm": 1.9353141784667969, "learning_rate": 9.180757130730177e-06, "loss": 0.4125, "step": 19760 }, { "epoch": 0.5403905053598775, "grad_norm": 1.1688001155853271, "learning_rate": 9.179874407030215e-06, "loss": 0.7758, "step": 19761 }, { "epoch": 0.5404178516735944, "grad_norm": 1.1381744146347046, "learning_rate": 9.178991689763908e-06, "loss": 0.5177, "step": 19762 }, { "epoch": 0.5404451979873113, "grad_norm": 1.3962323665618896, "learning_rate": 9.178108978938178e-06, "loss": 0.4887, "step": 19763 }, { "epoch": 0.5404725443010282, "grad_norm": 1.239894986152649, "learning_rate": 9.177226274559953e-06, "loss": 0.4871, "step": 19764 }, { "epoch": 0.5404998906147451, "grad_norm": 1.1975631713867188, "learning_rate": 9.176343576636156e-06, "loss": 0.4934, "step": 19765 }, { "epoch": 0.540527236928462, "grad_norm": 1.318968415260315, "learning_rate": 9.175460885173712e-06, "loss": 0.5118, "step": 19766 }, { "epoch": 0.540554583242179, "grad_norm": 1.405694603919983, "learning_rate": 9.174578200179549e-06, "loss": 0.4797, "step": 19767 }, { "epoch": 0.5405819295558959, "grad_norm": 1.2495735883712769, "learning_rate": 9.173695521660585e-06, "loss": 0.5192, "step": 19768 }, { "epoch": 0.5406092758696128, "grad_norm": 1.479388952255249, "learning_rate": 9.172812849623746e-06, "loss": 0.4663, "step": 19769 }, { "epoch": 0.5406366221833296, "grad_norm": 1.8392198085784912, "learning_rate": 9.171930184075956e-06, "loss": 0.5078, "step": 19770 }, { "epoch": 0.5406639684970466, "grad_norm": 1.408189058303833, "learning_rate": 9.171047525024144e-06, "loss": 0.5047, "step": 19771 }, { "epoch": 0.5406913148107635, "grad_norm": 1.12515389919281, "learning_rate": 9.170164872475227e-06, "loss": 0.5056, "step": 19772 }, { "epoch": 0.5407186611244804, "grad_norm": 1.458170771598816, "learning_rate": 9.169282226436138e-06, "loss": 0.5037, "step": 19773 }, { "epoch": 0.5407460074381973, "grad_norm": 1.2545942068099976, "learning_rate": 9.168399586913792e-06, "loss": 0.4847, "step": 19774 }, { "epoch": 0.5407733537519143, "grad_norm": 1.1496204137802124, "learning_rate": 9.167516953915117e-06, "loss": 0.4638, "step": 19775 }, { "epoch": 0.5408007000656312, "grad_norm": 1.4917960166931152, "learning_rate": 9.166634327447041e-06, "loss": 0.5203, "step": 19776 }, { "epoch": 0.5408280463793481, "grad_norm": 1.630475401878357, "learning_rate": 9.16575170751648e-06, "loss": 0.7781, "step": 19777 }, { "epoch": 0.5408553926930649, "grad_norm": 1.3159997463226318, "learning_rate": 9.164869094130364e-06, "loss": 0.491, "step": 19778 }, { "epoch": 0.5408827390067819, "grad_norm": 1.3119795322418213, "learning_rate": 9.163986487295616e-06, "loss": 0.5104, "step": 19779 }, { "epoch": 0.5409100853204988, "grad_norm": 1.2392852306365967, "learning_rate": 9.163103887019157e-06, "loss": 0.4964, "step": 19780 }, { "epoch": 0.5409374316342157, "grad_norm": 2.6293556690216064, "learning_rate": 9.162221293307915e-06, "loss": 0.4676, "step": 19781 }, { "epoch": 0.5409647779479326, "grad_norm": 1.4705618619918823, "learning_rate": 9.161338706168809e-06, "loss": 0.4007, "step": 19782 }, { "epoch": 0.5409921242616496, "grad_norm": 1.4176074266433716, "learning_rate": 9.160456125608765e-06, "loss": 0.503, "step": 19783 }, { "epoch": 0.5410194705753665, "grad_norm": 1.2680400609970093, "learning_rate": 9.159573551634707e-06, "loss": 0.4825, "step": 19784 }, { "epoch": 0.5410468168890834, "grad_norm": 1.287420392036438, "learning_rate": 9.158690984253557e-06, "loss": 0.492, "step": 19785 }, { "epoch": 0.5410741632028002, "grad_norm": 2.337542772293091, "learning_rate": 9.15780842347224e-06, "loss": 0.4713, "step": 19786 }, { "epoch": 0.5411015095165171, "grad_norm": 1.442274570465088, "learning_rate": 9.15692586929768e-06, "loss": 0.5073, "step": 19787 }, { "epoch": 0.5411288558302341, "grad_norm": 2.2581028938293457, "learning_rate": 9.156043321736796e-06, "loss": 0.5277, "step": 19788 }, { "epoch": 0.541156202143951, "grad_norm": 1.2473909854888916, "learning_rate": 9.15516078079652e-06, "loss": 0.4631, "step": 19789 }, { "epoch": 0.5411835484576679, "grad_norm": 1.2041370868682861, "learning_rate": 9.154278246483765e-06, "loss": 0.4684, "step": 19790 }, { "epoch": 0.5412108947713848, "grad_norm": 1.1497530937194824, "learning_rate": 9.15339571880546e-06, "loss": 0.4984, "step": 19791 }, { "epoch": 0.5412382410851018, "grad_norm": 1.1857571601867676, "learning_rate": 9.15251319776853e-06, "loss": 0.7741, "step": 19792 }, { "epoch": 0.5412655873988187, "grad_norm": 1.201192855834961, "learning_rate": 9.151630683379893e-06, "loss": 0.5019, "step": 19793 }, { "epoch": 0.5412929337125355, "grad_norm": 1.2272610664367676, "learning_rate": 9.150748175646476e-06, "loss": 0.4848, "step": 19794 }, { "epoch": 0.5413202800262524, "grad_norm": 1.1554397344589233, "learning_rate": 9.149865674575199e-06, "loss": 0.8175, "step": 19795 }, { "epoch": 0.5413476263399694, "grad_norm": 1.2428513765335083, "learning_rate": 9.148983180172987e-06, "loss": 0.5062, "step": 19796 }, { "epoch": 0.5413749726536863, "grad_norm": 1.2914209365844727, "learning_rate": 9.148100692446764e-06, "loss": 0.4618, "step": 19797 }, { "epoch": 0.5414023189674032, "grad_norm": 1.2616634368896484, "learning_rate": 9.147218211403449e-06, "loss": 0.498, "step": 19798 }, { "epoch": 0.5414296652811201, "grad_norm": 1.1350239515304565, "learning_rate": 9.14633573704997e-06, "loss": 0.3594, "step": 19799 }, { "epoch": 0.5414570115948371, "grad_norm": 1.2451143264770508, "learning_rate": 9.145453269393244e-06, "loss": 0.4832, "step": 19800 }, { "epoch": 0.541484357908554, "grad_norm": 5.016088962554932, "learning_rate": 9.144570808440197e-06, "loss": 0.7766, "step": 19801 }, { "epoch": 0.5415117042222708, "grad_norm": 1.4908021688461304, "learning_rate": 9.143688354197756e-06, "loss": 0.5059, "step": 19802 }, { "epoch": 0.5415390505359877, "grad_norm": 1.183669090270996, "learning_rate": 9.142805906672836e-06, "loss": 0.4862, "step": 19803 }, { "epoch": 0.5415663968497046, "grad_norm": 1.9151594638824463, "learning_rate": 9.14192346587236e-06, "loss": 0.7645, "step": 19804 }, { "epoch": 0.5415937431634216, "grad_norm": 1.2153546810150146, "learning_rate": 9.141041031803255e-06, "loss": 0.5105, "step": 19805 }, { "epoch": 0.5416210894771385, "grad_norm": 1.3028618097305298, "learning_rate": 9.140158604472441e-06, "loss": 0.4955, "step": 19806 }, { "epoch": 0.5416484357908554, "grad_norm": 1.3696844577789307, "learning_rate": 9.13927618388684e-06, "loss": 0.3611, "step": 19807 }, { "epoch": 0.5416757821045723, "grad_norm": 1.3147072792053223, "learning_rate": 9.138393770053376e-06, "loss": 0.3798, "step": 19808 }, { "epoch": 0.5417031284182892, "grad_norm": 1.3846584558486938, "learning_rate": 9.13751136297897e-06, "loss": 0.4713, "step": 19809 }, { "epoch": 0.5417304747320061, "grad_norm": 1.2491647005081177, "learning_rate": 9.136628962670542e-06, "loss": 0.4556, "step": 19810 }, { "epoch": 0.541757821045723, "grad_norm": 1.198297381401062, "learning_rate": 9.135746569135022e-06, "loss": 0.5094, "step": 19811 }, { "epoch": 0.5417851673594399, "grad_norm": 1.2170374393463135, "learning_rate": 9.134864182379324e-06, "loss": 0.5213, "step": 19812 }, { "epoch": 0.5418125136731569, "grad_norm": 1.1299412250518799, "learning_rate": 9.133981802410371e-06, "loss": 0.5059, "step": 19813 }, { "epoch": 0.5418398599868738, "grad_norm": 1.1923719644546509, "learning_rate": 9.13309942923509e-06, "loss": 0.5205, "step": 19814 }, { "epoch": 0.5418672063005907, "grad_norm": 1.1578733921051025, "learning_rate": 9.1322170628604e-06, "loss": 0.4995, "step": 19815 }, { "epoch": 0.5418945526143076, "grad_norm": 1.410709023475647, "learning_rate": 9.131334703293225e-06, "loss": 0.5031, "step": 19816 }, { "epoch": 0.5419218989280244, "grad_norm": 1.1883248090744019, "learning_rate": 9.130452350540481e-06, "loss": 0.5092, "step": 19817 }, { "epoch": 0.5419492452417414, "grad_norm": 1.3420644998550415, "learning_rate": 9.129570004609094e-06, "loss": 0.4946, "step": 19818 }, { "epoch": 0.5419765915554583, "grad_norm": 1.339105248451233, "learning_rate": 9.128687665505986e-06, "loss": 0.4027, "step": 19819 }, { "epoch": 0.5420039378691752, "grad_norm": 1.116450548171997, "learning_rate": 9.127805333238077e-06, "loss": 0.5373, "step": 19820 }, { "epoch": 0.5420312841828921, "grad_norm": 1.5347830057144165, "learning_rate": 9.12692300781229e-06, "loss": 0.4749, "step": 19821 }, { "epoch": 0.5420586304966091, "grad_norm": 1.3329582214355469, "learning_rate": 9.126040689235549e-06, "loss": 0.5049, "step": 19822 }, { "epoch": 0.542085976810326, "grad_norm": 1.2443028688430786, "learning_rate": 9.125158377514772e-06, "loss": 0.4873, "step": 19823 }, { "epoch": 0.5421133231240429, "grad_norm": 1.6278187036514282, "learning_rate": 9.12427607265688e-06, "loss": 0.3934, "step": 19824 }, { "epoch": 0.5421406694377597, "grad_norm": 1.2397079467773438, "learning_rate": 9.123393774668795e-06, "loss": 0.5289, "step": 19825 }, { "epoch": 0.5421680157514767, "grad_norm": 1.1039702892303467, "learning_rate": 9.12251148355744e-06, "loss": 0.5009, "step": 19826 }, { "epoch": 0.5421953620651936, "grad_norm": 1.2313638925552368, "learning_rate": 9.121629199329736e-06, "loss": 0.5461, "step": 19827 }, { "epoch": 0.5422227083789105, "grad_norm": 1.2127352952957153, "learning_rate": 9.120746921992603e-06, "loss": 0.4993, "step": 19828 }, { "epoch": 0.5422500546926274, "grad_norm": 1.2981319427490234, "learning_rate": 9.119864651552965e-06, "loss": 0.5001, "step": 19829 }, { "epoch": 0.5422774010063444, "grad_norm": 1.149383306503296, "learning_rate": 9.118982388017739e-06, "loss": 0.4739, "step": 19830 }, { "epoch": 0.5423047473200613, "grad_norm": 4.400455474853516, "learning_rate": 9.118100131393848e-06, "loss": 0.3806, "step": 19831 }, { "epoch": 0.5423320936337782, "grad_norm": 1.2888140678405762, "learning_rate": 9.117217881688216e-06, "loss": 0.4781, "step": 19832 }, { "epoch": 0.542359439947495, "grad_norm": 1.1695884466171265, "learning_rate": 9.116335638907757e-06, "loss": 0.4566, "step": 19833 }, { "epoch": 0.542386786261212, "grad_norm": 1.562359094619751, "learning_rate": 9.115453403059397e-06, "loss": 0.4087, "step": 19834 }, { "epoch": 0.5424141325749289, "grad_norm": 1.1976354122161865, "learning_rate": 9.11457117415006e-06, "loss": 0.5114, "step": 19835 }, { "epoch": 0.5424414788886458, "grad_norm": 1.8063114881515503, "learning_rate": 9.113688952186661e-06, "loss": 0.3753, "step": 19836 }, { "epoch": 0.5424688252023627, "grad_norm": 1.1783957481384277, "learning_rate": 9.112806737176125e-06, "loss": 0.4578, "step": 19837 }, { "epoch": 0.5424961715160797, "grad_norm": 1.1137514114379883, "learning_rate": 9.111924529125369e-06, "loss": 0.3803, "step": 19838 }, { "epoch": 0.5425235178297966, "grad_norm": 1.3068840503692627, "learning_rate": 9.111042328041313e-06, "loss": 0.4932, "step": 19839 }, { "epoch": 0.5425508641435135, "grad_norm": 1.282230257987976, "learning_rate": 9.11016013393088e-06, "loss": 0.4995, "step": 19840 }, { "epoch": 0.5425782104572303, "grad_norm": 1.1330825090408325, "learning_rate": 9.109277946800992e-06, "loss": 0.5079, "step": 19841 }, { "epoch": 0.5426055567709472, "grad_norm": 1.3167989253997803, "learning_rate": 9.108395766658566e-06, "loss": 0.5325, "step": 19842 }, { "epoch": 0.5426329030846642, "grad_norm": 1.2149540185928345, "learning_rate": 9.107513593510524e-06, "loss": 0.5005, "step": 19843 }, { "epoch": 0.5426602493983811, "grad_norm": 1.2957072257995605, "learning_rate": 9.106631427363789e-06, "loss": 0.7838, "step": 19844 }, { "epoch": 0.542687595712098, "grad_norm": 2.6935951709747314, "learning_rate": 9.105749268225277e-06, "loss": 0.8049, "step": 19845 }, { "epoch": 0.5427149420258149, "grad_norm": 1.2201356887817383, "learning_rate": 9.104867116101913e-06, "loss": 0.5062, "step": 19846 }, { "epoch": 0.5427422883395319, "grad_norm": 1.96077561378479, "learning_rate": 9.103984971000613e-06, "loss": 0.3688, "step": 19847 }, { "epoch": 0.5427696346532488, "grad_norm": 1.3934587240219116, "learning_rate": 9.103102832928297e-06, "loss": 0.5159, "step": 19848 }, { "epoch": 0.5427969809669656, "grad_norm": 1.3561463356018066, "learning_rate": 9.102220701891889e-06, "loss": 0.5054, "step": 19849 }, { "epoch": 0.5428243272806825, "grad_norm": 1.4832873344421387, "learning_rate": 9.101338577898306e-06, "loss": 0.5311, "step": 19850 }, { "epoch": 0.5428516735943995, "grad_norm": 1.2420628070831299, "learning_rate": 9.10045646095447e-06, "loss": 0.4634, "step": 19851 }, { "epoch": 0.5428790199081164, "grad_norm": 1.2504149675369263, "learning_rate": 9.099574351067298e-06, "loss": 0.5088, "step": 19852 }, { "epoch": 0.5429063662218333, "grad_norm": 1.038621187210083, "learning_rate": 9.098692248243713e-06, "loss": 0.3231, "step": 19853 }, { "epoch": 0.5429337125355502, "grad_norm": 1.5458083152770996, "learning_rate": 9.097810152490635e-06, "loss": 0.737, "step": 19854 }, { "epoch": 0.5429610588492672, "grad_norm": 1.529680609703064, "learning_rate": 9.09692806381498e-06, "loss": 0.5117, "step": 19855 }, { "epoch": 0.5429884051629841, "grad_norm": 1.147335171699524, "learning_rate": 9.09604598222367e-06, "loss": 0.4909, "step": 19856 }, { "epoch": 0.5430157514767009, "grad_norm": 1.2696471214294434, "learning_rate": 9.095163907723628e-06, "loss": 0.8051, "step": 19857 }, { "epoch": 0.5430430977904178, "grad_norm": 1.4546016454696655, "learning_rate": 9.094281840321768e-06, "loss": 0.4847, "step": 19858 }, { "epoch": 0.5430704441041347, "grad_norm": 1.4180649518966675, "learning_rate": 9.093399780025013e-06, "loss": 0.3461, "step": 19859 }, { "epoch": 0.5430977904178517, "grad_norm": 1.6003468036651611, "learning_rate": 9.092517726840281e-06, "loss": 0.4061, "step": 19860 }, { "epoch": 0.5431251367315686, "grad_norm": 1.163991928100586, "learning_rate": 9.091635680774492e-06, "loss": 0.4948, "step": 19861 }, { "epoch": 0.5431524830452855, "grad_norm": 1.3820128440856934, "learning_rate": 9.090753641834566e-06, "loss": 0.5048, "step": 19862 }, { "epoch": 0.5431798293590024, "grad_norm": 1.6011574268341064, "learning_rate": 9.089871610027423e-06, "loss": 0.4098, "step": 19863 }, { "epoch": 0.5432071756727194, "grad_norm": 1.4827340841293335, "learning_rate": 9.08898958535998e-06, "loss": 0.3677, "step": 19864 }, { "epoch": 0.5432345219864362, "grad_norm": 1.3493181467056274, "learning_rate": 9.088107567839156e-06, "loss": 0.4972, "step": 19865 }, { "epoch": 0.5432618683001531, "grad_norm": 1.64491868019104, "learning_rate": 9.087225557471871e-06, "loss": 0.3801, "step": 19866 }, { "epoch": 0.54328921461387, "grad_norm": 1.4358415603637695, "learning_rate": 9.086343554265048e-06, "loss": 0.4802, "step": 19867 }, { "epoch": 0.543316560927587, "grad_norm": 1.4002631902694702, "learning_rate": 9.0854615582256e-06, "loss": 0.5358, "step": 19868 }, { "epoch": 0.5433439072413039, "grad_norm": 1.382128357887268, "learning_rate": 9.084579569360448e-06, "loss": 0.5177, "step": 19869 }, { "epoch": 0.5433712535550208, "grad_norm": 1.247457504272461, "learning_rate": 9.083697587676515e-06, "loss": 0.5027, "step": 19870 }, { "epoch": 0.5433985998687377, "grad_norm": 1.1539170742034912, "learning_rate": 9.082815613180715e-06, "loss": 0.516, "step": 19871 }, { "epoch": 0.5434259461824547, "grad_norm": 1.1775720119476318, "learning_rate": 9.081933645879968e-06, "loss": 0.4764, "step": 19872 }, { "epoch": 0.5434532924961715, "grad_norm": 1.2361527681350708, "learning_rate": 9.081051685781197e-06, "loss": 0.4724, "step": 19873 }, { "epoch": 0.5434806388098884, "grad_norm": 1.170084834098816, "learning_rate": 9.080169732891312e-06, "loss": 0.4873, "step": 19874 }, { "epoch": 0.5435079851236053, "grad_norm": 1.3127472400665283, "learning_rate": 9.079287787217238e-06, "loss": 0.4939, "step": 19875 }, { "epoch": 0.5435353314373222, "grad_norm": 1.3560504913330078, "learning_rate": 9.078405848765893e-06, "loss": 0.3984, "step": 19876 }, { "epoch": 0.5435626777510392, "grad_norm": 1.0971192121505737, "learning_rate": 9.077523917544193e-06, "loss": 0.5074, "step": 19877 }, { "epoch": 0.5435900240647561, "grad_norm": 1.34722900390625, "learning_rate": 9.076641993559058e-06, "loss": 0.5037, "step": 19878 }, { "epoch": 0.543617370378473, "grad_norm": 1.3810317516326904, "learning_rate": 9.075760076817409e-06, "loss": 0.5575, "step": 19879 }, { "epoch": 0.5436447166921899, "grad_norm": 1.0366084575653076, "learning_rate": 9.07487816732616e-06, "loss": 0.3412, "step": 19880 }, { "epoch": 0.5436720630059068, "grad_norm": 1.1836949586868286, "learning_rate": 9.073996265092234e-06, "loss": 0.509, "step": 19881 }, { "epoch": 0.5436994093196237, "grad_norm": 1.291717529296875, "learning_rate": 9.073114370122545e-06, "loss": 0.4778, "step": 19882 }, { "epoch": 0.5437267556333406, "grad_norm": 1.146836757659912, "learning_rate": 9.072232482424012e-06, "loss": 0.4572, "step": 19883 }, { "epoch": 0.5437541019470575, "grad_norm": 1.0967316627502441, "learning_rate": 9.071350602003555e-06, "loss": 0.4514, "step": 19884 }, { "epoch": 0.5437814482607745, "grad_norm": 1.4969048500061035, "learning_rate": 9.07046872886809e-06, "loss": 0.4726, "step": 19885 }, { "epoch": 0.5438087945744914, "grad_norm": 1.3294153213500977, "learning_rate": 9.069586863024538e-06, "loss": 0.4759, "step": 19886 }, { "epoch": 0.5438361408882083, "grad_norm": 1.625139594078064, "learning_rate": 9.068705004479815e-06, "loss": 0.3769, "step": 19887 }, { "epoch": 0.5438634872019252, "grad_norm": 1.1701730489730835, "learning_rate": 9.067823153240838e-06, "loss": 0.5033, "step": 19888 }, { "epoch": 0.543890833515642, "grad_norm": 1.2837482690811157, "learning_rate": 9.066941309314528e-06, "loss": 0.4918, "step": 19889 }, { "epoch": 0.543918179829359, "grad_norm": 1.4426902532577515, "learning_rate": 9.0660594727078e-06, "loss": 0.4694, "step": 19890 }, { "epoch": 0.5439455261430759, "grad_norm": 1.2951586246490479, "learning_rate": 9.06517764342757e-06, "loss": 0.4914, "step": 19891 }, { "epoch": 0.5439728724567928, "grad_norm": 1.3038324117660522, "learning_rate": 9.064295821480762e-06, "loss": 0.5061, "step": 19892 }, { "epoch": 0.5440002187705097, "grad_norm": 1.4475466012954712, "learning_rate": 9.063414006874288e-06, "loss": 0.5178, "step": 19893 }, { "epoch": 0.5440275650842267, "grad_norm": 1.5349643230438232, "learning_rate": 9.06253219961507e-06, "loss": 0.4725, "step": 19894 }, { "epoch": 0.5440549113979436, "grad_norm": 1.408012866973877, "learning_rate": 9.06165039971002e-06, "loss": 0.7609, "step": 19895 }, { "epoch": 0.5440822577116605, "grad_norm": 1.2132872343063354, "learning_rate": 9.060768607166058e-06, "loss": 0.4713, "step": 19896 }, { "epoch": 0.5441096040253773, "grad_norm": 1.3304461240768433, "learning_rate": 9.059886821990104e-06, "loss": 0.5365, "step": 19897 }, { "epoch": 0.5441369503390943, "grad_norm": 1.1633988618850708, "learning_rate": 9.059005044189073e-06, "loss": 0.4902, "step": 19898 }, { "epoch": 0.5441642966528112, "grad_norm": 1.2609540224075317, "learning_rate": 9.058123273769882e-06, "loss": 0.4636, "step": 19899 }, { "epoch": 0.5441916429665281, "grad_norm": 1.2349144220352173, "learning_rate": 9.05724151073945e-06, "loss": 0.4967, "step": 19900 }, { "epoch": 0.544218989280245, "grad_norm": 1.2796746492385864, "learning_rate": 9.056359755104692e-06, "loss": 0.4998, "step": 19901 }, { "epoch": 0.544246335593962, "grad_norm": 1.197756052017212, "learning_rate": 9.055478006872529e-06, "loss": 0.4781, "step": 19902 }, { "epoch": 0.5442736819076789, "grad_norm": 1.2396626472473145, "learning_rate": 9.054596266049871e-06, "loss": 0.4896, "step": 19903 }, { "epoch": 0.5443010282213958, "grad_norm": 1.3266023397445679, "learning_rate": 9.053714532643641e-06, "loss": 0.495, "step": 19904 }, { "epoch": 0.5443283745351126, "grad_norm": 1.6330499649047852, "learning_rate": 9.052832806660753e-06, "loss": 0.4887, "step": 19905 }, { "epoch": 0.5443557208488295, "grad_norm": 1.5705209970474243, "learning_rate": 9.051951088108127e-06, "loss": 0.4842, "step": 19906 }, { "epoch": 0.5443830671625465, "grad_norm": 1.2011606693267822, "learning_rate": 9.051069376992678e-06, "loss": 0.4934, "step": 19907 }, { "epoch": 0.5444104134762634, "grad_norm": 1.6486361026763916, "learning_rate": 9.050187673321323e-06, "loss": 0.7632, "step": 19908 }, { "epoch": 0.5444377597899803, "grad_norm": 1.1944276094436646, "learning_rate": 9.049305977100979e-06, "loss": 0.5114, "step": 19909 }, { "epoch": 0.5444651061036972, "grad_norm": 1.138195514678955, "learning_rate": 9.04842428833856e-06, "loss": 0.4807, "step": 19910 }, { "epoch": 0.5444924524174142, "grad_norm": 1.4684727191925049, "learning_rate": 9.047542607040986e-06, "loss": 0.5018, "step": 19911 }, { "epoch": 0.544519798731131, "grad_norm": 1.826273798942566, "learning_rate": 9.04666093321517e-06, "loss": 0.5321, "step": 19912 }, { "epoch": 0.5445471450448479, "grad_norm": 1.569865345954895, "learning_rate": 9.045779266868032e-06, "loss": 0.4922, "step": 19913 }, { "epoch": 0.5445744913585648, "grad_norm": 1.4098260402679443, "learning_rate": 9.044897608006488e-06, "loss": 0.4994, "step": 19914 }, { "epoch": 0.5446018376722818, "grad_norm": 1.3135950565338135, "learning_rate": 9.044015956637452e-06, "loss": 0.5032, "step": 19915 }, { "epoch": 0.5446291839859987, "grad_norm": 1.2128385305404663, "learning_rate": 9.043134312767844e-06, "loss": 0.7769, "step": 19916 }, { "epoch": 0.5446565302997156, "grad_norm": 1.1880464553833008, "learning_rate": 9.042252676404576e-06, "loss": 0.4609, "step": 19917 }, { "epoch": 0.5446838766134325, "grad_norm": 1.269468903541565, "learning_rate": 9.041371047554563e-06, "loss": 0.4969, "step": 19918 }, { "epoch": 0.5447112229271495, "grad_norm": 1.3970015048980713, "learning_rate": 9.04048942622473e-06, "loss": 0.8022, "step": 19919 }, { "epoch": 0.5447385692408663, "grad_norm": 1.268505334854126, "learning_rate": 9.039607812421985e-06, "loss": 0.5198, "step": 19920 }, { "epoch": 0.5447659155545832, "grad_norm": 1.3750767707824707, "learning_rate": 9.038726206153245e-06, "loss": 0.5217, "step": 19921 }, { "epoch": 0.5447932618683001, "grad_norm": 1.335073471069336, "learning_rate": 9.037844607425431e-06, "loss": 0.4937, "step": 19922 }, { "epoch": 0.544820608182017, "grad_norm": 1.2240468263626099, "learning_rate": 9.036963016245452e-06, "loss": 0.4822, "step": 19923 }, { "epoch": 0.544847954495734, "grad_norm": 1.1531779766082764, "learning_rate": 9.036081432620229e-06, "loss": 0.5029, "step": 19924 }, { "epoch": 0.5448753008094509, "grad_norm": 1.2838095426559448, "learning_rate": 9.035199856556676e-06, "loss": 0.7203, "step": 19925 }, { "epoch": 0.5449026471231678, "grad_norm": 1.2603294849395752, "learning_rate": 9.034318288061706e-06, "loss": 0.5005, "step": 19926 }, { "epoch": 0.5449299934368848, "grad_norm": 1.0295213460922241, "learning_rate": 9.033436727142241e-06, "loss": 0.4851, "step": 19927 }, { "epoch": 0.5449573397506016, "grad_norm": 1.1732646226882935, "learning_rate": 9.032555173805192e-06, "loss": 0.5042, "step": 19928 }, { "epoch": 0.5449846860643185, "grad_norm": 1.1509685516357422, "learning_rate": 9.031673628057474e-06, "loss": 0.4533, "step": 19929 }, { "epoch": 0.5450120323780354, "grad_norm": 0.9881243109703064, "learning_rate": 9.030792089906005e-06, "loss": 0.3277, "step": 19930 }, { "epoch": 0.5450393786917523, "grad_norm": 1.6076421737670898, "learning_rate": 9.029910559357696e-06, "loss": 0.5296, "step": 19931 }, { "epoch": 0.5450667250054693, "grad_norm": 1.5767370462417603, "learning_rate": 9.02902903641947e-06, "loss": 0.5038, "step": 19932 }, { "epoch": 0.5450940713191862, "grad_norm": 1.1708394289016724, "learning_rate": 9.028147521098236e-06, "loss": 0.515, "step": 19933 }, { "epoch": 0.5451214176329031, "grad_norm": 1.2550791501998901, "learning_rate": 9.027266013400911e-06, "loss": 0.5241, "step": 19934 }, { "epoch": 0.54514876394662, "grad_norm": 1.4528374671936035, "learning_rate": 9.026384513334413e-06, "loss": 0.4776, "step": 19935 }, { "epoch": 0.5451761102603369, "grad_norm": 1.0841840505599976, "learning_rate": 9.02550302090565e-06, "loss": 0.5019, "step": 19936 }, { "epoch": 0.5452034565740538, "grad_norm": 0.8982940316200256, "learning_rate": 9.024621536121544e-06, "loss": 0.3418, "step": 19937 }, { "epoch": 0.5452308028877707, "grad_norm": 1.4213042259216309, "learning_rate": 9.023740058989008e-06, "loss": 0.3793, "step": 19938 }, { "epoch": 0.5452581492014876, "grad_norm": 1.750732183456421, "learning_rate": 9.022858589514955e-06, "loss": 0.3324, "step": 19939 }, { "epoch": 0.5452854955152046, "grad_norm": 1.173305869102478, "learning_rate": 9.021977127706303e-06, "loss": 0.5116, "step": 19940 }, { "epoch": 0.5453128418289215, "grad_norm": 1.2760981321334839, "learning_rate": 9.021095673569963e-06, "loss": 0.504, "step": 19941 }, { "epoch": 0.5453401881426384, "grad_norm": 1.3494340181350708, "learning_rate": 9.020214227112854e-06, "loss": 0.4796, "step": 19942 }, { "epoch": 0.5453675344563553, "grad_norm": 1.3680555820465088, "learning_rate": 9.019332788341891e-06, "loss": 0.3845, "step": 19943 }, { "epoch": 0.5453948807700721, "grad_norm": 1.1540440320968628, "learning_rate": 9.018451357263985e-06, "loss": 0.5103, "step": 19944 }, { "epoch": 0.5454222270837891, "grad_norm": 1.2964273691177368, "learning_rate": 9.01756993388605e-06, "loss": 0.5014, "step": 19945 }, { "epoch": 0.545449573397506, "grad_norm": 1.1488337516784668, "learning_rate": 9.016688518215003e-06, "loss": 0.4742, "step": 19946 }, { "epoch": 0.5454769197112229, "grad_norm": 1.3616843223571777, "learning_rate": 9.015807110257757e-06, "loss": 0.4954, "step": 19947 }, { "epoch": 0.5455042660249398, "grad_norm": 1.4013168811798096, "learning_rate": 9.014925710021227e-06, "loss": 0.5074, "step": 19948 }, { "epoch": 0.5455316123386568, "grad_norm": 1.2216377258300781, "learning_rate": 9.014044317512329e-06, "loss": 0.4769, "step": 19949 }, { "epoch": 0.5455589586523737, "grad_norm": 1.391000747680664, "learning_rate": 9.013162932737975e-06, "loss": 0.3889, "step": 19950 }, { "epoch": 0.5455863049660906, "grad_norm": 1.3743432760238647, "learning_rate": 9.01228155570508e-06, "loss": 0.3939, "step": 19951 }, { "epoch": 0.5456136512798074, "grad_norm": 1.6478157043457031, "learning_rate": 9.011400186420559e-06, "loss": 0.4022, "step": 19952 }, { "epoch": 0.5456409975935244, "grad_norm": 1.354344367980957, "learning_rate": 9.010518824891325e-06, "loss": 0.496, "step": 19953 }, { "epoch": 0.5456683439072413, "grad_norm": 1.3047794103622437, "learning_rate": 9.009637471124293e-06, "loss": 0.3763, "step": 19954 }, { "epoch": 0.5456956902209582, "grad_norm": 0.9023354649543762, "learning_rate": 9.008756125126374e-06, "loss": 0.3597, "step": 19955 }, { "epoch": 0.5457230365346751, "grad_norm": 1.3701213598251343, "learning_rate": 9.007874786904485e-06, "loss": 0.4707, "step": 19956 }, { "epoch": 0.5457503828483921, "grad_norm": 1.4883179664611816, "learning_rate": 9.006993456465541e-06, "loss": 0.4788, "step": 19957 }, { "epoch": 0.545777729162109, "grad_norm": 1.3637584447860718, "learning_rate": 9.006112133816453e-06, "loss": 0.3854, "step": 19958 }, { "epoch": 0.5458050754758259, "grad_norm": 1.3597973585128784, "learning_rate": 9.005230818964136e-06, "loss": 0.4855, "step": 19959 }, { "epoch": 0.5458324217895427, "grad_norm": 1.0723907947540283, "learning_rate": 9.004349511915502e-06, "loss": 0.5142, "step": 19960 }, { "epoch": 0.5458597681032596, "grad_norm": 1.6532622575759888, "learning_rate": 9.003468212677466e-06, "loss": 0.4774, "step": 19961 }, { "epoch": 0.5458871144169766, "grad_norm": 1.3144407272338867, "learning_rate": 9.002586921256943e-06, "loss": 0.4935, "step": 19962 }, { "epoch": 0.5459144607306935, "grad_norm": 1.2857096195220947, "learning_rate": 9.001705637660842e-06, "loss": 0.4958, "step": 19963 }, { "epoch": 0.5459418070444104, "grad_norm": 1.641502022743225, "learning_rate": 9.000824361896081e-06, "loss": 0.3862, "step": 19964 }, { "epoch": 0.5459691533581273, "grad_norm": 1.4222790002822876, "learning_rate": 8.999943093969574e-06, "loss": 0.5034, "step": 19965 }, { "epoch": 0.5459964996718443, "grad_norm": 1.2154639959335327, "learning_rate": 8.999061833888227e-06, "loss": 0.5119, "step": 19966 }, { "epoch": 0.5460238459855612, "grad_norm": 1.2963405847549438, "learning_rate": 8.998180581658963e-06, "loss": 0.7788, "step": 19967 }, { "epoch": 0.546051192299278, "grad_norm": 1.2876977920532227, "learning_rate": 8.997299337288687e-06, "loss": 0.4949, "step": 19968 }, { "epoch": 0.5460785386129949, "grad_norm": 1.267433524131775, "learning_rate": 8.996418100784317e-06, "loss": 0.5061, "step": 19969 }, { "epoch": 0.5461058849267119, "grad_norm": 1.3135803937911987, "learning_rate": 8.995536872152765e-06, "loss": 0.5101, "step": 19970 }, { "epoch": 0.5461332312404288, "grad_norm": 1.310692548751831, "learning_rate": 8.994655651400943e-06, "loss": 0.482, "step": 19971 }, { "epoch": 0.5461605775541457, "grad_norm": 1.6373237371444702, "learning_rate": 8.993774438535766e-06, "loss": 0.4606, "step": 19972 }, { "epoch": 0.5461879238678626, "grad_norm": 1.1222881078720093, "learning_rate": 8.992893233564142e-06, "loss": 0.5148, "step": 19973 }, { "epoch": 0.5462152701815796, "grad_norm": 1.2802494764328003, "learning_rate": 8.992012036492989e-06, "loss": 0.495, "step": 19974 }, { "epoch": 0.5462426164952965, "grad_norm": 1.4352596998214722, "learning_rate": 8.991130847329219e-06, "loss": 0.5014, "step": 19975 }, { "epoch": 0.5462699628090133, "grad_norm": 1.1178317070007324, "learning_rate": 8.990249666079742e-06, "loss": 0.4796, "step": 19976 }, { "epoch": 0.5462973091227302, "grad_norm": 1.1082879304885864, "learning_rate": 8.989368492751473e-06, "loss": 0.4881, "step": 19977 }, { "epoch": 0.5463246554364471, "grad_norm": 1.0146903991699219, "learning_rate": 8.988487327351325e-06, "loss": 0.3988, "step": 19978 }, { "epoch": 0.5463520017501641, "grad_norm": 1.4001764059066772, "learning_rate": 8.98760616988621e-06, "loss": 0.3747, "step": 19979 }, { "epoch": 0.546379348063881, "grad_norm": 1.167359709739685, "learning_rate": 8.986725020363036e-06, "loss": 0.4906, "step": 19980 }, { "epoch": 0.5464066943775979, "grad_norm": 1.2984026670455933, "learning_rate": 8.985843878788723e-06, "loss": 0.4805, "step": 19981 }, { "epoch": 0.5464340406913148, "grad_norm": 1.3800532817840576, "learning_rate": 8.984962745170177e-06, "loss": 0.3431, "step": 19982 }, { "epoch": 0.5464613870050318, "grad_norm": 1.239319086074829, "learning_rate": 8.98408161951431e-06, "loss": 0.5058, "step": 19983 }, { "epoch": 0.5464887333187486, "grad_norm": 1.443063497543335, "learning_rate": 8.983200501828041e-06, "loss": 0.38, "step": 19984 }, { "epoch": 0.5465160796324655, "grad_norm": 1.3814984560012817, "learning_rate": 8.982319392118274e-06, "loss": 0.5002, "step": 19985 }, { "epoch": 0.5465434259461824, "grad_norm": 1.0806853771209717, "learning_rate": 8.981438290391928e-06, "loss": 0.4658, "step": 19986 }, { "epoch": 0.5465707722598994, "grad_norm": 1.2394338846206665, "learning_rate": 8.980557196655913e-06, "loss": 0.5502, "step": 19987 }, { "epoch": 0.5465981185736163, "grad_norm": 1.2628692388534546, "learning_rate": 8.979676110917136e-06, "loss": 0.5061, "step": 19988 }, { "epoch": 0.5466254648873332, "grad_norm": 1.538720965385437, "learning_rate": 8.978795033182517e-06, "loss": 0.3982, "step": 19989 }, { "epoch": 0.5466528112010501, "grad_norm": 1.127647042274475, "learning_rate": 8.977913963458959e-06, "loss": 0.4757, "step": 19990 }, { "epoch": 0.5466801575147671, "grad_norm": 1.4061362743377686, "learning_rate": 8.97703290175338e-06, "loss": 0.3562, "step": 19991 }, { "epoch": 0.5467075038284839, "grad_norm": 1.4344960451126099, "learning_rate": 8.976151848072694e-06, "loss": 0.4722, "step": 19992 }, { "epoch": 0.5467348501422008, "grad_norm": 1.344974398612976, "learning_rate": 8.975270802423803e-06, "loss": 0.5009, "step": 19993 }, { "epoch": 0.5467621964559177, "grad_norm": 1.7632180452346802, "learning_rate": 8.974389764813627e-06, "loss": 0.7289, "step": 19994 }, { "epoch": 0.5467895427696347, "grad_norm": 1.1245311498641968, "learning_rate": 8.973508735249074e-06, "loss": 0.4814, "step": 19995 }, { "epoch": 0.5468168890833516, "grad_norm": 1.1634525060653687, "learning_rate": 8.972627713737056e-06, "loss": 0.5012, "step": 19996 }, { "epoch": 0.5468442353970685, "grad_norm": 1.3262887001037598, "learning_rate": 8.971746700284484e-06, "loss": 0.4854, "step": 19997 }, { "epoch": 0.5468715817107854, "grad_norm": 1.2175564765930176, "learning_rate": 8.970865694898271e-06, "loss": 0.5044, "step": 19998 }, { "epoch": 0.5468989280245024, "grad_norm": 1.2027208805084229, "learning_rate": 8.969984697585326e-06, "loss": 0.4947, "step": 19999 }, { "epoch": 0.5469262743382192, "grad_norm": 1.2017271518707275, "learning_rate": 8.969103708352563e-06, "loss": 0.4893, "step": 20000 }, { "epoch": 0.5469536206519361, "grad_norm": 1.1477177143096924, "learning_rate": 8.968222727206888e-06, "loss": 0.4944, "step": 20001 }, { "epoch": 0.546980966965653, "grad_norm": 1.431254267692566, "learning_rate": 8.967341754155219e-06, "loss": 0.3788, "step": 20002 }, { "epoch": 0.5470083132793699, "grad_norm": 1.3018126487731934, "learning_rate": 8.966460789204461e-06, "loss": 0.4924, "step": 20003 }, { "epoch": 0.5470356595930869, "grad_norm": 1.9387710094451904, "learning_rate": 8.965579832361525e-06, "loss": 0.4843, "step": 20004 }, { "epoch": 0.5470630059068038, "grad_norm": 1.135142207145691, "learning_rate": 8.96469888363333e-06, "loss": 0.4623, "step": 20005 }, { "epoch": 0.5470903522205207, "grad_norm": 1.3227918148040771, "learning_rate": 8.963817943026776e-06, "loss": 0.5002, "step": 20006 }, { "epoch": 0.5471176985342376, "grad_norm": 1.107962965965271, "learning_rate": 8.96293701054878e-06, "loss": 0.349, "step": 20007 }, { "epoch": 0.5471450448479545, "grad_norm": 1.6894805431365967, "learning_rate": 8.962056086206253e-06, "loss": 0.495, "step": 20008 }, { "epoch": 0.5471723911616714, "grad_norm": 1.4108963012695312, "learning_rate": 8.961175170006102e-06, "loss": 0.4011, "step": 20009 }, { "epoch": 0.5471997374753883, "grad_norm": 1.3035305738449097, "learning_rate": 8.960294261955242e-06, "loss": 0.5067, "step": 20010 }, { "epoch": 0.5472270837891052, "grad_norm": 1.5162829160690308, "learning_rate": 8.959413362060577e-06, "loss": 0.402, "step": 20011 }, { "epoch": 0.5472544301028222, "grad_norm": 1.9351309537887573, "learning_rate": 8.958532470329022e-06, "loss": 0.7713, "step": 20012 }, { "epoch": 0.5472817764165391, "grad_norm": 1.2772190570831299, "learning_rate": 8.957651586767492e-06, "loss": 0.4835, "step": 20013 }, { "epoch": 0.547309122730256, "grad_norm": 1.212339162826538, "learning_rate": 8.956770711382889e-06, "loss": 0.3484, "step": 20014 }, { "epoch": 0.5473364690439728, "grad_norm": 0.914453387260437, "learning_rate": 8.955889844182126e-06, "loss": 0.3758, "step": 20015 }, { "epoch": 0.5473638153576897, "grad_norm": 1.2168490886688232, "learning_rate": 8.955008985172112e-06, "loss": 0.738, "step": 20016 }, { "epoch": 0.5473911616714067, "grad_norm": 1.3649227619171143, "learning_rate": 8.954128134359759e-06, "loss": 0.4891, "step": 20017 }, { "epoch": 0.5474185079851236, "grad_norm": 1.1934555768966675, "learning_rate": 8.953247291751975e-06, "loss": 0.4811, "step": 20018 }, { "epoch": 0.5474458542988405, "grad_norm": 1.2509775161743164, "learning_rate": 8.952366457355675e-06, "loss": 0.5138, "step": 20019 }, { "epoch": 0.5474732006125574, "grad_norm": 1.1929638385772705, "learning_rate": 8.95148563117776e-06, "loss": 0.4768, "step": 20020 }, { "epoch": 0.5475005469262744, "grad_norm": 1.6567118167877197, "learning_rate": 8.950604813225148e-06, "loss": 0.4984, "step": 20021 }, { "epoch": 0.5475278932399913, "grad_norm": 1.800254464149475, "learning_rate": 8.949724003504749e-06, "loss": 0.5059, "step": 20022 }, { "epoch": 0.5475552395537081, "grad_norm": 1.607364296913147, "learning_rate": 8.948843202023466e-06, "loss": 0.5239, "step": 20023 }, { "epoch": 0.547582585867425, "grad_norm": 1.3303653001785278, "learning_rate": 8.947962408788213e-06, "loss": 0.7759, "step": 20024 }, { "epoch": 0.547609932181142, "grad_norm": 1.1395893096923828, "learning_rate": 8.947081623805897e-06, "loss": 0.4821, "step": 20025 }, { "epoch": 0.5476372784948589, "grad_norm": 1.2709542512893677, "learning_rate": 8.946200847083431e-06, "loss": 0.7698, "step": 20026 }, { "epoch": 0.5476646248085758, "grad_norm": 1.6604900360107422, "learning_rate": 8.945320078627724e-06, "loss": 0.5041, "step": 20027 }, { "epoch": 0.5476919711222927, "grad_norm": 1.2575786113739014, "learning_rate": 8.944439318445684e-06, "loss": 0.524, "step": 20028 }, { "epoch": 0.5477193174360097, "grad_norm": 1.1404774188995361, "learning_rate": 8.943558566544218e-06, "loss": 0.5111, "step": 20029 }, { "epoch": 0.5477466637497266, "grad_norm": 1.3537205457687378, "learning_rate": 8.94267782293024e-06, "loss": 0.484, "step": 20030 }, { "epoch": 0.5477740100634434, "grad_norm": 1.2916624546051025, "learning_rate": 8.941797087610657e-06, "loss": 0.5073, "step": 20031 }, { "epoch": 0.5478013563771603, "grad_norm": 1.6652374267578125, "learning_rate": 8.940916360592378e-06, "loss": 0.7816, "step": 20032 }, { "epoch": 0.5478287026908772, "grad_norm": 1.135819435119629, "learning_rate": 8.94003564188231e-06, "loss": 0.4753, "step": 20033 }, { "epoch": 0.5478560490045942, "grad_norm": 1.079626202583313, "learning_rate": 8.939154931487365e-06, "loss": 0.5084, "step": 20034 }, { "epoch": 0.5478833953183111, "grad_norm": 1.486111044883728, "learning_rate": 8.938274229414453e-06, "loss": 0.4644, "step": 20035 }, { "epoch": 0.547910741632028, "grad_norm": 1.2869409322738647, "learning_rate": 8.937393535670478e-06, "loss": 0.4969, "step": 20036 }, { "epoch": 0.547938087945745, "grad_norm": 1.376611351966858, "learning_rate": 8.936512850262355e-06, "loss": 0.4996, "step": 20037 }, { "epoch": 0.5479654342594619, "grad_norm": 1.3652406930923462, "learning_rate": 8.935632173196986e-06, "loss": 0.4072, "step": 20038 }, { "epoch": 0.5479927805731787, "grad_norm": 1.284198522567749, "learning_rate": 8.934751504481284e-06, "loss": 0.5073, "step": 20039 }, { "epoch": 0.5480201268868956, "grad_norm": 1.5069096088409424, "learning_rate": 8.933870844122158e-06, "loss": 0.3879, "step": 20040 }, { "epoch": 0.5480474732006125, "grad_norm": 1.2879552841186523, "learning_rate": 8.932990192126514e-06, "loss": 0.4808, "step": 20041 }, { "epoch": 0.5480748195143295, "grad_norm": 1.6718475818634033, "learning_rate": 8.93210954850126e-06, "loss": 0.4114, "step": 20042 }, { "epoch": 0.5481021658280464, "grad_norm": 1.2950948476791382, "learning_rate": 8.93122891325331e-06, "loss": 0.7995, "step": 20043 }, { "epoch": 0.5481295121417633, "grad_norm": 2.255424737930298, "learning_rate": 8.930348286389566e-06, "loss": 0.4086, "step": 20044 }, { "epoch": 0.5481568584554802, "grad_norm": 1.3639521598815918, "learning_rate": 8.92946766791694e-06, "loss": 0.767, "step": 20045 }, { "epoch": 0.5481842047691972, "grad_norm": 1.095286250114441, "learning_rate": 8.928587057842339e-06, "loss": 0.4961, "step": 20046 }, { "epoch": 0.548211551082914, "grad_norm": 1.3005212545394897, "learning_rate": 8.927706456172667e-06, "loss": 0.4761, "step": 20047 }, { "epoch": 0.5482388973966309, "grad_norm": 1.3785728216171265, "learning_rate": 8.926825862914841e-06, "loss": 0.5252, "step": 20048 }, { "epoch": 0.5482662437103478, "grad_norm": 1.3479379415512085, "learning_rate": 8.925945278075767e-06, "loss": 0.5181, "step": 20049 }, { "epoch": 0.5482935900240647, "grad_norm": 1.4550292491912842, "learning_rate": 8.925064701662344e-06, "loss": 0.52, "step": 20050 }, { "epoch": 0.5483209363377817, "grad_norm": 1.366318702697754, "learning_rate": 8.924184133681485e-06, "loss": 0.5003, "step": 20051 }, { "epoch": 0.5483482826514986, "grad_norm": 1.6363139152526855, "learning_rate": 8.923303574140104e-06, "loss": 0.4861, "step": 20052 }, { "epoch": 0.5483756289652155, "grad_norm": 1.1904678344726562, "learning_rate": 8.922423023045099e-06, "loss": 0.5007, "step": 20053 }, { "epoch": 0.5484029752789324, "grad_norm": 1.5662920475006104, "learning_rate": 8.921542480403385e-06, "loss": 0.5068, "step": 20054 }, { "epoch": 0.5484303215926493, "grad_norm": 1.9648624658584595, "learning_rate": 8.920661946221863e-06, "loss": 0.4901, "step": 20055 }, { "epoch": 0.5484576679063662, "grad_norm": 1.7180792093276978, "learning_rate": 8.919781420507447e-06, "loss": 0.496, "step": 20056 }, { "epoch": 0.5484850142200831, "grad_norm": 1.2219040393829346, "learning_rate": 8.918900903267043e-06, "loss": 0.4836, "step": 20057 }, { "epoch": 0.5485123605338, "grad_norm": 1.0657758712768555, "learning_rate": 8.918020394507553e-06, "loss": 0.3266, "step": 20058 }, { "epoch": 0.548539706847517, "grad_norm": 1.290044903755188, "learning_rate": 8.917139894235893e-06, "loss": 0.5252, "step": 20059 }, { "epoch": 0.5485670531612339, "grad_norm": 1.1887052059173584, "learning_rate": 8.916259402458963e-06, "loss": 0.5061, "step": 20060 }, { "epoch": 0.5485943994749508, "grad_norm": 1.3425779342651367, "learning_rate": 8.915378919183672e-06, "loss": 0.4984, "step": 20061 }, { "epoch": 0.5486217457886677, "grad_norm": 1.2024863958358765, "learning_rate": 8.91449844441693e-06, "loss": 0.4831, "step": 20062 }, { "epoch": 0.5486490921023846, "grad_norm": 1.2599557638168335, "learning_rate": 8.913617978165644e-06, "loss": 0.4748, "step": 20063 }, { "epoch": 0.5486764384161015, "grad_norm": 1.4777368307113647, "learning_rate": 8.912737520436716e-06, "loss": 0.5123, "step": 20064 }, { "epoch": 0.5487037847298184, "grad_norm": 1.2138011455535889, "learning_rate": 8.911857071237057e-06, "loss": 0.4756, "step": 20065 }, { "epoch": 0.5487311310435353, "grad_norm": 1.2390575408935547, "learning_rate": 8.910976630573573e-06, "loss": 0.4688, "step": 20066 }, { "epoch": 0.5487584773572523, "grad_norm": 1.3044072389602661, "learning_rate": 8.910096198453172e-06, "loss": 0.485, "step": 20067 }, { "epoch": 0.5487858236709692, "grad_norm": 1.458486795425415, "learning_rate": 8.909215774882759e-06, "loss": 0.5186, "step": 20068 }, { "epoch": 0.5488131699846861, "grad_norm": 1.2183595895767212, "learning_rate": 8.90833535986924e-06, "loss": 0.4919, "step": 20069 }, { "epoch": 0.548840516298403, "grad_norm": 1.160224199295044, "learning_rate": 8.907454953419526e-06, "loss": 0.4556, "step": 20070 }, { "epoch": 0.5488678626121198, "grad_norm": 1.5376578569412231, "learning_rate": 8.906574555540519e-06, "loss": 0.3909, "step": 20071 }, { "epoch": 0.5488952089258368, "grad_norm": 1.3922405242919922, "learning_rate": 8.905694166239124e-06, "loss": 0.5116, "step": 20072 }, { "epoch": 0.5489225552395537, "grad_norm": 1.5105048418045044, "learning_rate": 8.904813785522255e-06, "loss": 0.4568, "step": 20073 }, { "epoch": 0.5489499015532706, "grad_norm": 1.5867798328399658, "learning_rate": 8.90393341339681e-06, "loss": 0.4686, "step": 20074 }, { "epoch": 0.5489772478669875, "grad_norm": 3.6372716426849365, "learning_rate": 8.903053049869701e-06, "loss": 0.4775, "step": 20075 }, { "epoch": 0.5490045941807045, "grad_norm": 1.1974725723266602, "learning_rate": 8.90217269494783e-06, "loss": 0.4706, "step": 20076 }, { "epoch": 0.5490319404944214, "grad_norm": 1.428413987159729, "learning_rate": 8.901292348638106e-06, "loss": 0.5134, "step": 20077 }, { "epoch": 0.5490592868081383, "grad_norm": 1.2771985530853271, "learning_rate": 8.900412010947437e-06, "loss": 0.7583, "step": 20078 }, { "epoch": 0.5490866331218551, "grad_norm": 1.359409213066101, "learning_rate": 8.899531681882724e-06, "loss": 0.5085, "step": 20079 }, { "epoch": 0.549113979435572, "grad_norm": 1.3634274005889893, "learning_rate": 8.898651361450876e-06, "loss": 0.4788, "step": 20080 }, { "epoch": 0.549141325749289, "grad_norm": 1.1962103843688965, "learning_rate": 8.897771049658796e-06, "loss": 0.4892, "step": 20081 }, { "epoch": 0.5491686720630059, "grad_norm": 1.4083685874938965, "learning_rate": 8.896890746513393e-06, "loss": 0.5022, "step": 20082 }, { "epoch": 0.5491960183767228, "grad_norm": 1.2884100675582886, "learning_rate": 8.896010452021573e-06, "loss": 0.479, "step": 20083 }, { "epoch": 0.5492233646904398, "grad_norm": 1.2744511365890503, "learning_rate": 8.895130166190244e-06, "loss": 0.4574, "step": 20084 }, { "epoch": 0.5492507110041567, "grad_norm": 1.626252293586731, "learning_rate": 8.894249889026302e-06, "loss": 0.4133, "step": 20085 }, { "epoch": 0.5492780573178736, "grad_norm": 1.197357416152954, "learning_rate": 8.893369620536661e-06, "loss": 0.5182, "step": 20086 }, { "epoch": 0.5493054036315904, "grad_norm": 1.137939214706421, "learning_rate": 8.892489360728223e-06, "loss": 0.3751, "step": 20087 }, { "epoch": 0.5493327499453073, "grad_norm": 1.4266616106033325, "learning_rate": 8.891609109607892e-06, "loss": 0.4692, "step": 20088 }, { "epoch": 0.5493600962590243, "grad_norm": 1.256091833114624, "learning_rate": 8.890728867182581e-06, "loss": 0.5109, "step": 20089 }, { "epoch": 0.5493874425727412, "grad_norm": 1.1594849824905396, "learning_rate": 8.889848633459186e-06, "loss": 0.3749, "step": 20090 }, { "epoch": 0.5494147888864581, "grad_norm": 1.4853843450546265, "learning_rate": 8.888968408444615e-06, "loss": 0.4749, "step": 20091 }, { "epoch": 0.549442135200175, "grad_norm": 1.578598141670227, "learning_rate": 8.888088192145776e-06, "loss": 0.4769, "step": 20092 }, { "epoch": 0.549469481513892, "grad_norm": 1.433201551437378, "learning_rate": 8.887207984569572e-06, "loss": 0.4719, "step": 20093 }, { "epoch": 0.5494968278276089, "grad_norm": 1.7007390260696411, "learning_rate": 8.886327785722907e-06, "loss": 0.4646, "step": 20094 }, { "epoch": 0.5495241741413257, "grad_norm": 1.4541308879852295, "learning_rate": 8.885447595612689e-06, "loss": 0.5287, "step": 20095 }, { "epoch": 0.5495515204550426, "grad_norm": 1.2803242206573486, "learning_rate": 8.884567414245821e-06, "loss": 0.4936, "step": 20096 }, { "epoch": 0.5495788667687596, "grad_norm": 1.2907600402832031, "learning_rate": 8.883687241629208e-06, "loss": 0.4969, "step": 20097 }, { "epoch": 0.5496062130824765, "grad_norm": 1.5704162120819092, "learning_rate": 8.882807077769752e-06, "loss": 0.3965, "step": 20098 }, { "epoch": 0.5496335593961934, "grad_norm": 1.2807432413101196, "learning_rate": 8.88192692267436e-06, "loss": 0.5244, "step": 20099 }, { "epoch": 0.5496609057099103, "grad_norm": 1.362302303314209, "learning_rate": 8.88104677634994e-06, "loss": 0.5046, "step": 20100 }, { "epoch": 0.5496882520236273, "grad_norm": 1.1095502376556396, "learning_rate": 8.880166638803391e-06, "loss": 0.4785, "step": 20101 }, { "epoch": 0.5497155983373442, "grad_norm": 1.219719409942627, "learning_rate": 8.879286510041621e-06, "loss": 0.5021, "step": 20102 }, { "epoch": 0.549742944651061, "grad_norm": 1.312921166419983, "learning_rate": 8.87840639007153e-06, "loss": 0.472, "step": 20103 }, { "epoch": 0.5497702909647779, "grad_norm": 1.3095616102218628, "learning_rate": 8.877526278900027e-06, "loss": 0.5157, "step": 20104 }, { "epoch": 0.5497976372784948, "grad_norm": 1.2504042387008667, "learning_rate": 8.876646176534016e-06, "loss": 0.5048, "step": 20105 }, { "epoch": 0.5498249835922118, "grad_norm": 1.364786982536316, "learning_rate": 8.875766082980397e-06, "loss": 0.5369, "step": 20106 }, { "epoch": 0.5498523299059287, "grad_norm": 1.391261100769043, "learning_rate": 8.874885998246076e-06, "loss": 0.4895, "step": 20107 }, { "epoch": 0.5498796762196456, "grad_norm": 1.2109779119491577, "learning_rate": 8.874005922337961e-06, "loss": 0.4545, "step": 20108 }, { "epoch": 0.5499070225333625, "grad_norm": 1.2154711484909058, "learning_rate": 8.87312585526295e-06, "loss": 0.4764, "step": 20109 }, { "epoch": 0.5499343688470795, "grad_norm": 1.2510476112365723, "learning_rate": 8.872245797027952e-06, "loss": 0.7516, "step": 20110 }, { "epoch": 0.5499617151607963, "grad_norm": 1.3883942365646362, "learning_rate": 8.871365747639865e-06, "loss": 0.4857, "step": 20111 }, { "epoch": 0.5499890614745132, "grad_norm": 1.4385186433792114, "learning_rate": 8.870485707105598e-06, "loss": 0.4984, "step": 20112 }, { "epoch": 0.5500164077882301, "grad_norm": 1.3160604238510132, "learning_rate": 8.869605675432054e-06, "loss": 0.5107, "step": 20113 }, { "epoch": 0.5500437541019471, "grad_norm": 1.461450219154358, "learning_rate": 8.868725652626133e-06, "loss": 0.4987, "step": 20114 }, { "epoch": 0.550071100415664, "grad_norm": 1.1993626356124878, "learning_rate": 8.867845638694743e-06, "loss": 0.4678, "step": 20115 }, { "epoch": 0.5500984467293809, "grad_norm": 1.2668925523757935, "learning_rate": 8.866965633644784e-06, "loss": 0.468, "step": 20116 }, { "epoch": 0.5501257930430978, "grad_norm": 1.2603693008422852, "learning_rate": 8.866085637483161e-06, "loss": 0.7527, "step": 20117 }, { "epoch": 0.5501531393568146, "grad_norm": 1.1697548627853394, "learning_rate": 8.865205650216778e-06, "loss": 0.5029, "step": 20118 }, { "epoch": 0.5501804856705316, "grad_norm": 2.6837925910949707, "learning_rate": 8.86432567185254e-06, "loss": 0.4924, "step": 20119 }, { "epoch": 0.5502078319842485, "grad_norm": 1.4649511575698853, "learning_rate": 8.863445702397344e-06, "loss": 0.3808, "step": 20120 }, { "epoch": 0.5502351782979654, "grad_norm": 1.3394653797149658, "learning_rate": 8.862565741858094e-06, "loss": 0.5069, "step": 20121 }, { "epoch": 0.5502625246116823, "grad_norm": 1.274511456489563, "learning_rate": 8.861685790241701e-06, "loss": 0.3541, "step": 20122 }, { "epoch": 0.5502898709253993, "grad_norm": 1.2494747638702393, "learning_rate": 8.860805847555059e-06, "loss": 0.5097, "step": 20123 }, { "epoch": 0.5503172172391162, "grad_norm": 1.3105437755584717, "learning_rate": 8.859925913805076e-06, "loss": 0.502, "step": 20124 }, { "epoch": 0.5503445635528331, "grad_norm": 1.3209680318832397, "learning_rate": 8.859045988998652e-06, "loss": 0.4884, "step": 20125 }, { "epoch": 0.5503719098665499, "grad_norm": 1.3474735021591187, "learning_rate": 8.85816607314269e-06, "loss": 0.4817, "step": 20126 }, { "epoch": 0.5503992561802669, "grad_norm": 1.2225381135940552, "learning_rate": 8.857286166244098e-06, "loss": 0.4887, "step": 20127 }, { "epoch": 0.5504266024939838, "grad_norm": 1.1497911214828491, "learning_rate": 8.856406268309772e-06, "loss": 0.4893, "step": 20128 }, { "epoch": 0.5504539488077007, "grad_norm": 1.2954500913619995, "learning_rate": 8.855526379346615e-06, "loss": 0.5303, "step": 20129 }, { "epoch": 0.5504812951214176, "grad_norm": 1.393650770187378, "learning_rate": 8.854646499361536e-06, "loss": 0.4246, "step": 20130 }, { "epoch": 0.5505086414351346, "grad_norm": 1.5704915523529053, "learning_rate": 8.85376662836143e-06, "loss": 0.4819, "step": 20131 }, { "epoch": 0.5505359877488515, "grad_norm": 1.6453732252120972, "learning_rate": 8.852886766353204e-06, "loss": 0.3823, "step": 20132 }, { "epoch": 0.5505633340625684, "grad_norm": 1.2653329372406006, "learning_rate": 8.852006913343756e-06, "loss": 0.7638, "step": 20133 }, { "epoch": 0.5505906803762852, "grad_norm": 1.0802987813949585, "learning_rate": 8.85112706933999e-06, "loss": 0.492, "step": 20134 }, { "epoch": 0.5506180266900021, "grad_norm": 1.4354193210601807, "learning_rate": 8.850247234348813e-06, "loss": 0.408, "step": 20135 }, { "epoch": 0.5506453730037191, "grad_norm": 1.2161446809768677, "learning_rate": 8.84936740837712e-06, "loss": 0.49, "step": 20136 }, { "epoch": 0.550672719317436, "grad_norm": 1.5981268882751465, "learning_rate": 8.848487591431815e-06, "loss": 0.513, "step": 20137 }, { "epoch": 0.5507000656311529, "grad_norm": 1.22757089138031, "learning_rate": 8.847607783519804e-06, "loss": 0.4984, "step": 20138 }, { "epoch": 0.5507274119448698, "grad_norm": 1.5181704759597778, "learning_rate": 8.846727984647983e-06, "loss": 0.7777, "step": 20139 }, { "epoch": 0.5507547582585868, "grad_norm": 1.3394560813903809, "learning_rate": 8.845848194823257e-06, "loss": 0.5066, "step": 20140 }, { "epoch": 0.5507821045723037, "grad_norm": 1.4891356229782104, "learning_rate": 8.844968414052529e-06, "loss": 0.4984, "step": 20141 }, { "epoch": 0.5508094508860205, "grad_norm": 1.3730233907699585, "learning_rate": 8.844088642342695e-06, "loss": 0.3404, "step": 20142 }, { "epoch": 0.5508367971997374, "grad_norm": 1.13801908493042, "learning_rate": 8.843208879700664e-06, "loss": 0.8121, "step": 20143 }, { "epoch": 0.5508641435134544, "grad_norm": 1.385152816772461, "learning_rate": 8.84232912613333e-06, "loss": 0.484, "step": 20144 }, { "epoch": 0.5508914898271713, "grad_norm": 1.2261734008789062, "learning_rate": 8.841449381647602e-06, "loss": 0.5284, "step": 20145 }, { "epoch": 0.5509188361408882, "grad_norm": 1.3512914180755615, "learning_rate": 8.840569646250375e-06, "loss": 0.372, "step": 20146 }, { "epoch": 0.5509461824546051, "grad_norm": 1.9885269403457642, "learning_rate": 8.839689919948555e-06, "loss": 0.5001, "step": 20147 }, { "epoch": 0.5509735287683221, "grad_norm": 2.3734381198883057, "learning_rate": 8.83881020274904e-06, "loss": 0.513, "step": 20148 }, { "epoch": 0.551000875082039, "grad_norm": 1.3525962829589844, "learning_rate": 8.837930494658731e-06, "loss": 0.5358, "step": 20149 }, { "epoch": 0.5510282213957558, "grad_norm": 1.246677279472351, "learning_rate": 8.837050795684531e-06, "loss": 0.4959, "step": 20150 }, { "epoch": 0.5510555677094727, "grad_norm": 1.9732969999313354, "learning_rate": 8.83617110583334e-06, "loss": 0.4679, "step": 20151 }, { "epoch": 0.5510829140231897, "grad_norm": 1.300321340560913, "learning_rate": 8.83529142511206e-06, "loss": 0.4665, "step": 20152 }, { "epoch": 0.5511102603369066, "grad_norm": 1.134036898612976, "learning_rate": 8.834411753527593e-06, "loss": 0.4883, "step": 20153 }, { "epoch": 0.5511376066506235, "grad_norm": 1.2547297477722168, "learning_rate": 8.833532091086839e-06, "loss": 0.5077, "step": 20154 }, { "epoch": 0.5511649529643404, "grad_norm": 1.1899802684783936, "learning_rate": 8.832652437796692e-06, "loss": 0.5239, "step": 20155 }, { "epoch": 0.5511922992780574, "grad_norm": 1.178700566291809, "learning_rate": 8.831772793664058e-06, "loss": 0.5122, "step": 20156 }, { "epoch": 0.5512196455917743, "grad_norm": 1.6048812866210938, "learning_rate": 8.83089315869584e-06, "loss": 0.3926, "step": 20157 }, { "epoch": 0.5512469919054911, "grad_norm": 1.2974015474319458, "learning_rate": 8.830013532898936e-06, "loss": 0.4864, "step": 20158 }, { "epoch": 0.551274338219208, "grad_norm": 1.8937382698059082, "learning_rate": 8.829133916280246e-06, "loss": 0.5445, "step": 20159 }, { "epoch": 0.5513016845329249, "grad_norm": 1.4031832218170166, "learning_rate": 8.828254308846673e-06, "loss": 0.7469, "step": 20160 }, { "epoch": 0.5513290308466419, "grad_norm": 1.29220449924469, "learning_rate": 8.827374710605111e-06, "loss": 0.4702, "step": 20161 }, { "epoch": 0.5513563771603588, "grad_norm": 1.1920325756072998, "learning_rate": 8.82649512156247e-06, "loss": 0.7777, "step": 20162 }, { "epoch": 0.5513837234740757, "grad_norm": 1.4253178834915161, "learning_rate": 8.82561554172564e-06, "loss": 0.525, "step": 20163 }, { "epoch": 0.5514110697877926, "grad_norm": 1.381027102470398, "learning_rate": 8.824735971101525e-06, "loss": 0.3947, "step": 20164 }, { "epoch": 0.5514384161015096, "grad_norm": 1.1703944206237793, "learning_rate": 8.823856409697031e-06, "loss": 0.5057, "step": 20165 }, { "epoch": 0.5514657624152264, "grad_norm": 1.159216046333313, "learning_rate": 8.822976857519047e-06, "loss": 0.5113, "step": 20166 }, { "epoch": 0.5514931087289433, "grad_norm": 1.142684817314148, "learning_rate": 8.822097314574482e-06, "loss": 0.5079, "step": 20167 }, { "epoch": 0.5515204550426602, "grad_norm": 1.3007898330688477, "learning_rate": 8.821217780870229e-06, "loss": 0.504, "step": 20168 }, { "epoch": 0.5515478013563772, "grad_norm": 1.2498464584350586, "learning_rate": 8.820338256413192e-06, "loss": 0.4986, "step": 20169 }, { "epoch": 0.5515751476700941, "grad_norm": 1.4445799589157104, "learning_rate": 8.819458741210271e-06, "loss": 0.491, "step": 20170 }, { "epoch": 0.551602493983811, "grad_norm": 1.3656377792358398, "learning_rate": 8.818579235268361e-06, "loss": 0.4856, "step": 20171 }, { "epoch": 0.5516298402975279, "grad_norm": 1.3764265775680542, "learning_rate": 8.817699738594366e-06, "loss": 0.509, "step": 20172 }, { "epoch": 0.5516571866112449, "grad_norm": 1.3751039505004883, "learning_rate": 8.816820251195184e-06, "loss": 0.3979, "step": 20173 }, { "epoch": 0.5516845329249617, "grad_norm": 1.0671049356460571, "learning_rate": 8.815940773077712e-06, "loss": 0.4707, "step": 20174 }, { "epoch": 0.5517118792386786, "grad_norm": 1.1521940231323242, "learning_rate": 8.815061304248856e-06, "loss": 0.4865, "step": 20175 }, { "epoch": 0.5517392255523955, "grad_norm": 1.0956555604934692, "learning_rate": 8.814181844715507e-06, "loss": 0.4819, "step": 20176 }, { "epoch": 0.5517665718661124, "grad_norm": 1.113568902015686, "learning_rate": 8.813302394484568e-06, "loss": 0.481, "step": 20177 }, { "epoch": 0.5517939181798294, "grad_norm": 1.4313949346542358, "learning_rate": 8.812422953562938e-06, "loss": 0.4987, "step": 20178 }, { "epoch": 0.5518212644935463, "grad_norm": 1.4191038608551025, "learning_rate": 8.811543521957516e-06, "loss": 0.4996, "step": 20179 }, { "epoch": 0.5518486108072632, "grad_norm": 1.1982009410858154, "learning_rate": 8.810664099675202e-06, "loss": 0.4996, "step": 20180 }, { "epoch": 0.5518759571209801, "grad_norm": 1.5912364721298218, "learning_rate": 8.809784686722891e-06, "loss": 0.4185, "step": 20181 }, { "epoch": 0.551903303434697, "grad_norm": 1.2422893047332764, "learning_rate": 8.808905283107485e-06, "loss": 0.4963, "step": 20182 }, { "epoch": 0.5519306497484139, "grad_norm": 1.1086318492889404, "learning_rate": 8.808025888835883e-06, "loss": 0.4973, "step": 20183 }, { "epoch": 0.5519579960621308, "grad_norm": 1.106225609779358, "learning_rate": 8.80714650391498e-06, "loss": 0.4894, "step": 20184 }, { "epoch": 0.5519853423758477, "grad_norm": 1.151490330696106, "learning_rate": 8.806267128351679e-06, "loss": 0.4589, "step": 20185 }, { "epoch": 0.5520126886895647, "grad_norm": 1.2401909828186035, "learning_rate": 8.805387762152877e-06, "loss": 0.4895, "step": 20186 }, { "epoch": 0.5520400350032816, "grad_norm": 1.4188880920410156, "learning_rate": 8.804508405325469e-06, "loss": 0.4611, "step": 20187 }, { "epoch": 0.5520673813169985, "grad_norm": 1.3742221593856812, "learning_rate": 8.80362905787636e-06, "loss": 0.7218, "step": 20188 }, { "epoch": 0.5520947276307154, "grad_norm": 1.3833048343658447, "learning_rate": 8.802749719812446e-06, "loss": 0.7797, "step": 20189 }, { "epoch": 0.5521220739444322, "grad_norm": 1.6372029781341553, "learning_rate": 8.801870391140619e-06, "loss": 0.4924, "step": 20190 }, { "epoch": 0.5521494202581492, "grad_norm": 1.383232593536377, "learning_rate": 8.800991071867782e-06, "loss": 0.5136, "step": 20191 }, { "epoch": 0.5521767665718661, "grad_norm": 1.19548499584198, "learning_rate": 8.800111762000833e-06, "loss": 0.5165, "step": 20192 }, { "epoch": 0.552204112885583, "grad_norm": 1.0066958665847778, "learning_rate": 8.79923246154667e-06, "loss": 0.3634, "step": 20193 }, { "epoch": 0.5522314591993, "grad_norm": 1.3933885097503662, "learning_rate": 8.798353170512188e-06, "loss": 0.515, "step": 20194 }, { "epoch": 0.5522588055130169, "grad_norm": 1.4966654777526855, "learning_rate": 8.797473888904291e-06, "loss": 0.4989, "step": 20195 }, { "epoch": 0.5522861518267338, "grad_norm": 1.7085710763931274, "learning_rate": 8.79659461672987e-06, "loss": 0.5282, "step": 20196 }, { "epoch": 0.5523134981404507, "grad_norm": 1.2569165229797363, "learning_rate": 8.795715353995827e-06, "loss": 0.3681, "step": 20197 }, { "epoch": 0.5523408444541675, "grad_norm": 1.3250932693481445, "learning_rate": 8.794836100709057e-06, "loss": 0.516, "step": 20198 }, { "epoch": 0.5523681907678845, "grad_norm": 1.2799803018569946, "learning_rate": 8.793956856876458e-06, "loss": 0.4737, "step": 20199 }, { "epoch": 0.5523955370816014, "grad_norm": 1.1273319721221924, "learning_rate": 8.793077622504933e-06, "loss": 0.4881, "step": 20200 }, { "epoch": 0.5524228833953183, "grad_norm": 1.2949246168136597, "learning_rate": 8.79219839760137e-06, "loss": 0.506, "step": 20201 }, { "epoch": 0.5524502297090352, "grad_norm": 1.3029643297195435, "learning_rate": 8.791319182172669e-06, "loss": 0.5064, "step": 20202 }, { "epoch": 0.5524775760227522, "grad_norm": 1.0851809978485107, "learning_rate": 8.790439976225732e-06, "loss": 0.4771, "step": 20203 }, { "epoch": 0.5525049223364691, "grad_norm": 1.140760064125061, "learning_rate": 8.789560779767452e-06, "loss": 0.4691, "step": 20204 }, { "epoch": 0.552532268650186, "grad_norm": 1.2735010385513306, "learning_rate": 8.78868159280473e-06, "loss": 0.4953, "step": 20205 }, { "epoch": 0.5525596149639028, "grad_norm": 1.123355746269226, "learning_rate": 8.787802415344457e-06, "loss": 0.4786, "step": 20206 }, { "epoch": 0.5525869612776197, "grad_norm": 1.6219249963760376, "learning_rate": 8.786923247393531e-06, "loss": 0.5, "step": 20207 }, { "epoch": 0.5526143075913367, "grad_norm": 1.0842130184173584, "learning_rate": 8.786044088958855e-06, "loss": 0.5082, "step": 20208 }, { "epoch": 0.5526416539050536, "grad_norm": 1.2732741832733154, "learning_rate": 8.785164940047317e-06, "loss": 0.502, "step": 20209 }, { "epoch": 0.5526690002187705, "grad_norm": 1.1442458629608154, "learning_rate": 8.784285800665824e-06, "loss": 0.4709, "step": 20210 }, { "epoch": 0.5526963465324874, "grad_norm": 1.4798775911331177, "learning_rate": 8.783406670821262e-06, "loss": 0.8233, "step": 20211 }, { "epoch": 0.5527236928462044, "grad_norm": 1.3602584600448608, "learning_rate": 8.782527550520533e-06, "loss": 0.4872, "step": 20212 }, { "epoch": 0.5527510391599213, "grad_norm": 1.2376829385757446, "learning_rate": 8.781648439770535e-06, "loss": 0.4996, "step": 20213 }, { "epoch": 0.5527783854736381, "grad_norm": 1.5283422470092773, "learning_rate": 8.780769338578159e-06, "loss": 0.5135, "step": 20214 }, { "epoch": 0.552805731787355, "grad_norm": 1.2301567792892456, "learning_rate": 8.779890246950306e-06, "loss": 0.7791, "step": 20215 }, { "epoch": 0.552833078101072, "grad_norm": 1.100122094154358, "learning_rate": 8.779011164893871e-06, "loss": 0.489, "step": 20216 }, { "epoch": 0.5528604244147889, "grad_norm": 1.2069380283355713, "learning_rate": 8.778132092415749e-06, "loss": 0.5203, "step": 20217 }, { "epoch": 0.5528877707285058, "grad_norm": 3.0460846424102783, "learning_rate": 8.777253029522838e-06, "loss": 0.4945, "step": 20218 }, { "epoch": 0.5529151170422227, "grad_norm": 1.292036533355713, "learning_rate": 8.776373976222032e-06, "loss": 0.5131, "step": 20219 }, { "epoch": 0.5529424633559397, "grad_norm": 1.9381393194198608, "learning_rate": 8.775494932520228e-06, "loss": 0.5125, "step": 20220 }, { "epoch": 0.5529698096696565, "grad_norm": 1.306855320930481, "learning_rate": 8.774615898424322e-06, "loss": 0.5201, "step": 20221 }, { "epoch": 0.5529971559833734, "grad_norm": 1.2835063934326172, "learning_rate": 8.77373687394121e-06, "loss": 0.5331, "step": 20222 }, { "epoch": 0.5530245022970903, "grad_norm": 1.2400689125061035, "learning_rate": 8.772857859077786e-06, "loss": 0.5149, "step": 20223 }, { "epoch": 0.5530518486108073, "grad_norm": 1.3477553129196167, "learning_rate": 8.771978853840947e-06, "loss": 0.4919, "step": 20224 }, { "epoch": 0.5530791949245242, "grad_norm": 1.1304597854614258, "learning_rate": 8.771099858237591e-06, "loss": 0.4606, "step": 20225 }, { "epoch": 0.5531065412382411, "grad_norm": 1.2225651741027832, "learning_rate": 8.770220872274608e-06, "loss": 0.4642, "step": 20226 }, { "epoch": 0.553133887551958, "grad_norm": 1.3096460103988647, "learning_rate": 8.769341895958896e-06, "loss": 0.5036, "step": 20227 }, { "epoch": 0.553161233865675, "grad_norm": 1.4933396577835083, "learning_rate": 8.76846292929735e-06, "loss": 0.5206, "step": 20228 }, { "epoch": 0.5531885801793918, "grad_norm": 1.4611680507659912, "learning_rate": 8.767583972296866e-06, "loss": 0.7704, "step": 20229 }, { "epoch": 0.5532159264931087, "grad_norm": 1.13397216796875, "learning_rate": 8.76670502496434e-06, "loss": 0.5204, "step": 20230 }, { "epoch": 0.5532432728068256, "grad_norm": 1.0737744569778442, "learning_rate": 8.765826087306665e-06, "loss": 0.5071, "step": 20231 }, { "epoch": 0.5532706191205425, "grad_norm": 1.286730408668518, "learning_rate": 8.76494715933074e-06, "loss": 0.5138, "step": 20232 }, { "epoch": 0.5532979654342595, "grad_norm": 1.5196802616119385, "learning_rate": 8.764068241043451e-06, "loss": 0.4866, "step": 20233 }, { "epoch": 0.5533253117479764, "grad_norm": 1.3835394382476807, "learning_rate": 8.763189332451703e-06, "loss": 0.3737, "step": 20234 }, { "epoch": 0.5533526580616933, "grad_norm": 1.39313542842865, "learning_rate": 8.762310433562386e-06, "loss": 0.5032, "step": 20235 }, { "epoch": 0.5533800043754102, "grad_norm": 1.2458478212356567, "learning_rate": 8.761431544382395e-06, "loss": 0.4777, "step": 20236 }, { "epoch": 0.553407350689127, "grad_norm": 1.312650203704834, "learning_rate": 8.760552664918623e-06, "loss": 0.4066, "step": 20237 }, { "epoch": 0.553434697002844, "grad_norm": 1.3319711685180664, "learning_rate": 8.759673795177971e-06, "loss": 0.4933, "step": 20238 }, { "epoch": 0.5534620433165609, "grad_norm": 1.2669309377670288, "learning_rate": 8.758794935167325e-06, "loss": 0.4891, "step": 20239 }, { "epoch": 0.5534893896302778, "grad_norm": 1.7843410968780518, "learning_rate": 8.757916084893586e-06, "loss": 0.7624, "step": 20240 }, { "epoch": 0.5535167359439948, "grad_norm": 1.3048651218414307, "learning_rate": 8.757037244363644e-06, "loss": 0.5119, "step": 20241 }, { "epoch": 0.5535440822577117, "grad_norm": 1.271399974822998, "learning_rate": 8.756158413584395e-06, "loss": 0.5023, "step": 20242 }, { "epoch": 0.5535714285714286, "grad_norm": 1.1315840482711792, "learning_rate": 8.755279592562735e-06, "loss": 0.485, "step": 20243 }, { "epoch": 0.5535987748851455, "grad_norm": 1.254129409790039, "learning_rate": 8.754400781305556e-06, "loss": 0.5064, "step": 20244 }, { "epoch": 0.5536261211988623, "grad_norm": 1.0948894023895264, "learning_rate": 8.753521979819752e-06, "loss": 0.4936, "step": 20245 }, { "epoch": 0.5536534675125793, "grad_norm": 1.24018132686615, "learning_rate": 8.752643188112217e-06, "loss": 0.4656, "step": 20246 }, { "epoch": 0.5536808138262962, "grad_norm": 2.049196720123291, "learning_rate": 8.751764406189843e-06, "loss": 0.3576, "step": 20247 }, { "epoch": 0.5537081601400131, "grad_norm": 1.2302647829055786, "learning_rate": 8.75088563405953e-06, "loss": 0.5101, "step": 20248 }, { "epoch": 0.55373550645373, "grad_norm": 1.516239047050476, "learning_rate": 8.750006871728165e-06, "loss": 0.3901, "step": 20249 }, { "epoch": 0.553762852767447, "grad_norm": 1.1991851329803467, "learning_rate": 8.749128119202644e-06, "loss": 0.4824, "step": 20250 }, { "epoch": 0.5537901990811639, "grad_norm": 1.3936489820480347, "learning_rate": 8.748249376489863e-06, "loss": 0.4828, "step": 20251 }, { "epoch": 0.5538175453948808, "grad_norm": 1.2451707124710083, "learning_rate": 8.74737064359671e-06, "loss": 0.4763, "step": 20252 }, { "epoch": 0.5538448917085976, "grad_norm": 1.3842542171478271, "learning_rate": 8.746491920530086e-06, "loss": 0.4093, "step": 20253 }, { "epoch": 0.5538722380223146, "grad_norm": 1.3072153329849243, "learning_rate": 8.745613207296877e-06, "loss": 0.5199, "step": 20254 }, { "epoch": 0.5538995843360315, "grad_norm": 1.451209545135498, "learning_rate": 8.74473450390398e-06, "loss": 0.4092, "step": 20255 }, { "epoch": 0.5539269306497484, "grad_norm": 1.3389142751693726, "learning_rate": 8.743855810358289e-06, "loss": 0.4933, "step": 20256 }, { "epoch": 0.5539542769634653, "grad_norm": 1.2506285905838013, "learning_rate": 8.742977126666694e-06, "loss": 0.4941, "step": 20257 }, { "epoch": 0.5539816232771823, "grad_norm": 1.815636157989502, "learning_rate": 8.742098452836089e-06, "loss": 0.533, "step": 20258 }, { "epoch": 0.5540089695908992, "grad_norm": 1.5646346807479858, "learning_rate": 8.741219788873372e-06, "loss": 0.4, "step": 20259 }, { "epoch": 0.5540363159046161, "grad_norm": 1.2760913372039795, "learning_rate": 8.740341134785431e-06, "loss": 0.5348, "step": 20260 }, { "epoch": 0.5540636622183329, "grad_norm": 1.543696403503418, "learning_rate": 8.739462490579156e-06, "loss": 0.5049, "step": 20261 }, { "epoch": 0.5540910085320498, "grad_norm": 1.197255253791809, "learning_rate": 8.738583856261446e-06, "loss": 0.4974, "step": 20262 }, { "epoch": 0.5541183548457668, "grad_norm": 1.1813488006591797, "learning_rate": 8.737705231839187e-06, "loss": 0.5206, "step": 20263 }, { "epoch": 0.5541457011594837, "grad_norm": 1.1831990480422974, "learning_rate": 8.736826617319276e-06, "loss": 0.4506, "step": 20264 }, { "epoch": 0.5541730474732006, "grad_norm": 1.5295369625091553, "learning_rate": 8.735948012708606e-06, "loss": 0.4731, "step": 20265 }, { "epoch": 0.5542003937869175, "grad_norm": 1.0858639478683472, "learning_rate": 8.735069418014067e-06, "loss": 0.4549, "step": 20266 }, { "epoch": 0.5542277401006345, "grad_norm": 1.1926690340042114, "learning_rate": 8.734190833242552e-06, "loss": 0.5038, "step": 20267 }, { "epoch": 0.5542550864143514, "grad_norm": 1.374697208404541, "learning_rate": 8.733312258400956e-06, "loss": 0.5174, "step": 20268 }, { "epoch": 0.5542824327280682, "grad_norm": 1.8590046167373657, "learning_rate": 8.732433693496167e-06, "loss": 0.513, "step": 20269 }, { "epoch": 0.5543097790417851, "grad_norm": 1.5588511228561401, "learning_rate": 8.731555138535081e-06, "loss": 0.4018, "step": 20270 }, { "epoch": 0.5543371253555021, "grad_norm": 1.2978005409240723, "learning_rate": 8.730676593524586e-06, "loss": 0.7828, "step": 20271 }, { "epoch": 0.554364471669219, "grad_norm": 1.1825523376464844, "learning_rate": 8.729798058471574e-06, "loss": 0.3658, "step": 20272 }, { "epoch": 0.5543918179829359, "grad_norm": 1.5120153427124023, "learning_rate": 8.728919533382943e-06, "loss": 0.4753, "step": 20273 }, { "epoch": 0.5544191642966528, "grad_norm": 1.3194900751113892, "learning_rate": 8.728041018265578e-06, "loss": 0.4072, "step": 20274 }, { "epoch": 0.5544465106103698, "grad_norm": 1.226264476776123, "learning_rate": 8.727162513126376e-06, "loss": 0.4712, "step": 20275 }, { "epoch": 0.5544738569240867, "grad_norm": 1.369146466255188, "learning_rate": 8.726284017972222e-06, "loss": 0.5185, "step": 20276 }, { "epoch": 0.5545012032378035, "grad_norm": 0.9928213953971863, "learning_rate": 8.725405532810012e-06, "loss": 0.3733, "step": 20277 }, { "epoch": 0.5545285495515204, "grad_norm": 1.2659324407577515, "learning_rate": 8.72452705764664e-06, "loss": 0.4794, "step": 20278 }, { "epoch": 0.5545558958652373, "grad_norm": 1.520581841468811, "learning_rate": 8.723648592488991e-06, "loss": 0.3846, "step": 20279 }, { "epoch": 0.5545832421789543, "grad_norm": 1.3832039833068848, "learning_rate": 8.722770137343962e-06, "loss": 0.3632, "step": 20280 }, { "epoch": 0.5546105884926712, "grad_norm": 1.002220869064331, "learning_rate": 8.72189169221844e-06, "loss": 0.4878, "step": 20281 }, { "epoch": 0.5546379348063881, "grad_norm": 1.5494027137756348, "learning_rate": 8.721013257119318e-06, "loss": 0.4918, "step": 20282 }, { "epoch": 0.554665281120105, "grad_norm": 1.4383941888809204, "learning_rate": 8.72013483205349e-06, "loss": 0.5193, "step": 20283 }, { "epoch": 0.554692627433822, "grad_norm": 1.2138493061065674, "learning_rate": 8.71925641702784e-06, "loss": 0.4949, "step": 20284 }, { "epoch": 0.5547199737475388, "grad_norm": 1.2600276470184326, "learning_rate": 8.718378012049265e-06, "loss": 0.5128, "step": 20285 }, { "epoch": 0.5547473200612557, "grad_norm": 1.437293529510498, "learning_rate": 8.717499617124656e-06, "loss": 0.7584, "step": 20286 }, { "epoch": 0.5547746663749726, "grad_norm": 1.3801835775375366, "learning_rate": 8.716621232260898e-06, "loss": 0.4903, "step": 20287 }, { "epoch": 0.5548020126886896, "grad_norm": 1.2042202949523926, "learning_rate": 8.715742857464887e-06, "loss": 0.4965, "step": 20288 }, { "epoch": 0.5548293590024065, "grad_norm": 1.5819106101989746, "learning_rate": 8.71486449274351e-06, "loss": 0.5221, "step": 20289 }, { "epoch": 0.5548567053161234, "grad_norm": 1.2309221029281616, "learning_rate": 8.71398613810366e-06, "loss": 0.4018, "step": 20290 }, { "epoch": 0.5548840516298403, "grad_norm": 1.4634202718734741, "learning_rate": 8.71310779355223e-06, "loss": 0.4612, "step": 20291 }, { "epoch": 0.5549113979435573, "grad_norm": 1.5183207988739014, "learning_rate": 8.712229459096103e-06, "loss": 0.4969, "step": 20292 }, { "epoch": 0.5549387442572741, "grad_norm": 1.1559183597564697, "learning_rate": 8.711351134742175e-06, "loss": 0.4768, "step": 20293 }, { "epoch": 0.554966090570991, "grad_norm": 1.2410764694213867, "learning_rate": 8.710472820497335e-06, "loss": 0.499, "step": 20294 }, { "epoch": 0.5549934368847079, "grad_norm": 1.490003228187561, "learning_rate": 8.709594516368477e-06, "loss": 0.4742, "step": 20295 }, { "epoch": 0.5550207831984248, "grad_norm": 1.3697917461395264, "learning_rate": 8.708716222362482e-06, "loss": 0.5062, "step": 20296 }, { "epoch": 0.5550481295121418, "grad_norm": 2.2039783000946045, "learning_rate": 8.707837938486247e-06, "loss": 0.5158, "step": 20297 }, { "epoch": 0.5550754758258587, "grad_norm": 1.4899042844772339, "learning_rate": 8.706959664746657e-06, "loss": 0.4084, "step": 20298 }, { "epoch": 0.5551028221395756, "grad_norm": 1.0374462604522705, "learning_rate": 8.706081401150605e-06, "loss": 0.3337, "step": 20299 }, { "epoch": 0.5551301684532925, "grad_norm": 1.297368049621582, "learning_rate": 8.705203147704983e-06, "loss": 0.4624, "step": 20300 }, { "epoch": 0.5551575147670094, "grad_norm": 1.9576247930526733, "learning_rate": 8.704324904416675e-06, "loss": 0.7612, "step": 20301 }, { "epoch": 0.5551848610807263, "grad_norm": 1.271236538887024, "learning_rate": 8.703446671292572e-06, "loss": 0.4525, "step": 20302 }, { "epoch": 0.5552122073944432, "grad_norm": 1.3021583557128906, "learning_rate": 8.70256844833957e-06, "loss": 0.4683, "step": 20303 }, { "epoch": 0.5552395537081601, "grad_norm": 1.3522104024887085, "learning_rate": 8.701690235564551e-06, "loss": 0.4706, "step": 20304 }, { "epoch": 0.5552669000218771, "grad_norm": 1.270440697669983, "learning_rate": 8.700812032974406e-06, "loss": 0.5104, "step": 20305 }, { "epoch": 0.555294246335594, "grad_norm": 1.2167487144470215, "learning_rate": 8.699933840576025e-06, "loss": 0.5278, "step": 20306 }, { "epoch": 0.5553215926493109, "grad_norm": 1.273540735244751, "learning_rate": 8.699055658376297e-06, "loss": 0.4949, "step": 20307 }, { "epoch": 0.5553489389630278, "grad_norm": 1.127710223197937, "learning_rate": 8.698177486382113e-06, "loss": 0.4825, "step": 20308 }, { "epoch": 0.5553762852767447, "grad_norm": 1.4886488914489746, "learning_rate": 8.697299324600359e-06, "loss": 0.4165, "step": 20309 }, { "epoch": 0.5554036315904616, "grad_norm": 1.5561789274215698, "learning_rate": 8.696421173037928e-06, "loss": 0.5205, "step": 20310 }, { "epoch": 0.5554309779041785, "grad_norm": 1.199210286140442, "learning_rate": 8.695543031701702e-06, "loss": 0.4903, "step": 20311 }, { "epoch": 0.5554583242178954, "grad_norm": 1.243680715560913, "learning_rate": 8.694664900598572e-06, "loss": 0.4783, "step": 20312 }, { "epoch": 0.5554856705316124, "grad_norm": 1.2087076902389526, "learning_rate": 8.693786779735434e-06, "loss": 0.5234, "step": 20313 }, { "epoch": 0.5555130168453293, "grad_norm": 1.2006014585494995, "learning_rate": 8.692908669119168e-06, "loss": 0.4954, "step": 20314 }, { "epoch": 0.5555403631590462, "grad_norm": 1.1520473957061768, "learning_rate": 8.692030568756667e-06, "loss": 0.3348, "step": 20315 }, { "epoch": 0.555567709472763, "grad_norm": 1.227543830871582, "learning_rate": 8.691152478654818e-06, "loss": 0.4678, "step": 20316 }, { "epoch": 0.5555950557864799, "grad_norm": 1.4733389616012573, "learning_rate": 8.690274398820507e-06, "loss": 0.5045, "step": 20317 }, { "epoch": 0.5556224021001969, "grad_norm": 1.4058643579483032, "learning_rate": 8.689396329260629e-06, "loss": 0.3814, "step": 20318 }, { "epoch": 0.5556497484139138, "grad_norm": 1.2418805360794067, "learning_rate": 8.688518269982064e-06, "loss": 0.4996, "step": 20319 }, { "epoch": 0.5556770947276307, "grad_norm": 1.1981067657470703, "learning_rate": 8.687640220991705e-06, "loss": 0.5024, "step": 20320 }, { "epoch": 0.5557044410413476, "grad_norm": 1.4274588823318481, "learning_rate": 8.686762182296443e-06, "loss": 0.5121, "step": 20321 }, { "epoch": 0.5557317873550646, "grad_norm": 1.1265414953231812, "learning_rate": 8.685884153903156e-06, "loss": 0.5053, "step": 20322 }, { "epoch": 0.5557591336687815, "grad_norm": 1.0570399761199951, "learning_rate": 8.68500613581874e-06, "loss": 0.4778, "step": 20323 }, { "epoch": 0.5557864799824983, "grad_norm": 1.2842799425125122, "learning_rate": 8.684128128050084e-06, "loss": 0.4702, "step": 20324 }, { "epoch": 0.5558138262962152, "grad_norm": 1.695328712463379, "learning_rate": 8.68325013060407e-06, "loss": 0.4789, "step": 20325 }, { "epoch": 0.5558411726099322, "grad_norm": 1.169935941696167, "learning_rate": 8.68237214348759e-06, "loss": 0.4759, "step": 20326 }, { "epoch": 0.5558685189236491, "grad_norm": 1.491835355758667, "learning_rate": 8.681494166707527e-06, "loss": 0.503, "step": 20327 }, { "epoch": 0.555895865237366, "grad_norm": 1.169466257095337, "learning_rate": 8.680616200270771e-06, "loss": 0.4771, "step": 20328 }, { "epoch": 0.5559232115510829, "grad_norm": 1.369964838027954, "learning_rate": 8.679738244184213e-06, "loss": 0.4837, "step": 20329 }, { "epoch": 0.5559505578647999, "grad_norm": 1.3102872371673584, "learning_rate": 8.678860298454739e-06, "loss": 0.4664, "step": 20330 }, { "epoch": 0.5559779041785168, "grad_norm": 1.261674165725708, "learning_rate": 8.67798236308923e-06, "loss": 0.7911, "step": 20331 }, { "epoch": 0.5560052504922336, "grad_norm": 1.189694881439209, "learning_rate": 8.677104438094578e-06, "loss": 0.7572, "step": 20332 }, { "epoch": 0.5560325968059505, "grad_norm": 1.4539473056793213, "learning_rate": 8.676226523477667e-06, "loss": 0.4298, "step": 20333 }, { "epoch": 0.5560599431196674, "grad_norm": 1.1272028684616089, "learning_rate": 8.675348619245387e-06, "loss": 0.4942, "step": 20334 }, { "epoch": 0.5560872894333844, "grad_norm": 1.2913190126419067, "learning_rate": 8.674470725404628e-06, "loss": 0.5288, "step": 20335 }, { "epoch": 0.5561146357471013, "grad_norm": 1.0506196022033691, "learning_rate": 8.67359284196227e-06, "loss": 0.4741, "step": 20336 }, { "epoch": 0.5561419820608182, "grad_norm": 1.331151008605957, "learning_rate": 8.672714968925202e-06, "loss": 0.4906, "step": 20337 }, { "epoch": 0.5561693283745351, "grad_norm": 1.1930816173553467, "learning_rate": 8.671837106300314e-06, "loss": 0.4756, "step": 20338 }, { "epoch": 0.5561966746882521, "grad_norm": 1.179757833480835, "learning_rate": 8.670959254094489e-06, "loss": 0.5001, "step": 20339 }, { "epoch": 0.5562240210019689, "grad_norm": 1.1479164361953735, "learning_rate": 8.670081412314615e-06, "loss": 0.4926, "step": 20340 }, { "epoch": 0.5562513673156858, "grad_norm": 1.4045034646987915, "learning_rate": 8.669203580967577e-06, "loss": 0.4929, "step": 20341 }, { "epoch": 0.5562787136294027, "grad_norm": 1.3183701038360596, "learning_rate": 8.668325760060263e-06, "loss": 0.7684, "step": 20342 }, { "epoch": 0.5563060599431197, "grad_norm": 1.4852592945098877, "learning_rate": 8.66744794959956e-06, "loss": 0.4057, "step": 20343 }, { "epoch": 0.5563334062568366, "grad_norm": 1.5991727113723755, "learning_rate": 8.66657014959235e-06, "loss": 0.5116, "step": 20344 }, { "epoch": 0.5563607525705535, "grad_norm": 1.1793848276138306, "learning_rate": 8.665692360045524e-06, "loss": 0.517, "step": 20345 }, { "epoch": 0.5563880988842704, "grad_norm": 1.5730879306793213, "learning_rate": 8.664814580965967e-06, "loss": 0.3997, "step": 20346 }, { "epoch": 0.5564154451979874, "grad_norm": 1.3410964012145996, "learning_rate": 8.66393681236056e-06, "loss": 0.5252, "step": 20347 }, { "epoch": 0.5564427915117042, "grad_norm": 1.5398156642913818, "learning_rate": 8.663059054236199e-06, "loss": 0.5027, "step": 20348 }, { "epoch": 0.5564701378254211, "grad_norm": 1.3903435468673706, "learning_rate": 8.662181306599757e-06, "loss": 0.4968, "step": 20349 }, { "epoch": 0.556497484139138, "grad_norm": 1.4494380950927734, "learning_rate": 8.661303569458129e-06, "loss": 0.4912, "step": 20350 }, { "epoch": 0.556524830452855, "grad_norm": 3.8236680030822754, "learning_rate": 8.660425842818197e-06, "loss": 0.3919, "step": 20351 }, { "epoch": 0.5565521767665719, "grad_norm": 1.242109775543213, "learning_rate": 8.659548126686847e-06, "loss": 0.4961, "step": 20352 }, { "epoch": 0.5565795230802888, "grad_norm": 1.6354334354400635, "learning_rate": 8.658670421070968e-06, "loss": 0.5036, "step": 20353 }, { "epoch": 0.5566068693940057, "grad_norm": 1.2878520488739014, "learning_rate": 8.657792725977438e-06, "loss": 0.4945, "step": 20354 }, { "epoch": 0.5566342157077226, "grad_norm": 1.301775336265564, "learning_rate": 8.656915041413148e-06, "loss": 0.5056, "step": 20355 }, { "epoch": 0.5566615620214395, "grad_norm": 1.3330988883972168, "learning_rate": 8.656037367384982e-06, "loss": 0.4839, "step": 20356 }, { "epoch": 0.5566889083351564, "grad_norm": 1.5950183868408203, "learning_rate": 8.655159703899823e-06, "loss": 0.4552, "step": 20357 }, { "epoch": 0.5567162546488733, "grad_norm": 1.1496706008911133, "learning_rate": 8.654282050964558e-06, "loss": 0.4839, "step": 20358 }, { "epoch": 0.5567436009625902, "grad_norm": 1.318800449371338, "learning_rate": 8.653404408586073e-06, "loss": 0.4794, "step": 20359 }, { "epoch": 0.5567709472763072, "grad_norm": 1.2685078382492065, "learning_rate": 8.652526776771251e-06, "loss": 0.51, "step": 20360 }, { "epoch": 0.5567982935900241, "grad_norm": 1.0711071491241455, "learning_rate": 8.651649155526978e-06, "loss": 0.5018, "step": 20361 }, { "epoch": 0.556825639903741, "grad_norm": 1.2880206108093262, "learning_rate": 8.650771544860136e-06, "loss": 0.4792, "step": 20362 }, { "epoch": 0.5568529862174579, "grad_norm": 1.2443968057632446, "learning_rate": 8.649893944777611e-06, "loss": 0.7533, "step": 20363 }, { "epoch": 0.5568803325311747, "grad_norm": 1.327608585357666, "learning_rate": 8.649016355286291e-06, "loss": 0.5122, "step": 20364 }, { "epoch": 0.5569076788448917, "grad_norm": 1.6632407903671265, "learning_rate": 8.64813877639306e-06, "loss": 0.4908, "step": 20365 }, { "epoch": 0.5569350251586086, "grad_norm": 1.421974539756775, "learning_rate": 8.647261208104793e-06, "loss": 0.4774, "step": 20366 }, { "epoch": 0.5569623714723255, "grad_norm": 1.2159452438354492, "learning_rate": 8.646383650428383e-06, "loss": 0.5006, "step": 20367 }, { "epoch": 0.5569897177860424, "grad_norm": 1.3115875720977783, "learning_rate": 8.645506103370715e-06, "loss": 0.7222, "step": 20368 }, { "epoch": 0.5570170640997594, "grad_norm": 1.4408085346221924, "learning_rate": 8.644628566938669e-06, "loss": 0.5056, "step": 20369 }, { "epoch": 0.5570444104134763, "grad_norm": 1.1665314435958862, "learning_rate": 8.643751041139131e-06, "loss": 0.4823, "step": 20370 }, { "epoch": 0.5570717567271932, "grad_norm": 1.4574027061462402, "learning_rate": 8.642873525978982e-06, "loss": 0.4811, "step": 20371 }, { "epoch": 0.55709910304091, "grad_norm": 1.069926381111145, "learning_rate": 8.64199602146511e-06, "loss": 0.526, "step": 20372 }, { "epoch": 0.557126449354627, "grad_norm": 1.200468897819519, "learning_rate": 8.641118527604397e-06, "loss": 0.4905, "step": 20373 }, { "epoch": 0.5571537956683439, "grad_norm": 1.155733585357666, "learning_rate": 8.640241044403727e-06, "loss": 0.5134, "step": 20374 }, { "epoch": 0.5571811419820608, "grad_norm": 1.2156888246536255, "learning_rate": 8.639363571869982e-06, "loss": 0.4749, "step": 20375 }, { "epoch": 0.5572084882957777, "grad_norm": 1.180197834968567, "learning_rate": 8.638486110010048e-06, "loss": 0.4708, "step": 20376 }, { "epoch": 0.5572358346094947, "grad_norm": 1.1527132987976074, "learning_rate": 8.637608658830805e-06, "loss": 0.4925, "step": 20377 }, { "epoch": 0.5572631809232116, "grad_norm": 1.4342625141143799, "learning_rate": 8.636731218339144e-06, "loss": 0.4806, "step": 20378 }, { "epoch": 0.5572905272369285, "grad_norm": 1.3277697563171387, "learning_rate": 8.635853788541939e-06, "loss": 0.4948, "step": 20379 }, { "epoch": 0.5573178735506453, "grad_norm": 1.1195793151855469, "learning_rate": 8.634976369446077e-06, "loss": 0.4791, "step": 20380 }, { "epoch": 0.5573452198643623, "grad_norm": 1.3356223106384277, "learning_rate": 8.634098961058443e-06, "loss": 0.3699, "step": 20381 }, { "epoch": 0.5573725661780792, "grad_norm": 1.3089896440505981, "learning_rate": 8.633221563385916e-06, "loss": 0.5091, "step": 20382 }, { "epoch": 0.5573999124917961, "grad_norm": 1.2242778539657593, "learning_rate": 8.632344176435383e-06, "loss": 0.7968, "step": 20383 }, { "epoch": 0.557427258805513, "grad_norm": 1.3450173139572144, "learning_rate": 8.631466800213723e-06, "loss": 0.5054, "step": 20384 }, { "epoch": 0.55745460511923, "grad_norm": 1.3726000785827637, "learning_rate": 8.630589434727824e-06, "loss": 0.451, "step": 20385 }, { "epoch": 0.5574819514329469, "grad_norm": 1.5614652633666992, "learning_rate": 8.629712079984563e-06, "loss": 0.5004, "step": 20386 }, { "epoch": 0.5575092977466638, "grad_norm": 1.3805763721466064, "learning_rate": 8.628834735990825e-06, "loss": 0.5185, "step": 20387 }, { "epoch": 0.5575366440603806, "grad_norm": 1.1326173543930054, "learning_rate": 8.627957402753492e-06, "loss": 0.4781, "step": 20388 }, { "epoch": 0.5575639903740975, "grad_norm": 1.3196560144424438, "learning_rate": 8.627080080279451e-06, "loss": 0.5387, "step": 20389 }, { "epoch": 0.5575913366878145, "grad_norm": 1.185416340827942, "learning_rate": 8.626202768575578e-06, "loss": 0.5132, "step": 20390 }, { "epoch": 0.5576186830015314, "grad_norm": 1.121806025505066, "learning_rate": 8.625325467648759e-06, "loss": 0.4893, "step": 20391 }, { "epoch": 0.5576460293152483, "grad_norm": 1.0993270874023438, "learning_rate": 8.624448177505872e-06, "loss": 0.4807, "step": 20392 }, { "epoch": 0.5576733756289652, "grad_norm": 1.145739197731018, "learning_rate": 8.623570898153803e-06, "loss": 0.4928, "step": 20393 }, { "epoch": 0.5577007219426822, "grad_norm": 1.350216269493103, "learning_rate": 8.622693629599434e-06, "loss": 0.5339, "step": 20394 }, { "epoch": 0.5577280682563991, "grad_norm": 1.362094759941101, "learning_rate": 8.621816371849646e-06, "loss": 0.5149, "step": 20395 }, { "epoch": 0.5577554145701159, "grad_norm": 2.1603167057037354, "learning_rate": 8.62093912491132e-06, "loss": 0.4894, "step": 20396 }, { "epoch": 0.5577827608838328, "grad_norm": 1.4956421852111816, "learning_rate": 8.620061888791337e-06, "loss": 0.4961, "step": 20397 }, { "epoch": 0.5578101071975498, "grad_norm": 1.581681728363037, "learning_rate": 8.619184663496581e-06, "loss": 0.3651, "step": 20398 }, { "epoch": 0.5578374535112667, "grad_norm": 1.2364537715911865, "learning_rate": 8.618307449033935e-06, "loss": 0.3896, "step": 20399 }, { "epoch": 0.5578647998249836, "grad_norm": 1.2016146183013916, "learning_rate": 8.617430245410278e-06, "loss": 0.5147, "step": 20400 }, { "epoch": 0.5578921461387005, "grad_norm": 1.1239666938781738, "learning_rate": 8.61655305263249e-06, "loss": 0.7668, "step": 20401 }, { "epoch": 0.5579194924524175, "grad_norm": 1.3665540218353271, "learning_rate": 8.615675870707453e-06, "loss": 0.5027, "step": 20402 }, { "epoch": 0.5579468387661344, "grad_norm": 1.2508858442306519, "learning_rate": 8.614798699642052e-06, "loss": 0.4657, "step": 20403 }, { "epoch": 0.5579741850798512, "grad_norm": 1.2715587615966797, "learning_rate": 8.613921539443163e-06, "loss": 0.5039, "step": 20404 }, { "epoch": 0.5580015313935681, "grad_norm": 1.3661092519760132, "learning_rate": 8.61304439011767e-06, "loss": 0.5044, "step": 20405 }, { "epoch": 0.558028877707285, "grad_norm": 2.127656936645508, "learning_rate": 8.612167251672454e-06, "loss": 0.4971, "step": 20406 }, { "epoch": 0.558056224021002, "grad_norm": 1.2025072574615479, "learning_rate": 8.611290124114394e-06, "loss": 0.5326, "step": 20407 }, { "epoch": 0.5580835703347189, "grad_norm": 1.079784631729126, "learning_rate": 8.610413007450375e-06, "loss": 0.4839, "step": 20408 }, { "epoch": 0.5581109166484358, "grad_norm": 1.3541741371154785, "learning_rate": 8.60953590168727e-06, "loss": 0.3358, "step": 20409 }, { "epoch": 0.5581382629621527, "grad_norm": 1.3283015489578247, "learning_rate": 8.608658806831967e-06, "loss": 0.4692, "step": 20410 }, { "epoch": 0.5581656092758697, "grad_norm": 1.2876675128936768, "learning_rate": 8.607781722891347e-06, "loss": 0.4738, "step": 20411 }, { "epoch": 0.5581929555895865, "grad_norm": 1.2257329225540161, "learning_rate": 8.606904649872285e-06, "loss": 0.5206, "step": 20412 }, { "epoch": 0.5582203019033034, "grad_norm": 1.16550612449646, "learning_rate": 8.606027587781665e-06, "loss": 0.3419, "step": 20413 }, { "epoch": 0.5582476482170203, "grad_norm": 1.2514511346817017, "learning_rate": 8.605150536626365e-06, "loss": 0.7568, "step": 20414 }, { "epoch": 0.5582749945307373, "grad_norm": 1.9965609312057495, "learning_rate": 8.604273496413265e-06, "loss": 0.4768, "step": 20415 }, { "epoch": 0.5583023408444542, "grad_norm": 1.4842990636825562, "learning_rate": 8.60339646714925e-06, "loss": 0.5031, "step": 20416 }, { "epoch": 0.5583296871581711, "grad_norm": 1.3555855751037598, "learning_rate": 8.602519448841195e-06, "loss": 0.463, "step": 20417 }, { "epoch": 0.558357033471888, "grad_norm": 1.1273292303085327, "learning_rate": 8.601642441495982e-06, "loss": 0.4754, "step": 20418 }, { "epoch": 0.5583843797856048, "grad_norm": 1.1105204820632935, "learning_rate": 8.600765445120491e-06, "loss": 0.3734, "step": 20419 }, { "epoch": 0.5584117260993218, "grad_norm": 1.39280366897583, "learning_rate": 8.599888459721599e-06, "loss": 0.3696, "step": 20420 }, { "epoch": 0.5584390724130387, "grad_norm": 1.4659240245819092, "learning_rate": 8.59901148530619e-06, "loss": 0.5246, "step": 20421 }, { "epoch": 0.5584664187267556, "grad_norm": 1.2631206512451172, "learning_rate": 8.59813452188114e-06, "loss": 0.5222, "step": 20422 }, { "epoch": 0.5584937650404725, "grad_norm": 1.4344737529754639, "learning_rate": 8.597257569453332e-06, "loss": 0.5181, "step": 20423 }, { "epoch": 0.5585211113541895, "grad_norm": 2.0072922706604004, "learning_rate": 8.596380628029643e-06, "loss": 0.3539, "step": 20424 }, { "epoch": 0.5585484576679064, "grad_norm": 1.125037670135498, "learning_rate": 8.595503697616955e-06, "loss": 0.4599, "step": 20425 }, { "epoch": 0.5585758039816233, "grad_norm": 1.1907280683517456, "learning_rate": 8.594626778222142e-06, "loss": 0.4844, "step": 20426 }, { "epoch": 0.5586031502953401, "grad_norm": 1.528730869293213, "learning_rate": 8.593749869852087e-06, "loss": 0.5024, "step": 20427 }, { "epoch": 0.5586304966090571, "grad_norm": 1.226297378540039, "learning_rate": 8.592872972513669e-06, "loss": 0.5204, "step": 20428 }, { "epoch": 0.558657842922774, "grad_norm": 1.3048185110092163, "learning_rate": 8.591996086213767e-06, "loss": 0.3986, "step": 20429 }, { "epoch": 0.5586851892364909, "grad_norm": 1.323087453842163, "learning_rate": 8.59111921095926e-06, "loss": 0.4733, "step": 20430 }, { "epoch": 0.5587125355502078, "grad_norm": 1.4783657789230347, "learning_rate": 8.590242346757024e-06, "loss": 0.5151, "step": 20431 }, { "epoch": 0.5587398818639248, "grad_norm": 1.2727159261703491, "learning_rate": 8.589365493613943e-06, "loss": 0.4971, "step": 20432 }, { "epoch": 0.5587672281776417, "grad_norm": 1.1707544326782227, "learning_rate": 8.588488651536889e-06, "loss": 0.7398, "step": 20433 }, { "epoch": 0.5587945744913586, "grad_norm": 1.5454895496368408, "learning_rate": 8.587611820532748e-06, "loss": 0.4944, "step": 20434 }, { "epoch": 0.5588219208050754, "grad_norm": 1.267324686050415, "learning_rate": 8.586735000608393e-06, "loss": 0.5025, "step": 20435 }, { "epoch": 0.5588492671187923, "grad_norm": 1.3551464080810547, "learning_rate": 8.58585819177071e-06, "loss": 0.3391, "step": 20436 }, { "epoch": 0.5588766134325093, "grad_norm": 1.3931735754013062, "learning_rate": 8.584981394026564e-06, "loss": 0.4809, "step": 20437 }, { "epoch": 0.5589039597462262, "grad_norm": 1.272423505783081, "learning_rate": 8.584104607382844e-06, "loss": 0.4955, "step": 20438 }, { "epoch": 0.5589313060599431, "grad_norm": 1.3852043151855469, "learning_rate": 8.58322783184642e-06, "loss": 0.4803, "step": 20439 }, { "epoch": 0.55895865237366, "grad_norm": 1.7774758338928223, "learning_rate": 8.582351067424182e-06, "loss": 0.3923, "step": 20440 }, { "epoch": 0.558985998687377, "grad_norm": 1.500754475593567, "learning_rate": 8.581474314122997e-06, "loss": 0.5039, "step": 20441 }, { "epoch": 0.5590133450010939, "grad_norm": 1.347301721572876, "learning_rate": 8.580597571949745e-06, "loss": 0.495, "step": 20442 }, { "epoch": 0.5590406913148107, "grad_norm": 1.2989957332611084, "learning_rate": 8.57972084091131e-06, "loss": 0.5026, "step": 20443 }, { "epoch": 0.5590680376285276, "grad_norm": 1.2279794216156006, "learning_rate": 8.578844121014562e-06, "loss": 0.4846, "step": 20444 }, { "epoch": 0.5590953839422446, "grad_norm": 1.2129806280136108, "learning_rate": 8.577967412266383e-06, "loss": 0.7467, "step": 20445 }, { "epoch": 0.5591227302559615, "grad_norm": 1.3379158973693848, "learning_rate": 8.57709071467365e-06, "loss": 0.5296, "step": 20446 }, { "epoch": 0.5591500765696784, "grad_norm": 1.3339413404464722, "learning_rate": 8.576214028243239e-06, "loss": 0.5069, "step": 20447 }, { "epoch": 0.5591774228833953, "grad_norm": 1.426934003829956, "learning_rate": 8.57533735298203e-06, "loss": 0.3944, "step": 20448 }, { "epoch": 0.5592047691971123, "grad_norm": 1.109666109085083, "learning_rate": 8.574460688896896e-06, "loss": 0.4802, "step": 20449 }, { "epoch": 0.5592321155108292, "grad_norm": 1.7570933103561401, "learning_rate": 8.573584035994718e-06, "loss": 0.5311, "step": 20450 }, { "epoch": 0.559259461824546, "grad_norm": 1.5426350831985474, "learning_rate": 8.572707394282372e-06, "loss": 0.4952, "step": 20451 }, { "epoch": 0.5592868081382629, "grad_norm": 1.1668907403945923, "learning_rate": 8.571830763766734e-06, "loss": 0.4631, "step": 20452 }, { "epoch": 0.5593141544519799, "grad_norm": 1.2531794309616089, "learning_rate": 8.570954144454682e-06, "loss": 0.4944, "step": 20453 }, { "epoch": 0.5593415007656968, "grad_norm": 1.389996886253357, "learning_rate": 8.570077536353093e-06, "loss": 0.4608, "step": 20454 }, { "epoch": 0.5593688470794137, "grad_norm": 1.2929863929748535, "learning_rate": 8.569200939468844e-06, "loss": 0.4503, "step": 20455 }, { "epoch": 0.5593961933931306, "grad_norm": 1.1711695194244385, "learning_rate": 8.568324353808811e-06, "loss": 0.4752, "step": 20456 }, { "epoch": 0.5594235397068476, "grad_norm": 1.2408291101455688, "learning_rate": 8.567447779379869e-06, "loss": 0.4507, "step": 20457 }, { "epoch": 0.5594508860205645, "grad_norm": 1.1384756565093994, "learning_rate": 8.566571216188896e-06, "loss": 0.342, "step": 20458 }, { "epoch": 0.5594782323342813, "grad_norm": 1.2993296384811401, "learning_rate": 8.56569466424277e-06, "loss": 0.4972, "step": 20459 }, { "epoch": 0.5595055786479982, "grad_norm": 2.7870912551879883, "learning_rate": 8.564818123548364e-06, "loss": 0.5031, "step": 20460 }, { "epoch": 0.5595329249617151, "grad_norm": 1.5187100172042847, "learning_rate": 8.563941594112558e-06, "loss": 0.4844, "step": 20461 }, { "epoch": 0.5595602712754321, "grad_norm": 1.394585132598877, "learning_rate": 8.563065075942223e-06, "loss": 0.4648, "step": 20462 }, { "epoch": 0.559587617589149, "grad_norm": 1.112126111984253, "learning_rate": 8.56218856904424e-06, "loss": 0.4852, "step": 20463 }, { "epoch": 0.5596149639028659, "grad_norm": 1.2308573722839355, "learning_rate": 8.561312073425485e-06, "loss": 0.488, "step": 20464 }, { "epoch": 0.5596423102165828, "grad_norm": 1.5198103189468384, "learning_rate": 8.560435589092829e-06, "loss": 0.3613, "step": 20465 }, { "epoch": 0.5596696565302998, "grad_norm": 1.2735087871551514, "learning_rate": 8.55955911605315e-06, "loss": 0.4788, "step": 20466 }, { "epoch": 0.5596970028440166, "grad_norm": 1.7157692909240723, "learning_rate": 8.558682654313327e-06, "loss": 0.416, "step": 20467 }, { "epoch": 0.5597243491577335, "grad_norm": 1.1361806392669678, "learning_rate": 8.557806203880232e-06, "loss": 0.4788, "step": 20468 }, { "epoch": 0.5597516954714504, "grad_norm": 1.3453682661056519, "learning_rate": 8.556929764760742e-06, "loss": 0.4748, "step": 20469 }, { "epoch": 0.5597790417851674, "grad_norm": 1.323565125465393, "learning_rate": 8.55605333696173e-06, "loss": 0.4927, "step": 20470 }, { "epoch": 0.5598063880988843, "grad_norm": 1.4063720703125, "learning_rate": 8.555176920490079e-06, "loss": 0.4665, "step": 20471 }, { "epoch": 0.5598337344126012, "grad_norm": 1.4950759410858154, "learning_rate": 8.554300515352652e-06, "loss": 0.4834, "step": 20472 }, { "epoch": 0.5598610807263181, "grad_norm": 1.326176643371582, "learning_rate": 8.553424121556334e-06, "loss": 0.7821, "step": 20473 }, { "epoch": 0.559888427040035, "grad_norm": 1.222926378250122, "learning_rate": 8.552547739107995e-06, "loss": 0.7878, "step": 20474 }, { "epoch": 0.5599157733537519, "grad_norm": 1.5402017831802368, "learning_rate": 8.55167136801451e-06, "loss": 0.3976, "step": 20475 }, { "epoch": 0.5599431196674688, "grad_norm": 1.1849308013916016, "learning_rate": 8.550795008282758e-06, "loss": 0.4581, "step": 20476 }, { "epoch": 0.5599704659811857, "grad_norm": 1.0544438362121582, "learning_rate": 8.54991865991961e-06, "loss": 0.4923, "step": 20477 }, { "epoch": 0.5599978122949026, "grad_norm": 1.1080451011657715, "learning_rate": 8.549042322931944e-06, "loss": 0.4647, "step": 20478 }, { "epoch": 0.5600251586086196, "grad_norm": 1.561373233795166, "learning_rate": 8.548165997326631e-06, "loss": 0.5309, "step": 20479 }, { "epoch": 0.5600525049223365, "grad_norm": 1.0704835653305054, "learning_rate": 8.547289683110547e-06, "loss": 0.7416, "step": 20480 }, { "epoch": 0.5600798512360534, "grad_norm": 1.51676344871521, "learning_rate": 8.546413380290569e-06, "loss": 0.471, "step": 20481 }, { "epoch": 0.5601071975497703, "grad_norm": 1.1317991018295288, "learning_rate": 8.545537088873564e-06, "loss": 0.4818, "step": 20482 }, { "epoch": 0.5601345438634872, "grad_norm": 2.274278163909912, "learning_rate": 8.544660808866415e-06, "loss": 0.3749, "step": 20483 }, { "epoch": 0.5601618901772041, "grad_norm": 1.3528072834014893, "learning_rate": 8.543784540275992e-06, "loss": 0.4933, "step": 20484 }, { "epoch": 0.560189236490921, "grad_norm": 1.2719470262527466, "learning_rate": 8.542908283109168e-06, "loss": 0.4894, "step": 20485 }, { "epoch": 0.5602165828046379, "grad_norm": 1.3398449420928955, "learning_rate": 8.542032037372821e-06, "loss": 0.4887, "step": 20486 }, { "epoch": 0.5602439291183549, "grad_norm": 1.4265111684799194, "learning_rate": 8.54115580307382e-06, "loss": 0.776, "step": 20487 }, { "epoch": 0.5602712754320718, "grad_norm": 1.4068502187728882, "learning_rate": 8.540279580219042e-06, "loss": 0.4868, "step": 20488 }, { "epoch": 0.5602986217457887, "grad_norm": 1.4825917482376099, "learning_rate": 8.539403368815362e-06, "loss": 0.3847, "step": 20489 }, { "epoch": 0.5603259680595056, "grad_norm": 1.1008535623550415, "learning_rate": 8.538527168869647e-06, "loss": 0.3658, "step": 20490 }, { "epoch": 0.5603533143732224, "grad_norm": 1.1263381242752075, "learning_rate": 8.537650980388779e-06, "loss": 0.4896, "step": 20491 }, { "epoch": 0.5603806606869394, "grad_norm": 1.3532850742340088, "learning_rate": 8.536774803379625e-06, "loss": 0.704, "step": 20492 }, { "epoch": 0.5604080070006563, "grad_norm": 1.5232568979263306, "learning_rate": 8.535898637849062e-06, "loss": 0.4895, "step": 20493 }, { "epoch": 0.5604353533143732, "grad_norm": 1.1505316495895386, "learning_rate": 8.535022483803963e-06, "loss": 0.5037, "step": 20494 }, { "epoch": 0.5604626996280901, "grad_norm": 1.705378532409668, "learning_rate": 8.534146341251198e-06, "loss": 0.3577, "step": 20495 }, { "epoch": 0.5604900459418071, "grad_norm": 1.0294103622436523, "learning_rate": 8.533270210197646e-06, "loss": 0.4621, "step": 20496 }, { "epoch": 0.560517392255524, "grad_norm": 2.1841540336608887, "learning_rate": 8.532394090650174e-06, "loss": 0.3172, "step": 20497 }, { "epoch": 0.5605447385692409, "grad_norm": 1.1817288398742676, "learning_rate": 8.531517982615655e-06, "loss": 0.4769, "step": 20498 }, { "epoch": 0.5605720848829577, "grad_norm": 1.9985876083374023, "learning_rate": 8.53064188610097e-06, "loss": 0.34, "step": 20499 }, { "epoch": 0.5605994311966747, "grad_norm": 1.2752578258514404, "learning_rate": 8.52976580111298e-06, "loss": 0.4927, "step": 20500 }, { "epoch": 0.5606267775103916, "grad_norm": 1.1109079122543335, "learning_rate": 8.528889727658565e-06, "loss": 0.477, "step": 20501 }, { "epoch": 0.5606541238241085, "grad_norm": 1.5908899307250977, "learning_rate": 8.528013665744599e-06, "loss": 0.3625, "step": 20502 }, { "epoch": 0.5606814701378254, "grad_norm": 1.3188300132751465, "learning_rate": 8.52713761537795e-06, "loss": 0.4642, "step": 20503 }, { "epoch": 0.5607088164515424, "grad_norm": 1.2811731100082397, "learning_rate": 8.526261576565493e-06, "loss": 0.509, "step": 20504 }, { "epoch": 0.5607361627652593, "grad_norm": 1.2867915630340576, "learning_rate": 8.525385549314097e-06, "loss": 0.3196, "step": 20505 }, { "epoch": 0.5607635090789762, "grad_norm": 1.2769865989685059, "learning_rate": 8.52450953363064e-06, "loss": 0.5116, "step": 20506 }, { "epoch": 0.560790855392693, "grad_norm": 1.264207363128662, "learning_rate": 8.523633529521987e-06, "loss": 0.7998, "step": 20507 }, { "epoch": 0.56081820170641, "grad_norm": 1.1421457529067993, "learning_rate": 8.522757536995016e-06, "loss": 0.4821, "step": 20508 }, { "epoch": 0.5608455480201269, "grad_norm": 1.405609369277954, "learning_rate": 8.521881556056593e-06, "loss": 0.3842, "step": 20509 }, { "epoch": 0.5608728943338438, "grad_norm": 1.1575522422790527, "learning_rate": 8.521005586713594e-06, "loss": 0.5082, "step": 20510 }, { "epoch": 0.5609002406475607, "grad_norm": 1.208592176437378, "learning_rate": 8.520129628972893e-06, "loss": 0.5032, "step": 20511 }, { "epoch": 0.5609275869612776, "grad_norm": 1.5381309986114502, "learning_rate": 8.519253682841355e-06, "loss": 0.5215, "step": 20512 }, { "epoch": 0.5609549332749946, "grad_norm": 1.2873196601867676, "learning_rate": 8.518377748325858e-06, "loss": 0.4929, "step": 20513 }, { "epoch": 0.5609822795887115, "grad_norm": 2.0186691284179688, "learning_rate": 8.517501825433269e-06, "loss": 0.4649, "step": 20514 }, { "epoch": 0.5610096259024283, "grad_norm": 1.4260509014129639, "learning_rate": 8.51662591417046e-06, "loss": 0.4874, "step": 20515 }, { "epoch": 0.5610369722161452, "grad_norm": 1.3166395425796509, "learning_rate": 8.515750014544305e-06, "loss": 0.3595, "step": 20516 }, { "epoch": 0.5610643185298622, "grad_norm": 1.1937997341156006, "learning_rate": 8.514874126561672e-06, "loss": 0.4814, "step": 20517 }, { "epoch": 0.5610916648435791, "grad_norm": 1.3595867156982422, "learning_rate": 8.513998250229434e-06, "loss": 0.4653, "step": 20518 }, { "epoch": 0.561119011157296, "grad_norm": 1.101621150970459, "learning_rate": 8.513122385554463e-06, "loss": 0.4833, "step": 20519 }, { "epoch": 0.5611463574710129, "grad_norm": 1.3949073553085327, "learning_rate": 8.512246532543626e-06, "loss": 0.4889, "step": 20520 }, { "epoch": 0.5611737037847299, "grad_norm": 1.0170514583587646, "learning_rate": 8.511370691203799e-06, "loss": 0.4771, "step": 20521 }, { "epoch": 0.5612010500984467, "grad_norm": 1.420359492301941, "learning_rate": 8.510494861541847e-06, "loss": 0.4995, "step": 20522 }, { "epoch": 0.5612283964121636, "grad_norm": 1.2582390308380127, "learning_rate": 8.509619043564645e-06, "loss": 0.7559, "step": 20523 }, { "epoch": 0.5612557427258805, "grad_norm": 1.183424472808838, "learning_rate": 8.508743237279063e-06, "loss": 0.4904, "step": 20524 }, { "epoch": 0.5612830890395974, "grad_norm": 1.5225324630737305, "learning_rate": 8.50786744269197e-06, "loss": 0.404, "step": 20525 }, { "epoch": 0.5613104353533144, "grad_norm": 1.2209150791168213, "learning_rate": 8.506991659810238e-06, "loss": 0.4563, "step": 20526 }, { "epoch": 0.5613377816670313, "grad_norm": 1.1430933475494385, "learning_rate": 8.506115888640733e-06, "loss": 0.5012, "step": 20527 }, { "epoch": 0.5613651279807482, "grad_norm": 1.2755088806152344, "learning_rate": 8.505240129190331e-06, "loss": 0.7543, "step": 20528 }, { "epoch": 0.5613924742944651, "grad_norm": 1.4086467027664185, "learning_rate": 8.5043643814659e-06, "loss": 0.3659, "step": 20529 }, { "epoch": 0.561419820608182, "grad_norm": 1.5931965112686157, "learning_rate": 8.503488645474308e-06, "loss": 0.4011, "step": 20530 }, { "epoch": 0.5614471669218989, "grad_norm": 1.2505460977554321, "learning_rate": 8.502612921222427e-06, "loss": 0.5154, "step": 20531 }, { "epoch": 0.5614745132356158, "grad_norm": 1.5266424417495728, "learning_rate": 8.501737208717127e-06, "loss": 0.4924, "step": 20532 }, { "epoch": 0.5615018595493327, "grad_norm": 1.150517463684082, "learning_rate": 8.500861507965276e-06, "loss": 0.3263, "step": 20533 }, { "epoch": 0.5615292058630497, "grad_norm": 1.1547998189926147, "learning_rate": 8.499985818973746e-06, "loss": 0.471, "step": 20534 }, { "epoch": 0.5615565521767666, "grad_norm": 1.4908525943756104, "learning_rate": 8.499110141749402e-06, "loss": 0.374, "step": 20535 }, { "epoch": 0.5615838984904835, "grad_norm": 1.2766202688217163, "learning_rate": 8.498234476299119e-06, "loss": 0.3669, "step": 20536 }, { "epoch": 0.5616112448042004, "grad_norm": 1.2332502603530884, "learning_rate": 8.497358822629765e-06, "loss": 0.5008, "step": 20537 }, { "epoch": 0.5616385911179173, "grad_norm": 2.3285598754882812, "learning_rate": 8.496483180748206e-06, "loss": 0.781, "step": 20538 }, { "epoch": 0.5616659374316342, "grad_norm": 1.3496248722076416, "learning_rate": 8.495607550661313e-06, "loss": 0.5098, "step": 20539 }, { "epoch": 0.5616932837453511, "grad_norm": 1.4972025156021118, "learning_rate": 8.494731932375955e-06, "loss": 0.4889, "step": 20540 }, { "epoch": 0.561720630059068, "grad_norm": 1.190728783607483, "learning_rate": 8.493856325899008e-06, "loss": 0.499, "step": 20541 }, { "epoch": 0.561747976372785, "grad_norm": 1.1504120826721191, "learning_rate": 8.492980731237328e-06, "loss": 0.5017, "step": 20542 }, { "epoch": 0.5617753226865019, "grad_norm": 1.6759841442108154, "learning_rate": 8.49210514839779e-06, "loss": 0.3973, "step": 20543 }, { "epoch": 0.5618026690002188, "grad_norm": 1.2252531051635742, "learning_rate": 8.491229577387264e-06, "loss": 0.7509, "step": 20544 }, { "epoch": 0.5618300153139357, "grad_norm": 1.3723785877227783, "learning_rate": 8.490354018212616e-06, "loss": 0.4838, "step": 20545 }, { "epoch": 0.5618573616276525, "grad_norm": 1.2499127388000488, "learning_rate": 8.489478470880718e-06, "loss": 0.5236, "step": 20546 }, { "epoch": 0.5618847079413695, "grad_norm": 1.3921235799789429, "learning_rate": 8.488602935398435e-06, "loss": 0.4984, "step": 20547 }, { "epoch": 0.5619120542550864, "grad_norm": 1.6463861465454102, "learning_rate": 8.487727411772639e-06, "loss": 0.7201, "step": 20548 }, { "epoch": 0.5619394005688033, "grad_norm": 1.3660390377044678, "learning_rate": 8.486851900010192e-06, "loss": 0.483, "step": 20549 }, { "epoch": 0.5619667468825202, "grad_norm": 1.3999700546264648, "learning_rate": 8.485976400117968e-06, "loss": 0.5195, "step": 20550 }, { "epoch": 0.5619940931962372, "grad_norm": 1.3477030992507935, "learning_rate": 8.485100912102834e-06, "loss": 0.4942, "step": 20551 }, { "epoch": 0.5620214395099541, "grad_norm": 1.230649709701538, "learning_rate": 8.484225435971655e-06, "loss": 0.4798, "step": 20552 }, { "epoch": 0.562048785823671, "grad_norm": 1.1384690999984741, "learning_rate": 8.483349971731301e-06, "loss": 0.4968, "step": 20553 }, { "epoch": 0.5620761321373878, "grad_norm": 1.3502238988876343, "learning_rate": 8.48247451938864e-06, "loss": 0.4785, "step": 20554 }, { "epoch": 0.5621034784511048, "grad_norm": 1.1558778285980225, "learning_rate": 8.48159907895054e-06, "loss": 0.5134, "step": 20555 }, { "epoch": 0.5621308247648217, "grad_norm": 1.3378517627716064, "learning_rate": 8.480723650423869e-06, "loss": 0.4888, "step": 20556 }, { "epoch": 0.5621581710785386, "grad_norm": 1.1754435300827026, "learning_rate": 8.47984823381549e-06, "loss": 0.4749, "step": 20557 }, { "epoch": 0.5621855173922555, "grad_norm": 1.2946404218673706, "learning_rate": 8.478972829132276e-06, "loss": 0.7652, "step": 20558 }, { "epoch": 0.5622128637059725, "grad_norm": 1.511832594871521, "learning_rate": 8.478097436381093e-06, "loss": 0.3769, "step": 20559 }, { "epoch": 0.5622402100196894, "grad_norm": 1.1496186256408691, "learning_rate": 8.477222055568806e-06, "loss": 0.496, "step": 20560 }, { "epoch": 0.5622675563334063, "grad_norm": 1.4646707773208618, "learning_rate": 8.476346686702283e-06, "loss": 0.7265, "step": 20561 }, { "epoch": 0.5622949026471231, "grad_norm": 1.1959952116012573, "learning_rate": 8.475471329788393e-06, "loss": 0.7741, "step": 20562 }, { "epoch": 0.56232224896084, "grad_norm": 1.1401634216308594, "learning_rate": 8.474595984833998e-06, "loss": 0.501, "step": 20563 }, { "epoch": 0.562349595274557, "grad_norm": 1.275262713432312, "learning_rate": 8.473720651845973e-06, "loss": 0.5084, "step": 20564 }, { "epoch": 0.5623769415882739, "grad_norm": 1.3778797388076782, "learning_rate": 8.472845330831177e-06, "loss": 0.4826, "step": 20565 }, { "epoch": 0.5624042879019908, "grad_norm": 1.4587467908859253, "learning_rate": 8.471970021796478e-06, "loss": 0.3991, "step": 20566 }, { "epoch": 0.5624316342157077, "grad_norm": 1.328397512435913, "learning_rate": 8.471094724748748e-06, "loss": 0.4957, "step": 20567 }, { "epoch": 0.5624589805294247, "grad_norm": 1.3423891067504883, "learning_rate": 8.470219439694848e-06, "loss": 0.4817, "step": 20568 }, { "epoch": 0.5624863268431416, "grad_norm": 1.485635757446289, "learning_rate": 8.469344166641648e-06, "loss": 0.4112, "step": 20569 }, { "epoch": 0.5625136731568584, "grad_norm": 1.2990305423736572, "learning_rate": 8.468468905596008e-06, "loss": 0.4863, "step": 20570 }, { "epoch": 0.5625410194705753, "grad_norm": 1.3298758268356323, "learning_rate": 8.467593656564801e-06, "loss": 0.7672, "step": 20571 }, { "epoch": 0.5625683657842923, "grad_norm": 1.3913041353225708, "learning_rate": 8.466718419554891e-06, "loss": 0.5153, "step": 20572 }, { "epoch": 0.5625957120980092, "grad_norm": 1.4048407077789307, "learning_rate": 8.465843194573142e-06, "loss": 0.4544, "step": 20573 }, { "epoch": 0.5626230584117261, "grad_norm": 1.2150930166244507, "learning_rate": 8.46496798162642e-06, "loss": 0.4847, "step": 20574 }, { "epoch": 0.562650404725443, "grad_norm": 1.1771299839019775, "learning_rate": 8.464092780721596e-06, "loss": 0.7251, "step": 20575 }, { "epoch": 0.56267775103916, "grad_norm": 1.352912187576294, "learning_rate": 8.463217591865533e-06, "loss": 0.5115, "step": 20576 }, { "epoch": 0.5627050973528769, "grad_norm": 1.1741238832473755, "learning_rate": 8.462342415065092e-06, "loss": 0.7469, "step": 20577 }, { "epoch": 0.5627324436665937, "grad_norm": 1.3374046087265015, "learning_rate": 8.461467250327145e-06, "loss": 0.4734, "step": 20578 }, { "epoch": 0.5627597899803106, "grad_norm": 1.278799295425415, "learning_rate": 8.460592097658552e-06, "loss": 0.5334, "step": 20579 }, { "epoch": 0.5627871362940275, "grad_norm": 1.2599923610687256, "learning_rate": 8.459716957066178e-06, "loss": 0.7404, "step": 20580 }, { "epoch": 0.5628144826077445, "grad_norm": 1.6866979598999023, "learning_rate": 8.458841828556897e-06, "loss": 0.4057, "step": 20581 }, { "epoch": 0.5628418289214614, "grad_norm": 1.3101083040237427, "learning_rate": 8.457966712137563e-06, "loss": 0.3356, "step": 20582 }, { "epoch": 0.5628691752351783, "grad_norm": 1.3328672647476196, "learning_rate": 8.45709160781505e-06, "loss": 0.4949, "step": 20583 }, { "epoch": 0.5628965215488952, "grad_norm": 1.2841700315475464, "learning_rate": 8.456216515596217e-06, "loss": 0.3756, "step": 20584 }, { "epoch": 0.5629238678626122, "grad_norm": 1.1216058731079102, "learning_rate": 8.45534143548793e-06, "loss": 0.4952, "step": 20585 }, { "epoch": 0.562951214176329, "grad_norm": 1.3765403032302856, "learning_rate": 8.454466367497056e-06, "loss": 0.4902, "step": 20586 }, { "epoch": 0.5629785604900459, "grad_norm": 1.2867343425750732, "learning_rate": 8.453591311630459e-06, "loss": 0.4975, "step": 20587 }, { "epoch": 0.5630059068037628, "grad_norm": 1.130692481994629, "learning_rate": 8.452716267895e-06, "loss": 0.5129, "step": 20588 }, { "epoch": 0.5630332531174798, "grad_norm": 1.1604812145233154, "learning_rate": 8.451841236297552e-06, "loss": 0.4956, "step": 20589 }, { "epoch": 0.5630605994311967, "grad_norm": 1.3553003072738647, "learning_rate": 8.450966216844969e-06, "loss": 0.5121, "step": 20590 }, { "epoch": 0.5630879457449136, "grad_norm": 1.3951706886291504, "learning_rate": 8.450091209544123e-06, "loss": 0.4858, "step": 20591 }, { "epoch": 0.5631152920586305, "grad_norm": 1.7280794382095337, "learning_rate": 8.449216214401873e-06, "loss": 0.5038, "step": 20592 }, { "epoch": 0.5631426383723475, "grad_norm": 1.2332412004470825, "learning_rate": 8.448341231425085e-06, "loss": 0.7951, "step": 20593 }, { "epoch": 0.5631699846860643, "grad_norm": 1.6037969589233398, "learning_rate": 8.447466260620625e-06, "loss": 0.4316, "step": 20594 }, { "epoch": 0.5631973309997812, "grad_norm": 1.307550311088562, "learning_rate": 8.446591301995355e-06, "loss": 0.7618, "step": 20595 }, { "epoch": 0.5632246773134981, "grad_norm": 1.2760961055755615, "learning_rate": 8.445716355556139e-06, "loss": 0.476, "step": 20596 }, { "epoch": 0.563252023627215, "grad_norm": 1.0683789253234863, "learning_rate": 8.444841421309842e-06, "loss": 0.7468, "step": 20597 }, { "epoch": 0.563279369940932, "grad_norm": 1.5235693454742432, "learning_rate": 8.443966499263324e-06, "loss": 0.4567, "step": 20598 }, { "epoch": 0.5633067162546489, "grad_norm": 1.4840196371078491, "learning_rate": 8.443091589423453e-06, "loss": 0.4736, "step": 20599 }, { "epoch": 0.5633340625683658, "grad_norm": 1.2308803796768188, "learning_rate": 8.44221669179709e-06, "loss": 0.5155, "step": 20600 }, { "epoch": 0.5633614088820827, "grad_norm": 1.1638617515563965, "learning_rate": 8.441341806391097e-06, "loss": 0.5001, "step": 20601 }, { "epoch": 0.5633887551957996, "grad_norm": 1.1388331651687622, "learning_rate": 8.44046693321234e-06, "loss": 0.4684, "step": 20602 }, { "epoch": 0.5634161015095165, "grad_norm": 1.6484438180923462, "learning_rate": 8.43959207226768e-06, "loss": 0.4811, "step": 20603 }, { "epoch": 0.5634434478232334, "grad_norm": 1.9452319145202637, "learning_rate": 8.438717223563983e-06, "loss": 0.4834, "step": 20604 }, { "epoch": 0.5634707941369503, "grad_norm": 1.277248740196228, "learning_rate": 8.43784238710811e-06, "loss": 0.4911, "step": 20605 }, { "epoch": 0.5634981404506673, "grad_norm": 1.1838369369506836, "learning_rate": 8.436967562906921e-06, "loss": 0.5072, "step": 20606 }, { "epoch": 0.5635254867643842, "grad_norm": 1.5787711143493652, "learning_rate": 8.436092750967285e-06, "loss": 0.4678, "step": 20607 }, { "epoch": 0.5635528330781011, "grad_norm": 1.3588656187057495, "learning_rate": 8.435217951296059e-06, "loss": 0.3449, "step": 20608 }, { "epoch": 0.563580179391818, "grad_norm": 1.2726116180419922, "learning_rate": 8.434343163900107e-06, "loss": 0.4952, "step": 20609 }, { "epoch": 0.5636075257055349, "grad_norm": 1.1587661504745483, "learning_rate": 8.433468388786296e-06, "loss": 0.5255, "step": 20610 }, { "epoch": 0.5636348720192518, "grad_norm": 1.4741268157958984, "learning_rate": 8.432593625961482e-06, "loss": 0.4177, "step": 20611 }, { "epoch": 0.5636622183329687, "grad_norm": 1.4000614881515503, "learning_rate": 8.431718875432532e-06, "loss": 0.4409, "step": 20612 }, { "epoch": 0.5636895646466856, "grad_norm": 1.3926948308944702, "learning_rate": 8.430844137206306e-06, "loss": 0.5034, "step": 20613 }, { "epoch": 0.5637169109604026, "grad_norm": 1.4548330307006836, "learning_rate": 8.429969411289664e-06, "loss": 0.4765, "step": 20614 }, { "epoch": 0.5637442572741195, "grad_norm": 1.473030686378479, "learning_rate": 8.42909469768947e-06, "loss": 0.428, "step": 20615 }, { "epoch": 0.5637716035878364, "grad_norm": 1.3110755681991577, "learning_rate": 8.428219996412588e-06, "loss": 0.4789, "step": 20616 }, { "epoch": 0.5637989499015533, "grad_norm": 1.2382299900054932, "learning_rate": 8.427345307465876e-06, "loss": 0.4902, "step": 20617 }, { "epoch": 0.5638262962152701, "grad_norm": 1.1583220958709717, "learning_rate": 8.426470630856198e-06, "loss": 0.4749, "step": 20618 }, { "epoch": 0.5638536425289871, "grad_norm": 1.1451480388641357, "learning_rate": 8.425595966590415e-06, "loss": 0.4457, "step": 20619 }, { "epoch": 0.563880988842704, "grad_norm": 1.3009284734725952, "learning_rate": 8.424721314675388e-06, "loss": 0.4924, "step": 20620 }, { "epoch": 0.5639083351564209, "grad_norm": 1.164759635925293, "learning_rate": 8.423846675117981e-06, "loss": 0.4705, "step": 20621 }, { "epoch": 0.5639356814701378, "grad_norm": 1.4759351015090942, "learning_rate": 8.42297204792505e-06, "loss": 0.456, "step": 20622 }, { "epoch": 0.5639630277838548, "grad_norm": 1.200683832168579, "learning_rate": 8.42209743310346e-06, "loss": 0.3526, "step": 20623 }, { "epoch": 0.5639903740975717, "grad_norm": 1.4097908735275269, "learning_rate": 8.421222830660077e-06, "loss": 0.369, "step": 20624 }, { "epoch": 0.5640177204112885, "grad_norm": 1.2058515548706055, "learning_rate": 8.42034824060175e-06, "loss": 0.5152, "step": 20625 }, { "epoch": 0.5640450667250054, "grad_norm": 1.4331413507461548, "learning_rate": 8.41947366293535e-06, "loss": 0.512, "step": 20626 }, { "epoch": 0.5640724130387224, "grad_norm": 1.5105646848678589, "learning_rate": 8.418599097667732e-06, "loss": 0.527, "step": 20627 }, { "epoch": 0.5640997593524393, "grad_norm": 1.3436369895935059, "learning_rate": 8.41772454480576e-06, "loss": 0.4939, "step": 20628 }, { "epoch": 0.5641271056661562, "grad_norm": 1.2508174180984497, "learning_rate": 8.416850004356296e-06, "loss": 0.3292, "step": 20629 }, { "epoch": 0.5641544519798731, "grad_norm": 1.6674301624298096, "learning_rate": 8.415975476326195e-06, "loss": 0.7565, "step": 20630 }, { "epoch": 0.56418179829359, "grad_norm": 1.2522209882736206, "learning_rate": 8.415100960722321e-06, "loss": 0.4857, "step": 20631 }, { "epoch": 0.564209144607307, "grad_norm": 1.2858773469924927, "learning_rate": 8.414226457551536e-06, "loss": 0.3698, "step": 20632 }, { "epoch": 0.5642364909210238, "grad_norm": 1.3688822984695435, "learning_rate": 8.413351966820695e-06, "loss": 0.382, "step": 20633 }, { "epoch": 0.5642638372347407, "grad_norm": 1.3278623819351196, "learning_rate": 8.412477488536666e-06, "loss": 0.4854, "step": 20634 }, { "epoch": 0.5642911835484576, "grad_norm": 1.4109714031219482, "learning_rate": 8.4116030227063e-06, "loss": 0.5072, "step": 20635 }, { "epoch": 0.5643185298621746, "grad_norm": 1.4048231840133667, "learning_rate": 8.410728569336462e-06, "loss": 0.3889, "step": 20636 }, { "epoch": 0.5643458761758915, "grad_norm": 1.364945650100708, "learning_rate": 8.409854128434013e-06, "loss": 0.4939, "step": 20637 }, { "epoch": 0.5643732224896084, "grad_norm": 1.5254756212234497, "learning_rate": 8.408979700005808e-06, "loss": 0.3928, "step": 20638 }, { "epoch": 0.5644005688033253, "grad_norm": 3.005439043045044, "learning_rate": 8.408105284058711e-06, "loss": 0.3522, "step": 20639 }, { "epoch": 0.5644279151170423, "grad_norm": 1.8235032558441162, "learning_rate": 8.407230880599582e-06, "loss": 0.4598, "step": 20640 }, { "epoch": 0.5644552614307591, "grad_norm": 1.711428165435791, "learning_rate": 8.406356489635275e-06, "loss": 0.4989, "step": 20641 }, { "epoch": 0.564482607744476, "grad_norm": 1.1454476118087769, "learning_rate": 8.405482111172656e-06, "loss": 0.4942, "step": 20642 }, { "epoch": 0.5645099540581929, "grad_norm": 1.152646780014038, "learning_rate": 8.404607745218579e-06, "loss": 0.4943, "step": 20643 }, { "epoch": 0.5645373003719099, "grad_norm": 1.7277096509933472, "learning_rate": 8.403733391779905e-06, "loss": 0.7554, "step": 20644 }, { "epoch": 0.5645646466856268, "grad_norm": 1.4439979791641235, "learning_rate": 8.402859050863497e-06, "loss": 0.4926, "step": 20645 }, { "epoch": 0.5645919929993437, "grad_norm": 1.4304285049438477, "learning_rate": 8.401984722476207e-06, "loss": 0.3821, "step": 20646 }, { "epoch": 0.5646193393130606, "grad_norm": 1.1783186197280884, "learning_rate": 8.4011104066249e-06, "loss": 0.4856, "step": 20647 }, { "epoch": 0.5646466856267776, "grad_norm": 1.2666881084442139, "learning_rate": 8.40023610331643e-06, "loss": 0.4846, "step": 20648 }, { "epoch": 0.5646740319404944, "grad_norm": 1.1673905849456787, "learning_rate": 8.399361812557657e-06, "loss": 0.3314, "step": 20649 }, { "epoch": 0.5647013782542113, "grad_norm": 1.3768256902694702, "learning_rate": 8.39848753435544e-06, "loss": 0.4966, "step": 20650 }, { "epoch": 0.5647287245679282, "grad_norm": 1.4846550226211548, "learning_rate": 8.397613268716638e-06, "loss": 0.525, "step": 20651 }, { "epoch": 0.5647560708816451, "grad_norm": 1.2103445529937744, "learning_rate": 8.396739015648109e-06, "loss": 0.4508, "step": 20652 }, { "epoch": 0.5647834171953621, "grad_norm": 1.1691572666168213, "learning_rate": 8.39586477515671e-06, "loss": 0.5085, "step": 20653 }, { "epoch": 0.564810763509079, "grad_norm": 1.200170874595642, "learning_rate": 8.394990547249303e-06, "loss": 0.4815, "step": 20654 }, { "epoch": 0.5648381098227959, "grad_norm": 1.6626505851745605, "learning_rate": 8.39411633193274e-06, "loss": 0.4158, "step": 20655 }, { "epoch": 0.5648654561365128, "grad_norm": 1.1319957971572876, "learning_rate": 8.393242129213886e-06, "loss": 0.4884, "step": 20656 }, { "epoch": 0.5648928024502297, "grad_norm": 1.227001667022705, "learning_rate": 8.392367939099593e-06, "loss": 0.4434, "step": 20657 }, { "epoch": 0.5649201487639466, "grad_norm": 1.5429410934448242, "learning_rate": 8.39149376159672e-06, "loss": 0.4113, "step": 20658 }, { "epoch": 0.5649474950776635, "grad_norm": 1.3499804735183716, "learning_rate": 8.39061959671213e-06, "loss": 0.5069, "step": 20659 }, { "epoch": 0.5649748413913804, "grad_norm": 1.2381620407104492, "learning_rate": 8.389745444452673e-06, "loss": 0.7934, "step": 20660 }, { "epoch": 0.5650021877050974, "grad_norm": 1.5818406343460083, "learning_rate": 8.38887130482521e-06, "loss": 0.4983, "step": 20661 }, { "epoch": 0.5650295340188143, "grad_norm": 1.670989990234375, "learning_rate": 8.3879971778366e-06, "loss": 0.3682, "step": 20662 }, { "epoch": 0.5650568803325312, "grad_norm": 1.263159990310669, "learning_rate": 8.387123063493696e-06, "loss": 0.4558, "step": 20663 }, { "epoch": 0.5650842266462481, "grad_norm": 1.3852174282073975, "learning_rate": 8.386248961803361e-06, "loss": 0.4747, "step": 20664 }, { "epoch": 0.565111572959965, "grad_norm": 1.7883678674697876, "learning_rate": 8.385374872772445e-06, "loss": 0.4951, "step": 20665 }, { "epoch": 0.5651389192736819, "grad_norm": 1.1950416564941406, "learning_rate": 8.384500796407811e-06, "loss": 0.7824, "step": 20666 }, { "epoch": 0.5651662655873988, "grad_norm": 1.5643508434295654, "learning_rate": 8.383626732716313e-06, "loss": 0.503, "step": 20667 }, { "epoch": 0.5651936119011157, "grad_norm": 1.4087358713150024, "learning_rate": 8.382752681704809e-06, "loss": 0.4827, "step": 20668 }, { "epoch": 0.5652209582148326, "grad_norm": 1.5273587703704834, "learning_rate": 8.381878643380155e-06, "loss": 0.4094, "step": 20669 }, { "epoch": 0.5652483045285496, "grad_norm": 8.274579048156738, "learning_rate": 8.381004617749207e-06, "loss": 0.7384, "step": 20670 }, { "epoch": 0.5652756508422665, "grad_norm": 1.1335864067077637, "learning_rate": 8.380130604818821e-06, "loss": 0.4655, "step": 20671 }, { "epoch": 0.5653029971559834, "grad_norm": 1.4256304502487183, "learning_rate": 8.379256604595857e-06, "loss": 0.4987, "step": 20672 }, { "epoch": 0.5653303434697002, "grad_norm": 1.602401614189148, "learning_rate": 8.378382617087168e-06, "loss": 0.3785, "step": 20673 }, { "epoch": 0.5653576897834172, "grad_norm": 2.142566442489624, "learning_rate": 8.37750864229961e-06, "loss": 0.3672, "step": 20674 }, { "epoch": 0.5653850360971341, "grad_norm": 1.2232633829116821, "learning_rate": 8.37663468024004e-06, "loss": 0.4996, "step": 20675 }, { "epoch": 0.565412382410851, "grad_norm": 1.3339275121688843, "learning_rate": 8.375760730915316e-06, "loss": 0.4609, "step": 20676 }, { "epoch": 0.5654397287245679, "grad_norm": 1.3422495126724243, "learning_rate": 8.374886794332292e-06, "loss": 0.4678, "step": 20677 }, { "epoch": 0.5654670750382849, "grad_norm": 1.1693601608276367, "learning_rate": 8.374012870497823e-06, "loss": 0.4897, "step": 20678 }, { "epoch": 0.5654944213520018, "grad_norm": 1.1470954418182373, "learning_rate": 8.373138959418763e-06, "loss": 0.4871, "step": 20679 }, { "epoch": 0.5655217676657187, "grad_norm": 1.2968978881835938, "learning_rate": 8.372265061101972e-06, "loss": 0.5022, "step": 20680 }, { "epoch": 0.5655491139794355, "grad_norm": 1.4815155267715454, "learning_rate": 8.371391175554305e-06, "loss": 0.4891, "step": 20681 }, { "epoch": 0.5655764602931524, "grad_norm": 0.9338472485542297, "learning_rate": 8.370517302782617e-06, "loss": 0.3247, "step": 20682 }, { "epoch": 0.5656038066068694, "grad_norm": 1.2496857643127441, "learning_rate": 8.369643442793759e-06, "loss": 0.4935, "step": 20683 }, { "epoch": 0.5656311529205863, "grad_norm": 1.3852053880691528, "learning_rate": 8.368769595594592e-06, "loss": 0.5022, "step": 20684 }, { "epoch": 0.5656584992343032, "grad_norm": 1.218174695968628, "learning_rate": 8.367895761191965e-06, "loss": 0.4935, "step": 20685 }, { "epoch": 0.5656858455480201, "grad_norm": 1.2786948680877686, "learning_rate": 8.367021939592739e-06, "loss": 0.5027, "step": 20686 }, { "epoch": 0.5657131918617371, "grad_norm": 1.0867037773132324, "learning_rate": 8.366148130803763e-06, "loss": 0.4623, "step": 20687 }, { "epoch": 0.565740538175454, "grad_norm": 1.3724253177642822, "learning_rate": 8.365274334831899e-06, "loss": 0.4662, "step": 20688 }, { "epoch": 0.5657678844891708, "grad_norm": 1.3136305809020996, "learning_rate": 8.364400551683996e-06, "loss": 0.5083, "step": 20689 }, { "epoch": 0.5657952308028877, "grad_norm": 1.315393090248108, "learning_rate": 8.363526781366911e-06, "loss": 0.7642, "step": 20690 }, { "epoch": 0.5658225771166047, "grad_norm": 1.1285018920898438, "learning_rate": 8.362653023887498e-06, "loss": 0.4956, "step": 20691 }, { "epoch": 0.5658499234303216, "grad_norm": 1.2442843914031982, "learning_rate": 8.36177927925261e-06, "loss": 0.4688, "step": 20692 }, { "epoch": 0.5658772697440385, "grad_norm": 1.667708396911621, "learning_rate": 8.360905547469102e-06, "loss": 0.3515, "step": 20693 }, { "epoch": 0.5659046160577554, "grad_norm": 1.4102420806884766, "learning_rate": 8.36003182854383e-06, "loss": 0.4028, "step": 20694 }, { "epoch": 0.5659319623714724, "grad_norm": 1.3657941818237305, "learning_rate": 8.359158122483647e-06, "loss": 0.7635, "step": 20695 }, { "epoch": 0.5659593086851893, "grad_norm": 1.7232112884521484, "learning_rate": 8.358284429295406e-06, "loss": 0.3757, "step": 20696 }, { "epoch": 0.5659866549989061, "grad_norm": 1.4247952699661255, "learning_rate": 8.357410748985963e-06, "loss": 0.4956, "step": 20697 }, { "epoch": 0.566014001312623, "grad_norm": 1.4740161895751953, "learning_rate": 8.356537081562168e-06, "loss": 0.7859, "step": 20698 }, { "epoch": 0.56604134762634, "grad_norm": 1.6710964441299438, "learning_rate": 8.35566342703088e-06, "loss": 0.7472, "step": 20699 }, { "epoch": 0.5660686939400569, "grad_norm": 1.2499268054962158, "learning_rate": 8.354789785398947e-06, "loss": 0.4961, "step": 20700 }, { "epoch": 0.5660960402537738, "grad_norm": 1.1362248659133911, "learning_rate": 8.353916156673226e-06, "loss": 0.7606, "step": 20701 }, { "epoch": 0.5661233865674907, "grad_norm": 1.5729669332504272, "learning_rate": 8.353042540860572e-06, "loss": 0.5021, "step": 20702 }, { "epoch": 0.5661507328812077, "grad_norm": 1.4788450002670288, "learning_rate": 8.352168937967833e-06, "loss": 0.4612, "step": 20703 }, { "epoch": 0.5661780791949246, "grad_norm": 1.6283912658691406, "learning_rate": 8.351295348001864e-06, "loss": 0.5005, "step": 20704 }, { "epoch": 0.5662054255086414, "grad_norm": 1.4000617265701294, "learning_rate": 8.350421770969522e-06, "loss": 0.4797, "step": 20705 }, { "epoch": 0.5662327718223583, "grad_norm": 1.4157620668411255, "learning_rate": 8.349548206877655e-06, "loss": 0.4874, "step": 20706 }, { "epoch": 0.5662601181360752, "grad_norm": 1.2146834135055542, "learning_rate": 8.34867465573312e-06, "loss": 0.5137, "step": 20707 }, { "epoch": 0.5662874644497922, "grad_norm": 1.339977741241455, "learning_rate": 8.347801117542765e-06, "loss": 0.5093, "step": 20708 }, { "epoch": 0.5663148107635091, "grad_norm": 1.8604131937026978, "learning_rate": 8.346927592313445e-06, "loss": 0.5204, "step": 20709 }, { "epoch": 0.566342157077226, "grad_norm": 1.28927743434906, "learning_rate": 8.346054080052015e-06, "loss": 0.4596, "step": 20710 }, { "epoch": 0.5663695033909429, "grad_norm": 1.528306245803833, "learning_rate": 8.345180580765324e-06, "loss": 0.3959, "step": 20711 }, { "epoch": 0.5663968497046599, "grad_norm": 1.5531443357467651, "learning_rate": 8.344307094460226e-06, "loss": 0.3715, "step": 20712 }, { "epoch": 0.5664241960183767, "grad_norm": 1.4370934963226318, "learning_rate": 8.343433621143572e-06, "loss": 0.7725, "step": 20713 }, { "epoch": 0.5664515423320936, "grad_norm": 1.1520437002182007, "learning_rate": 8.342560160822216e-06, "loss": 0.5074, "step": 20714 }, { "epoch": 0.5664788886458105, "grad_norm": 1.0424678325653076, "learning_rate": 8.34168671350301e-06, "loss": 0.4859, "step": 20715 }, { "epoch": 0.5665062349595275, "grad_norm": 1.2409124374389648, "learning_rate": 8.340813279192804e-06, "loss": 0.5148, "step": 20716 }, { "epoch": 0.5665335812732444, "grad_norm": 1.1473772525787354, "learning_rate": 8.339939857898453e-06, "loss": 0.4814, "step": 20717 }, { "epoch": 0.5665609275869613, "grad_norm": 1.19115149974823, "learning_rate": 8.339066449626804e-06, "loss": 0.5221, "step": 20718 }, { "epoch": 0.5665882739006782, "grad_norm": 1.3217486143112183, "learning_rate": 8.338193054384713e-06, "loss": 0.4823, "step": 20719 }, { "epoch": 0.5666156202143952, "grad_norm": 1.414686918258667, "learning_rate": 8.337319672179029e-06, "loss": 0.5061, "step": 20720 }, { "epoch": 0.566642966528112, "grad_norm": 1.173350214958191, "learning_rate": 8.336446303016606e-06, "loss": 0.4859, "step": 20721 }, { "epoch": 0.5666703128418289, "grad_norm": 1.3069496154785156, "learning_rate": 8.33557294690429e-06, "loss": 0.4534, "step": 20722 }, { "epoch": 0.5666976591555458, "grad_norm": 1.4607363939285278, "learning_rate": 8.334699603848937e-06, "loss": 0.4612, "step": 20723 }, { "epoch": 0.5667250054692627, "grad_norm": 1.537569284439087, "learning_rate": 8.333826273857398e-06, "loss": 0.4755, "step": 20724 }, { "epoch": 0.5667523517829797, "grad_norm": 1.248262882232666, "learning_rate": 8.33295295693652e-06, "loss": 0.4675, "step": 20725 }, { "epoch": 0.5667796980966966, "grad_norm": 1.2598906755447388, "learning_rate": 8.332079653093158e-06, "loss": 0.4893, "step": 20726 }, { "epoch": 0.5668070444104135, "grad_norm": 1.528090000152588, "learning_rate": 8.331206362334166e-06, "loss": 0.391, "step": 20727 }, { "epoch": 0.5668343907241303, "grad_norm": 1.5334497690200806, "learning_rate": 8.330333084666387e-06, "loss": 0.4997, "step": 20728 }, { "epoch": 0.5668617370378473, "grad_norm": 1.9308785200119019, "learning_rate": 8.329459820096676e-06, "loss": 0.5373, "step": 20729 }, { "epoch": 0.5668890833515642, "grad_norm": 1.4578572511672974, "learning_rate": 8.328586568631881e-06, "loss": 0.4997, "step": 20730 }, { "epoch": 0.5669164296652811, "grad_norm": 1.3212246894836426, "learning_rate": 8.327713330278855e-06, "loss": 0.5327, "step": 20731 }, { "epoch": 0.566943775978998, "grad_norm": 1.2706313133239746, "learning_rate": 8.326840105044448e-06, "loss": 0.4736, "step": 20732 }, { "epoch": 0.566971122292715, "grad_norm": 1.279632329940796, "learning_rate": 8.325966892935507e-06, "loss": 0.4885, "step": 20733 }, { "epoch": 0.5669984686064319, "grad_norm": 1.3816055059432983, "learning_rate": 8.32509369395889e-06, "loss": 0.4939, "step": 20734 }, { "epoch": 0.5670258149201488, "grad_norm": 1.1838266849517822, "learning_rate": 8.324220508121435e-06, "loss": 0.482, "step": 20735 }, { "epoch": 0.5670531612338656, "grad_norm": 1.5637702941894531, "learning_rate": 8.323347335430001e-06, "loss": 0.4971, "step": 20736 }, { "epoch": 0.5670805075475825, "grad_norm": 1.5000773668289185, "learning_rate": 8.322474175891437e-06, "loss": 0.3613, "step": 20737 }, { "epoch": 0.5671078538612995, "grad_norm": 1.5245627164840698, "learning_rate": 8.321601029512589e-06, "loss": 0.539, "step": 20738 }, { "epoch": 0.5671352001750164, "grad_norm": 1.0828135013580322, "learning_rate": 8.320727896300309e-06, "loss": 0.5133, "step": 20739 }, { "epoch": 0.5671625464887333, "grad_norm": 1.4112985134124756, "learning_rate": 8.319854776261448e-06, "loss": 0.4703, "step": 20740 }, { "epoch": 0.5671898928024502, "grad_norm": 1.2882452011108398, "learning_rate": 8.31898166940285e-06, "loss": 0.5062, "step": 20741 }, { "epoch": 0.5672172391161672, "grad_norm": 1.1171073913574219, "learning_rate": 8.318108575731372e-06, "loss": 0.5173, "step": 20742 }, { "epoch": 0.5672445854298841, "grad_norm": 1.3747543096542358, "learning_rate": 8.317235495253855e-06, "loss": 0.5178, "step": 20743 }, { "epoch": 0.5672719317436009, "grad_norm": 1.3696240186691284, "learning_rate": 8.316362427977154e-06, "loss": 0.4584, "step": 20744 }, { "epoch": 0.5672992780573178, "grad_norm": 1.1692359447479248, "learning_rate": 8.315489373908117e-06, "loss": 0.4721, "step": 20745 }, { "epoch": 0.5673266243710348, "grad_norm": 1.5454344749450684, "learning_rate": 8.314616333053589e-06, "loss": 0.54, "step": 20746 }, { "epoch": 0.5673539706847517, "grad_norm": 1.523600697517395, "learning_rate": 8.313743305420424e-06, "loss": 0.4794, "step": 20747 }, { "epoch": 0.5673813169984686, "grad_norm": 1.2745628356933594, "learning_rate": 8.312870291015466e-06, "loss": 0.4806, "step": 20748 }, { "epoch": 0.5674086633121855, "grad_norm": 1.3523292541503906, "learning_rate": 8.311997289845565e-06, "loss": 0.4659, "step": 20749 }, { "epoch": 0.5674360096259025, "grad_norm": 1.0282539129257202, "learning_rate": 8.311124301917574e-06, "loss": 0.4757, "step": 20750 }, { "epoch": 0.5674633559396194, "grad_norm": 1.5421977043151855, "learning_rate": 8.310251327238335e-06, "loss": 0.3686, "step": 20751 }, { "epoch": 0.5674907022533362, "grad_norm": 1.2040241956710815, "learning_rate": 8.309378365814702e-06, "loss": 0.454, "step": 20752 }, { "epoch": 0.5675180485670531, "grad_norm": 1.542640209197998, "learning_rate": 8.308505417653517e-06, "loss": 0.4618, "step": 20753 }, { "epoch": 0.56754539488077, "grad_norm": 1.2514920234680176, "learning_rate": 8.307632482761633e-06, "loss": 0.4767, "step": 20754 }, { "epoch": 0.567572741194487, "grad_norm": 3.303980588912964, "learning_rate": 8.306759561145893e-06, "loss": 0.7605, "step": 20755 }, { "epoch": 0.5676000875082039, "grad_norm": 1.2737547159194946, "learning_rate": 8.30588665281315e-06, "loss": 0.5024, "step": 20756 }, { "epoch": 0.5676274338219208, "grad_norm": 1.2586450576782227, "learning_rate": 8.305013757770247e-06, "loss": 0.5164, "step": 20757 }, { "epoch": 0.5676547801356377, "grad_norm": 1.259508490562439, "learning_rate": 8.304140876024036e-06, "loss": 0.4929, "step": 20758 }, { "epoch": 0.5676821264493547, "grad_norm": 1.4900062084197998, "learning_rate": 8.303268007581361e-06, "loss": 0.5159, "step": 20759 }, { "epoch": 0.5677094727630715, "grad_norm": 1.0820869207382202, "learning_rate": 8.302395152449073e-06, "loss": 0.4849, "step": 20760 }, { "epoch": 0.5677368190767884, "grad_norm": 1.185031533241272, "learning_rate": 8.301522310634014e-06, "loss": 0.4874, "step": 20761 }, { "epoch": 0.5677641653905053, "grad_norm": 1.267401099205017, "learning_rate": 8.30064948214304e-06, "loss": 0.4706, "step": 20762 }, { "epoch": 0.5677915117042223, "grad_norm": 1.1608028411865234, "learning_rate": 8.299776666982986e-06, "loss": 0.7824, "step": 20763 }, { "epoch": 0.5678188580179392, "grad_norm": 1.25423264503479, "learning_rate": 8.298903865160712e-06, "loss": 0.4992, "step": 20764 }, { "epoch": 0.5678462043316561, "grad_norm": 1.321565866470337, "learning_rate": 8.298031076683054e-06, "loss": 0.5214, "step": 20765 }, { "epoch": 0.567873550645373, "grad_norm": 1.2265477180480957, "learning_rate": 8.297158301556864e-06, "loss": 0.7597, "step": 20766 }, { "epoch": 0.56790089695909, "grad_norm": 1.370048999786377, "learning_rate": 8.296285539788989e-06, "loss": 0.3749, "step": 20767 }, { "epoch": 0.5679282432728068, "grad_norm": 1.1890863180160522, "learning_rate": 8.295412791386274e-06, "loss": 0.4901, "step": 20768 }, { "epoch": 0.5679555895865237, "grad_norm": 2.0946967601776123, "learning_rate": 8.294540056355565e-06, "loss": 0.7601, "step": 20769 }, { "epoch": 0.5679829359002406, "grad_norm": 1.3840277194976807, "learning_rate": 8.293667334703712e-06, "loss": 0.4786, "step": 20770 }, { "epoch": 0.5680102822139576, "grad_norm": 1.2522703409194946, "learning_rate": 8.292794626437555e-06, "loss": 0.4839, "step": 20771 }, { "epoch": 0.5680376285276745, "grad_norm": 1.3399156332015991, "learning_rate": 8.291921931563947e-06, "loss": 0.5453, "step": 20772 }, { "epoch": 0.5680649748413914, "grad_norm": 1.648380994796753, "learning_rate": 8.291049250089728e-06, "loss": 0.3873, "step": 20773 }, { "epoch": 0.5680923211551083, "grad_norm": 1.221323013305664, "learning_rate": 8.290176582021747e-06, "loss": 0.489, "step": 20774 }, { "epoch": 0.5681196674688253, "grad_norm": 1.3315937519073486, "learning_rate": 8.289303927366852e-06, "loss": 0.375, "step": 20775 }, { "epoch": 0.5681470137825421, "grad_norm": 1.5800321102142334, "learning_rate": 8.288431286131883e-06, "loss": 0.3702, "step": 20776 }, { "epoch": 0.568174360096259, "grad_norm": 1.7099320888519287, "learning_rate": 8.287558658323692e-06, "loss": 0.4016, "step": 20777 }, { "epoch": 0.5682017064099759, "grad_norm": 1.09832763671875, "learning_rate": 8.286686043949119e-06, "loss": 0.49, "step": 20778 }, { "epoch": 0.5682290527236928, "grad_norm": 2.0133321285247803, "learning_rate": 8.285813443015011e-06, "loss": 0.5125, "step": 20779 }, { "epoch": 0.5682563990374098, "grad_norm": 3.889315128326416, "learning_rate": 8.284940855528219e-06, "loss": 0.7519, "step": 20780 }, { "epoch": 0.5682837453511267, "grad_norm": 1.473070740699768, "learning_rate": 8.284068281495579e-06, "loss": 0.3847, "step": 20781 }, { "epoch": 0.5683110916648436, "grad_norm": 1.1379190683364868, "learning_rate": 8.28319572092394e-06, "loss": 0.478, "step": 20782 }, { "epoch": 0.5683384379785605, "grad_norm": 1.3132133483886719, "learning_rate": 8.282323173820151e-06, "loss": 0.3614, "step": 20783 }, { "epoch": 0.5683657842922774, "grad_norm": 1.2148489952087402, "learning_rate": 8.28145064019105e-06, "loss": 0.7947, "step": 20784 }, { "epoch": 0.5683931306059943, "grad_norm": 1.2825524806976318, "learning_rate": 8.280578120043488e-06, "loss": 0.4927, "step": 20785 }, { "epoch": 0.5684204769197112, "grad_norm": 1.2387583255767822, "learning_rate": 8.279705613384303e-06, "loss": 0.4755, "step": 20786 }, { "epoch": 0.5684478232334281, "grad_norm": 1.403064250946045, "learning_rate": 8.27883312022035e-06, "loss": 0.4501, "step": 20787 }, { "epoch": 0.568475169547145, "grad_norm": 1.3065394163131714, "learning_rate": 8.277960640558461e-06, "loss": 0.5032, "step": 20788 }, { "epoch": 0.568502515860862, "grad_norm": 1.1052407026290894, "learning_rate": 8.27708817440549e-06, "loss": 0.4777, "step": 20789 }, { "epoch": 0.5685298621745789, "grad_norm": 1.1543035507202148, "learning_rate": 8.276215721768272e-06, "loss": 0.5184, "step": 20790 }, { "epoch": 0.5685572084882958, "grad_norm": 1.2624022960662842, "learning_rate": 8.275343282653659e-06, "loss": 0.4746, "step": 20791 }, { "epoch": 0.5685845548020126, "grad_norm": 1.1415977478027344, "learning_rate": 8.274470857068495e-06, "loss": 0.4838, "step": 20792 }, { "epoch": 0.5686119011157296, "grad_norm": 1.3150001764297485, "learning_rate": 8.273598445019619e-06, "loss": 0.5101, "step": 20793 }, { "epoch": 0.5686392474294465, "grad_norm": 1.3074324131011963, "learning_rate": 8.27272604651388e-06, "loss": 0.4744, "step": 20794 }, { "epoch": 0.5686665937431634, "grad_norm": 1.2920795679092407, "learning_rate": 8.271853661558115e-06, "loss": 0.4963, "step": 20795 }, { "epoch": 0.5686939400568803, "grad_norm": 1.2529386281967163, "learning_rate": 8.270981290159172e-06, "loss": 0.7347, "step": 20796 }, { "epoch": 0.5687212863705973, "grad_norm": 1.3937275409698486, "learning_rate": 8.270108932323897e-06, "loss": 0.5, "step": 20797 }, { "epoch": 0.5687486326843142, "grad_norm": 1.3780938386917114, "learning_rate": 8.26923658805913e-06, "loss": 0.4799, "step": 20798 }, { "epoch": 0.5687759789980311, "grad_norm": 1.3154844045639038, "learning_rate": 8.268364257371715e-06, "loss": 0.5067, "step": 20799 }, { "epoch": 0.5688033253117479, "grad_norm": 1.5173310041427612, "learning_rate": 8.267491940268493e-06, "loss": 0.3794, "step": 20800 }, { "epoch": 0.5688306716254649, "grad_norm": 1.1705474853515625, "learning_rate": 8.26661963675631e-06, "loss": 0.4935, "step": 20801 }, { "epoch": 0.5688580179391818, "grad_norm": 1.0999397039413452, "learning_rate": 8.26574734684201e-06, "loss": 0.4763, "step": 20802 }, { "epoch": 0.5688853642528987, "grad_norm": 1.1453392505645752, "learning_rate": 8.264875070532433e-06, "loss": 0.3576, "step": 20803 }, { "epoch": 0.5689127105666156, "grad_norm": 1.4076974391937256, "learning_rate": 8.26400280783442e-06, "loss": 0.4567, "step": 20804 }, { "epoch": 0.5689400568803326, "grad_norm": 1.5902448892593384, "learning_rate": 8.26313055875482e-06, "loss": 0.4949, "step": 20805 }, { "epoch": 0.5689674031940495, "grad_norm": 1.3312751054763794, "learning_rate": 8.26225832330047e-06, "loss": 0.5278, "step": 20806 }, { "epoch": 0.5689947495077664, "grad_norm": 1.0953439474105835, "learning_rate": 8.261386101478217e-06, "loss": 0.4911, "step": 20807 }, { "epoch": 0.5690220958214832, "grad_norm": 1.3557729721069336, "learning_rate": 8.260513893294897e-06, "loss": 0.5036, "step": 20808 }, { "epoch": 0.5690494421352001, "grad_norm": 1.068679690361023, "learning_rate": 8.259641698757358e-06, "loss": 0.4405, "step": 20809 }, { "epoch": 0.5690767884489171, "grad_norm": 1.2822707891464233, "learning_rate": 8.258769517872442e-06, "loss": 0.4784, "step": 20810 }, { "epoch": 0.569104134762634, "grad_norm": 1.6454628705978394, "learning_rate": 8.257897350646986e-06, "loss": 0.7421, "step": 20811 }, { "epoch": 0.5691314810763509, "grad_norm": 1.145569920539856, "learning_rate": 8.257025197087839e-06, "loss": 0.4939, "step": 20812 }, { "epoch": 0.5691588273900678, "grad_norm": 1.209626317024231, "learning_rate": 8.256153057201835e-06, "loss": 0.4801, "step": 20813 }, { "epoch": 0.5691861737037848, "grad_norm": 1.3884533643722534, "learning_rate": 8.255280930995819e-06, "loss": 0.5151, "step": 20814 }, { "epoch": 0.5692135200175017, "grad_norm": 2.7407469749450684, "learning_rate": 8.254408818476636e-06, "loss": 0.3821, "step": 20815 }, { "epoch": 0.5692408663312185, "grad_norm": 1.1320369243621826, "learning_rate": 8.253536719651122e-06, "loss": 0.4556, "step": 20816 }, { "epoch": 0.5692682126449354, "grad_norm": 1.1887717247009277, "learning_rate": 8.252664634526121e-06, "loss": 0.4882, "step": 20817 }, { "epoch": 0.5692955589586524, "grad_norm": 1.318518042564392, "learning_rate": 8.251792563108477e-06, "loss": 0.7258, "step": 20818 }, { "epoch": 0.5693229052723693, "grad_norm": 1.5481866598129272, "learning_rate": 8.250920505405024e-06, "loss": 0.4791, "step": 20819 }, { "epoch": 0.5693502515860862, "grad_norm": 1.3807590007781982, "learning_rate": 8.250048461422613e-06, "loss": 0.5056, "step": 20820 }, { "epoch": 0.5693775978998031, "grad_norm": 1.739893913269043, "learning_rate": 8.249176431168076e-06, "loss": 0.3974, "step": 20821 }, { "epoch": 0.5694049442135201, "grad_norm": 1.5806233882904053, "learning_rate": 8.248304414648257e-06, "loss": 0.3666, "step": 20822 }, { "epoch": 0.569432290527237, "grad_norm": 1.1428947448730469, "learning_rate": 8.24743241187e-06, "loss": 0.7556, "step": 20823 }, { "epoch": 0.5694596368409538, "grad_norm": 1.3374685049057007, "learning_rate": 8.24656042284014e-06, "loss": 0.4858, "step": 20824 }, { "epoch": 0.5694869831546707, "grad_norm": 1.262808918952942, "learning_rate": 8.24568844756552e-06, "loss": 0.4982, "step": 20825 }, { "epoch": 0.5695143294683876, "grad_norm": 1.3069937229156494, "learning_rate": 8.24481648605298e-06, "loss": 0.4805, "step": 20826 }, { "epoch": 0.5695416757821046, "grad_norm": 1.3001431226730347, "learning_rate": 8.243944538309361e-06, "loss": 0.4842, "step": 20827 }, { "epoch": 0.5695690220958215, "grad_norm": 1.3682111501693726, "learning_rate": 8.243072604341503e-06, "loss": 0.5224, "step": 20828 }, { "epoch": 0.5695963684095384, "grad_norm": 1.4077733755111694, "learning_rate": 8.242200684156247e-06, "loss": 0.4421, "step": 20829 }, { "epoch": 0.5696237147232553, "grad_norm": 1.444632887840271, "learning_rate": 8.241328777760431e-06, "loss": 0.3652, "step": 20830 }, { "epoch": 0.5696510610369722, "grad_norm": 1.1822166442871094, "learning_rate": 8.240456885160895e-06, "loss": 0.4857, "step": 20831 }, { "epoch": 0.5696784073506891, "grad_norm": 1.5304316282272339, "learning_rate": 8.23958500636448e-06, "loss": 0.7022, "step": 20832 }, { "epoch": 0.569705753664406, "grad_norm": 1.515309453010559, "learning_rate": 8.238713141378025e-06, "loss": 0.4113, "step": 20833 }, { "epoch": 0.5697330999781229, "grad_norm": 1.2964825630187988, "learning_rate": 8.23784129020837e-06, "loss": 0.4762, "step": 20834 }, { "epoch": 0.5697604462918399, "grad_norm": 1.5175201892852783, "learning_rate": 8.236969452862356e-06, "loss": 0.328, "step": 20835 }, { "epoch": 0.5697877926055568, "grad_norm": 1.3343169689178467, "learning_rate": 8.236097629346817e-06, "loss": 0.5035, "step": 20836 }, { "epoch": 0.5698151389192737, "grad_norm": 1.2157713174819946, "learning_rate": 8.2352258196686e-06, "loss": 0.5019, "step": 20837 }, { "epoch": 0.5698424852329906, "grad_norm": 1.1335541009902954, "learning_rate": 8.234354023834536e-06, "loss": 0.5212, "step": 20838 }, { "epoch": 0.5698698315467075, "grad_norm": 1.1423248052597046, "learning_rate": 8.23348224185147e-06, "loss": 0.4756, "step": 20839 }, { "epoch": 0.5698971778604244, "grad_norm": 1.148343801498413, "learning_rate": 8.232610473726238e-06, "loss": 0.4846, "step": 20840 }, { "epoch": 0.5699245241741413, "grad_norm": 1.3535621166229248, "learning_rate": 8.231738719465679e-06, "loss": 0.485, "step": 20841 }, { "epoch": 0.5699518704878582, "grad_norm": 1.2373932600021362, "learning_rate": 8.230866979076632e-06, "loss": 0.5178, "step": 20842 }, { "epoch": 0.5699792168015752, "grad_norm": 1.1144111156463623, "learning_rate": 8.229995252565937e-06, "loss": 0.4576, "step": 20843 }, { "epoch": 0.5700065631152921, "grad_norm": 1.476590871810913, "learning_rate": 8.229123539940429e-06, "loss": 0.3554, "step": 20844 }, { "epoch": 0.570033909429009, "grad_norm": 1.1808793544769287, "learning_rate": 8.22825184120695e-06, "loss": 0.5132, "step": 20845 }, { "epoch": 0.5700612557427259, "grad_norm": 1.2713866233825684, "learning_rate": 8.227380156372335e-06, "loss": 0.5303, "step": 20846 }, { "epoch": 0.5700886020564427, "grad_norm": 1.3296531438827515, "learning_rate": 8.226508485443425e-06, "loss": 0.4624, "step": 20847 }, { "epoch": 0.5701159483701597, "grad_norm": 1.2370911836624146, "learning_rate": 8.225636828427059e-06, "loss": 0.3517, "step": 20848 }, { "epoch": 0.5701432946838766, "grad_norm": 1.3697214126586914, "learning_rate": 8.224765185330069e-06, "loss": 0.3438, "step": 20849 }, { "epoch": 0.5701706409975935, "grad_norm": 1.6314451694488525, "learning_rate": 8.2238935561593e-06, "loss": 0.7527, "step": 20850 }, { "epoch": 0.5701979873113104, "grad_norm": 3.206437110900879, "learning_rate": 8.223021940921584e-06, "loss": 0.3599, "step": 20851 }, { "epoch": 0.5702253336250274, "grad_norm": 1.2993491888046265, "learning_rate": 8.22215033962376e-06, "loss": 0.5093, "step": 20852 }, { "epoch": 0.5702526799387443, "grad_norm": 1.1823731660842896, "learning_rate": 8.22127875227267e-06, "loss": 0.7627, "step": 20853 }, { "epoch": 0.5702800262524612, "grad_norm": 1.1464242935180664, "learning_rate": 8.220407178875143e-06, "loss": 0.4669, "step": 20854 }, { "epoch": 0.570307372566178, "grad_norm": 4.6702775955200195, "learning_rate": 8.219535619438024e-06, "loss": 0.4956, "step": 20855 }, { "epoch": 0.570334718879895, "grad_norm": 1.3131318092346191, "learning_rate": 8.218664073968146e-06, "loss": 0.4683, "step": 20856 }, { "epoch": 0.5703620651936119, "grad_norm": 1.671170711517334, "learning_rate": 8.217792542472345e-06, "loss": 0.5, "step": 20857 }, { "epoch": 0.5703894115073288, "grad_norm": 1.0508445501327515, "learning_rate": 8.216921024957467e-06, "loss": 0.4744, "step": 20858 }, { "epoch": 0.5704167578210457, "grad_norm": 1.4065394401550293, "learning_rate": 8.216049521430338e-06, "loss": 0.4869, "step": 20859 }, { "epoch": 0.5704441041347627, "grad_norm": 1.2912558317184448, "learning_rate": 8.215178031897794e-06, "loss": 0.5154, "step": 20860 }, { "epoch": 0.5704714504484796, "grad_norm": 1.258347988128662, "learning_rate": 8.21430655636668e-06, "loss": 0.4723, "step": 20861 }, { "epoch": 0.5704987967621965, "grad_norm": 1.4480254650115967, "learning_rate": 8.213435094843828e-06, "loss": 0.498, "step": 20862 }, { "epoch": 0.5705261430759133, "grad_norm": 1.5282913446426392, "learning_rate": 8.212563647336075e-06, "loss": 0.4865, "step": 20863 }, { "epoch": 0.5705534893896302, "grad_norm": 1.4455281496047974, "learning_rate": 8.211692213850257e-06, "loss": 0.3685, "step": 20864 }, { "epoch": 0.5705808357033472, "grad_norm": 1.250978708267212, "learning_rate": 8.210820794393211e-06, "loss": 0.4656, "step": 20865 }, { "epoch": 0.5706081820170641, "grad_norm": 1.224643588066101, "learning_rate": 8.20994938897177e-06, "loss": 0.4808, "step": 20866 }, { "epoch": 0.570635528330781, "grad_norm": 1.3159048557281494, "learning_rate": 8.209077997592775e-06, "loss": 0.5056, "step": 20867 }, { "epoch": 0.5706628746444979, "grad_norm": 1.1114225387573242, "learning_rate": 8.208206620263056e-06, "loss": 0.4665, "step": 20868 }, { "epoch": 0.5706902209582149, "grad_norm": 1.4678373336791992, "learning_rate": 8.207335256989453e-06, "loss": 0.3622, "step": 20869 }, { "epoch": 0.5707175672719318, "grad_norm": 1.6254017353057861, "learning_rate": 8.206463907778803e-06, "loss": 0.4015, "step": 20870 }, { "epoch": 0.5707449135856486, "grad_norm": 1.2431672811508179, "learning_rate": 8.205592572637937e-06, "loss": 0.5182, "step": 20871 }, { "epoch": 0.5707722598993655, "grad_norm": 1.1375718116760254, "learning_rate": 8.204721251573693e-06, "loss": 0.4669, "step": 20872 }, { "epoch": 0.5707996062130825, "grad_norm": 1.2958877086639404, "learning_rate": 8.203849944592903e-06, "loss": 0.7712, "step": 20873 }, { "epoch": 0.5708269525267994, "grad_norm": 1.056772232055664, "learning_rate": 8.202978651702405e-06, "loss": 0.4704, "step": 20874 }, { "epoch": 0.5708542988405163, "grad_norm": 1.2146269083023071, "learning_rate": 8.202107372909036e-06, "loss": 0.4938, "step": 20875 }, { "epoch": 0.5708816451542332, "grad_norm": 1.5462565422058105, "learning_rate": 8.201236108219628e-06, "loss": 0.4004, "step": 20876 }, { "epoch": 0.5709089914679502, "grad_norm": 1.1587612628936768, "learning_rate": 8.200364857641019e-06, "loss": 0.4804, "step": 20877 }, { "epoch": 0.5709363377816671, "grad_norm": 1.1594290733337402, "learning_rate": 8.199493621180037e-06, "loss": 0.7823, "step": 20878 }, { "epoch": 0.5709636840953839, "grad_norm": 1.1714982986450195, "learning_rate": 8.19862239884352e-06, "loss": 0.477, "step": 20879 }, { "epoch": 0.5709910304091008, "grad_norm": 1.429251790046692, "learning_rate": 8.19775119063831e-06, "loss": 0.3473, "step": 20880 }, { "epoch": 0.5710183767228177, "grad_norm": 1.2662055492401123, "learning_rate": 8.196879996571229e-06, "loss": 0.4508, "step": 20881 }, { "epoch": 0.5710457230365347, "grad_norm": 1.0815141201019287, "learning_rate": 8.196008816649118e-06, "loss": 0.4685, "step": 20882 }, { "epoch": 0.5710730693502516, "grad_norm": 1.4988429546356201, "learning_rate": 8.19513765087881e-06, "loss": 0.5198, "step": 20883 }, { "epoch": 0.5711004156639685, "grad_norm": 1.2111492156982422, "learning_rate": 8.19426649926714e-06, "loss": 0.498, "step": 20884 }, { "epoch": 0.5711277619776854, "grad_norm": 1.334533452987671, "learning_rate": 8.193395361820943e-06, "loss": 0.4824, "step": 20885 }, { "epoch": 0.5711551082914024, "grad_norm": 1.3941402435302734, "learning_rate": 8.192524238547049e-06, "loss": 0.3771, "step": 20886 }, { "epoch": 0.5711824546051192, "grad_norm": 1.459343671798706, "learning_rate": 8.191653129452292e-06, "loss": 0.3744, "step": 20887 }, { "epoch": 0.5712098009188361, "grad_norm": 1.4532476663589478, "learning_rate": 8.19078203454351e-06, "loss": 0.4906, "step": 20888 }, { "epoch": 0.571237147232553, "grad_norm": 1.2687121629714966, "learning_rate": 8.18991095382753e-06, "loss": 0.7619, "step": 20889 }, { "epoch": 0.57126449354627, "grad_norm": 1.566241979598999, "learning_rate": 8.189039887311193e-06, "loss": 0.3494, "step": 20890 }, { "epoch": 0.5712918398599869, "grad_norm": 1.1762168407440186, "learning_rate": 8.188168835001329e-06, "loss": 0.5342, "step": 20891 }, { "epoch": 0.5713191861737038, "grad_norm": 1.248785138130188, "learning_rate": 8.187297796904767e-06, "loss": 0.5001, "step": 20892 }, { "epoch": 0.5713465324874207, "grad_norm": 1.317750334739685, "learning_rate": 8.186426773028349e-06, "loss": 0.4681, "step": 20893 }, { "epoch": 0.5713738788011377, "grad_norm": 1.2113765478134155, "learning_rate": 8.185555763378901e-06, "loss": 0.7611, "step": 20894 }, { "epoch": 0.5714012251148545, "grad_norm": 1.2488090991973877, "learning_rate": 8.184684767963254e-06, "loss": 0.4675, "step": 20895 }, { "epoch": 0.5714285714285714, "grad_norm": 1.3881275653839111, "learning_rate": 8.183813786788246e-06, "loss": 0.4828, "step": 20896 }, { "epoch": 0.5714559177422883, "grad_norm": 1.448649525642395, "learning_rate": 8.182942819860709e-06, "loss": 0.3701, "step": 20897 }, { "epoch": 0.5714832640560052, "grad_norm": 1.2192386388778687, "learning_rate": 8.182071867187471e-06, "loss": 0.4858, "step": 20898 }, { "epoch": 0.5715106103697222, "grad_norm": 1.3342421054840088, "learning_rate": 8.18120092877537e-06, "loss": 0.4871, "step": 20899 }, { "epoch": 0.5715379566834391, "grad_norm": 1.15077805519104, "learning_rate": 8.180330004631235e-06, "loss": 0.4834, "step": 20900 }, { "epoch": 0.571565302997156, "grad_norm": 1.2372130155563354, "learning_rate": 8.179459094761897e-06, "loss": 0.5078, "step": 20901 }, { "epoch": 0.571592649310873, "grad_norm": 1.2271888256072998, "learning_rate": 8.178588199174194e-06, "loss": 0.5258, "step": 20902 }, { "epoch": 0.5716199956245898, "grad_norm": 1.4945125579833984, "learning_rate": 8.17771731787495e-06, "loss": 0.3754, "step": 20903 }, { "epoch": 0.5716473419383067, "grad_norm": 1.1991534233093262, "learning_rate": 8.176846450871003e-06, "loss": 0.7771, "step": 20904 }, { "epoch": 0.5716746882520236, "grad_norm": 1.5597248077392578, "learning_rate": 8.175975598169185e-06, "loss": 0.3971, "step": 20905 }, { "epoch": 0.5717020345657405, "grad_norm": 1.3306536674499512, "learning_rate": 8.17510475977632e-06, "loss": 0.4984, "step": 20906 }, { "epoch": 0.5717293808794575, "grad_norm": 1.3678311109542847, "learning_rate": 8.174233935699249e-06, "loss": 0.5106, "step": 20907 }, { "epoch": 0.5717567271931744, "grad_norm": 1.67290461063385, "learning_rate": 8.173363125944796e-06, "loss": 0.5149, "step": 20908 }, { "epoch": 0.5717840735068913, "grad_norm": 1.3088823556900024, "learning_rate": 8.172492330519796e-06, "loss": 0.5045, "step": 20909 }, { "epoch": 0.5718114198206082, "grad_norm": 1.241413950920105, "learning_rate": 8.171621549431081e-06, "loss": 0.4741, "step": 20910 }, { "epoch": 0.571838766134325, "grad_norm": 1.5902796983718872, "learning_rate": 8.170750782685479e-06, "loss": 0.4551, "step": 20911 }, { "epoch": 0.571866112448042, "grad_norm": 1.256354808807373, "learning_rate": 8.16988003028982e-06, "loss": 0.743, "step": 20912 }, { "epoch": 0.5718934587617589, "grad_norm": 1.3828585147857666, "learning_rate": 8.169009292250941e-06, "loss": 0.3636, "step": 20913 }, { "epoch": 0.5719208050754758, "grad_norm": 1.358441948890686, "learning_rate": 8.168138568575668e-06, "loss": 0.4953, "step": 20914 }, { "epoch": 0.5719481513891927, "grad_norm": 1.3654955625534058, "learning_rate": 8.167267859270832e-06, "loss": 0.3875, "step": 20915 }, { "epoch": 0.5719754977029097, "grad_norm": 1.2000445127487183, "learning_rate": 8.166397164343262e-06, "loss": 0.4918, "step": 20916 }, { "epoch": 0.5720028440166266, "grad_norm": 1.6051770448684692, "learning_rate": 8.165526483799791e-06, "loss": 0.4702, "step": 20917 }, { "epoch": 0.5720301903303435, "grad_norm": 1.3415958881378174, "learning_rate": 8.164655817647252e-06, "loss": 0.5017, "step": 20918 }, { "epoch": 0.5720575366440603, "grad_norm": 1.1996569633483887, "learning_rate": 8.163785165892467e-06, "loss": 0.3296, "step": 20919 }, { "epoch": 0.5720848829577773, "grad_norm": 1.3980929851531982, "learning_rate": 8.162914528542273e-06, "loss": 0.488, "step": 20920 }, { "epoch": 0.5721122292714942, "grad_norm": 1.314871072769165, "learning_rate": 8.162043905603497e-06, "loss": 0.5005, "step": 20921 }, { "epoch": 0.5721395755852111, "grad_norm": 1.2726236581802368, "learning_rate": 8.16117329708297e-06, "loss": 0.491, "step": 20922 }, { "epoch": 0.572166921898928, "grad_norm": 1.3348498344421387, "learning_rate": 8.16030270298752e-06, "loss": 0.4757, "step": 20923 }, { "epoch": 0.572194268212645, "grad_norm": 1.300818920135498, "learning_rate": 8.159432123323978e-06, "loss": 0.4889, "step": 20924 }, { "epoch": 0.5722216145263619, "grad_norm": 1.5693799257278442, "learning_rate": 8.158561558099172e-06, "loss": 0.4507, "step": 20925 }, { "epoch": 0.5722489608400787, "grad_norm": 1.1315698623657227, "learning_rate": 8.157691007319935e-06, "loss": 0.4929, "step": 20926 }, { "epoch": 0.5722763071537956, "grad_norm": 1.1047996282577515, "learning_rate": 8.15682047099309e-06, "loss": 0.36, "step": 20927 }, { "epoch": 0.5723036534675126, "grad_norm": 1.0902742147445679, "learning_rate": 8.155949949125474e-06, "loss": 0.4515, "step": 20928 }, { "epoch": 0.5723309997812295, "grad_norm": 1.2746288776397705, "learning_rate": 8.155079441723913e-06, "loss": 0.7595, "step": 20929 }, { "epoch": 0.5723583460949464, "grad_norm": 1.3119219541549683, "learning_rate": 8.154208948795229e-06, "loss": 0.5016, "step": 20930 }, { "epoch": 0.5723856924086633, "grad_norm": 1.313301920890808, "learning_rate": 8.153338470346256e-06, "loss": 0.4898, "step": 20931 }, { "epoch": 0.5724130387223803, "grad_norm": 2.82177734375, "learning_rate": 8.152468006383828e-06, "loss": 0.3762, "step": 20932 }, { "epoch": 0.5724403850360972, "grad_norm": 1.260823130607605, "learning_rate": 8.151597556914764e-06, "loss": 0.7505, "step": 20933 }, { "epoch": 0.572467731349814, "grad_norm": 1.2568080425262451, "learning_rate": 8.150727121945899e-06, "loss": 0.4703, "step": 20934 }, { "epoch": 0.5724950776635309, "grad_norm": 1.3879249095916748, "learning_rate": 8.149856701484058e-06, "loss": 0.4866, "step": 20935 }, { "epoch": 0.5725224239772478, "grad_norm": 1.2595735788345337, "learning_rate": 8.148986295536073e-06, "loss": 0.5198, "step": 20936 }, { "epoch": 0.5725497702909648, "grad_norm": 1.3245720863342285, "learning_rate": 8.148115904108768e-06, "loss": 0.3683, "step": 20937 }, { "epoch": 0.5725771166046817, "grad_norm": 1.508358359336853, "learning_rate": 8.147245527208972e-06, "loss": 0.4176, "step": 20938 }, { "epoch": 0.5726044629183986, "grad_norm": 1.2665246725082397, "learning_rate": 8.146375164843513e-06, "loss": 0.7738, "step": 20939 }, { "epoch": 0.5726318092321155, "grad_norm": 1.2007046937942505, "learning_rate": 8.145504817019222e-06, "loss": 0.5046, "step": 20940 }, { "epoch": 0.5726591555458325, "grad_norm": 1.609541893005371, "learning_rate": 8.144634483742921e-06, "loss": 0.3985, "step": 20941 }, { "epoch": 0.5726865018595493, "grad_norm": 1.5916671752929688, "learning_rate": 8.143764165021442e-06, "loss": 0.3891, "step": 20942 }, { "epoch": 0.5727138481732662, "grad_norm": 1.2714918851852417, "learning_rate": 8.14289386086161e-06, "loss": 0.5071, "step": 20943 }, { "epoch": 0.5727411944869831, "grad_norm": 1.440798282623291, "learning_rate": 8.142023571270252e-06, "loss": 0.3713, "step": 20944 }, { "epoch": 0.5727685408007, "grad_norm": 1.1561110019683838, "learning_rate": 8.141153296254199e-06, "loss": 0.4453, "step": 20945 }, { "epoch": 0.572795887114417, "grad_norm": 1.3178266286849976, "learning_rate": 8.140283035820272e-06, "loss": 0.5075, "step": 20946 }, { "epoch": 0.5728232334281339, "grad_norm": 1.5046817064285278, "learning_rate": 8.139412789975303e-06, "loss": 0.4753, "step": 20947 }, { "epoch": 0.5728505797418508, "grad_norm": 1.2049400806427002, "learning_rate": 8.138542558726119e-06, "loss": 0.495, "step": 20948 }, { "epoch": 0.5728779260555678, "grad_norm": 1.5034369230270386, "learning_rate": 8.137672342079541e-06, "loss": 0.7951, "step": 20949 }, { "epoch": 0.5729052723692846, "grad_norm": 1.4948738813400269, "learning_rate": 8.136802140042402e-06, "loss": 0.4997, "step": 20950 }, { "epoch": 0.5729326186830015, "grad_norm": 1.7628281116485596, "learning_rate": 8.135931952621524e-06, "loss": 0.4603, "step": 20951 }, { "epoch": 0.5729599649967184, "grad_norm": 1.1957865953445435, "learning_rate": 8.135061779823735e-06, "loss": 0.4829, "step": 20952 }, { "epoch": 0.5729873113104353, "grad_norm": 1.9730722904205322, "learning_rate": 8.134191621655864e-06, "loss": 0.4719, "step": 20953 }, { "epoch": 0.5730146576241523, "grad_norm": 1.3742340803146362, "learning_rate": 8.133321478124733e-06, "loss": 0.4829, "step": 20954 }, { "epoch": 0.5730420039378692, "grad_norm": 1.1763746738433838, "learning_rate": 8.132451349237168e-06, "loss": 0.5043, "step": 20955 }, { "epoch": 0.5730693502515861, "grad_norm": 1.4453272819519043, "learning_rate": 8.131581235000001e-06, "loss": 0.5187, "step": 20956 }, { "epoch": 0.573096696565303, "grad_norm": 1.2007824182510376, "learning_rate": 8.13071113542005e-06, "loss": 0.4795, "step": 20957 }, { "epoch": 0.5731240428790199, "grad_norm": 1.189276099205017, "learning_rate": 8.129841050504147e-06, "loss": 0.4639, "step": 20958 }, { "epoch": 0.5731513891927368, "grad_norm": 1.0561256408691406, "learning_rate": 8.12897098025911e-06, "loss": 0.5078, "step": 20959 }, { "epoch": 0.5731787355064537, "grad_norm": 1.2076992988586426, "learning_rate": 8.128100924691772e-06, "loss": 0.4812, "step": 20960 }, { "epoch": 0.5732060818201706, "grad_norm": 1.1052615642547607, "learning_rate": 8.127230883808956e-06, "loss": 0.495, "step": 20961 }, { "epoch": 0.5732334281338876, "grad_norm": 2.4769484996795654, "learning_rate": 8.126360857617484e-06, "loss": 0.3609, "step": 20962 }, { "epoch": 0.5732607744476045, "grad_norm": 1.5989474058151245, "learning_rate": 8.12549084612419e-06, "loss": 0.3923, "step": 20963 }, { "epoch": 0.5732881207613214, "grad_norm": 1.3818674087524414, "learning_rate": 8.124620849335889e-06, "loss": 0.3941, "step": 20964 }, { "epoch": 0.5733154670750383, "grad_norm": 1.2443368434906006, "learning_rate": 8.12375086725941e-06, "loss": 0.4919, "step": 20965 }, { "epoch": 0.5733428133887551, "grad_norm": 1.5533106327056885, "learning_rate": 8.122880899901576e-06, "loss": 0.4195, "step": 20966 }, { "epoch": 0.5733701597024721, "grad_norm": 1.2170348167419434, "learning_rate": 8.122010947269214e-06, "loss": 0.5088, "step": 20967 }, { "epoch": 0.573397506016189, "grad_norm": 1.072387456893921, "learning_rate": 8.121141009369146e-06, "loss": 0.4589, "step": 20968 }, { "epoch": 0.5734248523299059, "grad_norm": 1.4031951427459717, "learning_rate": 8.120271086208198e-06, "loss": 0.5243, "step": 20969 }, { "epoch": 0.5734521986436228, "grad_norm": 1.3048205375671387, "learning_rate": 8.119401177793196e-06, "loss": 0.5063, "step": 20970 }, { "epoch": 0.5734795449573398, "grad_norm": 1.2693040370941162, "learning_rate": 8.118531284130961e-06, "loss": 0.5032, "step": 20971 }, { "epoch": 0.5735068912710567, "grad_norm": 0.915620744228363, "learning_rate": 8.117661405228323e-06, "loss": 0.2912, "step": 20972 }, { "epoch": 0.5735342375847736, "grad_norm": 1.2969897985458374, "learning_rate": 8.116791541092098e-06, "loss": 0.4764, "step": 20973 }, { "epoch": 0.5735615838984904, "grad_norm": 1.1156965494155884, "learning_rate": 8.115921691729111e-06, "loss": 0.5103, "step": 20974 }, { "epoch": 0.5735889302122074, "grad_norm": 1.147828221321106, "learning_rate": 8.115051857146193e-06, "loss": 0.4967, "step": 20975 }, { "epoch": 0.5736162765259243, "grad_norm": 1.1142313480377197, "learning_rate": 8.114182037350158e-06, "loss": 0.4737, "step": 20976 }, { "epoch": 0.5736436228396412, "grad_norm": 1.1971668004989624, "learning_rate": 8.113312232347836e-06, "loss": 0.4582, "step": 20977 }, { "epoch": 0.5736709691533581, "grad_norm": 1.224520206451416, "learning_rate": 8.11244244214605e-06, "loss": 0.4954, "step": 20978 }, { "epoch": 0.5736983154670751, "grad_norm": 1.407914400100708, "learning_rate": 8.11157266675162e-06, "loss": 0.4921, "step": 20979 }, { "epoch": 0.573725661780792, "grad_norm": 1.532503366470337, "learning_rate": 8.110702906171375e-06, "loss": 0.5014, "step": 20980 }, { "epoch": 0.5737530080945089, "grad_norm": 1.2171932458877563, "learning_rate": 8.10983316041213e-06, "loss": 0.482, "step": 20981 }, { "epoch": 0.5737803544082257, "grad_norm": 1.2903416156768799, "learning_rate": 8.108963429480712e-06, "loss": 0.4861, "step": 20982 }, { "epoch": 0.5738077007219426, "grad_norm": 1.281036615371704, "learning_rate": 8.108093713383946e-06, "loss": 0.4716, "step": 20983 }, { "epoch": 0.5738350470356596, "grad_norm": 1.1849534511566162, "learning_rate": 8.10722401212865e-06, "loss": 0.5146, "step": 20984 }, { "epoch": 0.5738623933493765, "grad_norm": 1.4462084770202637, "learning_rate": 8.106354325721651e-06, "loss": 0.3932, "step": 20985 }, { "epoch": 0.5738897396630934, "grad_norm": 1.788793683052063, "learning_rate": 8.105484654169768e-06, "loss": 0.5035, "step": 20986 }, { "epoch": 0.5739170859768103, "grad_norm": 1.1308542490005493, "learning_rate": 8.104614997479824e-06, "loss": 0.3681, "step": 20987 }, { "epoch": 0.5739444322905273, "grad_norm": 1.3288848400115967, "learning_rate": 8.103745355658645e-06, "loss": 0.4966, "step": 20988 }, { "epoch": 0.5739717786042442, "grad_norm": 1.1710801124572754, "learning_rate": 8.102875728713046e-06, "loss": 0.7458, "step": 20989 }, { "epoch": 0.573999124917961, "grad_norm": 1.479966640472412, "learning_rate": 8.102006116649855e-06, "loss": 0.7531, "step": 20990 }, { "epoch": 0.5740264712316779, "grad_norm": 1.1797112226486206, "learning_rate": 8.101136519475892e-06, "loss": 0.4632, "step": 20991 }, { "epoch": 0.5740538175453949, "grad_norm": 3.3785500526428223, "learning_rate": 8.100266937197978e-06, "loss": 0.3824, "step": 20992 }, { "epoch": 0.5740811638591118, "grad_norm": 1.3375473022460938, "learning_rate": 8.099397369822937e-06, "loss": 0.4968, "step": 20993 }, { "epoch": 0.5741085101728287, "grad_norm": 1.2518765926361084, "learning_rate": 8.098527817357586e-06, "loss": 0.4522, "step": 20994 }, { "epoch": 0.5741358564865456, "grad_norm": 1.2887248992919922, "learning_rate": 8.09765827980875e-06, "loss": 0.7627, "step": 20995 }, { "epoch": 0.5741632028002626, "grad_norm": 1.2291672229766846, "learning_rate": 8.096788757183252e-06, "loss": 0.4784, "step": 20996 }, { "epoch": 0.5741905491139795, "grad_norm": 1.1086336374282837, "learning_rate": 8.095919249487908e-06, "loss": 0.4762, "step": 20997 }, { "epoch": 0.5742178954276963, "grad_norm": 1.19197678565979, "learning_rate": 8.095049756729544e-06, "loss": 0.4801, "step": 20998 }, { "epoch": 0.5742452417414132, "grad_norm": 1.2340487241744995, "learning_rate": 8.094180278914978e-06, "loss": 0.5013, "step": 20999 }, { "epoch": 0.5742725880551302, "grad_norm": 1.1836551427841187, "learning_rate": 8.093310816051032e-06, "loss": 0.4766, "step": 21000 }, { "epoch": 0.5742999343688471, "grad_norm": 1.2669728994369507, "learning_rate": 8.092441368144523e-06, "loss": 0.4631, "step": 21001 }, { "epoch": 0.574327280682564, "grad_norm": 1.3753705024719238, "learning_rate": 8.091571935202277e-06, "loss": 0.3983, "step": 21002 }, { "epoch": 0.5743546269962809, "grad_norm": 1.1757844686508179, "learning_rate": 8.09070251723111e-06, "loss": 0.4599, "step": 21003 }, { "epoch": 0.5743819733099979, "grad_norm": 1.3980271816253662, "learning_rate": 8.089833114237845e-06, "loss": 0.3666, "step": 21004 }, { "epoch": 0.5744093196237148, "grad_norm": 1.1918026208877563, "learning_rate": 8.088963726229304e-06, "loss": 0.4779, "step": 21005 }, { "epoch": 0.5744366659374316, "grad_norm": 1.1438653469085693, "learning_rate": 8.088094353212302e-06, "loss": 0.4888, "step": 21006 }, { "epoch": 0.5744640122511485, "grad_norm": 1.2150248289108276, "learning_rate": 8.087224995193663e-06, "loss": 0.5102, "step": 21007 }, { "epoch": 0.5744913585648654, "grad_norm": 1.9558850526809692, "learning_rate": 8.086355652180205e-06, "loss": 0.7457, "step": 21008 }, { "epoch": 0.5745187048785824, "grad_norm": 0.9746550917625427, "learning_rate": 8.08548632417875e-06, "loss": 0.3404, "step": 21009 }, { "epoch": 0.5745460511922993, "grad_norm": 1.5285993814468384, "learning_rate": 8.084617011196114e-06, "loss": 0.4786, "step": 21010 }, { "epoch": 0.5745733975060162, "grad_norm": 1.244596004486084, "learning_rate": 8.083747713239118e-06, "loss": 0.5173, "step": 21011 }, { "epoch": 0.5746007438197331, "grad_norm": 1.2704702615737915, "learning_rate": 8.082878430314582e-06, "loss": 0.7584, "step": 21012 }, { "epoch": 0.5746280901334501, "grad_norm": 1.2247017621994019, "learning_rate": 8.082009162429328e-06, "loss": 0.4148, "step": 21013 }, { "epoch": 0.5746554364471669, "grad_norm": 1.4974381923675537, "learning_rate": 8.08113990959017e-06, "loss": 0.4834, "step": 21014 }, { "epoch": 0.5746827827608838, "grad_norm": 1.3263804912567139, "learning_rate": 8.080270671803932e-06, "loss": 0.4833, "step": 21015 }, { "epoch": 0.5747101290746007, "grad_norm": 1.1277915239334106, "learning_rate": 8.079401449077426e-06, "loss": 0.4792, "step": 21016 }, { "epoch": 0.5747374753883177, "grad_norm": 1.3380842208862305, "learning_rate": 8.078532241417476e-06, "loss": 0.4898, "step": 21017 }, { "epoch": 0.5747648217020346, "grad_norm": 1.2787138223648071, "learning_rate": 8.077663048830903e-06, "loss": 0.4986, "step": 21018 }, { "epoch": 0.5747921680157515, "grad_norm": 1.3284955024719238, "learning_rate": 8.07679387132452e-06, "loss": 0.5338, "step": 21019 }, { "epoch": 0.5748195143294684, "grad_norm": 1.2263089418411255, "learning_rate": 8.075924708905147e-06, "loss": 0.4819, "step": 21020 }, { "epoch": 0.5748468606431854, "grad_norm": 1.1577751636505127, "learning_rate": 8.075055561579607e-06, "loss": 0.4748, "step": 21021 }, { "epoch": 0.5748742069569022, "grad_norm": 1.3668237924575806, "learning_rate": 8.07418642935471e-06, "loss": 0.5197, "step": 21022 }, { "epoch": 0.5749015532706191, "grad_norm": 1.2366801500320435, "learning_rate": 8.073317312237284e-06, "loss": 0.5089, "step": 21023 }, { "epoch": 0.574928899584336, "grad_norm": 1.233635663986206, "learning_rate": 8.072448210234135e-06, "loss": 0.5045, "step": 21024 }, { "epoch": 0.5749562458980529, "grad_norm": 1.188512921333313, "learning_rate": 8.07157912335209e-06, "loss": 0.4719, "step": 21025 }, { "epoch": 0.5749835922117699, "grad_norm": 1.17967689037323, "learning_rate": 8.070710051597967e-06, "loss": 0.4951, "step": 21026 }, { "epoch": 0.5750109385254868, "grad_norm": 1.2465789318084717, "learning_rate": 8.069840994978576e-06, "loss": 0.515, "step": 21027 }, { "epoch": 0.5750382848392037, "grad_norm": 1.041900634765625, "learning_rate": 8.068971953500743e-06, "loss": 0.3786, "step": 21028 }, { "epoch": 0.5750656311529205, "grad_norm": 1.274316430091858, "learning_rate": 8.068102927171279e-06, "loss": 0.4865, "step": 21029 }, { "epoch": 0.5750929774666375, "grad_norm": 1.318505883216858, "learning_rate": 8.067233915997004e-06, "loss": 0.5034, "step": 21030 }, { "epoch": 0.5751203237803544, "grad_norm": 1.3655333518981934, "learning_rate": 8.066364919984737e-06, "loss": 0.4885, "step": 21031 }, { "epoch": 0.5751476700940713, "grad_norm": 1.3539856672286987, "learning_rate": 8.065495939141291e-06, "loss": 0.3405, "step": 21032 }, { "epoch": 0.5751750164077882, "grad_norm": 1.0886220932006836, "learning_rate": 8.064626973473486e-06, "loss": 0.4819, "step": 21033 }, { "epoch": 0.5752023627215052, "grad_norm": 1.4697271585464478, "learning_rate": 8.06375802298814e-06, "loss": 0.4726, "step": 21034 }, { "epoch": 0.5752297090352221, "grad_norm": 1.2377068996429443, "learning_rate": 8.062889087692066e-06, "loss": 0.4717, "step": 21035 }, { "epoch": 0.575257055348939, "grad_norm": 1.1677998304367065, "learning_rate": 8.06202016759208e-06, "loss": 0.4506, "step": 21036 }, { "epoch": 0.5752844016626558, "grad_norm": 1.1261483430862427, "learning_rate": 8.061151262695001e-06, "loss": 0.5016, "step": 21037 }, { "epoch": 0.5753117479763727, "grad_norm": 1.3496748208999634, "learning_rate": 8.060282373007645e-06, "loss": 0.4933, "step": 21038 }, { "epoch": 0.5753390942900897, "grad_norm": 1.2655540704727173, "learning_rate": 8.059413498536828e-06, "loss": 0.4961, "step": 21039 }, { "epoch": 0.5753664406038066, "grad_norm": 1.2352731227874756, "learning_rate": 8.058544639289367e-06, "loss": 0.4907, "step": 21040 }, { "epoch": 0.5753937869175235, "grad_norm": 1.2794725894927979, "learning_rate": 8.057675795272074e-06, "loss": 0.4712, "step": 21041 }, { "epoch": 0.5754211332312404, "grad_norm": 1.2458189725875854, "learning_rate": 8.056806966491768e-06, "loss": 0.757, "step": 21042 }, { "epoch": 0.5754484795449574, "grad_norm": 1.2164874076843262, "learning_rate": 8.055938152955269e-06, "loss": 0.494, "step": 21043 }, { "epoch": 0.5754758258586743, "grad_norm": 1.3631601333618164, "learning_rate": 8.055069354669382e-06, "loss": 0.5101, "step": 21044 }, { "epoch": 0.5755031721723911, "grad_norm": 1.1082931756973267, "learning_rate": 8.054200571640935e-06, "loss": 0.4606, "step": 21045 }, { "epoch": 0.575530518486108, "grad_norm": 1.2730354070663452, "learning_rate": 8.053331803876732e-06, "loss": 0.4616, "step": 21046 }, { "epoch": 0.575557864799825, "grad_norm": 1.1216689348220825, "learning_rate": 8.052463051383594e-06, "loss": 0.507, "step": 21047 }, { "epoch": 0.5755852111135419, "grad_norm": 1.228914499282837, "learning_rate": 8.051594314168338e-06, "loss": 0.4919, "step": 21048 }, { "epoch": 0.5756125574272588, "grad_norm": 1.1734507083892822, "learning_rate": 8.050725592237772e-06, "loss": 0.5005, "step": 21049 }, { "epoch": 0.5756399037409757, "grad_norm": 2.01794171333313, "learning_rate": 8.049856885598718e-06, "loss": 0.3369, "step": 21050 }, { "epoch": 0.5756672500546927, "grad_norm": 1.0293858051300049, "learning_rate": 8.048988194257988e-06, "loss": 0.3869, "step": 21051 }, { "epoch": 0.5756945963684096, "grad_norm": 1.1980654001235962, "learning_rate": 8.048119518222394e-06, "loss": 0.4673, "step": 21052 }, { "epoch": 0.5757219426821264, "grad_norm": 1.1413718461990356, "learning_rate": 8.047250857498756e-06, "loss": 0.4911, "step": 21053 }, { "epoch": 0.5757492889958433, "grad_norm": 1.2005542516708374, "learning_rate": 8.046382212093884e-06, "loss": 0.4783, "step": 21054 }, { "epoch": 0.5757766353095602, "grad_norm": 1.528390645980835, "learning_rate": 8.045513582014593e-06, "loss": 0.3682, "step": 21055 }, { "epoch": 0.5758039816232772, "grad_norm": 1.45283043384552, "learning_rate": 8.044644967267702e-06, "loss": 0.485, "step": 21056 }, { "epoch": 0.5758313279369941, "grad_norm": 1.1216304302215576, "learning_rate": 8.043776367860017e-06, "loss": 0.4817, "step": 21057 }, { "epoch": 0.575858674250711, "grad_norm": 1.02716863155365, "learning_rate": 8.042907783798358e-06, "loss": 0.4894, "step": 21058 }, { "epoch": 0.575886020564428, "grad_norm": 1.4322245121002197, "learning_rate": 8.042039215089535e-06, "loss": 0.4132, "step": 21059 }, { "epoch": 0.5759133668781449, "grad_norm": 1.4123108386993408, "learning_rate": 8.041170661740361e-06, "loss": 0.5143, "step": 21060 }, { "epoch": 0.5759407131918617, "grad_norm": 1.2680416107177734, "learning_rate": 8.040302123757657e-06, "loss": 0.4847, "step": 21061 }, { "epoch": 0.5759680595055786, "grad_norm": 1.534533143043518, "learning_rate": 8.039433601148229e-06, "loss": 0.4028, "step": 21062 }, { "epoch": 0.5759954058192955, "grad_norm": 1.275527834892273, "learning_rate": 8.038565093918893e-06, "loss": 0.7832, "step": 21063 }, { "epoch": 0.5760227521330125, "grad_norm": 1.2680209875106812, "learning_rate": 8.037696602076462e-06, "loss": 0.4955, "step": 21064 }, { "epoch": 0.5760500984467294, "grad_norm": 1.4326080083847046, "learning_rate": 8.036828125627746e-06, "loss": 0.5092, "step": 21065 }, { "epoch": 0.5760774447604463, "grad_norm": 1.433119297027588, "learning_rate": 8.035959664579565e-06, "loss": 0.4843, "step": 21066 }, { "epoch": 0.5761047910741632, "grad_norm": 2.323972702026367, "learning_rate": 8.035091218938727e-06, "loss": 0.3783, "step": 21067 }, { "epoch": 0.5761321373878802, "grad_norm": 1.627984642982483, "learning_rate": 8.034222788712042e-06, "loss": 0.394, "step": 21068 }, { "epoch": 0.576159483701597, "grad_norm": 1.1709767580032349, "learning_rate": 8.033354373906332e-06, "loss": 0.5041, "step": 21069 }, { "epoch": 0.5761868300153139, "grad_norm": 1.2406595945358276, "learning_rate": 8.032485974528401e-06, "loss": 0.5291, "step": 21070 }, { "epoch": 0.5762141763290308, "grad_norm": 1.1008046865463257, "learning_rate": 8.031617590585062e-06, "loss": 0.3307, "step": 21071 }, { "epoch": 0.5762415226427477, "grad_norm": 1.2243236303329468, "learning_rate": 8.030749222083131e-06, "loss": 0.5149, "step": 21072 }, { "epoch": 0.5762688689564647, "grad_norm": 1.1532472372055054, "learning_rate": 8.029880869029417e-06, "loss": 0.4958, "step": 21073 }, { "epoch": 0.5762962152701816, "grad_norm": 1.0634634494781494, "learning_rate": 8.029012531430733e-06, "loss": 0.3454, "step": 21074 }, { "epoch": 0.5763235615838985, "grad_norm": 1.3608254194259644, "learning_rate": 8.028144209293892e-06, "loss": 0.7425, "step": 21075 }, { "epoch": 0.5763509078976154, "grad_norm": 1.0724616050720215, "learning_rate": 8.027275902625703e-06, "loss": 0.4939, "step": 21076 }, { "epoch": 0.5763782542113323, "grad_norm": 1.316065788269043, "learning_rate": 8.026407611432978e-06, "loss": 0.4555, "step": 21077 }, { "epoch": 0.5764056005250492, "grad_norm": 1.3213887214660645, "learning_rate": 8.025539335722535e-06, "loss": 0.4871, "step": 21078 }, { "epoch": 0.5764329468387661, "grad_norm": 1.0748686790466309, "learning_rate": 8.024671075501176e-06, "loss": 0.4901, "step": 21079 }, { "epoch": 0.576460293152483, "grad_norm": 1.1152116060256958, "learning_rate": 8.02380283077572e-06, "loss": 0.5015, "step": 21080 }, { "epoch": 0.5764876394662, "grad_norm": 1.2367746829986572, "learning_rate": 8.022934601552972e-06, "loss": 0.4806, "step": 21081 }, { "epoch": 0.5765149857799169, "grad_norm": 1.039092779159546, "learning_rate": 8.022066387839745e-06, "loss": 0.4851, "step": 21082 }, { "epoch": 0.5765423320936338, "grad_norm": 1.1205374002456665, "learning_rate": 8.021198189642852e-06, "loss": 0.4679, "step": 21083 }, { "epoch": 0.5765696784073507, "grad_norm": 1.1979563236236572, "learning_rate": 8.020330006969101e-06, "loss": 0.4905, "step": 21084 }, { "epoch": 0.5765970247210676, "grad_norm": 1.0995217561721802, "learning_rate": 8.019461839825305e-06, "loss": 0.4625, "step": 21085 }, { "epoch": 0.5766243710347845, "grad_norm": 1.1394672393798828, "learning_rate": 8.018593688218275e-06, "loss": 0.5095, "step": 21086 }, { "epoch": 0.5766517173485014, "grad_norm": 0.9028564095497131, "learning_rate": 8.017725552154818e-06, "loss": 0.3288, "step": 21087 }, { "epoch": 0.5766790636622183, "grad_norm": 1.1811907291412354, "learning_rate": 8.016857431641747e-06, "loss": 0.4845, "step": 21088 }, { "epoch": 0.5767064099759353, "grad_norm": 1.4595537185668945, "learning_rate": 8.015989326685872e-06, "loss": 0.3665, "step": 21089 }, { "epoch": 0.5767337562896522, "grad_norm": 1.207322597503662, "learning_rate": 8.015121237294e-06, "loss": 0.4898, "step": 21090 }, { "epoch": 0.5767611026033691, "grad_norm": 1.1920077800750732, "learning_rate": 8.014253163472944e-06, "loss": 0.5323, "step": 21091 }, { "epoch": 0.576788448917086, "grad_norm": 1.4081758260726929, "learning_rate": 8.013385105229514e-06, "loss": 0.4746, "step": 21092 }, { "epoch": 0.5768157952308028, "grad_norm": 1.1625279188156128, "learning_rate": 8.01251706257052e-06, "loss": 0.518, "step": 21093 }, { "epoch": 0.5768431415445198, "grad_norm": 1.249506950378418, "learning_rate": 8.011649035502768e-06, "loss": 0.4877, "step": 21094 }, { "epoch": 0.5768704878582367, "grad_norm": 1.1526707410812378, "learning_rate": 8.01078102403307e-06, "loss": 0.4666, "step": 21095 }, { "epoch": 0.5768978341719536, "grad_norm": 1.2541011571884155, "learning_rate": 8.009913028168236e-06, "loss": 0.4684, "step": 21096 }, { "epoch": 0.5769251804856705, "grad_norm": 1.2708961963653564, "learning_rate": 8.009045047915074e-06, "loss": 0.5029, "step": 21097 }, { "epoch": 0.5769525267993875, "grad_norm": 1.2194985151290894, "learning_rate": 8.008177083280392e-06, "loss": 0.4777, "step": 21098 }, { "epoch": 0.5769798731131044, "grad_norm": 1.6134287118911743, "learning_rate": 8.007309134271004e-06, "loss": 0.4688, "step": 21099 }, { "epoch": 0.5770072194268213, "grad_norm": 1.7173902988433838, "learning_rate": 8.006441200893712e-06, "loss": 0.3629, "step": 21100 }, { "epoch": 0.5770345657405381, "grad_norm": 1.2481629848480225, "learning_rate": 8.005573283155328e-06, "loss": 0.4977, "step": 21101 }, { "epoch": 0.577061912054255, "grad_norm": 1.3697240352630615, "learning_rate": 8.004705381062659e-06, "loss": 0.4433, "step": 21102 }, { "epoch": 0.577089258367972, "grad_norm": 1.2669384479522705, "learning_rate": 8.003837494622517e-06, "loss": 0.49, "step": 21103 }, { "epoch": 0.5771166046816889, "grad_norm": 1.356097936630249, "learning_rate": 8.00296962384171e-06, "loss": 0.5066, "step": 21104 }, { "epoch": 0.5771439509954058, "grad_norm": 1.3749390840530396, "learning_rate": 8.002101768727044e-06, "loss": 0.7438, "step": 21105 }, { "epoch": 0.5771712973091228, "grad_norm": 1.3642915487289429, "learning_rate": 8.001233929285323e-06, "loss": 0.5033, "step": 21106 }, { "epoch": 0.5771986436228397, "grad_norm": 1.2503604888916016, "learning_rate": 8.00036610552336e-06, "loss": 0.475, "step": 21107 }, { "epoch": 0.5772259899365566, "grad_norm": 1.2307543754577637, "learning_rate": 7.999498297447966e-06, "loss": 0.4771, "step": 21108 }, { "epoch": 0.5772533362502734, "grad_norm": 1.1993370056152344, "learning_rate": 7.998630505065941e-06, "loss": 0.4627, "step": 21109 }, { "epoch": 0.5772806825639903, "grad_norm": 1.3304990530014038, "learning_rate": 7.997762728384098e-06, "loss": 0.488, "step": 21110 }, { "epoch": 0.5773080288777073, "grad_norm": 1.1201022863388062, "learning_rate": 7.996894967409243e-06, "loss": 0.4908, "step": 21111 }, { "epoch": 0.5773353751914242, "grad_norm": 1.1842490434646606, "learning_rate": 7.996027222148182e-06, "loss": 0.5097, "step": 21112 }, { "epoch": 0.5773627215051411, "grad_norm": 1.2852625846862793, "learning_rate": 7.995159492607725e-06, "loss": 0.4984, "step": 21113 }, { "epoch": 0.577390067818858, "grad_norm": 1.2950160503387451, "learning_rate": 7.994291778794676e-06, "loss": 0.3503, "step": 21114 }, { "epoch": 0.577417414132575, "grad_norm": 1.1241885423660278, "learning_rate": 7.993424080715848e-06, "loss": 0.4874, "step": 21115 }, { "epoch": 0.5774447604462919, "grad_norm": 1.0485548973083496, "learning_rate": 7.992556398378037e-06, "loss": 0.5408, "step": 21116 }, { "epoch": 0.5774721067600087, "grad_norm": 1.1791431903839111, "learning_rate": 7.991688731788059e-06, "loss": 0.4877, "step": 21117 }, { "epoch": 0.5774994530737256, "grad_norm": 1.1584724187850952, "learning_rate": 7.990821080952718e-06, "loss": 0.4943, "step": 21118 }, { "epoch": 0.5775267993874426, "grad_norm": 1.4758861064910889, "learning_rate": 7.98995344587882e-06, "loss": 0.5398, "step": 21119 }, { "epoch": 0.5775541457011595, "grad_norm": 1.2422112226486206, "learning_rate": 7.989085826573171e-06, "loss": 0.4468, "step": 21120 }, { "epoch": 0.5775814920148764, "grad_norm": 1.658029556274414, "learning_rate": 7.98821822304258e-06, "loss": 0.5012, "step": 21121 }, { "epoch": 0.5776088383285933, "grad_norm": 1.3560129404067993, "learning_rate": 7.987350635293847e-06, "loss": 0.5143, "step": 21122 }, { "epoch": 0.5776361846423103, "grad_norm": 1.1815420389175415, "learning_rate": 7.986483063333786e-06, "loss": 0.4883, "step": 21123 }, { "epoch": 0.5776635309560272, "grad_norm": 1.4700082540512085, "learning_rate": 7.985615507169195e-06, "loss": 0.4935, "step": 21124 }, { "epoch": 0.577690877269744, "grad_norm": 1.4182573556900024, "learning_rate": 7.984747966806886e-06, "loss": 0.482, "step": 21125 }, { "epoch": 0.5777182235834609, "grad_norm": 1.3879642486572266, "learning_rate": 7.983880442253663e-06, "loss": 0.5149, "step": 21126 }, { "epoch": 0.5777455698971778, "grad_norm": 1.3242179155349731, "learning_rate": 7.983012933516327e-06, "loss": 0.4633, "step": 21127 }, { "epoch": 0.5777729162108948, "grad_norm": 1.4702818393707275, "learning_rate": 7.98214544060169e-06, "loss": 0.7557, "step": 21128 }, { "epoch": 0.5778002625246117, "grad_norm": 1.238812804222107, "learning_rate": 7.981277963516552e-06, "loss": 0.503, "step": 21129 }, { "epoch": 0.5778276088383286, "grad_norm": 1.224562406539917, "learning_rate": 7.98041050226772e-06, "loss": 0.4719, "step": 21130 }, { "epoch": 0.5778549551520455, "grad_norm": 1.1889454126358032, "learning_rate": 7.979543056862e-06, "loss": 0.4837, "step": 21131 }, { "epoch": 0.5778823014657624, "grad_norm": 1.238482117652893, "learning_rate": 7.978675627306197e-06, "loss": 0.5057, "step": 21132 }, { "epoch": 0.5779096477794793, "grad_norm": 1.4956060647964478, "learning_rate": 7.977808213607114e-06, "loss": 0.7979, "step": 21133 }, { "epoch": 0.5779369940931962, "grad_norm": 1.2095896005630493, "learning_rate": 7.976940815771557e-06, "loss": 0.7362, "step": 21134 }, { "epoch": 0.5779643404069131, "grad_norm": 1.750147819519043, "learning_rate": 7.976073433806329e-06, "loss": 0.7789, "step": 21135 }, { "epoch": 0.5779916867206301, "grad_norm": 1.2365034818649292, "learning_rate": 7.975206067718237e-06, "loss": 0.4935, "step": 21136 }, { "epoch": 0.578019033034347, "grad_norm": 1.1135863065719604, "learning_rate": 7.974338717514083e-06, "loss": 0.4771, "step": 21137 }, { "epoch": 0.5780463793480639, "grad_norm": 1.3609681129455566, "learning_rate": 7.97347138320067e-06, "loss": 0.4984, "step": 21138 }, { "epoch": 0.5780737256617808, "grad_norm": 1.3119977712631226, "learning_rate": 7.972604064784808e-06, "loss": 0.51, "step": 21139 }, { "epoch": 0.5781010719754976, "grad_norm": 1.0768553018569946, "learning_rate": 7.971736762273295e-06, "loss": 0.4837, "step": 21140 }, { "epoch": 0.5781284182892146, "grad_norm": 1.3394497632980347, "learning_rate": 7.970869475672934e-06, "loss": 0.523, "step": 21141 }, { "epoch": 0.5781557646029315, "grad_norm": 1.200838327407837, "learning_rate": 7.970002204990532e-06, "loss": 0.4784, "step": 21142 }, { "epoch": 0.5781831109166484, "grad_norm": 1.117382287979126, "learning_rate": 7.969134950232891e-06, "loss": 0.4836, "step": 21143 }, { "epoch": 0.5782104572303653, "grad_norm": 1.0589009523391724, "learning_rate": 7.968267711406815e-06, "loss": 0.4813, "step": 21144 }, { "epoch": 0.5782378035440823, "grad_norm": 1.4110482931137085, "learning_rate": 7.967400488519108e-06, "loss": 0.3552, "step": 21145 }, { "epoch": 0.5782651498577992, "grad_norm": 1.2582379579544067, "learning_rate": 7.96653328157657e-06, "loss": 0.5023, "step": 21146 }, { "epoch": 0.5782924961715161, "grad_norm": 1.4300462007522583, "learning_rate": 7.965666090586007e-06, "loss": 0.7435, "step": 21147 }, { "epoch": 0.5783198424852329, "grad_norm": 1.3411303758621216, "learning_rate": 7.964798915554223e-06, "loss": 0.511, "step": 21148 }, { "epoch": 0.5783471887989499, "grad_norm": 1.166536569595337, "learning_rate": 7.963931756488015e-06, "loss": 0.4776, "step": 21149 }, { "epoch": 0.5783745351126668, "grad_norm": 1.2493555545806885, "learning_rate": 7.963064613394194e-06, "loss": 0.4795, "step": 21150 }, { "epoch": 0.5784018814263837, "grad_norm": 1.2556051015853882, "learning_rate": 7.962197486279555e-06, "loss": 0.4966, "step": 21151 }, { "epoch": 0.5784292277401006, "grad_norm": 1.4758528470993042, "learning_rate": 7.961330375150902e-06, "loss": 0.517, "step": 21152 }, { "epoch": 0.5784565740538176, "grad_norm": 1.9721217155456543, "learning_rate": 7.960463280015042e-06, "loss": 0.4032, "step": 21153 }, { "epoch": 0.5784839203675345, "grad_norm": 1.2520215511322021, "learning_rate": 7.95959620087877e-06, "loss": 0.5296, "step": 21154 }, { "epoch": 0.5785112666812514, "grad_norm": 1.1198623180389404, "learning_rate": 7.958729137748892e-06, "loss": 0.4824, "step": 21155 }, { "epoch": 0.5785386129949682, "grad_norm": 1.3599004745483398, "learning_rate": 7.957862090632212e-06, "loss": 0.4971, "step": 21156 }, { "epoch": 0.5785659593086852, "grad_norm": 1.2834484577178955, "learning_rate": 7.956995059535527e-06, "loss": 0.7294, "step": 21157 }, { "epoch": 0.5785933056224021, "grad_norm": 1.212567925453186, "learning_rate": 7.956128044465643e-06, "loss": 0.4821, "step": 21158 }, { "epoch": 0.578620651936119, "grad_norm": 1.1874746084213257, "learning_rate": 7.955261045429357e-06, "loss": 0.3432, "step": 21159 }, { "epoch": 0.5786479982498359, "grad_norm": 1.3847789764404297, "learning_rate": 7.954394062433472e-06, "loss": 0.7552, "step": 21160 }, { "epoch": 0.5786753445635529, "grad_norm": 1.37106454372406, "learning_rate": 7.953527095484794e-06, "loss": 0.4666, "step": 21161 }, { "epoch": 0.5787026908772698, "grad_norm": 1.2220022678375244, "learning_rate": 7.952660144590116e-06, "loss": 0.5054, "step": 21162 }, { "epoch": 0.5787300371909867, "grad_norm": 1.06049382686615, "learning_rate": 7.951793209756244e-06, "loss": 0.4789, "step": 21163 }, { "epoch": 0.5787573835047035, "grad_norm": 1.1475403308868408, "learning_rate": 7.95092629098998e-06, "loss": 0.4672, "step": 21164 }, { "epoch": 0.5787847298184204, "grad_norm": 1.239696979522705, "learning_rate": 7.950059388298119e-06, "loss": 0.4592, "step": 21165 }, { "epoch": 0.5788120761321374, "grad_norm": 1.5214897394180298, "learning_rate": 7.949192501687469e-06, "loss": 0.4043, "step": 21166 }, { "epoch": 0.5788394224458543, "grad_norm": 1.2028099298477173, "learning_rate": 7.948325631164823e-06, "loss": 0.4339, "step": 21167 }, { "epoch": 0.5788667687595712, "grad_norm": 1.4165475368499756, "learning_rate": 7.947458776736987e-06, "loss": 0.3828, "step": 21168 }, { "epoch": 0.5788941150732881, "grad_norm": 1.3972203731536865, "learning_rate": 7.94659193841076e-06, "loss": 0.7022, "step": 21169 }, { "epoch": 0.5789214613870051, "grad_norm": 1.2020443677902222, "learning_rate": 7.94572511619294e-06, "loss": 0.3668, "step": 21170 }, { "epoch": 0.578948807700722, "grad_norm": 1.3141580820083618, "learning_rate": 7.94485831009033e-06, "loss": 0.8018, "step": 21171 }, { "epoch": 0.5789761540144388, "grad_norm": 1.8031623363494873, "learning_rate": 7.943991520109727e-06, "loss": 0.3769, "step": 21172 }, { "epoch": 0.5790035003281557, "grad_norm": 1.0746065378189087, "learning_rate": 7.94312474625793e-06, "loss": 0.4636, "step": 21173 }, { "epoch": 0.5790308466418727, "grad_norm": 1.2969603538513184, "learning_rate": 7.942257988541743e-06, "loss": 0.4826, "step": 21174 }, { "epoch": 0.5790581929555896, "grad_norm": 1.4378256797790527, "learning_rate": 7.941391246967967e-06, "loss": 0.4955, "step": 21175 }, { "epoch": 0.5790855392693065, "grad_norm": 1.3712254762649536, "learning_rate": 7.940524521543392e-06, "loss": 0.3476, "step": 21176 }, { "epoch": 0.5791128855830234, "grad_norm": 1.3683500289916992, "learning_rate": 7.939657812274822e-06, "loss": 0.463, "step": 21177 }, { "epoch": 0.5791402318967404, "grad_norm": 1.1420191526412964, "learning_rate": 7.93879111916906e-06, "loss": 0.4871, "step": 21178 }, { "epoch": 0.5791675782104573, "grad_norm": 1.207188367843628, "learning_rate": 7.937924442232898e-06, "loss": 0.7162, "step": 21179 }, { "epoch": 0.5791949245241741, "grad_norm": 1.0749448537826538, "learning_rate": 7.93705778147314e-06, "loss": 0.5008, "step": 21180 }, { "epoch": 0.579222270837891, "grad_norm": 1.3300670385360718, "learning_rate": 7.936191136896583e-06, "loss": 0.5016, "step": 21181 }, { "epoch": 0.5792496171516079, "grad_norm": 1.1949841976165771, "learning_rate": 7.935324508510025e-06, "loss": 0.4841, "step": 21182 }, { "epoch": 0.5792769634653249, "grad_norm": 1.770856261253357, "learning_rate": 7.934457896320267e-06, "loss": 0.3898, "step": 21183 }, { "epoch": 0.5793043097790418, "grad_norm": 1.1547869443893433, "learning_rate": 7.933591300334105e-06, "loss": 0.5228, "step": 21184 }, { "epoch": 0.5793316560927587, "grad_norm": 1.1496202945709229, "learning_rate": 7.932724720558336e-06, "loss": 0.4897, "step": 21185 }, { "epoch": 0.5793590024064756, "grad_norm": 1.1017144918441772, "learning_rate": 7.931858156999762e-06, "loss": 0.3591, "step": 21186 }, { "epoch": 0.5793863487201926, "grad_norm": 1.468813180923462, "learning_rate": 7.930991609665177e-06, "loss": 0.4811, "step": 21187 }, { "epoch": 0.5794136950339094, "grad_norm": 1.2350648641586304, "learning_rate": 7.930125078561383e-06, "loss": 0.5043, "step": 21188 }, { "epoch": 0.5794410413476263, "grad_norm": 1.1537110805511475, "learning_rate": 7.929258563695172e-06, "loss": 0.4671, "step": 21189 }, { "epoch": 0.5794683876613432, "grad_norm": 1.1318562030792236, "learning_rate": 7.928392065073345e-06, "loss": 0.4601, "step": 21190 }, { "epoch": 0.5794957339750602, "grad_norm": 1.5261609554290771, "learning_rate": 7.927525582702703e-06, "loss": 0.4836, "step": 21191 }, { "epoch": 0.5795230802887771, "grad_norm": 1.3722200393676758, "learning_rate": 7.926659116590036e-06, "loss": 0.3744, "step": 21192 }, { "epoch": 0.579550426602494, "grad_norm": 1.2093673944473267, "learning_rate": 7.925792666742146e-06, "loss": 0.4784, "step": 21193 }, { "epoch": 0.5795777729162109, "grad_norm": 1.2605888843536377, "learning_rate": 7.924926233165828e-06, "loss": 0.4722, "step": 21194 }, { "epoch": 0.5796051192299279, "grad_norm": 1.1833115816116333, "learning_rate": 7.92405981586788e-06, "loss": 0.5, "step": 21195 }, { "epoch": 0.5796324655436447, "grad_norm": 1.4098751544952393, "learning_rate": 7.9231934148551e-06, "loss": 0.5081, "step": 21196 }, { "epoch": 0.5796598118573616, "grad_norm": 1.1878690719604492, "learning_rate": 7.922327030134282e-06, "loss": 0.4621, "step": 21197 }, { "epoch": 0.5796871581710785, "grad_norm": 1.3834823369979858, "learning_rate": 7.921460661712222e-06, "loss": 0.4623, "step": 21198 }, { "epoch": 0.5797145044847954, "grad_norm": 1.1738618612289429, "learning_rate": 7.920594309595721e-06, "loss": 0.4969, "step": 21199 }, { "epoch": 0.5797418507985124, "grad_norm": 1.2487837076187134, "learning_rate": 7.91972797379157e-06, "loss": 0.4485, "step": 21200 }, { "epoch": 0.5797691971122293, "grad_norm": 1.6544119119644165, "learning_rate": 7.918861654306571e-06, "loss": 0.441, "step": 21201 }, { "epoch": 0.5797965434259462, "grad_norm": 1.2567481994628906, "learning_rate": 7.917995351147514e-06, "loss": 0.4662, "step": 21202 }, { "epoch": 0.5798238897396631, "grad_norm": 1.4541820287704468, "learning_rate": 7.917129064321199e-06, "loss": 0.3765, "step": 21203 }, { "epoch": 0.57985123605338, "grad_norm": 1.5564098358154297, "learning_rate": 7.91626279383442e-06, "loss": 0.3786, "step": 21204 }, { "epoch": 0.5798785823670969, "grad_norm": 1.139101505279541, "learning_rate": 7.91539653969397e-06, "loss": 0.4808, "step": 21205 }, { "epoch": 0.5799059286808138, "grad_norm": 1.4440844058990479, "learning_rate": 7.914530301906651e-06, "loss": 0.4897, "step": 21206 }, { "epoch": 0.5799332749945307, "grad_norm": 1.2541711330413818, "learning_rate": 7.913664080479255e-06, "loss": 0.4631, "step": 21207 }, { "epoch": 0.5799606213082477, "grad_norm": 3.25359845161438, "learning_rate": 7.912797875418575e-06, "loss": 0.8205, "step": 21208 }, { "epoch": 0.5799879676219646, "grad_norm": 1.3683874607086182, "learning_rate": 7.91193168673141e-06, "loss": 0.4884, "step": 21209 }, { "epoch": 0.5800153139356815, "grad_norm": 1.2077785730361938, "learning_rate": 7.911065514424556e-06, "loss": 0.5179, "step": 21210 }, { "epoch": 0.5800426602493984, "grad_norm": 1.206271767616272, "learning_rate": 7.910199358504803e-06, "loss": 0.4838, "step": 21211 }, { "epoch": 0.5800700065631152, "grad_norm": 1.1248420476913452, "learning_rate": 7.909333218978945e-06, "loss": 0.4694, "step": 21212 }, { "epoch": 0.5800973528768322, "grad_norm": 1.155281901359558, "learning_rate": 7.908467095853782e-06, "loss": 0.4866, "step": 21213 }, { "epoch": 0.5801246991905491, "grad_norm": 1.3279067277908325, "learning_rate": 7.907600989136104e-06, "loss": 0.5089, "step": 21214 }, { "epoch": 0.580152045504266, "grad_norm": 1.4800734519958496, "learning_rate": 7.90673489883271e-06, "loss": 0.4782, "step": 21215 }, { "epoch": 0.580179391817983, "grad_norm": 1.4748016595840454, "learning_rate": 7.905868824950389e-06, "loss": 0.4594, "step": 21216 }, { "epoch": 0.5802067381316999, "grad_norm": 1.3082921504974365, "learning_rate": 7.905002767495939e-06, "loss": 0.4899, "step": 21217 }, { "epoch": 0.5802340844454168, "grad_norm": 1.20845365524292, "learning_rate": 7.904136726476154e-06, "loss": 0.4908, "step": 21218 }, { "epoch": 0.5802614307591337, "grad_norm": 1.113618016242981, "learning_rate": 7.903270701897823e-06, "loss": 0.4983, "step": 21219 }, { "epoch": 0.5802887770728505, "grad_norm": 1.009089469909668, "learning_rate": 7.902404693767746e-06, "loss": 0.3912, "step": 21220 }, { "epoch": 0.5803161233865675, "grad_norm": 1.397261142730713, "learning_rate": 7.901538702092714e-06, "loss": 0.4513, "step": 21221 }, { "epoch": 0.5803434697002844, "grad_norm": 1.3681004047393799, "learning_rate": 7.900672726879518e-06, "loss": 0.3403, "step": 21222 }, { "epoch": 0.5803708160140013, "grad_norm": 1.3189237117767334, "learning_rate": 7.899806768134957e-06, "loss": 0.5317, "step": 21223 }, { "epoch": 0.5803981623277182, "grad_norm": 1.4821144342422485, "learning_rate": 7.89894082586582e-06, "loss": 0.3871, "step": 21224 }, { "epoch": 0.5804255086414352, "grad_norm": 1.1113874912261963, "learning_rate": 7.898074900078898e-06, "loss": 0.4812, "step": 21225 }, { "epoch": 0.5804528549551521, "grad_norm": 1.165265679359436, "learning_rate": 7.89720899078099e-06, "loss": 0.4706, "step": 21226 }, { "epoch": 0.580480201268869, "grad_norm": 1.3327823877334595, "learning_rate": 7.896343097978885e-06, "loss": 0.4681, "step": 21227 }, { "epoch": 0.5805075475825858, "grad_norm": 1.3094199895858765, "learning_rate": 7.895477221679376e-06, "loss": 0.4985, "step": 21228 }, { "epoch": 0.5805348938963028, "grad_norm": 1.27414870262146, "learning_rate": 7.894611361889256e-06, "loss": 0.4564, "step": 21229 }, { "epoch": 0.5805622402100197, "grad_norm": 1.2174128293991089, "learning_rate": 7.893745518615317e-06, "loss": 0.4834, "step": 21230 }, { "epoch": 0.5805895865237366, "grad_norm": 1.3807427883148193, "learning_rate": 7.892879691864353e-06, "loss": 0.5085, "step": 21231 }, { "epoch": 0.5806169328374535, "grad_norm": 1.5030146837234497, "learning_rate": 7.892013881643153e-06, "loss": 0.519, "step": 21232 }, { "epoch": 0.5806442791511705, "grad_norm": 1.4055755138397217, "learning_rate": 7.891148087958511e-06, "loss": 0.5092, "step": 21233 }, { "epoch": 0.5806716254648874, "grad_norm": 1.1934361457824707, "learning_rate": 7.89028231081722e-06, "loss": 0.5174, "step": 21234 }, { "epoch": 0.5806989717786042, "grad_norm": 1.3121193647384644, "learning_rate": 7.88941655022607e-06, "loss": 0.4821, "step": 21235 }, { "epoch": 0.5807263180923211, "grad_norm": 1.1612330675125122, "learning_rate": 7.888550806191855e-06, "loss": 0.4794, "step": 21236 }, { "epoch": 0.580753664406038, "grad_norm": 1.1639258861541748, "learning_rate": 7.887685078721361e-06, "loss": 0.4807, "step": 21237 }, { "epoch": 0.580781010719755, "grad_norm": 1.3745886087417603, "learning_rate": 7.886819367821385e-06, "loss": 0.4761, "step": 21238 }, { "epoch": 0.5808083570334719, "grad_norm": 1.3800725936889648, "learning_rate": 7.885953673498717e-06, "loss": 0.4869, "step": 21239 }, { "epoch": 0.5808357033471888, "grad_norm": 1.5416688919067383, "learning_rate": 7.885087995760145e-06, "loss": 0.529, "step": 21240 }, { "epoch": 0.5808630496609057, "grad_norm": 1.212199330329895, "learning_rate": 7.884222334612465e-06, "loss": 0.4989, "step": 21241 }, { "epoch": 0.5808903959746227, "grad_norm": 1.1676549911499023, "learning_rate": 7.883356690062466e-06, "loss": 0.4825, "step": 21242 }, { "epoch": 0.5809177422883395, "grad_norm": 1.2110382318496704, "learning_rate": 7.882491062116936e-06, "loss": 0.4558, "step": 21243 }, { "epoch": 0.5809450886020564, "grad_norm": 1.070266842842102, "learning_rate": 7.88162545078267e-06, "loss": 0.4483, "step": 21244 }, { "epoch": 0.5809724349157733, "grad_norm": 1.1630399227142334, "learning_rate": 7.880759856066457e-06, "loss": 0.5026, "step": 21245 }, { "epoch": 0.5809997812294903, "grad_norm": 1.2964448928833008, "learning_rate": 7.879894277975084e-06, "loss": 0.4909, "step": 21246 }, { "epoch": 0.5810271275432072, "grad_norm": 1.2272818088531494, "learning_rate": 7.879028716515341e-06, "loss": 0.5147, "step": 21247 }, { "epoch": 0.5810544738569241, "grad_norm": 1.2165379524230957, "learning_rate": 7.878163171694026e-06, "loss": 0.4785, "step": 21248 }, { "epoch": 0.581081820170641, "grad_norm": 1.709020733833313, "learning_rate": 7.877297643517922e-06, "loss": 0.5205, "step": 21249 }, { "epoch": 0.581109166484358, "grad_norm": 1.185694932937622, "learning_rate": 7.876432131993819e-06, "loss": 0.3693, "step": 21250 }, { "epoch": 0.5811365127980748, "grad_norm": 1.3090420961380005, "learning_rate": 7.875566637128511e-06, "loss": 0.5234, "step": 21251 }, { "epoch": 0.5811638591117917, "grad_norm": 1.5743739604949951, "learning_rate": 7.874701158928782e-06, "loss": 0.4715, "step": 21252 }, { "epoch": 0.5811912054255086, "grad_norm": 1.5012649297714233, "learning_rate": 7.873835697401428e-06, "loss": 0.7333, "step": 21253 }, { "epoch": 0.5812185517392255, "grad_norm": 1.560942530632019, "learning_rate": 7.872970252553231e-06, "loss": 0.3986, "step": 21254 }, { "epoch": 0.5812458980529425, "grad_norm": 1.3141735792160034, "learning_rate": 7.872104824390984e-06, "loss": 0.5001, "step": 21255 }, { "epoch": 0.5812732443666594, "grad_norm": 1.240995168685913, "learning_rate": 7.871239412921477e-06, "loss": 0.4909, "step": 21256 }, { "epoch": 0.5813005906803763, "grad_norm": 1.2953718900680542, "learning_rate": 7.870374018151498e-06, "loss": 0.3492, "step": 21257 }, { "epoch": 0.5813279369940932, "grad_norm": 1.1494368314743042, "learning_rate": 7.869508640087835e-06, "loss": 0.5014, "step": 21258 }, { "epoch": 0.5813552833078101, "grad_norm": 1.0998388528823853, "learning_rate": 7.868643278737276e-06, "loss": 0.4854, "step": 21259 }, { "epoch": 0.581382629621527, "grad_norm": 1.1854609251022339, "learning_rate": 7.867777934106611e-06, "loss": 0.3564, "step": 21260 }, { "epoch": 0.5814099759352439, "grad_norm": 1.3895087242126465, "learning_rate": 7.86691260620263e-06, "loss": 0.476, "step": 21261 }, { "epoch": 0.5814373222489608, "grad_norm": 1.2714877128601074, "learning_rate": 7.866047295032119e-06, "loss": 0.4952, "step": 21262 }, { "epoch": 0.5814646685626778, "grad_norm": 1.1640421152114868, "learning_rate": 7.865182000601864e-06, "loss": 0.496, "step": 21263 }, { "epoch": 0.5814920148763947, "grad_norm": 1.2907785177230835, "learning_rate": 7.864316722918659e-06, "loss": 0.5164, "step": 21264 }, { "epoch": 0.5815193611901116, "grad_norm": 1.385427474975586, "learning_rate": 7.863451461989284e-06, "loss": 0.508, "step": 21265 }, { "epoch": 0.5815467075038285, "grad_norm": 1.4322516918182373, "learning_rate": 7.862586217820536e-06, "loss": 0.4885, "step": 21266 }, { "epoch": 0.5815740538175453, "grad_norm": 1.0579007863998413, "learning_rate": 7.861720990419194e-06, "loss": 0.4577, "step": 21267 }, { "epoch": 0.5816014001312623, "grad_norm": 1.4076619148254395, "learning_rate": 7.86085577979205e-06, "loss": 0.4996, "step": 21268 }, { "epoch": 0.5816287464449792, "grad_norm": 1.3651829957962036, "learning_rate": 7.85999058594589e-06, "loss": 0.349, "step": 21269 }, { "epoch": 0.5816560927586961, "grad_norm": 1.3093091249465942, "learning_rate": 7.8591254088875e-06, "loss": 0.4761, "step": 21270 }, { "epoch": 0.581683439072413, "grad_norm": 1.177939772605896, "learning_rate": 7.858260248623672e-06, "loss": 0.4864, "step": 21271 }, { "epoch": 0.58171078538613, "grad_norm": 1.2666600942611694, "learning_rate": 7.857395105161189e-06, "loss": 0.4769, "step": 21272 }, { "epoch": 0.5817381316998469, "grad_norm": 1.195433259010315, "learning_rate": 7.856529978506838e-06, "loss": 0.4566, "step": 21273 }, { "epoch": 0.5817654780135638, "grad_norm": 1.2790391445159912, "learning_rate": 7.855664868667405e-06, "loss": 0.5376, "step": 21274 }, { "epoch": 0.5817928243272806, "grad_norm": 1.2318904399871826, "learning_rate": 7.854799775649678e-06, "loss": 0.4798, "step": 21275 }, { "epoch": 0.5818201706409976, "grad_norm": 1.335437297821045, "learning_rate": 7.853934699460442e-06, "loss": 0.5108, "step": 21276 }, { "epoch": 0.5818475169547145, "grad_norm": 1.3399111032485962, "learning_rate": 7.853069640106488e-06, "loss": 0.5084, "step": 21277 }, { "epoch": 0.5818748632684314, "grad_norm": 1.4364876747131348, "learning_rate": 7.852204597594596e-06, "loss": 0.4738, "step": 21278 }, { "epoch": 0.5819022095821483, "grad_norm": 1.6276648044586182, "learning_rate": 7.851339571931556e-06, "loss": 0.4549, "step": 21279 }, { "epoch": 0.5819295558958653, "grad_norm": 1.129238247871399, "learning_rate": 7.850474563124154e-06, "loss": 0.3482, "step": 21280 }, { "epoch": 0.5819569022095822, "grad_norm": 1.2435797452926636, "learning_rate": 7.849609571179171e-06, "loss": 0.794, "step": 21281 }, { "epoch": 0.5819842485232991, "grad_norm": 1.1581714153289795, "learning_rate": 7.848744596103394e-06, "loss": 0.7526, "step": 21282 }, { "epoch": 0.5820115948370159, "grad_norm": 1.2007498741149902, "learning_rate": 7.847879637903613e-06, "loss": 0.4763, "step": 21283 }, { "epoch": 0.5820389411507328, "grad_norm": 1.2810159921646118, "learning_rate": 7.84701469658661e-06, "loss": 0.4712, "step": 21284 }, { "epoch": 0.5820662874644498, "grad_norm": 1.4230364561080933, "learning_rate": 7.84614977215917e-06, "loss": 0.4832, "step": 21285 }, { "epoch": 0.5820936337781667, "grad_norm": 1.2429858446121216, "learning_rate": 7.84528486462808e-06, "loss": 0.4653, "step": 21286 }, { "epoch": 0.5821209800918836, "grad_norm": 1.234649658203125, "learning_rate": 7.844419974000122e-06, "loss": 0.7578, "step": 21287 }, { "epoch": 0.5821483264056005, "grad_norm": 1.081817865371704, "learning_rate": 7.843555100282086e-06, "loss": 0.4926, "step": 21288 }, { "epoch": 0.5821756727193175, "grad_norm": 1.284328579902649, "learning_rate": 7.84269024348075e-06, "loss": 0.4792, "step": 21289 }, { "epoch": 0.5822030190330344, "grad_norm": 1.3084276914596558, "learning_rate": 7.841825403602904e-06, "loss": 0.4719, "step": 21290 }, { "epoch": 0.5822303653467512, "grad_norm": 1.2416675090789795, "learning_rate": 7.84096058065533e-06, "loss": 0.5055, "step": 21291 }, { "epoch": 0.5822577116604681, "grad_norm": 1.2568622827529907, "learning_rate": 7.840095774644811e-06, "loss": 0.5101, "step": 21292 }, { "epoch": 0.5822850579741851, "grad_norm": 1.2772809267044067, "learning_rate": 7.839230985578133e-06, "loss": 0.7352, "step": 21293 }, { "epoch": 0.582312404287902, "grad_norm": 1.4574331045150757, "learning_rate": 7.838366213462082e-06, "loss": 0.3576, "step": 21294 }, { "epoch": 0.5823397506016189, "grad_norm": 1.4881794452667236, "learning_rate": 7.837501458303438e-06, "loss": 0.5233, "step": 21295 }, { "epoch": 0.5823670969153358, "grad_norm": 1.2081431150436401, "learning_rate": 7.83663672010899e-06, "loss": 0.4913, "step": 21296 }, { "epoch": 0.5823944432290528, "grad_norm": 1.4720903635025024, "learning_rate": 7.835771998885514e-06, "loss": 0.5175, "step": 21297 }, { "epoch": 0.5824217895427697, "grad_norm": 1.2119640111923218, "learning_rate": 7.834907294639797e-06, "loss": 0.4838, "step": 21298 }, { "epoch": 0.5824491358564865, "grad_norm": 1.3743764162063599, "learning_rate": 7.834042607378627e-06, "loss": 0.4635, "step": 21299 }, { "epoch": 0.5824764821702034, "grad_norm": 1.200880765914917, "learning_rate": 7.83317793710878e-06, "loss": 0.4805, "step": 21300 }, { "epoch": 0.5825038284839203, "grad_norm": 1.2666075229644775, "learning_rate": 7.832313283837044e-06, "loss": 0.4863, "step": 21301 }, { "epoch": 0.5825311747976373, "grad_norm": 1.207269310951233, "learning_rate": 7.831448647570199e-06, "loss": 0.5124, "step": 21302 }, { "epoch": 0.5825585211113542, "grad_norm": 1.465134620666504, "learning_rate": 7.830584028315028e-06, "loss": 0.4863, "step": 21303 }, { "epoch": 0.5825858674250711, "grad_norm": 1.1709822416305542, "learning_rate": 7.829719426078319e-06, "loss": 0.4773, "step": 21304 }, { "epoch": 0.582613213738788, "grad_norm": 1.3983601331710815, "learning_rate": 7.828854840866847e-06, "loss": 0.481, "step": 21305 }, { "epoch": 0.582640560052505, "grad_norm": 1.5679410696029663, "learning_rate": 7.827990272687397e-06, "loss": 0.3897, "step": 21306 }, { "epoch": 0.5826679063662218, "grad_norm": 1.3104424476623535, "learning_rate": 7.827125721546754e-06, "loss": 0.7557, "step": 21307 }, { "epoch": 0.5826952526799387, "grad_norm": 1.0656330585479736, "learning_rate": 7.826261187451696e-06, "loss": 0.3557, "step": 21308 }, { "epoch": 0.5827225989936556, "grad_norm": 1.2697747945785522, "learning_rate": 7.82539667040901e-06, "loss": 0.49, "step": 21309 }, { "epoch": 0.5827499453073726, "grad_norm": 1.4519392251968384, "learning_rate": 7.824532170425474e-06, "loss": 0.4878, "step": 21310 }, { "epoch": 0.5827772916210895, "grad_norm": 1.143531322479248, "learning_rate": 7.82366768750787e-06, "loss": 0.4818, "step": 21311 }, { "epoch": 0.5828046379348064, "grad_norm": 1.3846144676208496, "learning_rate": 7.822803221662983e-06, "loss": 0.4988, "step": 21312 }, { "epoch": 0.5828319842485233, "grad_norm": 1.5450754165649414, "learning_rate": 7.82193877289759e-06, "loss": 0.4828, "step": 21313 }, { "epoch": 0.5828593305622403, "grad_norm": 1.9659796953201294, "learning_rate": 7.821074341218476e-06, "loss": 0.7504, "step": 21314 }, { "epoch": 0.5828866768759571, "grad_norm": 1.4097552299499512, "learning_rate": 7.820209926632421e-06, "loss": 0.3404, "step": 21315 }, { "epoch": 0.582914023189674, "grad_norm": 1.2195994853973389, "learning_rate": 7.819345529146205e-06, "loss": 0.4629, "step": 21316 }, { "epoch": 0.5829413695033909, "grad_norm": 1.2290891408920288, "learning_rate": 7.818481148766608e-06, "loss": 0.5178, "step": 21317 }, { "epoch": 0.5829687158171079, "grad_norm": 1.3079965114593506, "learning_rate": 7.817616785500415e-06, "loss": 0.4888, "step": 21318 }, { "epoch": 0.5829960621308248, "grad_norm": 1.3547874689102173, "learning_rate": 7.816752439354402e-06, "loss": 0.474, "step": 21319 }, { "epoch": 0.5830234084445417, "grad_norm": 1.3110610246658325, "learning_rate": 7.81588811033535e-06, "loss": 0.5, "step": 21320 }, { "epoch": 0.5830507547582586, "grad_norm": 1.3900152444839478, "learning_rate": 7.815023798450046e-06, "loss": 0.3748, "step": 21321 }, { "epoch": 0.5830781010719756, "grad_norm": 1.419230580329895, "learning_rate": 7.81415950370526e-06, "loss": 0.4849, "step": 21322 }, { "epoch": 0.5831054473856924, "grad_norm": 1.2039662599563599, "learning_rate": 7.813295226107783e-06, "loss": 0.4806, "step": 21323 }, { "epoch": 0.5831327936994093, "grad_norm": 1.1664904356002808, "learning_rate": 7.812430965664384e-06, "loss": 0.511, "step": 21324 }, { "epoch": 0.5831601400131262, "grad_norm": 1.2433892488479614, "learning_rate": 7.811566722381851e-06, "loss": 0.4626, "step": 21325 }, { "epoch": 0.5831874863268431, "grad_norm": 1.24541437625885, "learning_rate": 7.810702496266963e-06, "loss": 0.5146, "step": 21326 }, { "epoch": 0.5832148326405601, "grad_norm": 1.1039890050888062, "learning_rate": 7.809838287326496e-06, "loss": 0.4669, "step": 21327 }, { "epoch": 0.583242178954277, "grad_norm": 1.334831714630127, "learning_rate": 7.80897409556723e-06, "loss": 0.5152, "step": 21328 }, { "epoch": 0.5832695252679939, "grad_norm": 1.242383599281311, "learning_rate": 7.808109920995948e-06, "loss": 0.4913, "step": 21329 }, { "epoch": 0.5832968715817108, "grad_norm": 1.24199640750885, "learning_rate": 7.807245763619425e-06, "loss": 0.744, "step": 21330 }, { "epoch": 0.5833242178954277, "grad_norm": 1.4587150812149048, "learning_rate": 7.806381623444444e-06, "loss": 0.3616, "step": 21331 }, { "epoch": 0.5833515642091446, "grad_norm": 1.3409881591796875, "learning_rate": 7.805517500477779e-06, "loss": 0.4687, "step": 21332 }, { "epoch": 0.5833789105228615, "grad_norm": 1.162116527557373, "learning_rate": 7.804653394726213e-06, "loss": 0.332, "step": 21333 }, { "epoch": 0.5834062568365784, "grad_norm": 1.8308563232421875, "learning_rate": 7.803789306196525e-06, "loss": 0.5021, "step": 21334 }, { "epoch": 0.5834336031502954, "grad_norm": 1.2231042385101318, "learning_rate": 7.802925234895488e-06, "loss": 0.4765, "step": 21335 }, { "epoch": 0.5834609494640123, "grad_norm": 1.513201355934143, "learning_rate": 7.802061180829887e-06, "loss": 0.4891, "step": 21336 }, { "epoch": 0.5834882957777292, "grad_norm": 1.2119132280349731, "learning_rate": 7.801197144006498e-06, "loss": 0.4721, "step": 21337 }, { "epoch": 0.583515642091446, "grad_norm": 1.1792850494384766, "learning_rate": 7.800333124432097e-06, "loss": 0.4992, "step": 21338 }, { "epoch": 0.5835429884051629, "grad_norm": 1.2188167572021484, "learning_rate": 7.799469122113466e-06, "loss": 0.49, "step": 21339 }, { "epoch": 0.5835703347188799, "grad_norm": 1.2101271152496338, "learning_rate": 7.798605137057377e-06, "loss": 0.4869, "step": 21340 }, { "epoch": 0.5835976810325968, "grad_norm": 1.1640279293060303, "learning_rate": 7.797741169270612e-06, "loss": 0.4583, "step": 21341 }, { "epoch": 0.5836250273463137, "grad_norm": 1.6000185012817383, "learning_rate": 7.79687721875995e-06, "loss": 0.4948, "step": 21342 }, { "epoch": 0.5836523736600306, "grad_norm": 1.4681833982467651, "learning_rate": 7.796013285532164e-06, "loss": 0.487, "step": 21343 }, { "epoch": 0.5836797199737476, "grad_norm": 1.2302237749099731, "learning_rate": 7.795149369594034e-06, "loss": 0.4884, "step": 21344 }, { "epoch": 0.5837070662874645, "grad_norm": 1.304590106010437, "learning_rate": 7.794285470952338e-06, "loss": 0.4701, "step": 21345 }, { "epoch": 0.5837344126011813, "grad_norm": 1.4539252519607544, "learning_rate": 7.793421589613849e-06, "loss": 0.4464, "step": 21346 }, { "epoch": 0.5837617589148982, "grad_norm": 1.3441823720932007, "learning_rate": 7.792557725585351e-06, "loss": 0.495, "step": 21347 }, { "epoch": 0.5837891052286152, "grad_norm": 1.5768104791641235, "learning_rate": 7.791693878873611e-06, "loss": 0.474, "step": 21348 }, { "epoch": 0.5838164515423321, "grad_norm": 1.376373052597046, "learning_rate": 7.790830049485413e-06, "loss": 0.4691, "step": 21349 }, { "epoch": 0.583843797856049, "grad_norm": 1.4133167266845703, "learning_rate": 7.789966237427537e-06, "loss": 0.4992, "step": 21350 }, { "epoch": 0.5838711441697659, "grad_norm": 1.3424127101898193, "learning_rate": 7.789102442706749e-06, "loss": 0.36, "step": 21351 }, { "epoch": 0.5838984904834829, "grad_norm": 1.3474653959274292, "learning_rate": 7.78823866532983e-06, "loss": 0.5013, "step": 21352 }, { "epoch": 0.5839258367971998, "grad_norm": 1.416588306427002, "learning_rate": 7.787374905303559e-06, "loss": 0.4785, "step": 21353 }, { "epoch": 0.5839531831109166, "grad_norm": 1.2144466638565063, "learning_rate": 7.786511162634706e-06, "loss": 0.4739, "step": 21354 }, { "epoch": 0.5839805294246335, "grad_norm": 1.2328046560287476, "learning_rate": 7.78564743733005e-06, "loss": 0.4647, "step": 21355 }, { "epoch": 0.5840078757383504, "grad_norm": 1.6119221448898315, "learning_rate": 7.784783729396368e-06, "loss": 0.5127, "step": 21356 }, { "epoch": 0.5840352220520674, "grad_norm": 1.0743839740753174, "learning_rate": 7.783920038840433e-06, "loss": 0.324, "step": 21357 }, { "epoch": 0.5840625683657843, "grad_norm": 1.2684266567230225, "learning_rate": 7.783056365669022e-06, "loss": 0.47, "step": 21358 }, { "epoch": 0.5840899146795012, "grad_norm": 1.429880976676941, "learning_rate": 7.782192709888912e-06, "loss": 0.7428, "step": 21359 }, { "epoch": 0.5841172609932181, "grad_norm": 1.5599567890167236, "learning_rate": 7.781329071506873e-06, "loss": 0.5127, "step": 21360 }, { "epoch": 0.5841446073069351, "grad_norm": 1.6258357763290405, "learning_rate": 7.780465450529685e-06, "loss": 0.506, "step": 21361 }, { "epoch": 0.5841719536206519, "grad_norm": 1.156884789466858, "learning_rate": 7.77960184696412e-06, "loss": 0.4595, "step": 21362 }, { "epoch": 0.5841992999343688, "grad_norm": 1.1517996788024902, "learning_rate": 7.778738260816953e-06, "loss": 0.4583, "step": 21363 }, { "epoch": 0.5842266462480857, "grad_norm": 1.177224040031433, "learning_rate": 7.77787469209496e-06, "loss": 0.4775, "step": 21364 }, { "epoch": 0.5842539925618027, "grad_norm": 1.4383805990219116, "learning_rate": 7.777011140804915e-06, "loss": 0.3897, "step": 21365 }, { "epoch": 0.5842813388755196, "grad_norm": 1.0150229930877686, "learning_rate": 7.776147606953593e-06, "loss": 0.4644, "step": 21366 }, { "epoch": 0.5843086851892365, "grad_norm": 1.309212327003479, "learning_rate": 7.775284090547766e-06, "loss": 0.7582, "step": 21367 }, { "epoch": 0.5843360315029534, "grad_norm": 1.094102144241333, "learning_rate": 7.774420591594208e-06, "loss": 0.4932, "step": 21368 }, { "epoch": 0.5843633778166704, "grad_norm": 1.1610409021377563, "learning_rate": 7.773557110099696e-06, "loss": 0.5031, "step": 21369 }, { "epoch": 0.5843907241303872, "grad_norm": 1.303686261177063, "learning_rate": 7.772693646071001e-06, "loss": 0.4862, "step": 21370 }, { "epoch": 0.5844180704441041, "grad_norm": 1.5049735307693481, "learning_rate": 7.771830199514897e-06, "loss": 0.5006, "step": 21371 }, { "epoch": 0.584445416757821, "grad_norm": 1.3485801219940186, "learning_rate": 7.77096677043816e-06, "loss": 0.4775, "step": 21372 }, { "epoch": 0.584472763071538, "grad_norm": 1.2380672693252563, "learning_rate": 7.77010335884756e-06, "loss": 0.4808, "step": 21373 }, { "epoch": 0.5845001093852549, "grad_norm": 1.1962826251983643, "learning_rate": 7.769239964749873e-06, "loss": 0.5047, "step": 21374 }, { "epoch": 0.5845274556989718, "grad_norm": 1.3536431789398193, "learning_rate": 7.768376588151868e-06, "loss": 0.3617, "step": 21375 }, { "epoch": 0.5845548020126887, "grad_norm": 1.2731611728668213, "learning_rate": 7.767513229060322e-06, "loss": 0.4935, "step": 21376 }, { "epoch": 0.5845821483264056, "grad_norm": 1.1172246932983398, "learning_rate": 7.766649887482008e-06, "loss": 0.5028, "step": 21377 }, { "epoch": 0.5846094946401225, "grad_norm": 1.1313048601150513, "learning_rate": 7.765786563423693e-06, "loss": 0.3956, "step": 21378 }, { "epoch": 0.5846368409538394, "grad_norm": 1.3367677927017212, "learning_rate": 7.764923256892159e-06, "loss": 0.5015, "step": 21379 }, { "epoch": 0.5846641872675563, "grad_norm": 1.5455495119094849, "learning_rate": 7.764059967894168e-06, "loss": 0.3434, "step": 21380 }, { "epoch": 0.5846915335812732, "grad_norm": 1.4056673049926758, "learning_rate": 7.763196696436501e-06, "loss": 0.4996, "step": 21381 }, { "epoch": 0.5847188798949902, "grad_norm": 1.47469162940979, "learning_rate": 7.762333442525926e-06, "loss": 0.5141, "step": 21382 }, { "epoch": 0.5847462262087071, "grad_norm": 1.2307660579681396, "learning_rate": 7.761470206169213e-06, "loss": 0.495, "step": 21383 }, { "epoch": 0.584773572522424, "grad_norm": 1.3229836225509644, "learning_rate": 7.760606987373137e-06, "loss": 0.5275, "step": 21384 }, { "epoch": 0.5848009188361409, "grad_norm": 1.167313575744629, "learning_rate": 7.75974378614447e-06, "loss": 0.4556, "step": 21385 }, { "epoch": 0.5848282651498578, "grad_norm": 1.2796056270599365, "learning_rate": 7.758880602489987e-06, "loss": 0.5103, "step": 21386 }, { "epoch": 0.5848556114635747, "grad_norm": 1.2313990592956543, "learning_rate": 7.758017436416449e-06, "loss": 0.4936, "step": 21387 }, { "epoch": 0.5848829577772916, "grad_norm": 1.1769064664840698, "learning_rate": 7.757154287930634e-06, "loss": 0.462, "step": 21388 }, { "epoch": 0.5849103040910085, "grad_norm": 1.4137604236602783, "learning_rate": 7.756291157039315e-06, "loss": 0.497, "step": 21389 }, { "epoch": 0.5849376504047255, "grad_norm": 1.3132210969924927, "learning_rate": 7.755428043749255e-06, "loss": 0.3622, "step": 21390 }, { "epoch": 0.5849649967184424, "grad_norm": 1.569137692451477, "learning_rate": 7.754564948067236e-06, "loss": 0.7644, "step": 21391 }, { "epoch": 0.5849923430321593, "grad_norm": 1.531461238861084, "learning_rate": 7.75370187000002e-06, "loss": 0.3835, "step": 21392 }, { "epoch": 0.5850196893458762, "grad_norm": 1.1249364614486694, "learning_rate": 7.75283880955438e-06, "loss": 0.4835, "step": 21393 }, { "epoch": 0.585047035659593, "grad_norm": 1.2375177145004272, "learning_rate": 7.75197576673709e-06, "loss": 0.4999, "step": 21394 }, { "epoch": 0.58507438197331, "grad_norm": 1.351144790649414, "learning_rate": 7.751112741554916e-06, "loss": 0.49, "step": 21395 }, { "epoch": 0.5851017282870269, "grad_norm": 1.4150558710098267, "learning_rate": 7.75024973401463e-06, "loss": 0.5079, "step": 21396 }, { "epoch": 0.5851290746007438, "grad_norm": 1.10752272605896, "learning_rate": 7.749386744123e-06, "loss": 0.4877, "step": 21397 }, { "epoch": 0.5851564209144607, "grad_norm": 1.1035006046295166, "learning_rate": 7.748523771886798e-06, "loss": 0.4769, "step": 21398 }, { "epoch": 0.5851837672281777, "grad_norm": 1.297804355621338, "learning_rate": 7.747660817312797e-06, "loss": 0.3507, "step": 21399 }, { "epoch": 0.5852111135418946, "grad_norm": 1.5039970874786377, "learning_rate": 7.74679788040776e-06, "loss": 0.3612, "step": 21400 }, { "epoch": 0.5852384598556115, "grad_norm": 1.1739062070846558, "learning_rate": 7.745934961178458e-06, "loss": 0.484, "step": 21401 }, { "epoch": 0.5852658061693283, "grad_norm": 1.3552742004394531, "learning_rate": 7.745072059631665e-06, "loss": 0.4611, "step": 21402 }, { "epoch": 0.5852931524830453, "grad_norm": 1.6694918870925903, "learning_rate": 7.744209175774145e-06, "loss": 0.519, "step": 21403 }, { "epoch": 0.5853204987967622, "grad_norm": 1.0987695455551147, "learning_rate": 7.743346309612672e-06, "loss": 0.4761, "step": 21404 }, { "epoch": 0.5853478451104791, "grad_norm": 1.3952631950378418, "learning_rate": 7.74248346115401e-06, "loss": 0.357, "step": 21405 }, { "epoch": 0.585375191424196, "grad_norm": 1.5174654722213745, "learning_rate": 7.741620630404929e-06, "loss": 0.4079, "step": 21406 }, { "epoch": 0.585402537737913, "grad_norm": 1.2735713720321655, "learning_rate": 7.7407578173722e-06, "loss": 0.4916, "step": 21407 }, { "epoch": 0.5854298840516299, "grad_norm": 1.2558656930923462, "learning_rate": 7.739895022062589e-06, "loss": 0.5008, "step": 21408 }, { "epoch": 0.5854572303653468, "grad_norm": 1.1180447340011597, "learning_rate": 7.739032244482868e-06, "loss": 0.5118, "step": 21409 }, { "epoch": 0.5854845766790636, "grad_norm": 1.3780378103256226, "learning_rate": 7.7381694846398e-06, "loss": 0.5067, "step": 21410 }, { "epoch": 0.5855119229927805, "grad_norm": 1.528396725654602, "learning_rate": 7.737306742540157e-06, "loss": 0.5055, "step": 21411 }, { "epoch": 0.5855392693064975, "grad_norm": 1.198299527168274, "learning_rate": 7.736444018190705e-06, "loss": 0.473, "step": 21412 }, { "epoch": 0.5855666156202144, "grad_norm": 1.3899084329605103, "learning_rate": 7.735581311598214e-06, "loss": 0.4922, "step": 21413 }, { "epoch": 0.5855939619339313, "grad_norm": 1.2458127737045288, "learning_rate": 7.734718622769448e-06, "loss": 0.5008, "step": 21414 }, { "epoch": 0.5856213082476482, "grad_norm": 1.2107917070388794, "learning_rate": 7.73385595171118e-06, "loss": 0.4871, "step": 21415 }, { "epoch": 0.5856486545613652, "grad_norm": 1.4484761953353882, "learning_rate": 7.732993298430173e-06, "loss": 0.3659, "step": 21416 }, { "epoch": 0.5856760008750821, "grad_norm": 1.3276675939559937, "learning_rate": 7.732130662933196e-06, "loss": 0.469, "step": 21417 }, { "epoch": 0.5857033471887989, "grad_norm": 1.1846463680267334, "learning_rate": 7.731268045227013e-06, "loss": 0.4851, "step": 21418 }, { "epoch": 0.5857306935025158, "grad_norm": 1.324537754058838, "learning_rate": 7.730405445318397e-06, "loss": 0.4806, "step": 21419 }, { "epoch": 0.5857580398162328, "grad_norm": 1.3996286392211914, "learning_rate": 7.729542863214112e-06, "loss": 0.3392, "step": 21420 }, { "epoch": 0.5857853861299497, "grad_norm": 1.236168384552002, "learning_rate": 7.728680298920925e-06, "loss": 0.4831, "step": 21421 }, { "epoch": 0.5858127324436666, "grad_norm": 1.1285171508789062, "learning_rate": 7.7278177524456e-06, "loss": 0.7619, "step": 21422 }, { "epoch": 0.5858400787573835, "grad_norm": 1.4068578481674194, "learning_rate": 7.726955223794903e-06, "loss": 0.5098, "step": 21423 }, { "epoch": 0.5858674250711005, "grad_norm": 1.1941441297531128, "learning_rate": 7.726092712975606e-06, "loss": 0.469, "step": 21424 }, { "epoch": 0.5858947713848174, "grad_norm": 1.2566386461257935, "learning_rate": 7.725230219994469e-06, "loss": 0.4604, "step": 21425 }, { "epoch": 0.5859221176985342, "grad_norm": 1.0422751903533936, "learning_rate": 7.724367744858265e-06, "loss": 0.5048, "step": 21426 }, { "epoch": 0.5859494640122511, "grad_norm": 1.1464425325393677, "learning_rate": 7.723505287573752e-06, "loss": 0.484, "step": 21427 }, { "epoch": 0.585976810325968, "grad_norm": 1.4788000583648682, "learning_rate": 7.722642848147698e-06, "loss": 0.4845, "step": 21428 }, { "epoch": 0.586004156639685, "grad_norm": 1.2610766887664795, "learning_rate": 7.721780426586873e-06, "loss": 0.5008, "step": 21429 }, { "epoch": 0.5860315029534019, "grad_norm": 1.1639596223831177, "learning_rate": 7.720918022898038e-06, "loss": 0.474, "step": 21430 }, { "epoch": 0.5860588492671188, "grad_norm": 1.872093915939331, "learning_rate": 7.720055637087962e-06, "loss": 0.3713, "step": 21431 }, { "epoch": 0.5860861955808357, "grad_norm": 1.2191942930221558, "learning_rate": 7.719193269163404e-06, "loss": 0.4722, "step": 21432 }, { "epoch": 0.5861135418945526, "grad_norm": 2.408966064453125, "learning_rate": 7.718330919131134e-06, "loss": 0.4732, "step": 21433 }, { "epoch": 0.5861408882082695, "grad_norm": 1.0508301258087158, "learning_rate": 7.717468586997918e-06, "loss": 0.4954, "step": 21434 }, { "epoch": 0.5861682345219864, "grad_norm": 1.4844297170639038, "learning_rate": 7.716606272770518e-06, "loss": 0.53, "step": 21435 }, { "epoch": 0.5861955808357033, "grad_norm": 1.283806562423706, "learning_rate": 7.715743976455696e-06, "loss": 0.7215, "step": 21436 }, { "epoch": 0.5862229271494203, "grad_norm": 1.0956599712371826, "learning_rate": 7.714881698060224e-06, "loss": 0.4992, "step": 21437 }, { "epoch": 0.5862502734631372, "grad_norm": 1.3336352109909058, "learning_rate": 7.714019437590859e-06, "loss": 0.5146, "step": 21438 }, { "epoch": 0.5862776197768541, "grad_norm": 1.246904969215393, "learning_rate": 7.713157195054371e-06, "loss": 0.4586, "step": 21439 }, { "epoch": 0.586304966090571, "grad_norm": 3.5474069118499756, "learning_rate": 7.712294970457519e-06, "loss": 0.3576, "step": 21440 }, { "epoch": 0.5863323124042878, "grad_norm": 1.3346291780471802, "learning_rate": 7.711432763807069e-06, "loss": 0.4871, "step": 21441 }, { "epoch": 0.5863596587180048, "grad_norm": 1.6514511108398438, "learning_rate": 7.710570575109786e-06, "loss": 0.3884, "step": 21442 }, { "epoch": 0.5863870050317217, "grad_norm": 1.5117958784103394, "learning_rate": 7.709708404372432e-06, "loss": 0.5321, "step": 21443 }, { "epoch": 0.5864143513454386, "grad_norm": 1.2467790842056274, "learning_rate": 7.708846251601771e-06, "loss": 0.4987, "step": 21444 }, { "epoch": 0.5864416976591555, "grad_norm": 1.1425663232803345, "learning_rate": 7.707984116804567e-06, "loss": 0.4743, "step": 21445 }, { "epoch": 0.5864690439728725, "grad_norm": 1.2966265678405762, "learning_rate": 7.70712199998758e-06, "loss": 0.4967, "step": 21446 }, { "epoch": 0.5864963902865894, "grad_norm": 1.26006019115448, "learning_rate": 7.70625990115758e-06, "loss": 0.4812, "step": 21447 }, { "epoch": 0.5865237366003063, "grad_norm": 1.117268443107605, "learning_rate": 7.705397820321322e-06, "loss": 0.3341, "step": 21448 }, { "epoch": 0.5865510829140231, "grad_norm": 1.1448180675506592, "learning_rate": 7.704535757485572e-06, "loss": 0.7561, "step": 21449 }, { "epoch": 0.5865784292277401, "grad_norm": 1.4071632623672485, "learning_rate": 7.703673712657098e-06, "loss": 0.4666, "step": 21450 }, { "epoch": 0.586605775541457, "grad_norm": 1.1979814767837524, "learning_rate": 7.702811685842652e-06, "loss": 0.7512, "step": 21451 }, { "epoch": 0.5866331218551739, "grad_norm": 1.5247608423233032, "learning_rate": 7.701949677049006e-06, "loss": 0.5118, "step": 21452 }, { "epoch": 0.5866604681688908, "grad_norm": 1.2379553318023682, "learning_rate": 7.701087686282914e-06, "loss": 0.4752, "step": 21453 }, { "epoch": 0.5866878144826078, "grad_norm": 1.324080228805542, "learning_rate": 7.700225713551144e-06, "loss": 0.4781, "step": 21454 }, { "epoch": 0.5867151607963247, "grad_norm": 1.1629562377929688, "learning_rate": 7.699363758860458e-06, "loss": 0.4844, "step": 21455 }, { "epoch": 0.5867425071100416, "grad_norm": 1.5781002044677734, "learning_rate": 7.698501822217616e-06, "loss": 0.5, "step": 21456 }, { "epoch": 0.5867698534237584, "grad_norm": 1.2935707569122314, "learning_rate": 7.697639903629377e-06, "loss": 0.5042, "step": 21457 }, { "epoch": 0.5867971997374754, "grad_norm": 1.2795908451080322, "learning_rate": 7.696778003102506e-06, "loss": 0.5008, "step": 21458 }, { "epoch": 0.5868245460511923, "grad_norm": 1.2062568664550781, "learning_rate": 7.695916120643762e-06, "loss": 0.4753, "step": 21459 }, { "epoch": 0.5868518923649092, "grad_norm": 1.2992756366729736, "learning_rate": 7.695054256259909e-06, "loss": 0.4833, "step": 21460 }, { "epoch": 0.5868792386786261, "grad_norm": 1.4999226331710815, "learning_rate": 7.694192409957709e-06, "loss": 0.4774, "step": 21461 }, { "epoch": 0.586906584992343, "grad_norm": 1.215495228767395, "learning_rate": 7.693330581743916e-06, "loss": 0.4632, "step": 21462 }, { "epoch": 0.58693393130606, "grad_norm": 1.365370750427246, "learning_rate": 7.692468771625296e-06, "loss": 0.4659, "step": 21463 }, { "epoch": 0.5869612776197769, "grad_norm": 1.1155613660812378, "learning_rate": 7.691606979608612e-06, "loss": 0.4863, "step": 21464 }, { "epoch": 0.5869886239334937, "grad_norm": 1.3800603151321411, "learning_rate": 7.690745205700619e-06, "loss": 0.7618, "step": 21465 }, { "epoch": 0.5870159702472106, "grad_norm": 1.4927005767822266, "learning_rate": 7.689883449908081e-06, "loss": 0.3691, "step": 21466 }, { "epoch": 0.5870433165609276, "grad_norm": 1.5653878450393677, "learning_rate": 7.689021712237758e-06, "loss": 0.372, "step": 21467 }, { "epoch": 0.5870706628746445, "grad_norm": 1.3106105327606201, "learning_rate": 7.688159992696407e-06, "loss": 0.4688, "step": 21468 }, { "epoch": 0.5870980091883614, "grad_norm": 1.209276556968689, "learning_rate": 7.687298291290793e-06, "loss": 0.4804, "step": 21469 }, { "epoch": 0.5871253555020783, "grad_norm": 1.1877866983413696, "learning_rate": 7.686436608027671e-06, "loss": 0.4812, "step": 21470 }, { "epoch": 0.5871527018157953, "grad_norm": 1.234237790107727, "learning_rate": 7.685574942913803e-06, "loss": 0.4597, "step": 21471 }, { "epoch": 0.5871800481295122, "grad_norm": 1.3252085447311401, "learning_rate": 7.68471329595595e-06, "loss": 0.4867, "step": 21472 }, { "epoch": 0.587207394443229, "grad_norm": 1.2788522243499756, "learning_rate": 7.683851667160868e-06, "loss": 0.462, "step": 21473 }, { "epoch": 0.5872347407569459, "grad_norm": 1.4222545623779297, "learning_rate": 7.68299005653532e-06, "loss": 0.3629, "step": 21474 }, { "epoch": 0.5872620870706629, "grad_norm": 1.424990177154541, "learning_rate": 7.682128464086059e-06, "loss": 0.7159, "step": 21475 }, { "epoch": 0.5872894333843798, "grad_norm": 1.3288012742996216, "learning_rate": 7.68126688981985e-06, "loss": 0.4584, "step": 21476 }, { "epoch": 0.5873167796980967, "grad_norm": 1.1748028993606567, "learning_rate": 7.68040533374345e-06, "loss": 0.7088, "step": 21477 }, { "epoch": 0.5873441260118136, "grad_norm": 1.416727066040039, "learning_rate": 7.679543795863618e-06, "loss": 0.5114, "step": 21478 }, { "epoch": 0.5873714723255306, "grad_norm": 1.4621095657348633, "learning_rate": 7.678682276187109e-06, "loss": 0.4748, "step": 21479 }, { "epoch": 0.5873988186392475, "grad_norm": 1.1249576807022095, "learning_rate": 7.677820774720688e-06, "loss": 0.4668, "step": 21480 }, { "epoch": 0.5874261649529643, "grad_norm": 1.5407228469848633, "learning_rate": 7.676959291471107e-06, "loss": 0.5038, "step": 21481 }, { "epoch": 0.5874535112666812, "grad_norm": 1.2732728719711304, "learning_rate": 7.676097826445128e-06, "loss": 0.4732, "step": 21482 }, { "epoch": 0.5874808575803981, "grad_norm": 1.2697781324386597, "learning_rate": 7.675236379649507e-06, "loss": 0.4921, "step": 21483 }, { "epoch": 0.5875082038941151, "grad_norm": 1.499355673789978, "learning_rate": 7.674374951091002e-06, "loss": 0.3865, "step": 21484 }, { "epoch": 0.587535550207832, "grad_norm": 1.1292458772659302, "learning_rate": 7.673513540776372e-06, "loss": 0.4586, "step": 21485 }, { "epoch": 0.5875628965215489, "grad_norm": 1.5661166906356812, "learning_rate": 7.672652148712372e-06, "loss": 0.3791, "step": 21486 }, { "epoch": 0.5875902428352658, "grad_norm": 2.132646322250366, "learning_rate": 7.671790774905763e-06, "loss": 0.7101, "step": 21487 }, { "epoch": 0.5876175891489828, "grad_norm": 1.5760126113891602, "learning_rate": 7.670929419363297e-06, "loss": 0.3522, "step": 21488 }, { "epoch": 0.5876449354626996, "grad_norm": 1.3357185125350952, "learning_rate": 7.670068082091737e-06, "loss": 0.5162, "step": 21489 }, { "epoch": 0.5876722817764165, "grad_norm": 1.1756577491760254, "learning_rate": 7.669206763097836e-06, "loss": 0.7154, "step": 21490 }, { "epoch": 0.5876996280901334, "grad_norm": 1.4063475131988525, "learning_rate": 7.668345462388358e-06, "loss": 0.4891, "step": 21491 }, { "epoch": 0.5877269744038504, "grad_norm": 1.2419993877410889, "learning_rate": 7.667484179970046e-06, "loss": 0.4644, "step": 21492 }, { "epoch": 0.5877543207175673, "grad_norm": 1.4841161966323853, "learning_rate": 7.666622915849665e-06, "loss": 0.4653, "step": 21493 }, { "epoch": 0.5877816670312842, "grad_norm": 1.188982605934143, "learning_rate": 7.665761670033973e-06, "loss": 0.4734, "step": 21494 }, { "epoch": 0.5878090133450011, "grad_norm": 1.3732264041900635, "learning_rate": 7.664900442529722e-06, "loss": 0.4796, "step": 21495 }, { "epoch": 0.587836359658718, "grad_norm": 1.2433569431304932, "learning_rate": 7.664039233343672e-06, "loss": 0.5038, "step": 21496 }, { "epoch": 0.5878637059724349, "grad_norm": 1.5886168479919434, "learning_rate": 7.663178042482575e-06, "loss": 0.42, "step": 21497 }, { "epoch": 0.5878910522861518, "grad_norm": 1.2313876152038574, "learning_rate": 7.662316869953186e-06, "loss": 0.4901, "step": 21498 }, { "epoch": 0.5879183985998687, "grad_norm": 1.3079334497451782, "learning_rate": 7.66145571576227e-06, "loss": 0.4755, "step": 21499 }, { "epoch": 0.5879457449135856, "grad_norm": 1.219828486442566, "learning_rate": 7.66059457991657e-06, "loss": 0.4865, "step": 21500 }, { "epoch": 0.5879730912273026, "grad_norm": 1.719375729560852, "learning_rate": 7.659733462422847e-06, "loss": 0.4075, "step": 21501 }, { "epoch": 0.5880004375410195, "grad_norm": 1.173412561416626, "learning_rate": 7.658872363287861e-06, "loss": 0.346, "step": 21502 }, { "epoch": 0.5880277838547364, "grad_norm": 1.3393481969833374, "learning_rate": 7.658011282518358e-06, "loss": 0.7249, "step": 21503 }, { "epoch": 0.5880551301684533, "grad_norm": 1.1731452941894531, "learning_rate": 7.657150220121102e-06, "loss": 0.47, "step": 21504 }, { "epoch": 0.5880824764821702, "grad_norm": 1.119469404220581, "learning_rate": 7.656289176102839e-06, "loss": 0.4959, "step": 21505 }, { "epoch": 0.5881098227958871, "grad_norm": 1.386288046836853, "learning_rate": 7.655428150470329e-06, "loss": 0.5118, "step": 21506 }, { "epoch": 0.588137169109604, "grad_norm": 1.3135755062103271, "learning_rate": 7.654567143230326e-06, "loss": 0.5116, "step": 21507 }, { "epoch": 0.5881645154233209, "grad_norm": 1.4988584518432617, "learning_rate": 7.653706154389584e-06, "loss": 0.3764, "step": 21508 }, { "epoch": 0.5881918617370379, "grad_norm": 1.4254590272903442, "learning_rate": 7.652845183954857e-06, "loss": 0.5172, "step": 21509 }, { "epoch": 0.5882192080507548, "grad_norm": 1.4128986597061157, "learning_rate": 7.651984231932898e-06, "loss": 0.5051, "step": 21510 }, { "epoch": 0.5882465543644717, "grad_norm": 1.2946155071258545, "learning_rate": 7.65112329833046e-06, "loss": 0.5347, "step": 21511 }, { "epoch": 0.5882739006781886, "grad_norm": 1.2198190689086914, "learning_rate": 7.650262383154303e-06, "loss": 0.752, "step": 21512 }, { "epoch": 0.5883012469919054, "grad_norm": 1.2953481674194336, "learning_rate": 7.649401486411173e-06, "loss": 0.4836, "step": 21513 }, { "epoch": 0.5883285933056224, "grad_norm": 1.8526616096496582, "learning_rate": 7.648540608107827e-06, "loss": 0.4984, "step": 21514 }, { "epoch": 0.5883559396193393, "grad_norm": 1.6522537469863892, "learning_rate": 7.647679748251022e-06, "loss": 0.7307, "step": 21515 }, { "epoch": 0.5883832859330562, "grad_norm": 1.5483590364456177, "learning_rate": 7.646818906847502e-06, "loss": 0.3802, "step": 21516 }, { "epoch": 0.5884106322467731, "grad_norm": 1.2279331684112549, "learning_rate": 7.64595808390403e-06, "loss": 0.4425, "step": 21517 }, { "epoch": 0.5884379785604901, "grad_norm": 1.3401639461517334, "learning_rate": 7.64509727942735e-06, "loss": 0.4743, "step": 21518 }, { "epoch": 0.588465324874207, "grad_norm": 1.1930469274520874, "learning_rate": 7.644236493424223e-06, "loss": 0.5238, "step": 21519 }, { "epoch": 0.5884926711879239, "grad_norm": 1.198922038078308, "learning_rate": 7.643375725901397e-06, "loss": 0.4764, "step": 21520 }, { "epoch": 0.5885200175016407, "grad_norm": 1.2473539113998413, "learning_rate": 7.642514976865624e-06, "loss": 0.4928, "step": 21521 }, { "epoch": 0.5885473638153577, "grad_norm": 2.0951271057128906, "learning_rate": 7.641654246323657e-06, "loss": 0.4758, "step": 21522 }, { "epoch": 0.5885747101290746, "grad_norm": 1.4505051374435425, "learning_rate": 7.640793534282251e-06, "loss": 0.7427, "step": 21523 }, { "epoch": 0.5886020564427915, "grad_norm": 1.1406337022781372, "learning_rate": 7.639932840748153e-06, "loss": 0.4806, "step": 21524 }, { "epoch": 0.5886294027565084, "grad_norm": 1.186295509338379, "learning_rate": 7.639072165728121e-06, "loss": 0.4899, "step": 21525 }, { "epoch": 0.5886567490702254, "grad_norm": 1.1832610368728638, "learning_rate": 7.638211509228906e-06, "loss": 0.465, "step": 21526 }, { "epoch": 0.5886840953839423, "grad_norm": 1.0424612760543823, "learning_rate": 7.637350871257252e-06, "loss": 0.4426, "step": 21527 }, { "epoch": 0.5887114416976592, "grad_norm": 1.3837276697158813, "learning_rate": 7.636490251819915e-06, "loss": 0.484, "step": 21528 }, { "epoch": 0.588738788011376, "grad_norm": 1.7121096849441528, "learning_rate": 7.635629650923649e-06, "loss": 0.4804, "step": 21529 }, { "epoch": 0.588766134325093, "grad_norm": 1.246527910232544, "learning_rate": 7.6347690685752e-06, "loss": 0.5306, "step": 21530 }, { "epoch": 0.5887934806388099, "grad_norm": 1.3427519798278809, "learning_rate": 7.633908504781326e-06, "loss": 0.5008, "step": 21531 }, { "epoch": 0.5888208269525268, "grad_norm": 1.3747485876083374, "learning_rate": 7.633047959548772e-06, "loss": 0.4826, "step": 21532 }, { "epoch": 0.5888481732662437, "grad_norm": 1.222452163696289, "learning_rate": 7.63218743288429e-06, "loss": 0.5215, "step": 21533 }, { "epoch": 0.5888755195799606, "grad_norm": 1.1606080532073975, "learning_rate": 7.631326924794631e-06, "loss": 0.4571, "step": 21534 }, { "epoch": 0.5889028658936776, "grad_norm": 1.2385752201080322, "learning_rate": 7.630466435286547e-06, "loss": 0.7578, "step": 21535 }, { "epoch": 0.5889302122073944, "grad_norm": 1.6653074026107788, "learning_rate": 7.629605964366785e-06, "loss": 0.4901, "step": 21536 }, { "epoch": 0.5889575585211113, "grad_norm": 1.22427499294281, "learning_rate": 7.6287455120421e-06, "loss": 0.4847, "step": 21537 }, { "epoch": 0.5889849048348282, "grad_norm": 1.1768380403518677, "learning_rate": 7.627885078319237e-06, "loss": 0.4665, "step": 21538 }, { "epoch": 0.5890122511485452, "grad_norm": 1.9592726230621338, "learning_rate": 7.627024663204951e-06, "loss": 0.4771, "step": 21539 }, { "epoch": 0.5890395974622621, "grad_norm": 1.5366796255111694, "learning_rate": 7.626164266705985e-06, "loss": 0.3977, "step": 21540 }, { "epoch": 0.589066943775979, "grad_norm": 1.4022769927978516, "learning_rate": 7.625303888829092e-06, "loss": 0.3759, "step": 21541 }, { "epoch": 0.5890942900896959, "grad_norm": 1.5754039287567139, "learning_rate": 7.624443529581025e-06, "loss": 0.3605, "step": 21542 }, { "epoch": 0.5891216364034129, "grad_norm": 1.2850492000579834, "learning_rate": 7.623583188968528e-06, "loss": 0.7417, "step": 21543 }, { "epoch": 0.5891489827171297, "grad_norm": 1.5292043685913086, "learning_rate": 7.622722866998351e-06, "loss": 0.3712, "step": 21544 }, { "epoch": 0.5891763290308466, "grad_norm": 1.4072563648223877, "learning_rate": 7.621862563677247e-06, "loss": 0.4949, "step": 21545 }, { "epoch": 0.5892036753445635, "grad_norm": 1.2954057455062866, "learning_rate": 7.62100227901196e-06, "loss": 0.4923, "step": 21546 }, { "epoch": 0.5892310216582805, "grad_norm": 1.179157018661499, "learning_rate": 7.620142013009242e-06, "loss": 0.3004, "step": 21547 }, { "epoch": 0.5892583679719974, "grad_norm": 1.4649968147277832, "learning_rate": 7.619281765675839e-06, "loss": 0.4994, "step": 21548 }, { "epoch": 0.5892857142857143, "grad_norm": 1.398427128791809, "learning_rate": 7.6184215370185e-06, "loss": 0.4327, "step": 21549 }, { "epoch": 0.5893130605994312, "grad_norm": 1.463494062423706, "learning_rate": 7.617561327043975e-06, "loss": 0.48, "step": 21550 }, { "epoch": 0.5893404069131482, "grad_norm": 1.4223226308822632, "learning_rate": 7.61670113575901e-06, "loss": 0.4328, "step": 21551 }, { "epoch": 0.589367753226865, "grad_norm": 1.2173243761062622, "learning_rate": 7.615840963170356e-06, "loss": 0.4886, "step": 21552 }, { "epoch": 0.5893950995405819, "grad_norm": 1.598675012588501, "learning_rate": 7.614980809284756e-06, "loss": 0.4883, "step": 21553 }, { "epoch": 0.5894224458542988, "grad_norm": 1.1663538217544556, "learning_rate": 7.6141206741089605e-06, "loss": 0.5048, "step": 21554 }, { "epoch": 0.5894497921680157, "grad_norm": 1.2990623712539673, "learning_rate": 7.6132605576497196e-06, "loss": 0.743, "step": 21555 }, { "epoch": 0.5894771384817327, "grad_norm": 1.7286518812179565, "learning_rate": 7.612400459913774e-06, "loss": 0.4815, "step": 21556 }, { "epoch": 0.5895044847954496, "grad_norm": 1.3183941841125488, "learning_rate": 7.611540380907877e-06, "loss": 0.494, "step": 21557 }, { "epoch": 0.5895318311091665, "grad_norm": 1.6115816831588745, "learning_rate": 7.610680320638775e-06, "loss": 0.4982, "step": 21558 }, { "epoch": 0.5895591774228834, "grad_norm": 1.1153112649917603, "learning_rate": 7.609820279113212e-06, "loss": 0.5056, "step": 21559 }, { "epoch": 0.5895865237366003, "grad_norm": 1.3477672338485718, "learning_rate": 7.608960256337936e-06, "loss": 0.4954, "step": 21560 }, { "epoch": 0.5896138700503172, "grad_norm": 1.5981628894805908, "learning_rate": 7.6081002523196945e-06, "loss": 0.3676, "step": 21561 }, { "epoch": 0.5896412163640341, "grad_norm": 1.2479416131973267, "learning_rate": 7.607240267065235e-06, "loss": 0.5257, "step": 21562 }, { "epoch": 0.589668562677751, "grad_norm": 1.4774084091186523, "learning_rate": 7.6063803005813e-06, "loss": 0.4896, "step": 21563 }, { "epoch": 0.589695908991468, "grad_norm": 1.430767297744751, "learning_rate": 7.60552035287464e-06, "loss": 0.3799, "step": 21564 }, { "epoch": 0.5897232553051849, "grad_norm": 1.3459469079971313, "learning_rate": 7.604660423951996e-06, "loss": 0.5135, "step": 21565 }, { "epoch": 0.5897506016189018, "grad_norm": 0.9706268906593323, "learning_rate": 7.603800513820118e-06, "loss": 0.353, "step": 21566 }, { "epoch": 0.5897779479326187, "grad_norm": 1.333768606185913, "learning_rate": 7.602940622485751e-06, "loss": 0.5044, "step": 21567 }, { "epoch": 0.5898052942463355, "grad_norm": 1.5237572193145752, "learning_rate": 7.6020807499556395e-06, "loss": 0.4787, "step": 21568 }, { "epoch": 0.5898326405600525, "grad_norm": 3.1362907886505127, "learning_rate": 7.601220896236531e-06, "loss": 0.5065, "step": 21569 }, { "epoch": 0.5898599868737694, "grad_norm": 1.5630828142166138, "learning_rate": 7.600361061335168e-06, "loss": 0.4973, "step": 21570 }, { "epoch": 0.5898873331874863, "grad_norm": 1.4145445823669434, "learning_rate": 7.599501245258296e-06, "loss": 0.4875, "step": 21571 }, { "epoch": 0.5899146795012032, "grad_norm": 1.1776903867721558, "learning_rate": 7.598641448012665e-06, "loss": 0.4864, "step": 21572 }, { "epoch": 0.5899420258149202, "grad_norm": 1.2032585144042969, "learning_rate": 7.597781669605013e-06, "loss": 0.4931, "step": 21573 }, { "epoch": 0.5899693721286371, "grad_norm": 1.1169050931930542, "learning_rate": 7.59692191004209e-06, "loss": 0.4746, "step": 21574 }, { "epoch": 0.589996718442354, "grad_norm": 1.1504030227661133, "learning_rate": 7.596062169330635e-06, "loss": 0.5011, "step": 21575 }, { "epoch": 0.5900240647560708, "grad_norm": 1.133358120918274, "learning_rate": 7.595202447477398e-06, "loss": 0.4875, "step": 21576 }, { "epoch": 0.5900514110697878, "grad_norm": 1.1091524362564087, "learning_rate": 7.594342744489121e-06, "loss": 0.4981, "step": 21577 }, { "epoch": 0.5900787573835047, "grad_norm": 1.3908967971801758, "learning_rate": 7.593483060372548e-06, "loss": 0.4669, "step": 21578 }, { "epoch": 0.5901061036972216, "grad_norm": 1.2096660137176514, "learning_rate": 7.592623395134422e-06, "loss": 0.7404, "step": 21579 }, { "epoch": 0.5901334500109385, "grad_norm": 1.372939109802246, "learning_rate": 7.591763748781491e-06, "loss": 0.464, "step": 21580 }, { "epoch": 0.5901607963246555, "grad_norm": 1.2611945867538452, "learning_rate": 7.590904121320492e-06, "loss": 0.4797, "step": 21581 }, { "epoch": 0.5901881426383724, "grad_norm": 1.1713855266571045, "learning_rate": 7.590044512758176e-06, "loss": 0.4635, "step": 21582 }, { "epoch": 0.5902154889520893, "grad_norm": 1.164083480834961, "learning_rate": 7.5891849231012795e-06, "loss": 0.4744, "step": 21583 }, { "epoch": 0.5902428352658061, "grad_norm": 1.2016963958740234, "learning_rate": 7.5883253523565495e-06, "loss": 0.4948, "step": 21584 }, { "epoch": 0.590270181579523, "grad_norm": 1.1932501792907715, "learning_rate": 7.587465800530729e-06, "loss": 0.4815, "step": 21585 }, { "epoch": 0.59029752789324, "grad_norm": 1.3962830305099487, "learning_rate": 7.58660626763056e-06, "loss": 0.4827, "step": 21586 }, { "epoch": 0.5903248742069569, "grad_norm": 1.5225844383239746, "learning_rate": 7.5857467536627835e-06, "loss": 0.4939, "step": 21587 }, { "epoch": 0.5903522205206738, "grad_norm": 1.527616024017334, "learning_rate": 7.584887258634148e-06, "loss": 0.4681, "step": 21588 }, { "epoch": 0.5903795668343907, "grad_norm": 1.3881196975708008, "learning_rate": 7.58402778255139e-06, "loss": 0.4892, "step": 21589 }, { "epoch": 0.5904069131481077, "grad_norm": 1.131843090057373, "learning_rate": 7.5831683254212564e-06, "loss": 0.5153, "step": 21590 }, { "epoch": 0.5904342594618246, "grad_norm": 1.3419240713119507, "learning_rate": 7.5823088872504845e-06, "loss": 0.4991, "step": 21591 }, { "epoch": 0.5904616057755414, "grad_norm": 1.6186928749084473, "learning_rate": 7.58144946804582e-06, "loss": 0.4292, "step": 21592 }, { "epoch": 0.5904889520892583, "grad_norm": 1.135157585144043, "learning_rate": 7.580590067814004e-06, "loss": 0.4614, "step": 21593 }, { "epoch": 0.5905162984029753, "grad_norm": 1.327328085899353, "learning_rate": 7.579730686561778e-06, "loss": 0.4769, "step": 21594 }, { "epoch": 0.5905436447166922, "grad_norm": 1.18318772315979, "learning_rate": 7.578871324295885e-06, "loss": 0.4772, "step": 21595 }, { "epoch": 0.5905709910304091, "grad_norm": 1.3619661331176758, "learning_rate": 7.5780119810230624e-06, "loss": 0.5177, "step": 21596 }, { "epoch": 0.590598337344126, "grad_norm": 1.227076530456543, "learning_rate": 7.57715265675006e-06, "loss": 0.4749, "step": 21597 }, { "epoch": 0.590625683657843, "grad_norm": 1.3266339302062988, "learning_rate": 7.576293351483607e-06, "loss": 0.4732, "step": 21598 }, { "epoch": 0.5906530299715599, "grad_norm": 1.263109803199768, "learning_rate": 7.575434065230454e-06, "loss": 0.482, "step": 21599 }, { "epoch": 0.5906803762852767, "grad_norm": 1.1505261659622192, "learning_rate": 7.574574797997335e-06, "loss": 0.5027, "step": 21600 }, { "epoch": 0.5907077225989936, "grad_norm": 1.577228307723999, "learning_rate": 7.573715549790995e-06, "loss": 0.5182, "step": 21601 }, { "epoch": 0.5907350689127105, "grad_norm": 1.1911227703094482, "learning_rate": 7.572856320618176e-06, "loss": 0.4772, "step": 21602 }, { "epoch": 0.5907624152264275, "grad_norm": 1.4542725086212158, "learning_rate": 7.571997110485614e-06, "loss": 0.4771, "step": 21603 }, { "epoch": 0.5907897615401444, "grad_norm": 1.1707502603530884, "learning_rate": 7.571137919400053e-06, "loss": 0.4923, "step": 21604 }, { "epoch": 0.5908171078538613, "grad_norm": 1.2657564878463745, "learning_rate": 7.570278747368229e-06, "loss": 0.5006, "step": 21605 }, { "epoch": 0.5908444541675782, "grad_norm": 1.1290920972824097, "learning_rate": 7.569419594396885e-06, "loss": 0.4756, "step": 21606 }, { "epoch": 0.5908718004812952, "grad_norm": 1.3795030117034912, "learning_rate": 7.568560460492762e-06, "loss": 0.4774, "step": 21607 }, { "epoch": 0.590899146795012, "grad_norm": 1.2178720235824585, "learning_rate": 7.567701345662597e-06, "loss": 0.4886, "step": 21608 }, { "epoch": 0.5909264931087289, "grad_norm": 4.012772083282471, "learning_rate": 7.566842249913129e-06, "loss": 0.7508, "step": 21609 }, { "epoch": 0.5909538394224458, "grad_norm": 1.271911382675171, "learning_rate": 7.565983173251103e-06, "loss": 0.4934, "step": 21610 }, { "epoch": 0.5909811857361628, "grad_norm": 1.1020047664642334, "learning_rate": 7.565124115683249e-06, "loss": 0.4928, "step": 21611 }, { "epoch": 0.5910085320498797, "grad_norm": 1.0865085124969482, "learning_rate": 7.564265077216315e-06, "loss": 0.4777, "step": 21612 }, { "epoch": 0.5910358783635966, "grad_norm": 1.3343960046768188, "learning_rate": 7.563406057857034e-06, "loss": 0.4403, "step": 21613 }, { "epoch": 0.5910632246773135, "grad_norm": 1.1257362365722656, "learning_rate": 7.562547057612146e-06, "loss": 0.4929, "step": 21614 }, { "epoch": 0.5910905709910305, "grad_norm": 1.2980822324752808, "learning_rate": 7.561688076488393e-06, "loss": 0.475, "step": 21615 }, { "epoch": 0.5911179173047473, "grad_norm": 1.2122926712036133, "learning_rate": 7.560829114492508e-06, "loss": 0.4736, "step": 21616 }, { "epoch": 0.5911452636184642, "grad_norm": 1.1082613468170166, "learning_rate": 7.559970171631234e-06, "loss": 0.4924, "step": 21617 }, { "epoch": 0.5911726099321811, "grad_norm": 1.3291981220245361, "learning_rate": 7.5591112479113075e-06, "loss": 0.5203, "step": 21618 }, { "epoch": 0.591199956245898, "grad_norm": 1.1826785802841187, "learning_rate": 7.558252343339464e-06, "loss": 0.4907, "step": 21619 }, { "epoch": 0.591227302559615, "grad_norm": 1.1013903617858887, "learning_rate": 7.557393457922446e-06, "loss": 0.5016, "step": 21620 }, { "epoch": 0.5912546488733319, "grad_norm": 1.4428025484085083, "learning_rate": 7.556534591666989e-06, "loss": 0.5011, "step": 21621 }, { "epoch": 0.5912819951870488, "grad_norm": 1.2713907957077026, "learning_rate": 7.5556757445798275e-06, "loss": 0.4644, "step": 21622 }, { "epoch": 0.5913093415007658, "grad_norm": 1.1320949792861938, "learning_rate": 7.554816916667707e-06, "loss": 0.3274, "step": 21623 }, { "epoch": 0.5913366878144826, "grad_norm": 1.160965085029602, "learning_rate": 7.553958107937354e-06, "loss": 0.4761, "step": 21624 }, { "epoch": 0.5913640341281995, "grad_norm": 1.2301676273345947, "learning_rate": 7.553099318395516e-06, "loss": 0.5152, "step": 21625 }, { "epoch": 0.5913913804419164, "grad_norm": 2.6612131595611572, "learning_rate": 7.5522405480489225e-06, "loss": 0.3639, "step": 21626 }, { "epoch": 0.5914187267556333, "grad_norm": 1.1915706396102905, "learning_rate": 7.551381796904313e-06, "loss": 0.4694, "step": 21627 }, { "epoch": 0.5914460730693503, "grad_norm": 1.260454535484314, "learning_rate": 7.550523064968427e-06, "loss": 0.4537, "step": 21628 }, { "epoch": 0.5914734193830672, "grad_norm": 1.1737202405929565, "learning_rate": 7.549664352247995e-06, "loss": 0.4574, "step": 21629 }, { "epoch": 0.5915007656967841, "grad_norm": 1.1995670795440674, "learning_rate": 7.5488056587497584e-06, "loss": 0.4712, "step": 21630 }, { "epoch": 0.591528112010501, "grad_norm": 1.7333344221115112, "learning_rate": 7.547946984480451e-06, "loss": 0.3563, "step": 21631 }, { "epoch": 0.5915554583242179, "grad_norm": 1.4716445207595825, "learning_rate": 7.547088329446812e-06, "loss": 0.7449, "step": 21632 }, { "epoch": 0.5915828046379348, "grad_norm": 1.1640857458114624, "learning_rate": 7.546229693655571e-06, "loss": 0.477, "step": 21633 }, { "epoch": 0.5916101509516517, "grad_norm": 1.1785755157470703, "learning_rate": 7.54537107711347e-06, "loss": 0.4976, "step": 21634 }, { "epoch": 0.5916374972653686, "grad_norm": 1.1264339685440063, "learning_rate": 7.544512479827241e-06, "loss": 0.4928, "step": 21635 }, { "epoch": 0.5916648435790856, "grad_norm": 1.2718762159347534, "learning_rate": 7.543653901803619e-06, "loss": 0.4775, "step": 21636 }, { "epoch": 0.5916921898928025, "grad_norm": 1.2586082220077515, "learning_rate": 7.542795343049344e-06, "loss": 0.7222, "step": 21637 }, { "epoch": 0.5917195362065194, "grad_norm": 1.0834987163543701, "learning_rate": 7.5419368035711445e-06, "loss": 0.5289, "step": 21638 }, { "epoch": 0.5917468825202362, "grad_norm": 1.204244613647461, "learning_rate": 7.541078283375762e-06, "loss": 0.5002, "step": 21639 }, { "epoch": 0.5917742288339531, "grad_norm": 1.5504003763198853, "learning_rate": 7.5402197824699265e-06, "loss": 0.4611, "step": 21640 }, { "epoch": 0.5918015751476701, "grad_norm": 1.103484034538269, "learning_rate": 7.539361300860375e-06, "loss": 0.4544, "step": 21641 }, { "epoch": 0.591828921461387, "grad_norm": 1.228279709815979, "learning_rate": 7.538502838553843e-06, "loss": 0.4708, "step": 21642 }, { "epoch": 0.5918562677751039, "grad_norm": 1.7633212804794312, "learning_rate": 7.537644395557061e-06, "loss": 0.4742, "step": 21643 }, { "epoch": 0.5918836140888208, "grad_norm": 1.2399033308029175, "learning_rate": 7.536785971876767e-06, "loss": 0.7367, "step": 21644 }, { "epoch": 0.5919109604025378, "grad_norm": 1.2569259405136108, "learning_rate": 7.535927567519694e-06, "loss": 0.4806, "step": 21645 }, { "epoch": 0.5919383067162547, "grad_norm": 1.7646571397781372, "learning_rate": 7.535069182492576e-06, "loss": 0.3635, "step": 21646 }, { "epoch": 0.5919656530299715, "grad_norm": 1.730865716934204, "learning_rate": 7.534210816802146e-06, "loss": 0.4934, "step": 21647 }, { "epoch": 0.5919929993436884, "grad_norm": 1.2502678632736206, "learning_rate": 7.533352470455138e-06, "loss": 0.4996, "step": 21648 }, { "epoch": 0.5920203456574054, "grad_norm": 1.2110278606414795, "learning_rate": 7.532494143458285e-06, "loss": 0.4681, "step": 21649 }, { "epoch": 0.5920476919711223, "grad_norm": 1.1057190895080566, "learning_rate": 7.531635835818324e-06, "loss": 0.5004, "step": 21650 }, { "epoch": 0.5920750382848392, "grad_norm": 1.1237949132919312, "learning_rate": 7.530777547541983e-06, "loss": 0.4758, "step": 21651 }, { "epoch": 0.5921023845985561, "grad_norm": 1.462523341178894, "learning_rate": 7.529919278635997e-06, "loss": 0.4759, "step": 21652 }, { "epoch": 0.5921297309122731, "grad_norm": 1.1359715461730957, "learning_rate": 7.529061029107101e-06, "loss": 0.5247, "step": 21653 }, { "epoch": 0.59215707722599, "grad_norm": 1.0806496143341064, "learning_rate": 7.528202798962023e-06, "loss": 0.4611, "step": 21654 }, { "epoch": 0.5921844235397068, "grad_norm": 1.2642459869384766, "learning_rate": 7.527344588207501e-06, "loss": 0.4792, "step": 21655 }, { "epoch": 0.5922117698534237, "grad_norm": 1.4223216772079468, "learning_rate": 7.526486396850264e-06, "loss": 0.4787, "step": 21656 }, { "epoch": 0.5922391161671406, "grad_norm": 1.196440577507019, "learning_rate": 7.525628224897044e-06, "loss": 0.4858, "step": 21657 }, { "epoch": 0.5922664624808576, "grad_norm": 1.4360781908035278, "learning_rate": 7.524770072354576e-06, "loss": 0.4782, "step": 21658 }, { "epoch": 0.5922938087945745, "grad_norm": 1.3455394506454468, "learning_rate": 7.5239119392295886e-06, "loss": 0.4967, "step": 21659 }, { "epoch": 0.5923211551082914, "grad_norm": 1.1894280910491943, "learning_rate": 7.523053825528818e-06, "loss": 0.4665, "step": 21660 }, { "epoch": 0.5923485014220083, "grad_norm": 1.3174697160720825, "learning_rate": 7.52219573125899e-06, "loss": 0.5115, "step": 21661 }, { "epoch": 0.5923758477357253, "grad_norm": 1.2183972597122192, "learning_rate": 7.521337656426839e-06, "loss": 0.4689, "step": 21662 }, { "epoch": 0.5924031940494421, "grad_norm": 1.1898118257522583, "learning_rate": 7.520479601039099e-06, "loss": 0.4957, "step": 21663 }, { "epoch": 0.592430540363159, "grad_norm": 1.4208849668502808, "learning_rate": 7.519621565102496e-06, "loss": 0.5058, "step": 21664 }, { "epoch": 0.5924578866768759, "grad_norm": 1.2281745672225952, "learning_rate": 7.518763548623765e-06, "loss": 0.4939, "step": 21665 }, { "epoch": 0.5924852329905929, "grad_norm": 1.081068515777588, "learning_rate": 7.517905551609637e-06, "loss": 0.4901, "step": 21666 }, { "epoch": 0.5925125793043098, "grad_norm": 1.4160676002502441, "learning_rate": 7.517047574066844e-06, "loss": 0.4506, "step": 21667 }, { "epoch": 0.5925399256180267, "grad_norm": 1.6038647890090942, "learning_rate": 7.51618961600211e-06, "loss": 0.352, "step": 21668 }, { "epoch": 0.5925672719317436, "grad_norm": 1.519894003868103, "learning_rate": 7.515331677422171e-06, "loss": 0.4704, "step": 21669 }, { "epoch": 0.5925946182454606, "grad_norm": 1.0579744577407837, "learning_rate": 7.5144737583337535e-06, "loss": 0.4994, "step": 21670 }, { "epoch": 0.5926219645591774, "grad_norm": 1.1551432609558105, "learning_rate": 7.51361585874359e-06, "loss": 0.4875, "step": 21671 }, { "epoch": 0.5926493108728943, "grad_norm": 1.5332980155944824, "learning_rate": 7.512757978658412e-06, "loss": 0.4009, "step": 21672 }, { "epoch": 0.5926766571866112, "grad_norm": 1.5710781812667847, "learning_rate": 7.511900118084947e-06, "loss": 0.3733, "step": 21673 }, { "epoch": 0.5927040035003281, "grad_norm": 1.2079252004623413, "learning_rate": 7.511042277029924e-06, "loss": 0.779, "step": 21674 }, { "epoch": 0.5927313498140451, "grad_norm": 1.328121304512024, "learning_rate": 7.510184455500077e-06, "loss": 0.506, "step": 21675 }, { "epoch": 0.592758696127762, "grad_norm": 12.888870239257812, "learning_rate": 7.509326653502129e-06, "loss": 0.7613, "step": 21676 }, { "epoch": 0.5927860424414789, "grad_norm": 1.4360979795455933, "learning_rate": 7.5084688710428154e-06, "loss": 0.5192, "step": 21677 }, { "epoch": 0.5928133887551958, "grad_norm": 1.1909925937652588, "learning_rate": 7.507611108128861e-06, "loss": 0.4887, "step": 21678 }, { "epoch": 0.5928407350689127, "grad_norm": 1.7349921464920044, "learning_rate": 7.506753364766994e-06, "loss": 0.741, "step": 21679 }, { "epoch": 0.5928680813826296, "grad_norm": 1.2049567699432373, "learning_rate": 7.50589564096395e-06, "loss": 0.4707, "step": 21680 }, { "epoch": 0.5928954276963465, "grad_norm": 1.0713849067687988, "learning_rate": 7.505037936726448e-06, "loss": 0.4912, "step": 21681 }, { "epoch": 0.5929227740100634, "grad_norm": 1.2819902896881104, "learning_rate": 7.5041802520612236e-06, "loss": 0.4842, "step": 21682 }, { "epoch": 0.5929501203237804, "grad_norm": 1.771337866783142, "learning_rate": 7.503322586975003e-06, "loss": 0.4818, "step": 21683 }, { "epoch": 0.5929774666374973, "grad_norm": 1.4280706644058228, "learning_rate": 7.5024649414745125e-06, "loss": 0.3808, "step": 21684 }, { "epoch": 0.5930048129512142, "grad_norm": 1.2262094020843506, "learning_rate": 7.501607315566484e-06, "loss": 0.5101, "step": 21685 }, { "epoch": 0.5930321592649311, "grad_norm": 1.313603401184082, "learning_rate": 7.50074970925764e-06, "loss": 0.324, "step": 21686 }, { "epoch": 0.593059505578648, "grad_norm": 1.1784796714782715, "learning_rate": 7.499892122554712e-06, "loss": 0.5031, "step": 21687 }, { "epoch": 0.5930868518923649, "grad_norm": 1.1657748222351074, "learning_rate": 7.499034555464429e-06, "loss": 0.5268, "step": 21688 }, { "epoch": 0.5931141982060818, "grad_norm": 1.5419261455535889, "learning_rate": 7.498177007993513e-06, "loss": 0.4659, "step": 21689 }, { "epoch": 0.5931415445197987, "grad_norm": 1.2174663543701172, "learning_rate": 7.497319480148697e-06, "loss": 0.5056, "step": 21690 }, { "epoch": 0.5931688908335156, "grad_norm": 1.3094619512557983, "learning_rate": 7.496461971936704e-06, "loss": 0.5132, "step": 21691 }, { "epoch": 0.5931962371472326, "grad_norm": 1.2098839282989502, "learning_rate": 7.495604483364261e-06, "loss": 0.4853, "step": 21692 }, { "epoch": 0.5932235834609495, "grad_norm": 1.2802873849868774, "learning_rate": 7.4947470144381e-06, "loss": 0.4526, "step": 21693 }, { "epoch": 0.5932509297746664, "grad_norm": 1.4884459972381592, "learning_rate": 7.493889565164939e-06, "loss": 0.4558, "step": 21694 }, { "epoch": 0.5932782760883832, "grad_norm": 1.3428488969802856, "learning_rate": 7.493032135551512e-06, "loss": 0.5195, "step": 21695 }, { "epoch": 0.5933056224021002, "grad_norm": 1.2663514614105225, "learning_rate": 7.4921747256045415e-06, "loss": 0.4854, "step": 21696 }, { "epoch": 0.5933329687158171, "grad_norm": 1.1831550598144531, "learning_rate": 7.491317335330754e-06, "loss": 0.4752, "step": 21697 }, { "epoch": 0.593360315029534, "grad_norm": 1.1435227394104004, "learning_rate": 7.490459964736878e-06, "loss": 0.366, "step": 21698 }, { "epoch": 0.5933876613432509, "grad_norm": 1.2562317848205566, "learning_rate": 7.489602613829634e-06, "loss": 0.4987, "step": 21699 }, { "epoch": 0.5934150076569679, "grad_norm": 1.2804423570632935, "learning_rate": 7.488745282615753e-06, "loss": 0.7752, "step": 21700 }, { "epoch": 0.5934423539706848, "grad_norm": 1.3705918788909912, "learning_rate": 7.4878879711019595e-06, "loss": 0.5113, "step": 21701 }, { "epoch": 0.5934697002844017, "grad_norm": 1.1401582956314087, "learning_rate": 7.48703067929498e-06, "loss": 0.4931, "step": 21702 }, { "epoch": 0.5934970465981185, "grad_norm": 1.4460960626602173, "learning_rate": 7.486173407201534e-06, "loss": 0.3858, "step": 21703 }, { "epoch": 0.5935243929118355, "grad_norm": 1.2501674890518188, "learning_rate": 7.485316154828352e-06, "loss": 0.7376, "step": 21704 }, { "epoch": 0.5935517392255524, "grad_norm": 1.4816884994506836, "learning_rate": 7.484458922182154e-06, "loss": 0.3928, "step": 21705 }, { "epoch": 0.5935790855392693, "grad_norm": 1.5486876964569092, "learning_rate": 7.48360170926967e-06, "loss": 0.4848, "step": 21706 }, { "epoch": 0.5936064318529862, "grad_norm": 1.246896743774414, "learning_rate": 7.482744516097622e-06, "loss": 0.4861, "step": 21707 }, { "epoch": 0.5936337781667032, "grad_norm": 1.1406468152999878, "learning_rate": 7.481887342672734e-06, "loss": 0.4859, "step": 21708 }, { "epoch": 0.5936611244804201, "grad_norm": 2.3008153438568115, "learning_rate": 7.481030189001732e-06, "loss": 0.779, "step": 21709 }, { "epoch": 0.593688470794137, "grad_norm": 1.1161518096923828, "learning_rate": 7.48017305509134e-06, "loss": 0.7514, "step": 21710 }, { "epoch": 0.5937158171078538, "grad_norm": 1.2700111865997314, "learning_rate": 7.47931594094828e-06, "loss": 0.7507, "step": 21711 }, { "epoch": 0.5937431634215707, "grad_norm": 1.4154298305511475, "learning_rate": 7.478458846579279e-06, "loss": 0.4558, "step": 21712 }, { "epoch": 0.5937705097352877, "grad_norm": 1.3225736618041992, "learning_rate": 7.4776017719910565e-06, "loss": 0.3458, "step": 21713 }, { "epoch": 0.5937978560490046, "grad_norm": 1.2702559232711792, "learning_rate": 7.476744717190337e-06, "loss": 0.4904, "step": 21714 }, { "epoch": 0.5938252023627215, "grad_norm": 1.1365771293640137, "learning_rate": 7.475887682183849e-06, "loss": 0.3564, "step": 21715 }, { "epoch": 0.5938525486764384, "grad_norm": 1.401542067527771, "learning_rate": 7.475030666978309e-06, "loss": 0.3484, "step": 21716 }, { "epoch": 0.5938798949901554, "grad_norm": 1.2843315601348877, "learning_rate": 7.474173671580445e-06, "loss": 0.5015, "step": 21717 }, { "epoch": 0.5939072413038723, "grad_norm": 1.5431151390075684, "learning_rate": 7.473316695996975e-06, "loss": 0.4769, "step": 21718 }, { "epoch": 0.5939345876175891, "grad_norm": 1.1849573850631714, "learning_rate": 7.472459740234626e-06, "loss": 0.5043, "step": 21719 }, { "epoch": 0.593961933931306, "grad_norm": 1.2656888961791992, "learning_rate": 7.47160280430012e-06, "loss": 0.4893, "step": 21720 }, { "epoch": 0.593989280245023, "grad_norm": 1.3735949993133545, "learning_rate": 7.470745888200176e-06, "loss": 0.4639, "step": 21721 }, { "epoch": 0.5940166265587399, "grad_norm": 1.3265783786773682, "learning_rate": 7.4698889919415184e-06, "loss": 0.4956, "step": 21722 }, { "epoch": 0.5940439728724568, "grad_norm": 1.210752010345459, "learning_rate": 7.469032115530873e-06, "loss": 0.4686, "step": 21723 }, { "epoch": 0.5940713191861737, "grad_norm": 1.2922825813293457, "learning_rate": 7.468175258974957e-06, "loss": 0.5044, "step": 21724 }, { "epoch": 0.5940986654998907, "grad_norm": 1.2614343166351318, "learning_rate": 7.467318422280494e-06, "loss": 0.4937, "step": 21725 }, { "epoch": 0.5941260118136076, "grad_norm": 2.8147895336151123, "learning_rate": 7.466461605454203e-06, "loss": 0.4906, "step": 21726 }, { "epoch": 0.5941533581273244, "grad_norm": 1.1998125314712524, "learning_rate": 7.465604808502809e-06, "loss": 0.4639, "step": 21727 }, { "epoch": 0.5941807044410413, "grad_norm": 1.514519214630127, "learning_rate": 7.464748031433033e-06, "loss": 0.474, "step": 21728 }, { "epoch": 0.5942080507547582, "grad_norm": 1.2285521030426025, "learning_rate": 7.4638912742515935e-06, "loss": 0.483, "step": 21729 }, { "epoch": 0.5942353970684752, "grad_norm": 1.1028205156326294, "learning_rate": 7.463034536965212e-06, "loss": 0.4808, "step": 21730 }, { "epoch": 0.5942627433821921, "grad_norm": 1.2130991220474243, "learning_rate": 7.462177819580615e-06, "loss": 0.5056, "step": 21731 }, { "epoch": 0.594290089695909, "grad_norm": 1.3366223573684692, "learning_rate": 7.461321122104516e-06, "loss": 0.7539, "step": 21732 }, { "epoch": 0.5943174360096259, "grad_norm": 1.661741018295288, "learning_rate": 7.46046444454364e-06, "loss": 0.7516, "step": 21733 }, { "epoch": 0.5943447823233429, "grad_norm": 1.0492249727249146, "learning_rate": 7.459607786904704e-06, "loss": 0.423, "step": 21734 }, { "epoch": 0.5943721286370597, "grad_norm": 1.1480528116226196, "learning_rate": 7.45875114919443e-06, "loss": 0.5031, "step": 21735 }, { "epoch": 0.5943994749507766, "grad_norm": 1.1734102964401245, "learning_rate": 7.45789453141954e-06, "loss": 0.3475, "step": 21736 }, { "epoch": 0.5944268212644935, "grad_norm": 1.5999618768692017, "learning_rate": 7.457037933586748e-06, "loss": 0.3984, "step": 21737 }, { "epoch": 0.5944541675782105, "grad_norm": 1.2914236783981323, "learning_rate": 7.456181355702785e-06, "loss": 0.5197, "step": 21738 }, { "epoch": 0.5944815138919274, "grad_norm": 1.1552895307540894, "learning_rate": 7.455324797774357e-06, "loss": 0.5144, "step": 21739 }, { "epoch": 0.5945088602056443, "grad_norm": 1.158247470855713, "learning_rate": 7.4544682598081945e-06, "loss": 0.4768, "step": 21740 }, { "epoch": 0.5945362065193612, "grad_norm": 1.357839822769165, "learning_rate": 7.453611741811008e-06, "loss": 0.4974, "step": 21741 }, { "epoch": 0.594563552833078, "grad_norm": 1.1328305006027222, "learning_rate": 7.452755243789522e-06, "loss": 0.5063, "step": 21742 }, { "epoch": 0.594590899146795, "grad_norm": 1.2404836416244507, "learning_rate": 7.451898765750454e-06, "loss": 0.483, "step": 21743 }, { "epoch": 0.5946182454605119, "grad_norm": 1.1064337491989136, "learning_rate": 7.451042307700522e-06, "loss": 0.4657, "step": 21744 }, { "epoch": 0.5946455917742288, "grad_norm": 1.17026948928833, "learning_rate": 7.450185869646449e-06, "loss": 0.7743, "step": 21745 }, { "epoch": 0.5946729380879457, "grad_norm": 1.2085824012756348, "learning_rate": 7.449329451594948e-06, "loss": 0.4583, "step": 21746 }, { "epoch": 0.5947002844016627, "grad_norm": 1.2090113162994385, "learning_rate": 7.44847305355274e-06, "loss": 0.7628, "step": 21747 }, { "epoch": 0.5947276307153796, "grad_norm": 1.3222986459732056, "learning_rate": 7.447616675526544e-06, "loss": 0.5102, "step": 21748 }, { "epoch": 0.5947549770290965, "grad_norm": 1.144407033920288, "learning_rate": 7.446760317523074e-06, "loss": 0.4986, "step": 21749 }, { "epoch": 0.5947823233428133, "grad_norm": 1.3910746574401855, "learning_rate": 7.445903979549054e-06, "loss": 0.5214, "step": 21750 }, { "epoch": 0.5948096696565303, "grad_norm": 1.1904499530792236, "learning_rate": 7.445047661611195e-06, "loss": 0.4856, "step": 21751 }, { "epoch": 0.5948370159702472, "grad_norm": 1.1592597961425781, "learning_rate": 7.44419136371622e-06, "loss": 0.4726, "step": 21752 }, { "epoch": 0.5948643622839641, "grad_norm": 1.2098227739334106, "learning_rate": 7.443335085870846e-06, "loss": 0.4814, "step": 21753 }, { "epoch": 0.594891708597681, "grad_norm": 1.111253023147583, "learning_rate": 7.442478828081786e-06, "loss": 0.4705, "step": 21754 }, { "epoch": 0.594919054911398, "grad_norm": 1.5114643573760986, "learning_rate": 7.441622590355762e-06, "loss": 0.3878, "step": 21755 }, { "epoch": 0.5949464012251149, "grad_norm": 1.3262265920639038, "learning_rate": 7.440766372699487e-06, "loss": 0.3487, "step": 21756 }, { "epoch": 0.5949737475388318, "grad_norm": 1.271421194076538, "learning_rate": 7.43991017511968e-06, "loss": 0.7584, "step": 21757 }, { "epoch": 0.5950010938525486, "grad_norm": 1.3700472116470337, "learning_rate": 7.439053997623059e-06, "loss": 0.3464, "step": 21758 }, { "epoch": 0.5950284401662655, "grad_norm": 1.5926711559295654, "learning_rate": 7.4381978402163364e-06, "loss": 0.433, "step": 21759 }, { "epoch": 0.5950557864799825, "grad_norm": 1.3130327463150024, "learning_rate": 7.4373417029062336e-06, "loss": 0.4912, "step": 21760 }, { "epoch": 0.5950831327936994, "grad_norm": 1.1865426301956177, "learning_rate": 7.436485585699461e-06, "loss": 0.4646, "step": 21761 }, { "epoch": 0.5951104791074163, "grad_norm": 1.2245558500289917, "learning_rate": 7.435629488602738e-06, "loss": 0.4841, "step": 21762 }, { "epoch": 0.5951378254211332, "grad_norm": 1.242245078086853, "learning_rate": 7.434773411622782e-06, "loss": 0.5081, "step": 21763 }, { "epoch": 0.5951651717348502, "grad_norm": 1.517493724822998, "learning_rate": 7.433917354766305e-06, "loss": 0.502, "step": 21764 }, { "epoch": 0.5951925180485671, "grad_norm": 1.382216215133667, "learning_rate": 7.433061318040022e-06, "loss": 0.4584, "step": 21765 }, { "epoch": 0.5952198643622839, "grad_norm": 1.120792269706726, "learning_rate": 7.432205301450655e-06, "loss": 0.5049, "step": 21766 }, { "epoch": 0.5952472106760008, "grad_norm": 1.0814613103866577, "learning_rate": 7.431349305004911e-06, "loss": 0.4742, "step": 21767 }, { "epoch": 0.5952745569897178, "grad_norm": 1.184434175491333, "learning_rate": 7.430493328709513e-06, "loss": 0.4771, "step": 21768 }, { "epoch": 0.5953019033034347, "grad_norm": 1.2273730039596558, "learning_rate": 7.429637372571168e-06, "loss": 0.491, "step": 21769 }, { "epoch": 0.5953292496171516, "grad_norm": 1.8078978061676025, "learning_rate": 7.428781436596595e-06, "loss": 0.3713, "step": 21770 }, { "epoch": 0.5953565959308685, "grad_norm": 1.0828826427459717, "learning_rate": 7.42792552079251e-06, "loss": 0.4759, "step": 21771 }, { "epoch": 0.5953839422445855, "grad_norm": 1.1222425699234009, "learning_rate": 7.4270696251656236e-06, "loss": 0.3519, "step": 21772 }, { "epoch": 0.5954112885583024, "grad_norm": 1.1061041355133057, "learning_rate": 7.426213749722656e-06, "loss": 0.4807, "step": 21773 }, { "epoch": 0.5954386348720192, "grad_norm": 1.2150992155075073, "learning_rate": 7.425357894470313e-06, "loss": 0.489, "step": 21774 }, { "epoch": 0.5954659811857361, "grad_norm": 1.1763540506362915, "learning_rate": 7.424502059415315e-06, "loss": 0.4838, "step": 21775 }, { "epoch": 0.595493327499453, "grad_norm": 1.1845530271530151, "learning_rate": 7.423646244564371e-06, "loss": 0.4622, "step": 21776 }, { "epoch": 0.59552067381317, "grad_norm": 1.4351975917816162, "learning_rate": 7.4227904499242e-06, "loss": 0.4961, "step": 21777 }, { "epoch": 0.5955480201268869, "grad_norm": 1.0574744939804077, "learning_rate": 7.42193467550151e-06, "loss": 0.4732, "step": 21778 }, { "epoch": 0.5955753664406038, "grad_norm": 1.4315603971481323, "learning_rate": 7.421078921303017e-06, "loss": 0.4874, "step": 21779 }, { "epoch": 0.5956027127543208, "grad_norm": 1.32333505153656, "learning_rate": 7.420223187335436e-06, "loss": 0.483, "step": 21780 }, { "epoch": 0.5956300590680377, "grad_norm": 1.3180292844772339, "learning_rate": 7.419367473605477e-06, "loss": 0.4998, "step": 21781 }, { "epoch": 0.5956574053817545, "grad_norm": 1.542122721672058, "learning_rate": 7.4185117801198545e-06, "loss": 0.4775, "step": 21782 }, { "epoch": 0.5956847516954714, "grad_norm": 1.1775567531585693, "learning_rate": 7.417656106885278e-06, "loss": 0.5068, "step": 21783 }, { "epoch": 0.5957120980091883, "grad_norm": 1.1818095445632935, "learning_rate": 7.4168004539084646e-06, "loss": 0.4762, "step": 21784 }, { "epoch": 0.5957394443229053, "grad_norm": 1.204642415046692, "learning_rate": 7.415944821196126e-06, "loss": 0.3607, "step": 21785 }, { "epoch": 0.5957667906366222, "grad_norm": 1.1217360496520996, "learning_rate": 7.415089208754971e-06, "loss": 0.4842, "step": 21786 }, { "epoch": 0.5957941369503391, "grad_norm": 1.1263279914855957, "learning_rate": 7.414233616591713e-06, "loss": 0.4656, "step": 21787 }, { "epoch": 0.595821483264056, "grad_norm": 1.2423758506774902, "learning_rate": 7.4133780447130675e-06, "loss": 0.4698, "step": 21788 }, { "epoch": 0.595848829577773, "grad_norm": 1.5313059091567993, "learning_rate": 7.412522493125741e-06, "loss": 0.3824, "step": 21789 }, { "epoch": 0.5958761758914898, "grad_norm": 1.2575252056121826, "learning_rate": 7.41166696183645e-06, "loss": 0.4577, "step": 21790 }, { "epoch": 0.5959035222052067, "grad_norm": 1.4415839910507202, "learning_rate": 7.4108114508519e-06, "loss": 0.4911, "step": 21791 }, { "epoch": 0.5959308685189236, "grad_norm": 1.4712811708450317, "learning_rate": 7.409955960178807e-06, "loss": 0.3449, "step": 21792 }, { "epoch": 0.5959582148326406, "grad_norm": 1.286616325378418, "learning_rate": 7.409100489823881e-06, "loss": 0.4646, "step": 21793 }, { "epoch": 0.5959855611463575, "grad_norm": 1.5200588703155518, "learning_rate": 7.408245039793831e-06, "loss": 0.5094, "step": 21794 }, { "epoch": 0.5960129074600744, "grad_norm": 1.1119210720062256, "learning_rate": 7.40738961009537e-06, "loss": 0.4764, "step": 21795 }, { "epoch": 0.5960402537737913, "grad_norm": 1.8380160331726074, "learning_rate": 7.406534200735209e-06, "loss": 0.3593, "step": 21796 }, { "epoch": 0.5960676000875083, "grad_norm": 1.2125831842422485, "learning_rate": 7.405678811720056e-06, "loss": 0.4741, "step": 21797 }, { "epoch": 0.5960949464012251, "grad_norm": 1.567299485206604, "learning_rate": 7.404823443056624e-06, "loss": 0.3432, "step": 21798 }, { "epoch": 0.596122292714942, "grad_norm": 1.0819706916809082, "learning_rate": 7.40396809475162e-06, "loss": 0.3608, "step": 21799 }, { "epoch": 0.5961496390286589, "grad_norm": 1.3356850147247314, "learning_rate": 7.403112766811756e-06, "loss": 0.4956, "step": 21800 }, { "epoch": 0.5961769853423758, "grad_norm": 1.223006248474121, "learning_rate": 7.402257459243743e-06, "loss": 0.4899, "step": 21801 }, { "epoch": 0.5962043316560928, "grad_norm": 1.5202275514602661, "learning_rate": 7.401402172054289e-06, "loss": 0.4188, "step": 21802 }, { "epoch": 0.5962316779698097, "grad_norm": 1.2134231328964233, "learning_rate": 7.400546905250103e-06, "loss": 0.4815, "step": 21803 }, { "epoch": 0.5962590242835266, "grad_norm": 1.2879760265350342, "learning_rate": 7.399691658837894e-06, "loss": 0.4722, "step": 21804 }, { "epoch": 0.5962863705972435, "grad_norm": 1.3466925621032715, "learning_rate": 7.3988364328243725e-06, "loss": 0.4894, "step": 21805 }, { "epoch": 0.5963137169109604, "grad_norm": 1.2182836532592773, "learning_rate": 7.397981227216249e-06, "loss": 0.4732, "step": 21806 }, { "epoch": 0.5963410632246773, "grad_norm": 1.0875881910324097, "learning_rate": 7.397126042020228e-06, "loss": 0.4678, "step": 21807 }, { "epoch": 0.5963684095383942, "grad_norm": 1.9532482624053955, "learning_rate": 7.3962708772430256e-06, "loss": 0.3889, "step": 21808 }, { "epoch": 0.5963957558521111, "grad_norm": 1.2518892288208008, "learning_rate": 7.3954157328913415e-06, "loss": 0.4756, "step": 21809 }, { "epoch": 0.5964231021658281, "grad_norm": 1.2394869327545166, "learning_rate": 7.394560608971889e-06, "loss": 0.5054, "step": 21810 }, { "epoch": 0.596450448479545, "grad_norm": 1.5004531145095825, "learning_rate": 7.393705505491373e-06, "loss": 0.3794, "step": 21811 }, { "epoch": 0.5964777947932619, "grad_norm": 1.4481711387634277, "learning_rate": 7.3928504224565075e-06, "loss": 0.3597, "step": 21812 }, { "epoch": 0.5965051411069788, "grad_norm": 1.5222865343093872, "learning_rate": 7.3919953598739935e-06, "loss": 0.3637, "step": 21813 }, { "epoch": 0.5965324874206956, "grad_norm": 1.134932518005371, "learning_rate": 7.391140317750542e-06, "loss": 0.4538, "step": 21814 }, { "epoch": 0.5965598337344126, "grad_norm": 1.2377318143844604, "learning_rate": 7.390285296092863e-06, "loss": 0.4781, "step": 21815 }, { "epoch": 0.5965871800481295, "grad_norm": 1.2100040912628174, "learning_rate": 7.389430294907659e-06, "loss": 0.5136, "step": 21816 }, { "epoch": 0.5966145263618464, "grad_norm": 1.3810627460479736, "learning_rate": 7.38857531420164e-06, "loss": 0.4804, "step": 21817 }, { "epoch": 0.5966418726755633, "grad_norm": 1.2373355627059937, "learning_rate": 7.387720353981515e-06, "loss": 0.511, "step": 21818 }, { "epoch": 0.5966692189892803, "grad_norm": 1.259104609489441, "learning_rate": 7.386865414253985e-06, "loss": 0.3506, "step": 21819 }, { "epoch": 0.5966965653029972, "grad_norm": 4.561131954193115, "learning_rate": 7.386010495025764e-06, "loss": 0.495, "step": 21820 }, { "epoch": 0.5967239116167141, "grad_norm": 1.2793900966644287, "learning_rate": 7.385155596303552e-06, "loss": 0.7433, "step": 21821 }, { "epoch": 0.5967512579304309, "grad_norm": 1.2583640813827515, "learning_rate": 7.38430071809406e-06, "loss": 0.4757, "step": 21822 }, { "epoch": 0.5967786042441479, "grad_norm": 1.4798320531845093, "learning_rate": 7.3834458604039934e-06, "loss": 0.3478, "step": 21823 }, { "epoch": 0.5968059505578648, "grad_norm": 1.1855437755584717, "learning_rate": 7.382591023240056e-06, "loss": 0.4848, "step": 21824 }, { "epoch": 0.5968332968715817, "grad_norm": 1.6519187688827515, "learning_rate": 7.3817362066089585e-06, "loss": 0.4819, "step": 21825 }, { "epoch": 0.5968606431852986, "grad_norm": 1.2038136720657349, "learning_rate": 7.3808814105174e-06, "loss": 0.5009, "step": 21826 }, { "epoch": 0.5968879894990156, "grad_norm": 1.3293615579605103, "learning_rate": 7.380026634972091e-06, "loss": 0.4892, "step": 21827 }, { "epoch": 0.5969153358127325, "grad_norm": 1.2051012516021729, "learning_rate": 7.3791718799797365e-06, "loss": 0.4725, "step": 21828 }, { "epoch": 0.5969426821264494, "grad_norm": 1.338653802871704, "learning_rate": 7.37831714554704e-06, "loss": 0.3501, "step": 21829 }, { "epoch": 0.5969700284401662, "grad_norm": 1.1315598487854004, "learning_rate": 7.377462431680706e-06, "loss": 0.452, "step": 21830 }, { "epoch": 0.5969973747538831, "grad_norm": 1.2305866479873657, "learning_rate": 7.376607738387445e-06, "loss": 0.4599, "step": 21831 }, { "epoch": 0.5970247210676001, "grad_norm": 1.2822610139846802, "learning_rate": 7.375753065673956e-06, "loss": 0.7349, "step": 21832 }, { "epoch": 0.597052067381317, "grad_norm": 1.4672551155090332, "learning_rate": 7.374898413546947e-06, "loss": 0.3479, "step": 21833 }, { "epoch": 0.5970794136950339, "grad_norm": 1.1876121759414673, "learning_rate": 7.37404378201312e-06, "loss": 0.4717, "step": 21834 }, { "epoch": 0.5971067600087508, "grad_norm": 1.2488354444503784, "learning_rate": 7.37318917107918e-06, "loss": 0.4839, "step": 21835 }, { "epoch": 0.5971341063224678, "grad_norm": 1.5608980655670166, "learning_rate": 7.372334580751833e-06, "loss": 0.4954, "step": 21836 }, { "epoch": 0.5971614526361847, "grad_norm": 1.2671492099761963, "learning_rate": 7.37148001103778e-06, "loss": 0.4703, "step": 21837 }, { "epoch": 0.5971887989499015, "grad_norm": 3.092146635055542, "learning_rate": 7.370625461943728e-06, "loss": 0.7489, "step": 21838 }, { "epoch": 0.5972161452636184, "grad_norm": 1.1661125421524048, "learning_rate": 7.369770933476379e-06, "loss": 0.5257, "step": 21839 }, { "epoch": 0.5972434915773354, "grad_norm": 1.2477452754974365, "learning_rate": 7.368916425642436e-06, "loss": 0.4882, "step": 21840 }, { "epoch": 0.5972708378910523, "grad_norm": 1.3639189004898071, "learning_rate": 7.368061938448605e-06, "loss": 0.4781, "step": 21841 }, { "epoch": 0.5972981842047692, "grad_norm": 1.2304139137268066, "learning_rate": 7.367207471901587e-06, "loss": 0.4717, "step": 21842 }, { "epoch": 0.5973255305184861, "grad_norm": 1.189005732536316, "learning_rate": 7.366353026008088e-06, "loss": 0.4763, "step": 21843 }, { "epoch": 0.5973528768322031, "grad_norm": 1.163340449333191, "learning_rate": 7.365498600774804e-06, "loss": 0.5188, "step": 21844 }, { "epoch": 0.5973802231459199, "grad_norm": 1.294501543045044, "learning_rate": 7.364644196208445e-06, "loss": 0.4896, "step": 21845 }, { "epoch": 0.5974075694596368, "grad_norm": 1.1607367992401123, "learning_rate": 7.363789812315707e-06, "loss": 0.5001, "step": 21846 }, { "epoch": 0.5974349157733537, "grad_norm": 2.3277089595794678, "learning_rate": 7.3629354491033e-06, "loss": 0.5117, "step": 21847 }, { "epoch": 0.5974622620870707, "grad_norm": 1.122920274734497, "learning_rate": 7.362081106577919e-06, "loss": 0.4474, "step": 21848 }, { "epoch": 0.5974896084007876, "grad_norm": 1.3397573232650757, "learning_rate": 7.36122678474627e-06, "loss": 0.4477, "step": 21849 }, { "epoch": 0.5975169547145045, "grad_norm": 1.2385027408599854, "learning_rate": 7.360372483615056e-06, "loss": 0.4637, "step": 21850 }, { "epoch": 0.5975443010282214, "grad_norm": 1.435800552368164, "learning_rate": 7.359518203190975e-06, "loss": 0.465, "step": 21851 }, { "epoch": 0.5975716473419384, "grad_norm": 1.1347705125808716, "learning_rate": 7.358663943480731e-06, "loss": 0.467, "step": 21852 }, { "epoch": 0.5975989936556552, "grad_norm": 1.5322507619857788, "learning_rate": 7.3578097044910256e-06, "loss": 0.7991, "step": 21853 }, { "epoch": 0.5976263399693721, "grad_norm": 1.0821608304977417, "learning_rate": 7.356955486228559e-06, "loss": 0.484, "step": 21854 }, { "epoch": 0.597653686283089, "grad_norm": 8.21945571899414, "learning_rate": 7.356101288700035e-06, "loss": 0.3471, "step": 21855 }, { "epoch": 0.5976810325968059, "grad_norm": 1.2109215259552002, "learning_rate": 7.355247111912149e-06, "loss": 0.4755, "step": 21856 }, { "epoch": 0.5977083789105229, "grad_norm": 1.1110864877700806, "learning_rate": 7.354392955871606e-06, "loss": 0.4555, "step": 21857 }, { "epoch": 0.5977357252242398, "grad_norm": 1.3075860738754272, "learning_rate": 7.353538820585106e-06, "loss": 0.4934, "step": 21858 }, { "epoch": 0.5977630715379567, "grad_norm": 1.4102078676223755, "learning_rate": 7.35268470605935e-06, "loss": 0.4958, "step": 21859 }, { "epoch": 0.5977904178516736, "grad_norm": 1.6652361154556274, "learning_rate": 7.3518306123010365e-06, "loss": 0.2986, "step": 21860 }, { "epoch": 0.5978177641653905, "grad_norm": 1.3901656866073608, "learning_rate": 7.3509765393168675e-06, "loss": 0.4948, "step": 21861 }, { "epoch": 0.5978451104791074, "grad_norm": 1.1862126588821411, "learning_rate": 7.3501224871135415e-06, "loss": 0.4788, "step": 21862 }, { "epoch": 0.5978724567928243, "grad_norm": 1.2297441959381104, "learning_rate": 7.34926845569776e-06, "loss": 0.4616, "step": 21863 }, { "epoch": 0.5978998031065412, "grad_norm": 1.4143248796463013, "learning_rate": 7.34841444507622e-06, "loss": 0.4711, "step": 21864 }, { "epoch": 0.5979271494202582, "grad_norm": 1.1523022651672363, "learning_rate": 7.347560455255621e-06, "loss": 0.4984, "step": 21865 }, { "epoch": 0.5979544957339751, "grad_norm": 1.3136475086212158, "learning_rate": 7.346706486242668e-06, "loss": 0.4688, "step": 21866 }, { "epoch": 0.597981842047692, "grad_norm": 1.3372931480407715, "learning_rate": 7.3458525380440515e-06, "loss": 0.4817, "step": 21867 }, { "epoch": 0.5980091883614089, "grad_norm": 1.2601152658462524, "learning_rate": 7.344998610666478e-06, "loss": 0.4949, "step": 21868 }, { "epoch": 0.5980365346751257, "grad_norm": 2.4086496829986572, "learning_rate": 7.3441447041166424e-06, "loss": 0.4955, "step": 21869 }, { "epoch": 0.5980638809888427, "grad_norm": 1.2597143650054932, "learning_rate": 7.343290818401243e-06, "loss": 0.4975, "step": 21870 }, { "epoch": 0.5980912273025596, "grad_norm": 1.286152720451355, "learning_rate": 7.3424369535269814e-06, "loss": 0.4731, "step": 21871 }, { "epoch": 0.5981185736162765, "grad_norm": 1.4530526399612427, "learning_rate": 7.341583109500552e-06, "loss": 0.4927, "step": 21872 }, { "epoch": 0.5981459199299934, "grad_norm": 1.6135883331298828, "learning_rate": 7.340729286328655e-06, "loss": 0.4973, "step": 21873 }, { "epoch": 0.5981732662437104, "grad_norm": 1.4941521883010864, "learning_rate": 7.339875484017991e-06, "loss": 0.7553, "step": 21874 }, { "epoch": 0.5982006125574273, "grad_norm": 1.708512783050537, "learning_rate": 7.339021702575252e-06, "loss": 0.3585, "step": 21875 }, { "epoch": 0.5982279588711442, "grad_norm": 1.7191764116287231, "learning_rate": 7.338167942007142e-06, "loss": 0.4936, "step": 21876 }, { "epoch": 0.598255305184861, "grad_norm": 1.5170669555664062, "learning_rate": 7.337314202320353e-06, "loss": 0.3578, "step": 21877 }, { "epoch": 0.598282651498578, "grad_norm": 1.4883652925491333, "learning_rate": 7.336460483521588e-06, "loss": 0.7877, "step": 21878 }, { "epoch": 0.5983099978122949, "grad_norm": 1.273573398590088, "learning_rate": 7.335606785617539e-06, "loss": 0.4825, "step": 21879 }, { "epoch": 0.5983373441260118, "grad_norm": 1.6398545503616333, "learning_rate": 7.334753108614905e-06, "loss": 0.4836, "step": 21880 }, { "epoch": 0.5983646904397287, "grad_norm": 1.5507982969284058, "learning_rate": 7.333899452520381e-06, "loss": 0.5033, "step": 21881 }, { "epoch": 0.5983920367534457, "grad_norm": 1.4060810804367065, "learning_rate": 7.333045817340666e-06, "loss": 0.3831, "step": 21882 }, { "epoch": 0.5984193830671626, "grad_norm": 1.350699782371521, "learning_rate": 7.3321922030824575e-06, "loss": 0.7176, "step": 21883 }, { "epoch": 0.5984467293808795, "grad_norm": 1.2535779476165771, "learning_rate": 7.33133860975245e-06, "loss": 0.7331, "step": 21884 }, { "epoch": 0.5984740756945963, "grad_norm": 1.4307326078414917, "learning_rate": 7.33048503735734e-06, "loss": 0.473, "step": 21885 }, { "epoch": 0.5985014220083132, "grad_norm": 1.1470470428466797, "learning_rate": 7.329631485903823e-06, "loss": 0.4724, "step": 21886 }, { "epoch": 0.5985287683220302, "grad_norm": 1.2032402753829956, "learning_rate": 7.328777955398594e-06, "loss": 0.5107, "step": 21887 }, { "epoch": 0.5985561146357471, "grad_norm": 1.2816165685653687, "learning_rate": 7.327924445848354e-06, "loss": 0.4653, "step": 21888 }, { "epoch": 0.598583460949464, "grad_norm": 1.5260231494903564, "learning_rate": 7.327070957259792e-06, "loss": 0.3646, "step": 21889 }, { "epoch": 0.5986108072631809, "grad_norm": 1.2470738887786865, "learning_rate": 7.326217489639608e-06, "loss": 0.4867, "step": 21890 }, { "epoch": 0.5986381535768979, "grad_norm": 1.2611953020095825, "learning_rate": 7.325364042994493e-06, "loss": 0.4655, "step": 21891 }, { "epoch": 0.5986654998906148, "grad_norm": 1.2020758390426636, "learning_rate": 7.324510617331144e-06, "loss": 0.4912, "step": 21892 }, { "epoch": 0.5986928462043316, "grad_norm": 1.4089679718017578, "learning_rate": 7.323657212656259e-06, "loss": 0.3708, "step": 21893 }, { "epoch": 0.5987201925180485, "grad_norm": 1.1309385299682617, "learning_rate": 7.322803828976528e-06, "loss": 0.4506, "step": 21894 }, { "epoch": 0.5987475388317655, "grad_norm": 1.416799545288086, "learning_rate": 7.321950466298649e-06, "loss": 0.4826, "step": 21895 }, { "epoch": 0.5987748851454824, "grad_norm": 1.1663504838943481, "learning_rate": 7.3210971246293135e-06, "loss": 0.468, "step": 21896 }, { "epoch": 0.5988022314591993, "grad_norm": 1.2375473976135254, "learning_rate": 7.320243803975217e-06, "loss": 0.7408, "step": 21897 }, { "epoch": 0.5988295777729162, "grad_norm": 1.4234447479248047, "learning_rate": 7.319390504343056e-06, "loss": 0.4854, "step": 21898 }, { "epoch": 0.5988569240866332, "grad_norm": 1.1758272647857666, "learning_rate": 7.31853722573952e-06, "loss": 0.4376, "step": 21899 }, { "epoch": 0.5988842704003501, "grad_norm": 1.9765052795410156, "learning_rate": 7.317683968171305e-06, "loss": 0.3729, "step": 21900 }, { "epoch": 0.5989116167140669, "grad_norm": 1.279615044593811, "learning_rate": 7.316830731645106e-06, "loss": 0.4268, "step": 21901 }, { "epoch": 0.5989389630277838, "grad_norm": 1.225282907485962, "learning_rate": 7.315977516167613e-06, "loss": 0.5058, "step": 21902 }, { "epoch": 0.5989663093415007, "grad_norm": 1.1521166563034058, "learning_rate": 7.315124321745521e-06, "loss": 0.4796, "step": 21903 }, { "epoch": 0.5989936556552177, "grad_norm": 1.3386439085006714, "learning_rate": 7.314271148385526e-06, "loss": 0.4745, "step": 21904 }, { "epoch": 0.5990210019689346, "grad_norm": 1.1894432306289673, "learning_rate": 7.313417996094314e-06, "loss": 0.4852, "step": 21905 }, { "epoch": 0.5990483482826515, "grad_norm": 1.7341411113739014, "learning_rate": 7.312564864878584e-06, "loss": 0.3757, "step": 21906 }, { "epoch": 0.5990756945963684, "grad_norm": 1.2476478815078735, "learning_rate": 7.3117117547450255e-06, "loss": 0.5015, "step": 21907 }, { "epoch": 0.5991030409100854, "grad_norm": 1.4323867559432983, "learning_rate": 7.310858665700331e-06, "loss": 0.7768, "step": 21908 }, { "epoch": 0.5991303872238022, "grad_norm": 1.398560643196106, "learning_rate": 7.3100055977511964e-06, "loss": 0.4452, "step": 21909 }, { "epoch": 0.5991577335375191, "grad_norm": 1.4397578239440918, "learning_rate": 7.309152550904308e-06, "loss": 0.4712, "step": 21910 }, { "epoch": 0.599185079851236, "grad_norm": 1.1660524606704712, "learning_rate": 7.308299525166362e-06, "loss": 0.7525, "step": 21911 }, { "epoch": 0.599212426164953, "grad_norm": 1.4773958921432495, "learning_rate": 7.307446520544048e-06, "loss": 0.5067, "step": 21912 }, { "epoch": 0.5992397724786699, "grad_norm": 1.249650239944458, "learning_rate": 7.306593537044062e-06, "loss": 0.4479, "step": 21913 }, { "epoch": 0.5992671187923868, "grad_norm": 1.3082542419433594, "learning_rate": 7.305740574673088e-06, "loss": 0.4885, "step": 21914 }, { "epoch": 0.5992944651061037, "grad_norm": 1.208650827407837, "learning_rate": 7.3048876334378225e-06, "loss": 0.4486, "step": 21915 }, { "epoch": 0.5993218114198207, "grad_norm": 1.6667400598526, "learning_rate": 7.304034713344954e-06, "loss": 0.4736, "step": 21916 }, { "epoch": 0.5993491577335375, "grad_norm": 1.1569942235946655, "learning_rate": 7.303181814401173e-06, "loss": 0.5186, "step": 21917 }, { "epoch": 0.5993765040472544, "grad_norm": 1.3952082395553589, "learning_rate": 7.302328936613174e-06, "loss": 0.4707, "step": 21918 }, { "epoch": 0.5994038503609713, "grad_norm": 1.3817428350448608, "learning_rate": 7.301476079987644e-06, "loss": 0.4484, "step": 21919 }, { "epoch": 0.5994311966746882, "grad_norm": 1.5822360515594482, "learning_rate": 7.300623244531276e-06, "loss": 0.3615, "step": 21920 }, { "epoch": 0.5994585429884052, "grad_norm": 1.4313453435897827, "learning_rate": 7.299770430250757e-06, "loss": 0.4901, "step": 21921 }, { "epoch": 0.5994858893021221, "grad_norm": 1.2345632314682007, "learning_rate": 7.29891763715278e-06, "loss": 0.4919, "step": 21922 }, { "epoch": 0.599513235615839, "grad_norm": 1.2601211071014404, "learning_rate": 7.298064865244035e-06, "loss": 0.5273, "step": 21923 }, { "epoch": 0.599540581929556, "grad_norm": 1.504263997077942, "learning_rate": 7.297212114531209e-06, "loss": 0.3987, "step": 21924 }, { "epoch": 0.5995679282432728, "grad_norm": 1.7282183170318604, "learning_rate": 7.296359385020993e-06, "loss": 0.5172, "step": 21925 }, { "epoch": 0.5995952745569897, "grad_norm": 1.3891922235488892, "learning_rate": 7.295506676720079e-06, "loss": 0.4178, "step": 21926 }, { "epoch": 0.5996226208707066, "grad_norm": 1.2791881561279297, "learning_rate": 7.294653989635152e-06, "loss": 0.4904, "step": 21927 }, { "epoch": 0.5996499671844235, "grad_norm": 1.6755585670471191, "learning_rate": 7.293801323772907e-06, "loss": 0.3632, "step": 21928 }, { "epoch": 0.5996773134981405, "grad_norm": 2.33907151222229, "learning_rate": 7.292948679140024e-06, "loss": 0.7354, "step": 21929 }, { "epoch": 0.5997046598118574, "grad_norm": 1.1789497137069702, "learning_rate": 7.292096055743198e-06, "loss": 0.7526, "step": 21930 }, { "epoch": 0.5997320061255743, "grad_norm": 1.5698940753936768, "learning_rate": 7.291243453589119e-06, "loss": 0.5031, "step": 21931 }, { "epoch": 0.5997593524392912, "grad_norm": 1.2025585174560547, "learning_rate": 7.29039087268447e-06, "loss": 0.4858, "step": 21932 }, { "epoch": 0.599786698753008, "grad_norm": 1.242129921913147, "learning_rate": 7.2895383130359445e-06, "loss": 0.4825, "step": 21933 }, { "epoch": 0.599814045066725, "grad_norm": 1.3287748098373413, "learning_rate": 7.288685774650226e-06, "loss": 0.4527, "step": 21934 }, { "epoch": 0.5998413913804419, "grad_norm": 1.2944958209991455, "learning_rate": 7.287833257534004e-06, "loss": 0.5147, "step": 21935 }, { "epoch": 0.5998687376941588, "grad_norm": 1.48931086063385, "learning_rate": 7.286980761693969e-06, "loss": 0.4843, "step": 21936 }, { "epoch": 0.5998960840078758, "grad_norm": 2.2119874954223633, "learning_rate": 7.2861282871368045e-06, "loss": 0.494, "step": 21937 }, { "epoch": 0.5999234303215927, "grad_norm": 1.1936486959457397, "learning_rate": 7.285275833869201e-06, "loss": 0.5022, "step": 21938 }, { "epoch": 0.5999507766353096, "grad_norm": 1.1895606517791748, "learning_rate": 7.284423401897845e-06, "loss": 0.5153, "step": 21939 }, { "epoch": 0.5999781229490265, "grad_norm": 1.304519534111023, "learning_rate": 7.283570991229423e-06, "loss": 0.4827, "step": 21940 }, { "epoch": 0.6000054692627433, "grad_norm": 1.2022424936294556, "learning_rate": 7.282718601870622e-06, "loss": 0.4796, "step": 21941 }, { "epoch": 0.6000328155764603, "grad_norm": 2.3253660202026367, "learning_rate": 7.2818662338281275e-06, "loss": 0.7612, "step": 21942 }, { "epoch": 0.6000601618901772, "grad_norm": 1.1578426361083984, "learning_rate": 7.281013887108628e-06, "loss": 0.4895, "step": 21943 }, { "epoch": 0.6000875082038941, "grad_norm": 1.1149489879608154, "learning_rate": 7.280161561718812e-06, "loss": 0.5103, "step": 21944 }, { "epoch": 0.600114854517611, "grad_norm": 1.4353240728378296, "learning_rate": 7.279309257665359e-06, "loss": 0.7508, "step": 21945 }, { "epoch": 0.600142200831328, "grad_norm": 1.1951202154159546, "learning_rate": 7.2784569749549626e-06, "loss": 0.4526, "step": 21946 }, { "epoch": 0.6001695471450449, "grad_norm": 1.497329831123352, "learning_rate": 7.277604713594304e-06, "loss": 0.3691, "step": 21947 }, { "epoch": 0.6001968934587617, "grad_norm": 1.3342312574386597, "learning_rate": 7.27675247359007e-06, "loss": 0.5067, "step": 21948 }, { "epoch": 0.6002242397724786, "grad_norm": 1.1655330657958984, "learning_rate": 7.275900254948949e-06, "loss": 0.4932, "step": 21949 }, { "epoch": 0.6002515860861956, "grad_norm": 1.112231731414795, "learning_rate": 7.275048057677623e-06, "loss": 0.4588, "step": 21950 }, { "epoch": 0.6002789323999125, "grad_norm": 1.405924677848816, "learning_rate": 7.274195881782775e-06, "loss": 0.5081, "step": 21951 }, { "epoch": 0.6003062787136294, "grad_norm": 1.6448826789855957, "learning_rate": 7.273343727271095e-06, "loss": 0.4159, "step": 21952 }, { "epoch": 0.6003336250273463, "grad_norm": 1.3022300004959106, "learning_rate": 7.272491594149267e-06, "loss": 0.4993, "step": 21953 }, { "epoch": 0.6003609713410633, "grad_norm": 1.515600323677063, "learning_rate": 7.271639482423974e-06, "loss": 0.3678, "step": 21954 }, { "epoch": 0.6003883176547802, "grad_norm": 1.629089593887329, "learning_rate": 7.270787392101901e-06, "loss": 0.3695, "step": 21955 }, { "epoch": 0.600415663968497, "grad_norm": 1.216654658317566, "learning_rate": 7.269935323189733e-06, "loss": 0.364, "step": 21956 }, { "epoch": 0.6004430102822139, "grad_norm": 1.2266806364059448, "learning_rate": 7.269083275694153e-06, "loss": 0.4958, "step": 21957 }, { "epoch": 0.6004703565959308, "grad_norm": 1.2976495027542114, "learning_rate": 7.268231249621849e-06, "loss": 0.764, "step": 21958 }, { "epoch": 0.6004977029096478, "grad_norm": 1.2615152597427368, "learning_rate": 7.2673792449795e-06, "loss": 0.4772, "step": 21959 }, { "epoch": 0.6005250492233647, "grad_norm": 1.5130059719085693, "learning_rate": 7.26652726177379e-06, "loss": 0.4637, "step": 21960 }, { "epoch": 0.6005523955370816, "grad_norm": 1.0828604698181152, "learning_rate": 7.265675300011409e-06, "loss": 0.4511, "step": 21961 }, { "epoch": 0.6005797418507985, "grad_norm": 1.1928890943527222, "learning_rate": 7.264823359699032e-06, "loss": 0.4502, "step": 21962 }, { "epoch": 0.6006070881645155, "grad_norm": 1.332204818725586, "learning_rate": 7.263971440843349e-06, "loss": 0.5075, "step": 21963 }, { "epoch": 0.6006344344782323, "grad_norm": 1.1632143259048462, "learning_rate": 7.263119543451037e-06, "loss": 0.4664, "step": 21964 }, { "epoch": 0.6006617807919492, "grad_norm": 1.1693989038467407, "learning_rate": 7.262267667528783e-06, "loss": 0.3499, "step": 21965 }, { "epoch": 0.6006891271056661, "grad_norm": 1.0920215845108032, "learning_rate": 7.26141581308327e-06, "loss": 0.4812, "step": 21966 }, { "epoch": 0.6007164734193831, "grad_norm": 1.4115025997161865, "learning_rate": 7.260563980121178e-06, "loss": 0.4886, "step": 21967 }, { "epoch": 0.6007438197331, "grad_norm": 1.1511892080307007, "learning_rate": 7.259712168649189e-06, "loss": 0.4805, "step": 21968 }, { "epoch": 0.6007711660468169, "grad_norm": 1.5218791961669922, "learning_rate": 7.258860378673991e-06, "loss": 0.3502, "step": 21969 }, { "epoch": 0.6007985123605338, "grad_norm": 1.2934800386428833, "learning_rate": 7.258008610202258e-06, "loss": 0.4809, "step": 21970 }, { "epoch": 0.6008258586742508, "grad_norm": 1.0391273498535156, "learning_rate": 7.25715686324068e-06, "loss": 0.3368, "step": 21971 }, { "epoch": 0.6008532049879676, "grad_norm": 1.3948496580123901, "learning_rate": 7.25630513779593e-06, "loss": 0.518, "step": 21972 }, { "epoch": 0.6008805513016845, "grad_norm": 1.152146816253662, "learning_rate": 7.2554534338746955e-06, "loss": 0.4965, "step": 21973 }, { "epoch": 0.6009078976154014, "grad_norm": 1.1388877630233765, "learning_rate": 7.254601751483658e-06, "loss": 0.5003, "step": 21974 }, { "epoch": 0.6009352439291183, "grad_norm": 1.3384755849838257, "learning_rate": 7.253750090629495e-06, "loss": 0.3519, "step": 21975 }, { "epoch": 0.6009625902428353, "grad_norm": 1.6047465801239014, "learning_rate": 7.252898451318893e-06, "loss": 0.4707, "step": 21976 }, { "epoch": 0.6009899365565522, "grad_norm": 1.5154259204864502, "learning_rate": 7.252046833558526e-06, "loss": 0.468, "step": 21977 }, { "epoch": 0.6010172828702691, "grad_norm": 1.246225118637085, "learning_rate": 7.25119523735508e-06, "loss": 0.4934, "step": 21978 }, { "epoch": 0.601044629183986, "grad_norm": 1.468937873840332, "learning_rate": 7.250343662715234e-06, "loss": 0.6804, "step": 21979 }, { "epoch": 0.6010719754977029, "grad_norm": 1.6156290769577026, "learning_rate": 7.249492109645666e-06, "loss": 0.4075, "step": 21980 }, { "epoch": 0.6010993218114198, "grad_norm": 1.287510871887207, "learning_rate": 7.248640578153059e-06, "loss": 0.5064, "step": 21981 }, { "epoch": 0.6011266681251367, "grad_norm": 1.2008168697357178, "learning_rate": 7.2477890682440954e-06, "loss": 0.4812, "step": 21982 }, { "epoch": 0.6011540144388536, "grad_norm": 1.6675106287002563, "learning_rate": 7.246937579925448e-06, "loss": 0.4757, "step": 21983 }, { "epoch": 0.6011813607525706, "grad_norm": 1.3887040615081787, "learning_rate": 7.246086113203806e-06, "loss": 0.4804, "step": 21984 }, { "epoch": 0.6012087070662875, "grad_norm": 1.085916519165039, "learning_rate": 7.245234668085841e-06, "loss": 0.4622, "step": 21985 }, { "epoch": 0.6012360533800044, "grad_norm": 1.430762767791748, "learning_rate": 7.244383244578232e-06, "loss": 0.4516, "step": 21986 }, { "epoch": 0.6012633996937213, "grad_norm": 1.099545955657959, "learning_rate": 7.243531842687663e-06, "loss": 0.3612, "step": 21987 }, { "epoch": 0.6012907460074381, "grad_norm": 1.4159473180770874, "learning_rate": 7.242680462420812e-06, "loss": 0.3676, "step": 21988 }, { "epoch": 0.6013180923211551, "grad_norm": 1.10719633102417, "learning_rate": 7.2418291037843534e-06, "loss": 0.4855, "step": 21989 }, { "epoch": 0.601345438634872, "grad_norm": 1.1798144578933716, "learning_rate": 7.2409777667849715e-06, "loss": 0.4584, "step": 21990 }, { "epoch": 0.6013727849485889, "grad_norm": 1.3375746011734009, "learning_rate": 7.240126451429342e-06, "loss": 0.4699, "step": 21991 }, { "epoch": 0.6014001312623058, "grad_norm": 1.2288941144943237, "learning_rate": 7.2392751577241436e-06, "loss": 0.494, "step": 21992 }, { "epoch": 0.6014274775760228, "grad_norm": 1.4637209177017212, "learning_rate": 7.238423885676056e-06, "loss": 0.4902, "step": 21993 }, { "epoch": 0.6014548238897397, "grad_norm": 1.2923555374145508, "learning_rate": 7.2375726352917545e-06, "loss": 0.7588, "step": 21994 }, { "epoch": 0.6014821702034566, "grad_norm": 1.5318636894226074, "learning_rate": 7.2367214065779175e-06, "loss": 0.5078, "step": 21995 }, { "epoch": 0.6015095165171734, "grad_norm": 1.5933287143707275, "learning_rate": 7.2358701995412266e-06, "loss": 0.3701, "step": 21996 }, { "epoch": 0.6015368628308904, "grad_norm": 1.3082873821258545, "learning_rate": 7.235019014188352e-06, "loss": 0.5292, "step": 21997 }, { "epoch": 0.6015642091446073, "grad_norm": 1.1758791208267212, "learning_rate": 7.23416785052598e-06, "loss": 0.3408, "step": 21998 }, { "epoch": 0.6015915554583242, "grad_norm": 1.417046308517456, "learning_rate": 7.233316708560778e-06, "loss": 0.506, "step": 21999 }, { "epoch": 0.6016189017720411, "grad_norm": 1.2415449619293213, "learning_rate": 7.232465588299429e-06, "loss": 0.4726, "step": 22000 }, { "epoch": 0.6016462480857581, "grad_norm": 1.5635342597961426, "learning_rate": 7.231614489748611e-06, "loss": 0.4898, "step": 22001 }, { "epoch": 0.601673594399475, "grad_norm": 1.3901680707931519, "learning_rate": 7.230763412914995e-06, "loss": 0.4871, "step": 22002 }, { "epoch": 0.6017009407131919, "grad_norm": 1.2863144874572754, "learning_rate": 7.229912357805262e-06, "loss": 0.5089, "step": 22003 }, { "epoch": 0.6017282870269087, "grad_norm": 1.14243745803833, "learning_rate": 7.229061324426088e-06, "loss": 0.4709, "step": 22004 }, { "epoch": 0.6017556333406257, "grad_norm": 1.3908952474594116, "learning_rate": 7.228210312784147e-06, "loss": 0.5147, "step": 22005 }, { "epoch": 0.6017829796543426, "grad_norm": 1.3441798686981201, "learning_rate": 7.227359322886117e-06, "loss": 0.498, "step": 22006 }, { "epoch": 0.6018103259680595, "grad_norm": 1.2994537353515625, "learning_rate": 7.226508354738672e-06, "loss": 0.4572, "step": 22007 }, { "epoch": 0.6018376722817764, "grad_norm": 1.3916916847229004, "learning_rate": 7.225657408348488e-06, "loss": 0.5025, "step": 22008 }, { "epoch": 0.6018650185954934, "grad_norm": 1.2725211381912231, "learning_rate": 7.224806483722242e-06, "loss": 0.7619, "step": 22009 }, { "epoch": 0.6018923649092103, "grad_norm": 1.462515950202942, "learning_rate": 7.223955580866607e-06, "loss": 0.495, "step": 22010 }, { "epoch": 0.6019197112229272, "grad_norm": 1.720858097076416, "learning_rate": 7.223104699788261e-06, "loss": 0.3776, "step": 22011 }, { "epoch": 0.601947057536644, "grad_norm": 1.2607858180999756, "learning_rate": 7.222253840493876e-06, "loss": 0.7666, "step": 22012 }, { "epoch": 0.6019744038503609, "grad_norm": 1.3853222131729126, "learning_rate": 7.221403002990126e-06, "loss": 0.4912, "step": 22013 }, { "epoch": 0.6020017501640779, "grad_norm": 1.1622896194458008, "learning_rate": 7.220552187283691e-06, "loss": 0.486, "step": 22014 }, { "epoch": 0.6020290964777948, "grad_norm": 1.129483699798584, "learning_rate": 7.21970139338124e-06, "loss": 0.5137, "step": 22015 }, { "epoch": 0.6020564427915117, "grad_norm": 1.379072666168213, "learning_rate": 7.218850621289448e-06, "loss": 0.5008, "step": 22016 }, { "epoch": 0.6020837891052286, "grad_norm": 1.3180278539657593, "learning_rate": 7.2179998710149936e-06, "loss": 0.5251, "step": 22017 }, { "epoch": 0.6021111354189456, "grad_norm": 1.3830991983413696, "learning_rate": 7.217149142564544e-06, "loss": 0.3713, "step": 22018 }, { "epoch": 0.6021384817326625, "grad_norm": 1.1035621166229248, "learning_rate": 7.216298435944782e-06, "loss": 0.4776, "step": 22019 }, { "epoch": 0.6021658280463793, "grad_norm": 1.1303610801696777, "learning_rate": 7.2154477511623725e-06, "loss": 0.4686, "step": 22020 }, { "epoch": 0.6021931743600962, "grad_norm": 1.2795966863632202, "learning_rate": 7.214597088223988e-06, "loss": 0.4775, "step": 22021 }, { "epoch": 0.6022205206738132, "grad_norm": 1.4840829372406006, "learning_rate": 7.213746447136308e-06, "loss": 0.4659, "step": 22022 }, { "epoch": 0.6022478669875301, "grad_norm": 1.1473995447158813, "learning_rate": 7.212895827906004e-06, "loss": 0.4942, "step": 22023 }, { "epoch": 0.602275213301247, "grad_norm": 1.1776224374771118, "learning_rate": 7.2120452305397435e-06, "loss": 0.4725, "step": 22024 }, { "epoch": 0.6023025596149639, "grad_norm": 1.313850998878479, "learning_rate": 7.211194655044207e-06, "loss": 0.4644, "step": 22025 }, { "epoch": 0.6023299059286809, "grad_norm": 1.9433478116989136, "learning_rate": 7.2103441014260635e-06, "loss": 0.3702, "step": 22026 }, { "epoch": 0.6023572522423978, "grad_norm": 1.397343397140503, "learning_rate": 7.209493569691985e-06, "loss": 0.4742, "step": 22027 }, { "epoch": 0.6023845985561146, "grad_norm": 1.3086642026901245, "learning_rate": 7.208643059848646e-06, "loss": 0.4658, "step": 22028 }, { "epoch": 0.6024119448698315, "grad_norm": 1.2231279611587524, "learning_rate": 7.207792571902713e-06, "loss": 0.5055, "step": 22029 }, { "epoch": 0.6024392911835484, "grad_norm": 1.3657962083816528, "learning_rate": 7.2069421058608634e-06, "loss": 0.4878, "step": 22030 }, { "epoch": 0.6024666374972654, "grad_norm": 1.2649096250534058, "learning_rate": 7.2060916617297685e-06, "loss": 0.4851, "step": 22031 }, { "epoch": 0.6024939838109823, "grad_norm": 1.207000970840454, "learning_rate": 7.205241239516094e-06, "loss": 0.5131, "step": 22032 }, { "epoch": 0.6025213301246992, "grad_norm": 1.5645239353179932, "learning_rate": 7.204390839226521e-06, "loss": 0.4458, "step": 22033 }, { "epoch": 0.6025486764384161, "grad_norm": 1.123547077178955, "learning_rate": 7.20354046086771e-06, "loss": 0.4528, "step": 22034 }, { "epoch": 0.6025760227521331, "grad_norm": 1.2338072061538696, "learning_rate": 7.202690104446339e-06, "loss": 0.498, "step": 22035 }, { "epoch": 0.6026033690658499, "grad_norm": 1.3595988750457764, "learning_rate": 7.201839769969079e-06, "loss": 0.472, "step": 22036 }, { "epoch": 0.6026307153795668, "grad_norm": 1.301343560218811, "learning_rate": 7.200989457442595e-06, "loss": 0.7693, "step": 22037 }, { "epoch": 0.6026580616932837, "grad_norm": 1.5269577503204346, "learning_rate": 7.200139166873562e-06, "loss": 0.4849, "step": 22038 }, { "epoch": 0.6026854080070007, "grad_norm": 1.5231441259384155, "learning_rate": 7.199288898268652e-06, "loss": 0.3655, "step": 22039 }, { "epoch": 0.6027127543207176, "grad_norm": 1.1440255641937256, "learning_rate": 7.1984386516345294e-06, "loss": 0.4585, "step": 22040 }, { "epoch": 0.6027401006344345, "grad_norm": 1.3694676160812378, "learning_rate": 7.197588426977869e-06, "loss": 0.498, "step": 22041 }, { "epoch": 0.6027674469481514, "grad_norm": 1.2234396934509277, "learning_rate": 7.196738224305337e-06, "loss": 0.4903, "step": 22042 }, { "epoch": 0.6027947932618682, "grad_norm": 1.1508610248565674, "learning_rate": 7.195888043623604e-06, "loss": 0.5182, "step": 22043 }, { "epoch": 0.6028221395755852, "grad_norm": 1.2421271800994873, "learning_rate": 7.195037884939345e-06, "loss": 0.4493, "step": 22044 }, { "epoch": 0.6028494858893021, "grad_norm": 1.303175449371338, "learning_rate": 7.194187748259219e-06, "loss": 0.4843, "step": 22045 }, { "epoch": 0.602876832203019, "grad_norm": 1.252052664756775, "learning_rate": 7.1933376335899015e-06, "loss": 0.3812, "step": 22046 }, { "epoch": 0.602904178516736, "grad_norm": 1.3202954530715942, "learning_rate": 7.192487540938064e-06, "loss": 0.485, "step": 22047 }, { "epoch": 0.6029315248304529, "grad_norm": 1.1174638271331787, "learning_rate": 7.191637470310368e-06, "loss": 0.4759, "step": 22048 }, { "epoch": 0.6029588711441698, "grad_norm": 1.5685070753097534, "learning_rate": 7.190787421713488e-06, "loss": 0.3499, "step": 22049 }, { "epoch": 0.6029862174578867, "grad_norm": 1.2025054693222046, "learning_rate": 7.189937395154088e-06, "loss": 0.4934, "step": 22050 }, { "epoch": 0.6030135637716035, "grad_norm": 1.141908884048462, "learning_rate": 7.189087390638838e-06, "loss": 0.7166, "step": 22051 }, { "epoch": 0.6030409100853205, "grad_norm": 1.4296410083770752, "learning_rate": 7.18823740817441e-06, "loss": 0.5002, "step": 22052 }, { "epoch": 0.6030682563990374, "grad_norm": 1.0809541940689087, "learning_rate": 7.187387447767464e-06, "loss": 0.48, "step": 22053 }, { "epoch": 0.6030956027127543, "grad_norm": 1.0451784133911133, "learning_rate": 7.186537509424678e-06, "loss": 0.464, "step": 22054 }, { "epoch": 0.6031229490264712, "grad_norm": 1.2595984935760498, "learning_rate": 7.185687593152708e-06, "loss": 0.5068, "step": 22055 }, { "epoch": 0.6031502953401882, "grad_norm": 1.2866628170013428, "learning_rate": 7.184837698958229e-06, "loss": 0.4884, "step": 22056 }, { "epoch": 0.6031776416539051, "grad_norm": 1.237532138824463, "learning_rate": 7.183987826847904e-06, "loss": 0.5058, "step": 22057 }, { "epoch": 0.603204987967622, "grad_norm": 1.3526690006256104, "learning_rate": 7.183137976828405e-06, "loss": 0.3279, "step": 22058 }, { "epoch": 0.6032323342813388, "grad_norm": 1.4455102682113647, "learning_rate": 7.1822881489063935e-06, "loss": 0.485, "step": 22059 }, { "epoch": 0.6032596805950557, "grad_norm": 1.0982003211975098, "learning_rate": 7.181438343088538e-06, "loss": 0.5093, "step": 22060 }, { "epoch": 0.6032870269087727, "grad_norm": 1.339563012123108, "learning_rate": 7.180588559381507e-06, "loss": 0.7723, "step": 22061 }, { "epoch": 0.6033143732224896, "grad_norm": 1.1536558866500854, "learning_rate": 7.179738797791964e-06, "loss": 0.4714, "step": 22062 }, { "epoch": 0.6033417195362065, "grad_norm": 1.1511586904525757, "learning_rate": 7.178889058326579e-06, "loss": 0.4847, "step": 22063 }, { "epoch": 0.6033690658499234, "grad_norm": 1.5088492631912231, "learning_rate": 7.178039340992013e-06, "loss": 0.4709, "step": 22064 }, { "epoch": 0.6033964121636404, "grad_norm": 1.481430172920227, "learning_rate": 7.177189645794934e-06, "loss": 0.3638, "step": 22065 }, { "epoch": 0.6034237584773573, "grad_norm": 1.2936152219772339, "learning_rate": 7.17633997274201e-06, "loss": 0.4557, "step": 22066 }, { "epoch": 0.6034511047910741, "grad_norm": 1.5763695240020752, "learning_rate": 7.175490321839902e-06, "loss": 0.4664, "step": 22067 }, { "epoch": 0.603478451104791, "grad_norm": 1.100092887878418, "learning_rate": 7.174640693095277e-06, "loss": 0.492, "step": 22068 }, { "epoch": 0.603505797418508, "grad_norm": 1.646442174911499, "learning_rate": 7.173791086514803e-06, "loss": 0.6913, "step": 22069 }, { "epoch": 0.6035331437322249, "grad_norm": 1.336073875427246, "learning_rate": 7.172941502105142e-06, "loss": 0.464, "step": 22070 }, { "epoch": 0.6035604900459418, "grad_norm": 1.5264687538146973, "learning_rate": 7.172091939872961e-06, "loss": 0.3305, "step": 22071 }, { "epoch": 0.6035878363596587, "grad_norm": 1.5119264125823975, "learning_rate": 7.171242399824918e-06, "loss": 0.3814, "step": 22072 }, { "epoch": 0.6036151826733757, "grad_norm": 1.1494179964065552, "learning_rate": 7.170392881967685e-06, "loss": 0.4729, "step": 22073 }, { "epoch": 0.6036425289870926, "grad_norm": 1.3703742027282715, "learning_rate": 7.169543386307925e-06, "loss": 0.7473, "step": 22074 }, { "epoch": 0.6036698753008094, "grad_norm": 1.1600661277770996, "learning_rate": 7.168693912852298e-06, "loss": 0.4689, "step": 22075 }, { "epoch": 0.6036972216145263, "grad_norm": 1.191465139389038, "learning_rate": 7.167844461607474e-06, "loss": 0.4153, "step": 22076 }, { "epoch": 0.6037245679282432, "grad_norm": 1.1564875841140747, "learning_rate": 7.166995032580108e-06, "loss": 0.4849, "step": 22077 }, { "epoch": 0.6037519142419602, "grad_norm": 1.379490613937378, "learning_rate": 7.166145625776872e-06, "loss": 0.4661, "step": 22078 }, { "epoch": 0.6037792605556771, "grad_norm": 1.214545726776123, "learning_rate": 7.165296241204427e-06, "loss": 0.4748, "step": 22079 }, { "epoch": 0.603806606869394, "grad_norm": 1.2842782735824585, "learning_rate": 7.164446878869433e-06, "loss": 0.7648, "step": 22080 }, { "epoch": 0.603833953183111, "grad_norm": 1.234749674797058, "learning_rate": 7.163597538778555e-06, "loss": 0.4839, "step": 22081 }, { "epoch": 0.6038612994968279, "grad_norm": 1.2641559839248657, "learning_rate": 7.162748220938458e-06, "loss": 0.4853, "step": 22082 }, { "epoch": 0.6038886458105447, "grad_norm": 1.5262304544448853, "learning_rate": 7.1618989253558015e-06, "loss": 0.5167, "step": 22083 }, { "epoch": 0.6039159921242616, "grad_norm": 2.1735615730285645, "learning_rate": 7.161049652037251e-06, "loss": 0.3444, "step": 22084 }, { "epoch": 0.6039433384379785, "grad_norm": 1.4213203191757202, "learning_rate": 7.160200400989464e-06, "loss": 0.4904, "step": 22085 }, { "epoch": 0.6039706847516955, "grad_norm": 1.218800663948059, "learning_rate": 7.159351172219107e-06, "loss": 0.5152, "step": 22086 }, { "epoch": 0.6039980310654124, "grad_norm": 1.1728789806365967, "learning_rate": 7.158501965732842e-06, "loss": 0.5083, "step": 22087 }, { "epoch": 0.6040253773791293, "grad_norm": 1.464433193206787, "learning_rate": 7.157652781537328e-06, "loss": 0.3793, "step": 22088 }, { "epoch": 0.6040527236928462, "grad_norm": 1.2327415943145752, "learning_rate": 7.156803619639232e-06, "loss": 0.479, "step": 22089 }, { "epoch": 0.6040800700065632, "grad_norm": 1.3491109609603882, "learning_rate": 7.155954480045206e-06, "loss": 0.4956, "step": 22090 }, { "epoch": 0.60410741632028, "grad_norm": 1.560253381729126, "learning_rate": 7.155105362761921e-06, "loss": 0.7274, "step": 22091 }, { "epoch": 0.6041347626339969, "grad_norm": 1.4026505947113037, "learning_rate": 7.154256267796029e-06, "loss": 0.3865, "step": 22092 }, { "epoch": 0.6041621089477138, "grad_norm": 1.438241958618164, "learning_rate": 7.153407195154201e-06, "loss": 0.4899, "step": 22093 }, { "epoch": 0.6041894552614308, "grad_norm": 1.4851950407028198, "learning_rate": 7.152558144843087e-06, "loss": 0.3809, "step": 22094 }, { "epoch": 0.6042168015751477, "grad_norm": 1.2915023565292358, "learning_rate": 7.151709116869355e-06, "loss": 0.5106, "step": 22095 }, { "epoch": 0.6042441478888646, "grad_norm": 1.4392822980880737, "learning_rate": 7.150860111239666e-06, "loss": 0.4749, "step": 22096 }, { "epoch": 0.6042714942025815, "grad_norm": 1.251416563987732, "learning_rate": 7.150011127960675e-06, "loss": 0.4838, "step": 22097 }, { "epoch": 0.6042988405162985, "grad_norm": 1.3768408298492432, "learning_rate": 7.1491621670390454e-06, "loss": 0.4858, "step": 22098 }, { "epoch": 0.6043261868300153, "grad_norm": 1.1004419326782227, "learning_rate": 7.148313228481436e-06, "loss": 0.5082, "step": 22099 }, { "epoch": 0.6043535331437322, "grad_norm": 1.3034656047821045, "learning_rate": 7.147464312294506e-06, "loss": 0.7607, "step": 22100 }, { "epoch": 0.6043808794574491, "grad_norm": 1.188918948173523, "learning_rate": 7.146615418484917e-06, "loss": 0.3575, "step": 22101 }, { "epoch": 0.604408225771166, "grad_norm": 1.080602765083313, "learning_rate": 7.1457665470593255e-06, "loss": 0.4661, "step": 22102 }, { "epoch": 0.604435572084883, "grad_norm": 1.0930320024490356, "learning_rate": 7.1449176980243924e-06, "loss": 0.4573, "step": 22103 }, { "epoch": 0.6044629183985999, "grad_norm": 1.526740550994873, "learning_rate": 7.144068871386779e-06, "loss": 0.4602, "step": 22104 }, { "epoch": 0.6044902647123168, "grad_norm": 1.204337239265442, "learning_rate": 7.143220067153139e-06, "loss": 0.4878, "step": 22105 }, { "epoch": 0.6045176110260337, "grad_norm": 1.211525321006775, "learning_rate": 7.1423712853301365e-06, "loss": 0.5163, "step": 22106 }, { "epoch": 0.6045449573397506, "grad_norm": 1.4896061420440674, "learning_rate": 7.141522525924423e-06, "loss": 0.4846, "step": 22107 }, { "epoch": 0.6045723036534675, "grad_norm": 1.4276005029678345, "learning_rate": 7.140673788942662e-06, "loss": 0.503, "step": 22108 }, { "epoch": 0.6045996499671844, "grad_norm": 1.2176995277404785, "learning_rate": 7.139825074391513e-06, "loss": 0.4623, "step": 22109 }, { "epoch": 0.6046269962809013, "grad_norm": 1.2980096340179443, "learning_rate": 7.138976382277629e-06, "loss": 0.4716, "step": 22110 }, { "epoch": 0.6046543425946183, "grad_norm": 1.4960843324661255, "learning_rate": 7.13812771260767e-06, "loss": 0.4836, "step": 22111 }, { "epoch": 0.6046816889083352, "grad_norm": 1.935919165611267, "learning_rate": 7.137279065388296e-06, "loss": 0.4881, "step": 22112 }, { "epoch": 0.6047090352220521, "grad_norm": 1.2530691623687744, "learning_rate": 7.13643044062616e-06, "loss": 0.4729, "step": 22113 }, { "epoch": 0.604736381535769, "grad_norm": 1.2666218280792236, "learning_rate": 7.135581838327923e-06, "loss": 0.4872, "step": 22114 }, { "epoch": 0.6047637278494858, "grad_norm": 1.2024105787277222, "learning_rate": 7.13473325850024e-06, "loss": 0.4738, "step": 22115 }, { "epoch": 0.6047910741632028, "grad_norm": 1.2435894012451172, "learning_rate": 7.133884701149765e-06, "loss": 0.7493, "step": 22116 }, { "epoch": 0.6048184204769197, "grad_norm": 1.1857854127883911, "learning_rate": 7.1330361662831625e-06, "loss": 0.4999, "step": 22117 }, { "epoch": 0.6048457667906366, "grad_norm": 1.3311688899993896, "learning_rate": 7.132187653907081e-06, "loss": 0.4653, "step": 22118 }, { "epoch": 0.6048731131043535, "grad_norm": 1.4105243682861328, "learning_rate": 7.131339164028183e-06, "loss": 0.4675, "step": 22119 }, { "epoch": 0.6049004594180705, "grad_norm": 1.4907065629959106, "learning_rate": 7.130490696653121e-06, "loss": 0.5103, "step": 22120 }, { "epoch": 0.6049278057317874, "grad_norm": 1.377467155456543, "learning_rate": 7.129642251788549e-06, "loss": 0.4634, "step": 22121 }, { "epoch": 0.6049551520455043, "grad_norm": 1.3740135431289673, "learning_rate": 7.1287938294411295e-06, "loss": 0.4918, "step": 22122 }, { "epoch": 0.6049824983592211, "grad_norm": 2.299285888671875, "learning_rate": 7.127945429617514e-06, "loss": 0.3468, "step": 22123 }, { "epoch": 0.6050098446729381, "grad_norm": 1.3364311456680298, "learning_rate": 7.127097052324357e-06, "loss": 0.4954, "step": 22124 }, { "epoch": 0.605037190986655, "grad_norm": 1.2672103643417358, "learning_rate": 7.126248697568318e-06, "loss": 0.4823, "step": 22125 }, { "epoch": 0.6050645373003719, "grad_norm": 1.5745675563812256, "learning_rate": 7.125400365356048e-06, "loss": 0.4595, "step": 22126 }, { "epoch": 0.6050918836140888, "grad_norm": 1.1513187885284424, "learning_rate": 7.124552055694201e-06, "loss": 0.3384, "step": 22127 }, { "epoch": 0.6051192299278058, "grad_norm": 1.2478716373443604, "learning_rate": 7.123703768589436e-06, "loss": 0.4797, "step": 22128 }, { "epoch": 0.6051465762415227, "grad_norm": 0.9799845814704895, "learning_rate": 7.122855504048404e-06, "loss": 0.3623, "step": 22129 }, { "epoch": 0.6051739225552396, "grad_norm": 1.3592963218688965, "learning_rate": 7.12200726207776e-06, "loss": 0.5091, "step": 22130 }, { "epoch": 0.6052012688689564, "grad_norm": 1.1982730627059937, "learning_rate": 7.121159042684163e-06, "loss": 0.4751, "step": 22131 }, { "epoch": 0.6052286151826733, "grad_norm": 1.9209007024765015, "learning_rate": 7.120310845874259e-06, "loss": 0.3581, "step": 22132 }, { "epoch": 0.6052559614963903, "grad_norm": 1.3043761253356934, "learning_rate": 7.1194626716547065e-06, "loss": 0.7644, "step": 22133 }, { "epoch": 0.6052833078101072, "grad_norm": 1.3449596166610718, "learning_rate": 7.118614520032161e-06, "loss": 0.4767, "step": 22134 }, { "epoch": 0.6053106541238241, "grad_norm": 1.2179100513458252, "learning_rate": 7.117766391013272e-06, "loss": 0.4956, "step": 22135 }, { "epoch": 0.605338000437541, "grad_norm": 1.2698192596435547, "learning_rate": 7.116918284604696e-06, "loss": 0.4758, "step": 22136 }, { "epoch": 0.605365346751258, "grad_norm": 1.153162956237793, "learning_rate": 7.116070200813082e-06, "loss": 0.5436, "step": 22137 }, { "epoch": 0.6053926930649749, "grad_norm": 1.1858201026916504, "learning_rate": 7.115222139645087e-06, "loss": 0.4914, "step": 22138 }, { "epoch": 0.6054200393786917, "grad_norm": 1.3236095905303955, "learning_rate": 7.114374101107364e-06, "loss": 0.4881, "step": 22139 }, { "epoch": 0.6054473856924086, "grad_norm": 1.3923295736312866, "learning_rate": 7.113526085206561e-06, "loss": 0.4726, "step": 22140 }, { "epoch": 0.6054747320061256, "grad_norm": 1.4152377843856812, "learning_rate": 7.1126780919493375e-06, "loss": 0.4906, "step": 22141 }, { "epoch": 0.6055020783198425, "grad_norm": 1.0863823890686035, "learning_rate": 7.111830121342339e-06, "loss": 0.4703, "step": 22142 }, { "epoch": 0.6055294246335594, "grad_norm": 1.5143089294433594, "learning_rate": 7.11098217339222e-06, "loss": 0.4773, "step": 22143 }, { "epoch": 0.6055567709472763, "grad_norm": 1.0847697257995605, "learning_rate": 7.110134248105636e-06, "loss": 0.4622, "step": 22144 }, { "epoch": 0.6055841172609933, "grad_norm": 1.2366375923156738, "learning_rate": 7.1092863454892324e-06, "loss": 0.4719, "step": 22145 }, { "epoch": 0.6056114635747101, "grad_norm": 1.4879664182662964, "learning_rate": 7.108438465549663e-06, "loss": 0.4934, "step": 22146 }, { "epoch": 0.605638809888427, "grad_norm": 1.1298733949661255, "learning_rate": 7.107590608293584e-06, "loss": 0.4868, "step": 22147 }, { "epoch": 0.6056661562021439, "grad_norm": 1.4211771488189697, "learning_rate": 7.1067427737276395e-06, "loss": 0.4776, "step": 22148 }, { "epoch": 0.6056935025158608, "grad_norm": 1.9915189743041992, "learning_rate": 7.105894961858486e-06, "loss": 0.3467, "step": 22149 }, { "epoch": 0.6057208488295778, "grad_norm": 1.2909340858459473, "learning_rate": 7.105047172692771e-06, "loss": 0.482, "step": 22150 }, { "epoch": 0.6057481951432947, "grad_norm": 1.3490169048309326, "learning_rate": 7.104199406237144e-06, "loss": 0.4673, "step": 22151 }, { "epoch": 0.6057755414570116, "grad_norm": 1.157284140586853, "learning_rate": 7.103351662498263e-06, "loss": 0.4901, "step": 22152 }, { "epoch": 0.6058028877707285, "grad_norm": 1.1861882209777832, "learning_rate": 7.102503941482768e-06, "loss": 0.4899, "step": 22153 }, { "epoch": 0.6058302340844454, "grad_norm": 1.2196464538574219, "learning_rate": 7.1016562431973145e-06, "loss": 0.4983, "step": 22154 }, { "epoch": 0.6058575803981623, "grad_norm": 1.259275197982788, "learning_rate": 7.100808567648556e-06, "loss": 0.4851, "step": 22155 }, { "epoch": 0.6058849267118792, "grad_norm": 1.489476203918457, "learning_rate": 7.099960914843136e-06, "loss": 0.7446, "step": 22156 }, { "epoch": 0.6059122730255961, "grad_norm": 1.677141785621643, "learning_rate": 7.099113284787707e-06, "loss": 0.4854, "step": 22157 }, { "epoch": 0.6059396193393131, "grad_norm": 2.824861526489258, "learning_rate": 7.098265677488916e-06, "loss": 0.3621, "step": 22158 }, { "epoch": 0.60596696565303, "grad_norm": 1.2147444486618042, "learning_rate": 7.097418092953415e-06, "loss": 0.4959, "step": 22159 }, { "epoch": 0.6059943119667469, "grad_norm": 1.3456063270568848, "learning_rate": 7.0965705311878565e-06, "loss": 0.3612, "step": 22160 }, { "epoch": 0.6060216582804638, "grad_norm": 1.5130199193954468, "learning_rate": 7.0957229921988826e-06, "loss": 0.4859, "step": 22161 }, { "epoch": 0.6060490045941807, "grad_norm": 1.2140827178955078, "learning_rate": 7.0948754759931424e-06, "loss": 0.4405, "step": 22162 }, { "epoch": 0.6060763509078976, "grad_norm": 1.485012412071228, "learning_rate": 7.094027982577289e-06, "loss": 0.5089, "step": 22163 }, { "epoch": 0.6061036972216145, "grad_norm": 1.267099380493164, "learning_rate": 7.093180511957966e-06, "loss": 0.5008, "step": 22164 }, { "epoch": 0.6061310435353314, "grad_norm": 1.3767364025115967, "learning_rate": 7.092333064141823e-06, "loss": 0.5156, "step": 22165 }, { "epoch": 0.6061583898490484, "grad_norm": 1.3785645961761475, "learning_rate": 7.091485639135512e-06, "loss": 0.5103, "step": 22166 }, { "epoch": 0.6061857361627653, "grad_norm": 1.36644446849823, "learning_rate": 7.090638236945675e-06, "loss": 0.4648, "step": 22167 }, { "epoch": 0.6062130824764822, "grad_norm": 1.1966710090637207, "learning_rate": 7.089790857578962e-06, "loss": 0.4895, "step": 22168 }, { "epoch": 0.6062404287901991, "grad_norm": 1.282050371170044, "learning_rate": 7.088943501042022e-06, "loss": 0.467, "step": 22169 }, { "epoch": 0.6062677751039159, "grad_norm": 1.6036033630371094, "learning_rate": 7.088096167341502e-06, "loss": 0.3642, "step": 22170 }, { "epoch": 0.6062951214176329, "grad_norm": 1.2702962160110474, "learning_rate": 7.087248856484048e-06, "loss": 0.4796, "step": 22171 }, { "epoch": 0.6063224677313498, "grad_norm": 1.3655633926391602, "learning_rate": 7.086401568476305e-06, "loss": 0.4785, "step": 22172 }, { "epoch": 0.6063498140450667, "grad_norm": 1.3177542686462402, "learning_rate": 7.085554303324922e-06, "loss": 0.4736, "step": 22173 }, { "epoch": 0.6063771603587836, "grad_norm": 1.0464189052581787, "learning_rate": 7.084707061036546e-06, "loss": 0.4583, "step": 22174 }, { "epoch": 0.6064045066725006, "grad_norm": 1.389841914176941, "learning_rate": 7.083859841617823e-06, "loss": 0.4729, "step": 22175 }, { "epoch": 0.6064318529862175, "grad_norm": 1.4628663063049316, "learning_rate": 7.083012645075398e-06, "loss": 0.4665, "step": 22176 }, { "epoch": 0.6064591992999344, "grad_norm": 1.2717114686965942, "learning_rate": 7.08216547141592e-06, "loss": 0.4852, "step": 22177 }, { "epoch": 0.6064865456136512, "grad_norm": 1.3430548906326294, "learning_rate": 7.081318320646029e-06, "loss": 0.4823, "step": 22178 }, { "epoch": 0.6065138919273682, "grad_norm": 1.1133726835250854, "learning_rate": 7.080471192772378e-06, "loss": 0.4626, "step": 22179 }, { "epoch": 0.6065412382410851, "grad_norm": 1.181012749671936, "learning_rate": 7.079624087801606e-06, "loss": 0.5091, "step": 22180 }, { "epoch": 0.606568584554802, "grad_norm": 1.2168121337890625, "learning_rate": 7.078777005740363e-06, "loss": 0.5098, "step": 22181 }, { "epoch": 0.6065959308685189, "grad_norm": 1.2409913539886475, "learning_rate": 7.0779299465952935e-06, "loss": 0.339, "step": 22182 }, { "epoch": 0.6066232771822359, "grad_norm": 1.495834469795227, "learning_rate": 7.077082910373039e-06, "loss": 0.3404, "step": 22183 }, { "epoch": 0.6066506234959528, "grad_norm": 1.0529723167419434, "learning_rate": 7.076235897080248e-06, "loss": 0.4629, "step": 22184 }, { "epoch": 0.6066779698096697, "grad_norm": 1.3373963832855225, "learning_rate": 7.075388906723561e-06, "loss": 0.4829, "step": 22185 }, { "epoch": 0.6067053161233865, "grad_norm": 1.3481839895248413, "learning_rate": 7.074541939309627e-06, "loss": 0.466, "step": 22186 }, { "epoch": 0.6067326624371034, "grad_norm": 1.2844740152359009, "learning_rate": 7.073694994845088e-06, "loss": 0.4822, "step": 22187 }, { "epoch": 0.6067600087508204, "grad_norm": 1.1171871423721313, "learning_rate": 7.072848073336588e-06, "loss": 0.4679, "step": 22188 }, { "epoch": 0.6067873550645373, "grad_norm": 1.1138335466384888, "learning_rate": 7.07200117479077e-06, "loss": 0.4831, "step": 22189 }, { "epoch": 0.6068147013782542, "grad_norm": 1.1424518823623657, "learning_rate": 7.071154299214281e-06, "loss": 0.4988, "step": 22190 }, { "epoch": 0.6068420476919711, "grad_norm": 1.2437779903411865, "learning_rate": 7.07030744661376e-06, "loss": 0.5189, "step": 22191 }, { "epoch": 0.6068693940056881, "grad_norm": 1.257262945175171, "learning_rate": 7.069460616995856e-06, "loss": 0.4992, "step": 22192 }, { "epoch": 0.606896740319405, "grad_norm": 1.1096712350845337, "learning_rate": 7.068613810367205e-06, "loss": 0.4813, "step": 22193 }, { "epoch": 0.6069240866331218, "grad_norm": 1.2336915731430054, "learning_rate": 7.0677670267344554e-06, "loss": 0.4702, "step": 22194 }, { "epoch": 0.6069514329468387, "grad_norm": 1.3129085302352905, "learning_rate": 7.066920266104252e-06, "loss": 0.4851, "step": 22195 }, { "epoch": 0.6069787792605557, "grad_norm": 1.4217578172683716, "learning_rate": 7.06607352848323e-06, "loss": 0.4728, "step": 22196 }, { "epoch": 0.6070061255742726, "grad_norm": 1.1900694370269775, "learning_rate": 7.065226813878035e-06, "loss": 0.4497, "step": 22197 }, { "epoch": 0.6070334718879895, "grad_norm": 1.1106961965560913, "learning_rate": 7.064380122295309e-06, "loss": 0.4837, "step": 22198 }, { "epoch": 0.6070608182017064, "grad_norm": 1.3248602151870728, "learning_rate": 7.063533453741696e-06, "loss": 0.4753, "step": 22199 }, { "epoch": 0.6070881645154234, "grad_norm": 1.1864585876464844, "learning_rate": 7.062686808223837e-06, "loss": 0.4843, "step": 22200 }, { "epoch": 0.6071155108291403, "grad_norm": 1.1487538814544678, "learning_rate": 7.061840185748374e-06, "loss": 0.4838, "step": 22201 }, { "epoch": 0.6071428571428571, "grad_norm": 1.0782201290130615, "learning_rate": 7.0609935863219446e-06, "loss": 0.4798, "step": 22202 }, { "epoch": 0.607170203456574, "grad_norm": 1.1268390417099, "learning_rate": 7.060147009951195e-06, "loss": 0.4971, "step": 22203 }, { "epoch": 0.607197549770291, "grad_norm": 1.2034999132156372, "learning_rate": 7.059300456642767e-06, "loss": 0.5107, "step": 22204 }, { "epoch": 0.6072248960840079, "grad_norm": 1.3947843313217163, "learning_rate": 7.058453926403297e-06, "loss": 0.4962, "step": 22205 }, { "epoch": 0.6072522423977248, "grad_norm": 1.1670796871185303, "learning_rate": 7.05760741923943e-06, "loss": 0.5087, "step": 22206 }, { "epoch": 0.6072795887114417, "grad_norm": 1.2648255825042725, "learning_rate": 7.0567609351578024e-06, "loss": 0.4769, "step": 22207 }, { "epoch": 0.6073069350251586, "grad_norm": 1.1513850688934326, "learning_rate": 7.055914474165057e-06, "loss": 0.4772, "step": 22208 }, { "epoch": 0.6073342813388756, "grad_norm": 1.1593034267425537, "learning_rate": 7.055068036267836e-06, "loss": 0.4758, "step": 22209 }, { "epoch": 0.6073616276525924, "grad_norm": 2.075277805328369, "learning_rate": 7.054221621472776e-06, "loss": 0.4798, "step": 22210 }, { "epoch": 0.6073889739663093, "grad_norm": 1.2267707586288452, "learning_rate": 7.053375229786519e-06, "loss": 0.4775, "step": 22211 }, { "epoch": 0.6074163202800262, "grad_norm": 1.4229321479797363, "learning_rate": 7.052528861215704e-06, "loss": 0.4474, "step": 22212 }, { "epoch": 0.6074436665937432, "grad_norm": 1.7128355503082275, "learning_rate": 7.05168251576697e-06, "loss": 0.4728, "step": 22213 }, { "epoch": 0.6074710129074601, "grad_norm": 1.2688610553741455, "learning_rate": 7.05083619344696e-06, "loss": 0.5061, "step": 22214 }, { "epoch": 0.607498359221177, "grad_norm": 1.1283023357391357, "learning_rate": 7.049989894262306e-06, "loss": 0.4821, "step": 22215 }, { "epoch": 0.6075257055348939, "grad_norm": 1.1961930990219116, "learning_rate": 7.049143618219651e-06, "loss": 0.5013, "step": 22216 }, { "epoch": 0.6075530518486109, "grad_norm": 1.3705382347106934, "learning_rate": 7.048297365325637e-06, "loss": 0.7578, "step": 22217 }, { "epoch": 0.6075803981623277, "grad_norm": 1.2407809495925903, "learning_rate": 7.047451135586897e-06, "loss": 0.4832, "step": 22218 }, { "epoch": 0.6076077444760446, "grad_norm": 1.6700043678283691, "learning_rate": 7.046604929010072e-06, "loss": 0.4837, "step": 22219 }, { "epoch": 0.6076350907897615, "grad_norm": 1.4291439056396484, "learning_rate": 7.045758745601803e-06, "loss": 0.4971, "step": 22220 }, { "epoch": 0.6076624371034784, "grad_norm": 1.6075750589370728, "learning_rate": 7.044912585368721e-06, "loss": 0.3707, "step": 22221 }, { "epoch": 0.6076897834171954, "grad_norm": 1.4581478834152222, "learning_rate": 7.0440664483174725e-06, "loss": 0.3701, "step": 22222 }, { "epoch": 0.6077171297309123, "grad_norm": 1.2777879238128662, "learning_rate": 7.043220334454688e-06, "loss": 0.5116, "step": 22223 }, { "epoch": 0.6077444760446292, "grad_norm": 1.746947169303894, "learning_rate": 7.042374243787008e-06, "loss": 0.3918, "step": 22224 }, { "epoch": 0.6077718223583461, "grad_norm": 1.3711581230163574, "learning_rate": 7.041528176321072e-06, "loss": 0.5018, "step": 22225 }, { "epoch": 0.607799168672063, "grad_norm": 1.1306475400924683, "learning_rate": 7.040682132063512e-06, "loss": 0.4921, "step": 22226 }, { "epoch": 0.6078265149857799, "grad_norm": 1.367374062538147, "learning_rate": 7.03983611102097e-06, "loss": 0.5312, "step": 22227 }, { "epoch": 0.6078538612994968, "grad_norm": 1.4164633750915527, "learning_rate": 7.038990113200079e-06, "loss": 0.4611, "step": 22228 }, { "epoch": 0.6078812076132137, "grad_norm": 1.2537904977798462, "learning_rate": 7.038144138607478e-06, "loss": 0.4526, "step": 22229 }, { "epoch": 0.6079085539269307, "grad_norm": 1.1794625520706177, "learning_rate": 7.037298187249806e-06, "loss": 0.5108, "step": 22230 }, { "epoch": 0.6079359002406476, "grad_norm": 1.3226113319396973, "learning_rate": 7.036452259133694e-06, "loss": 0.4619, "step": 22231 }, { "epoch": 0.6079632465543645, "grad_norm": 1.294796109199524, "learning_rate": 7.035606354265779e-06, "loss": 0.5058, "step": 22232 }, { "epoch": 0.6079905928680814, "grad_norm": 1.191745400428772, "learning_rate": 7.034760472652695e-06, "loss": 0.4689, "step": 22233 }, { "epoch": 0.6080179391817983, "grad_norm": 1.0552403926849365, "learning_rate": 7.033914614301085e-06, "loss": 0.4643, "step": 22234 }, { "epoch": 0.6080452854955152, "grad_norm": 2.0326879024505615, "learning_rate": 7.033068779217578e-06, "loss": 0.3844, "step": 22235 }, { "epoch": 0.6080726318092321, "grad_norm": 1.3232998847961426, "learning_rate": 7.032222967408813e-06, "loss": 0.4553, "step": 22236 }, { "epoch": 0.608099978122949, "grad_norm": 1.2182694673538208, "learning_rate": 7.03137717888142e-06, "loss": 0.4658, "step": 22237 }, { "epoch": 0.608127324436666, "grad_norm": 1.4000358581542969, "learning_rate": 7.03053141364204e-06, "loss": 0.4578, "step": 22238 }, { "epoch": 0.6081546707503829, "grad_norm": 1.158124566078186, "learning_rate": 7.029685671697306e-06, "loss": 0.498, "step": 22239 }, { "epoch": 0.6081820170640998, "grad_norm": 1.0739226341247559, "learning_rate": 7.028839953053851e-06, "loss": 0.5025, "step": 22240 }, { "epoch": 0.6082093633778167, "grad_norm": 1.230352759361267, "learning_rate": 7.0279942577183095e-06, "loss": 0.4902, "step": 22241 }, { "epoch": 0.6082367096915335, "grad_norm": 1.4519423246383667, "learning_rate": 7.027148585697319e-06, "loss": 0.7746, "step": 22242 }, { "epoch": 0.6082640560052505, "grad_norm": 1.1043241024017334, "learning_rate": 7.02630293699751e-06, "loss": 0.5123, "step": 22243 }, { "epoch": 0.6082914023189674, "grad_norm": 1.358400821685791, "learning_rate": 7.025457311625518e-06, "loss": 0.4871, "step": 22244 }, { "epoch": 0.6083187486326843, "grad_norm": 1.5328165292739868, "learning_rate": 7.024611709587976e-06, "loss": 0.777, "step": 22245 }, { "epoch": 0.6083460949464012, "grad_norm": 1.3738964796066284, "learning_rate": 7.023766130891516e-06, "loss": 0.4677, "step": 22246 }, { "epoch": 0.6083734412601182, "grad_norm": 1.2356525659561157, "learning_rate": 7.022920575542778e-06, "loss": 0.4837, "step": 22247 }, { "epoch": 0.6084007875738351, "grad_norm": 1.3063123226165771, "learning_rate": 7.0220750435483855e-06, "loss": 0.5082, "step": 22248 }, { "epoch": 0.6084281338875519, "grad_norm": 1.2797956466674805, "learning_rate": 7.02122953491498e-06, "loss": 0.4569, "step": 22249 }, { "epoch": 0.6084554802012688, "grad_norm": 1.5607672929763794, "learning_rate": 7.020384049649187e-06, "loss": 0.4585, "step": 22250 }, { "epoch": 0.6084828265149858, "grad_norm": 1.1253012418746948, "learning_rate": 7.019538587757643e-06, "loss": 0.779, "step": 22251 }, { "epoch": 0.6085101728287027, "grad_norm": 1.320561408996582, "learning_rate": 7.018693149246982e-06, "loss": 0.4873, "step": 22252 }, { "epoch": 0.6085375191424196, "grad_norm": 1.4409054517745972, "learning_rate": 7.0178477341238325e-06, "loss": 0.5058, "step": 22253 }, { "epoch": 0.6085648654561365, "grad_norm": 1.0804182291030884, "learning_rate": 7.017002342394828e-06, "loss": 0.5071, "step": 22254 }, { "epoch": 0.6085922117698535, "grad_norm": 1.3016059398651123, "learning_rate": 7.016156974066603e-06, "loss": 0.7686, "step": 22255 }, { "epoch": 0.6086195580835704, "grad_norm": 1.1414647102355957, "learning_rate": 7.015311629145785e-06, "loss": 0.4685, "step": 22256 }, { "epoch": 0.6086469043972872, "grad_norm": 1.1092109680175781, "learning_rate": 7.014466307639009e-06, "loss": 0.5027, "step": 22257 }, { "epoch": 0.6086742507110041, "grad_norm": 1.4688640832901, "learning_rate": 7.013621009552902e-06, "loss": 0.3625, "step": 22258 }, { "epoch": 0.608701597024721, "grad_norm": 1.3581687211990356, "learning_rate": 7.012775734894097e-06, "loss": 0.4927, "step": 22259 }, { "epoch": 0.608728943338438, "grad_norm": 1.2740352153778076, "learning_rate": 7.011930483669229e-06, "loss": 0.482, "step": 22260 }, { "epoch": 0.6087562896521549, "grad_norm": 1.1137596368789673, "learning_rate": 7.011085255884924e-06, "loss": 0.5095, "step": 22261 }, { "epoch": 0.6087836359658718, "grad_norm": 1.2180439233779907, "learning_rate": 7.010240051547817e-06, "loss": 0.4701, "step": 22262 }, { "epoch": 0.6088109822795887, "grad_norm": 1.2637094259262085, "learning_rate": 7.00939487066453e-06, "loss": 0.4783, "step": 22263 }, { "epoch": 0.6088383285933057, "grad_norm": 1.2714718580245972, "learning_rate": 7.008549713241701e-06, "loss": 0.4759, "step": 22264 }, { "epoch": 0.6088656749070225, "grad_norm": 1.2615894079208374, "learning_rate": 7.007704579285961e-06, "loss": 0.744, "step": 22265 }, { "epoch": 0.6088930212207394, "grad_norm": 1.231817364692688, "learning_rate": 7.006859468803935e-06, "loss": 0.4651, "step": 22266 }, { "epoch": 0.6089203675344563, "grad_norm": 1.1375329494476318, "learning_rate": 7.006014381802251e-06, "loss": 0.4699, "step": 22267 }, { "epoch": 0.6089477138481733, "grad_norm": 1.505855679512024, "learning_rate": 7.005169318287543e-06, "loss": 0.4925, "step": 22268 }, { "epoch": 0.6089750601618902, "grad_norm": 1.2484301328659058, "learning_rate": 7.00432427826644e-06, "loss": 0.7204, "step": 22269 }, { "epoch": 0.6090024064756071, "grad_norm": 1.351676106452942, "learning_rate": 7.0034792617455686e-06, "loss": 0.5324, "step": 22270 }, { "epoch": 0.609029752789324, "grad_norm": 1.1766105890274048, "learning_rate": 7.002634268731561e-06, "loss": 0.522, "step": 22271 }, { "epoch": 0.609057099103041, "grad_norm": 1.0834420919418335, "learning_rate": 7.001789299231041e-06, "loss": 0.4607, "step": 22272 }, { "epoch": 0.6090844454167578, "grad_norm": 1.267337441444397, "learning_rate": 7.000944353250641e-06, "loss": 0.4884, "step": 22273 }, { "epoch": 0.6091117917304747, "grad_norm": 1.3548917770385742, "learning_rate": 7.000099430796989e-06, "loss": 0.7418, "step": 22274 }, { "epoch": 0.6091391380441916, "grad_norm": 1.2719484567642212, "learning_rate": 6.999254531876713e-06, "loss": 0.4842, "step": 22275 }, { "epoch": 0.6091664843579085, "grad_norm": 1.3183387517929077, "learning_rate": 6.99840965649644e-06, "loss": 0.4701, "step": 22276 }, { "epoch": 0.6091938306716255, "grad_norm": 1.2540491819381714, "learning_rate": 6.997564804662799e-06, "loss": 0.4465, "step": 22277 }, { "epoch": 0.6092211769853424, "grad_norm": 1.3635199069976807, "learning_rate": 6.9967199763824175e-06, "loss": 0.4883, "step": 22278 }, { "epoch": 0.6092485232990593, "grad_norm": 1.4835093021392822, "learning_rate": 6.995875171661923e-06, "loss": 0.3805, "step": 22279 }, { "epoch": 0.6092758696127762, "grad_norm": 1.3736621141433716, "learning_rate": 6.995030390507942e-06, "loss": 0.526, "step": 22280 }, { "epoch": 0.6093032159264931, "grad_norm": 1.2680180072784424, "learning_rate": 6.9941856329271e-06, "loss": 0.4791, "step": 22281 }, { "epoch": 0.60933056224021, "grad_norm": 1.385873794555664, "learning_rate": 6.9933408989260285e-06, "loss": 0.4769, "step": 22282 }, { "epoch": 0.6093579085539269, "grad_norm": 1.4543139934539795, "learning_rate": 6.992496188511351e-06, "loss": 0.5238, "step": 22283 }, { "epoch": 0.6093852548676438, "grad_norm": 1.1073877811431885, "learning_rate": 6.991651501689694e-06, "loss": 0.332, "step": 22284 }, { "epoch": 0.6094126011813608, "grad_norm": 1.3140949010849, "learning_rate": 6.990806838467683e-06, "loss": 0.7817, "step": 22285 }, { "epoch": 0.6094399474950777, "grad_norm": 1.2169506549835205, "learning_rate": 6.989962198851947e-06, "loss": 0.4542, "step": 22286 }, { "epoch": 0.6094672938087946, "grad_norm": 1.1267807483673096, "learning_rate": 6.989117582849112e-06, "loss": 0.4701, "step": 22287 }, { "epoch": 0.6094946401225115, "grad_norm": 1.3012586832046509, "learning_rate": 6.988272990465799e-06, "loss": 0.5031, "step": 22288 }, { "epoch": 0.6095219864362283, "grad_norm": 1.1395281553268433, "learning_rate": 6.987428421708638e-06, "loss": 0.4859, "step": 22289 }, { "epoch": 0.6095493327499453, "grad_norm": 1.6201170682907104, "learning_rate": 6.986583876584254e-06, "loss": 0.3689, "step": 22290 }, { "epoch": 0.6095766790636622, "grad_norm": 1.3715823888778687, "learning_rate": 6.98573935509927e-06, "loss": 0.5017, "step": 22291 }, { "epoch": 0.6096040253773791, "grad_norm": 1.295290470123291, "learning_rate": 6.9848948572603145e-06, "loss": 0.7606, "step": 22292 }, { "epoch": 0.609631371691096, "grad_norm": 1.239505648612976, "learning_rate": 6.984050383074008e-06, "loss": 0.4543, "step": 22293 }, { "epoch": 0.609658718004813, "grad_norm": 2.810864210128784, "learning_rate": 6.983205932546979e-06, "loss": 0.7248, "step": 22294 }, { "epoch": 0.6096860643185299, "grad_norm": 1.3135161399841309, "learning_rate": 6.982361505685851e-06, "loss": 0.4851, "step": 22295 }, { "epoch": 0.6097134106322468, "grad_norm": 1.3144614696502686, "learning_rate": 6.981517102497246e-06, "loss": 0.4719, "step": 22296 }, { "epoch": 0.6097407569459636, "grad_norm": 1.2007215023040771, "learning_rate": 6.98067272298779e-06, "loss": 0.4876, "step": 22297 }, { "epoch": 0.6097681032596806, "grad_norm": 1.702925682067871, "learning_rate": 6.979828367164109e-06, "loss": 0.7602, "step": 22298 }, { "epoch": 0.6097954495733975, "grad_norm": 1.5027157068252563, "learning_rate": 6.978984035032822e-06, "loss": 0.3818, "step": 22299 }, { "epoch": 0.6098227958871144, "grad_norm": 1.4207056760787964, "learning_rate": 6.978139726600557e-06, "loss": 0.529, "step": 22300 }, { "epoch": 0.6098501422008313, "grad_norm": 1.3384809494018555, "learning_rate": 6.977295441873939e-06, "loss": 0.5196, "step": 22301 }, { "epoch": 0.6098774885145483, "grad_norm": 1.285094141960144, "learning_rate": 6.976451180859581e-06, "loss": 0.5046, "step": 22302 }, { "epoch": 0.6099048348282652, "grad_norm": 1.1914293766021729, "learning_rate": 6.975606943564115e-06, "loss": 0.4709, "step": 22303 }, { "epoch": 0.6099321811419821, "grad_norm": 1.2461001873016357, "learning_rate": 6.974762729994162e-06, "loss": 0.5218, "step": 22304 }, { "epoch": 0.6099595274556989, "grad_norm": 1.2688703536987305, "learning_rate": 6.973918540156341e-06, "loss": 0.4872, "step": 22305 }, { "epoch": 0.6099868737694158, "grad_norm": 1.1088221073150635, "learning_rate": 6.97307437405728e-06, "loss": 0.486, "step": 22306 }, { "epoch": 0.6100142200831328, "grad_norm": 1.2341097593307495, "learning_rate": 6.9722302317036e-06, "loss": 0.4633, "step": 22307 }, { "epoch": 0.6100415663968497, "grad_norm": 0.9820544719696045, "learning_rate": 6.971386113101919e-06, "loss": 0.3473, "step": 22308 }, { "epoch": 0.6100689127105666, "grad_norm": 1.279526948928833, "learning_rate": 6.970542018258864e-06, "loss": 0.4549, "step": 22309 }, { "epoch": 0.6100962590242835, "grad_norm": 1.4450546503067017, "learning_rate": 6.969697947181051e-06, "loss": 0.4633, "step": 22310 }, { "epoch": 0.6101236053380005, "grad_norm": 1.2819194793701172, "learning_rate": 6.968853899875106e-06, "loss": 0.5146, "step": 22311 }, { "epoch": 0.6101509516517174, "grad_norm": 1.4846673011779785, "learning_rate": 6.968009876347652e-06, "loss": 0.5101, "step": 22312 }, { "epoch": 0.6101782979654342, "grad_norm": 1.365628957748413, "learning_rate": 6.967165876605304e-06, "loss": 0.7384, "step": 22313 }, { "epoch": 0.6102056442791511, "grad_norm": 1.240318775177002, "learning_rate": 6.966321900654689e-06, "loss": 0.5063, "step": 22314 }, { "epoch": 0.6102329905928681, "grad_norm": 1.3728020191192627, "learning_rate": 6.9654779485024215e-06, "loss": 0.3736, "step": 22315 }, { "epoch": 0.610260336906585, "grad_norm": 1.364697813987732, "learning_rate": 6.964634020155127e-06, "loss": 0.4673, "step": 22316 }, { "epoch": 0.6102876832203019, "grad_norm": 1.1082007884979248, "learning_rate": 6.963790115619426e-06, "loss": 0.4814, "step": 22317 }, { "epoch": 0.6103150295340188, "grad_norm": 1.3450454473495483, "learning_rate": 6.962946234901934e-06, "loss": 0.4712, "step": 22318 }, { "epoch": 0.6103423758477358, "grad_norm": 1.2175167798995972, "learning_rate": 6.962102378009275e-06, "loss": 0.4799, "step": 22319 }, { "epoch": 0.6103697221614527, "grad_norm": 1.231559157371521, "learning_rate": 6.9612585449480706e-06, "loss": 0.4811, "step": 22320 }, { "epoch": 0.6103970684751695, "grad_norm": 1.3064252138137817, "learning_rate": 6.960414735724934e-06, "loss": 0.4496, "step": 22321 }, { "epoch": 0.6104244147888864, "grad_norm": 1.3530608415603638, "learning_rate": 6.9595709503464925e-06, "loss": 0.4165, "step": 22322 }, { "epoch": 0.6104517611026034, "grad_norm": 1.4308159351348877, "learning_rate": 6.958727188819358e-06, "loss": 0.3341, "step": 22323 }, { "epoch": 0.6104791074163203, "grad_norm": 1.1697808504104614, "learning_rate": 6.957883451150153e-06, "loss": 0.4707, "step": 22324 }, { "epoch": 0.6105064537300372, "grad_norm": 1.1604242324829102, "learning_rate": 6.957039737345496e-06, "loss": 0.497, "step": 22325 }, { "epoch": 0.6105338000437541, "grad_norm": 1.2512619495391846, "learning_rate": 6.956196047412007e-06, "loss": 0.7509, "step": 22326 }, { "epoch": 0.610561146357471, "grad_norm": 1.431775689125061, "learning_rate": 6.955352381356305e-06, "loss": 0.3737, "step": 22327 }, { "epoch": 0.610588492671188, "grad_norm": 1.832433819770813, "learning_rate": 6.954508739185004e-06, "loss": 0.3604, "step": 22328 }, { "epoch": 0.6106158389849048, "grad_norm": 1.140086054801941, "learning_rate": 6.953665120904725e-06, "loss": 0.4937, "step": 22329 }, { "epoch": 0.6106431852986217, "grad_norm": 1.1487936973571777, "learning_rate": 6.952821526522089e-06, "loss": 0.478, "step": 22330 }, { "epoch": 0.6106705316123386, "grad_norm": 1.3652249574661255, "learning_rate": 6.9519779560437075e-06, "loss": 0.467, "step": 22331 }, { "epoch": 0.6106978779260556, "grad_norm": 1.3195288181304932, "learning_rate": 6.9511344094762e-06, "loss": 0.4901, "step": 22332 }, { "epoch": 0.6107252242397725, "grad_norm": 1.2748748064041138, "learning_rate": 6.950290886826188e-06, "loss": 0.4705, "step": 22333 }, { "epoch": 0.6107525705534894, "grad_norm": 1.499047875404358, "learning_rate": 6.949447388100283e-06, "loss": 0.3828, "step": 22334 }, { "epoch": 0.6107799168672063, "grad_norm": 1.1904358863830566, "learning_rate": 6.948603913305109e-06, "loss": 0.5028, "step": 22335 }, { "epoch": 0.6108072631809233, "grad_norm": 1.2492891550064087, "learning_rate": 6.94776046244728e-06, "loss": 0.4945, "step": 22336 }, { "epoch": 0.6108346094946401, "grad_norm": 1.7746597528457642, "learning_rate": 6.946917035533405e-06, "loss": 0.3735, "step": 22337 }, { "epoch": 0.610861955808357, "grad_norm": 1.3088983297348022, "learning_rate": 6.946073632570109e-06, "loss": 0.4611, "step": 22338 }, { "epoch": 0.6108893021220739, "grad_norm": 1.511695146560669, "learning_rate": 6.945230253564006e-06, "loss": 0.3307, "step": 22339 }, { "epoch": 0.6109166484357909, "grad_norm": 1.4500824213027954, "learning_rate": 6.944386898521711e-06, "loss": 0.3867, "step": 22340 }, { "epoch": 0.6109439947495078, "grad_norm": 1.2692681550979614, "learning_rate": 6.94354356744984e-06, "loss": 0.3339, "step": 22341 }, { "epoch": 0.6109713410632247, "grad_norm": 1.263939619064331, "learning_rate": 6.942700260355013e-06, "loss": 0.5069, "step": 22342 }, { "epoch": 0.6109986873769416, "grad_norm": 1.2521001100540161, "learning_rate": 6.941856977243839e-06, "loss": 0.4712, "step": 22343 }, { "epoch": 0.6110260336906586, "grad_norm": 1.395827293395996, "learning_rate": 6.94101371812294e-06, "loss": 0.3784, "step": 22344 }, { "epoch": 0.6110533800043754, "grad_norm": 1.615525722503662, "learning_rate": 6.940170482998922e-06, "loss": 0.4782, "step": 22345 }, { "epoch": 0.6110807263180923, "grad_norm": 1.2306814193725586, "learning_rate": 6.939327271878409e-06, "loss": 0.4864, "step": 22346 }, { "epoch": 0.6111080726318092, "grad_norm": 2.0497994422912598, "learning_rate": 6.938484084768013e-06, "loss": 0.3222, "step": 22347 }, { "epoch": 0.6111354189455261, "grad_norm": 1.7517725229263306, "learning_rate": 6.937640921674346e-06, "loss": 0.4994, "step": 22348 }, { "epoch": 0.6111627652592431, "grad_norm": 1.157944679260254, "learning_rate": 6.936797782604026e-06, "loss": 0.5002, "step": 22349 }, { "epoch": 0.61119011157296, "grad_norm": 1.319275975227356, "learning_rate": 6.9359546675636625e-06, "loss": 0.3766, "step": 22350 }, { "epoch": 0.6112174578866769, "grad_norm": 1.3352670669555664, "learning_rate": 6.935111576559872e-06, "loss": 0.493, "step": 22351 }, { "epoch": 0.6112448042003937, "grad_norm": 1.1785085201263428, "learning_rate": 6.934268509599272e-06, "loss": 0.4962, "step": 22352 }, { "epoch": 0.6112721505141107, "grad_norm": 1.4334954023361206, "learning_rate": 6.93342546668847e-06, "loss": 0.4934, "step": 22353 }, { "epoch": 0.6112994968278276, "grad_norm": 1.1941107511520386, "learning_rate": 6.932582447834084e-06, "loss": 0.7302, "step": 22354 }, { "epoch": 0.6113268431415445, "grad_norm": 1.6577154397964478, "learning_rate": 6.931739453042725e-06, "loss": 0.3855, "step": 22355 }, { "epoch": 0.6113541894552614, "grad_norm": 2.33366060256958, "learning_rate": 6.930896482321006e-06, "loss": 0.5193, "step": 22356 }, { "epoch": 0.6113815357689784, "grad_norm": 1.4578431844711304, "learning_rate": 6.930053535675543e-06, "loss": 0.3636, "step": 22357 }, { "epoch": 0.6114088820826953, "grad_norm": 1.3110332489013672, "learning_rate": 6.9292106131129425e-06, "loss": 0.4849, "step": 22358 }, { "epoch": 0.6114362283964122, "grad_norm": 1.1978625059127808, "learning_rate": 6.928367714639821e-06, "loss": 0.4875, "step": 22359 }, { "epoch": 0.611463574710129, "grad_norm": 1.3526259660720825, "learning_rate": 6.927524840262792e-06, "loss": 0.5134, "step": 22360 }, { "epoch": 0.611490921023846, "grad_norm": 1.1171053647994995, "learning_rate": 6.9266819899884645e-06, "loss": 0.4872, "step": 22361 }, { "epoch": 0.6115182673375629, "grad_norm": 1.3529902696609497, "learning_rate": 6.925839163823451e-06, "loss": 0.3387, "step": 22362 }, { "epoch": 0.6115456136512798, "grad_norm": 1.2034939527511597, "learning_rate": 6.924996361774367e-06, "loss": 0.4951, "step": 22363 }, { "epoch": 0.6115729599649967, "grad_norm": 1.247699499130249, "learning_rate": 6.924153583847819e-06, "loss": 0.515, "step": 22364 }, { "epoch": 0.6116003062787136, "grad_norm": 1.2994670867919922, "learning_rate": 6.923310830050423e-06, "loss": 0.5208, "step": 22365 }, { "epoch": 0.6116276525924306, "grad_norm": 1.4730175733566284, "learning_rate": 6.922468100388785e-06, "loss": 0.4469, "step": 22366 }, { "epoch": 0.6116549989061475, "grad_norm": 1.346319556236267, "learning_rate": 6.921625394869519e-06, "loss": 0.4734, "step": 22367 }, { "epoch": 0.6116823452198643, "grad_norm": 1.019984245300293, "learning_rate": 6.920782713499236e-06, "loss": 0.4754, "step": 22368 }, { "epoch": 0.6117096915335812, "grad_norm": 1.157167911529541, "learning_rate": 6.919940056284546e-06, "loss": 0.457, "step": 22369 }, { "epoch": 0.6117370378472982, "grad_norm": 1.3003003597259521, "learning_rate": 6.919097423232062e-06, "loss": 0.7249, "step": 22370 }, { "epoch": 0.6117643841610151, "grad_norm": 1.2178232669830322, "learning_rate": 6.91825481434839e-06, "loss": 0.4661, "step": 22371 }, { "epoch": 0.611791730474732, "grad_norm": 1.1341724395751953, "learning_rate": 6.917412229640141e-06, "loss": 0.4807, "step": 22372 }, { "epoch": 0.6118190767884489, "grad_norm": 1.1009491682052612, "learning_rate": 6.916569669113926e-06, "loss": 0.4773, "step": 22373 }, { "epoch": 0.6118464231021659, "grad_norm": 1.182433009147644, "learning_rate": 6.915727132776355e-06, "loss": 0.7486, "step": 22374 }, { "epoch": 0.6118737694158828, "grad_norm": 1.1552765369415283, "learning_rate": 6.914884620634035e-06, "loss": 0.4872, "step": 22375 }, { "epoch": 0.6119011157295996, "grad_norm": 1.2039037942886353, "learning_rate": 6.9140421326935744e-06, "loss": 0.4712, "step": 22376 }, { "epoch": 0.6119284620433165, "grad_norm": 1.2540104389190674, "learning_rate": 6.91319966896159e-06, "loss": 0.468, "step": 22377 }, { "epoch": 0.6119558083570334, "grad_norm": 1.395526647567749, "learning_rate": 6.912357229444682e-06, "loss": 0.7778, "step": 22378 }, { "epoch": 0.6119831546707504, "grad_norm": 1.2468369007110596, "learning_rate": 6.9115148141494644e-06, "loss": 0.4676, "step": 22379 }, { "epoch": 0.6120105009844673, "grad_norm": 1.2534611225128174, "learning_rate": 6.910672423082543e-06, "loss": 0.454, "step": 22380 }, { "epoch": 0.6120378472981842, "grad_norm": 1.2165950536727905, "learning_rate": 6.909830056250527e-06, "loss": 0.4734, "step": 22381 }, { "epoch": 0.6120651936119011, "grad_norm": 1.7309176921844482, "learning_rate": 6.908987713660027e-06, "loss": 0.3962, "step": 22382 }, { "epoch": 0.6120925399256181, "grad_norm": 1.4655146598815918, "learning_rate": 6.9081453953176446e-06, "loss": 0.4823, "step": 22383 }, { "epoch": 0.6121198862393349, "grad_norm": 1.4004839658737183, "learning_rate": 6.9073031012299915e-06, "loss": 0.5121, "step": 22384 }, { "epoch": 0.6121472325530518, "grad_norm": 1.3632597923278809, "learning_rate": 6.906460831403679e-06, "loss": 0.4795, "step": 22385 }, { "epoch": 0.6121745788667687, "grad_norm": 1.4735770225524902, "learning_rate": 6.905618585845309e-06, "loss": 0.387, "step": 22386 }, { "epoch": 0.6122019251804857, "grad_norm": 1.462928295135498, "learning_rate": 6.9047763645614894e-06, "loss": 0.4957, "step": 22387 }, { "epoch": 0.6122292714942026, "grad_norm": 1.4521280527114868, "learning_rate": 6.903934167558829e-06, "loss": 0.7398, "step": 22388 }, { "epoch": 0.6122566178079195, "grad_norm": 1.4048810005187988, "learning_rate": 6.903091994843932e-06, "loss": 0.4584, "step": 22389 }, { "epoch": 0.6122839641216364, "grad_norm": 1.208604097366333, "learning_rate": 6.90224984642341e-06, "loss": 0.4738, "step": 22390 }, { "epoch": 0.6123113104353534, "grad_norm": 1.6770176887512207, "learning_rate": 6.901407722303863e-06, "loss": 0.7631, "step": 22391 }, { "epoch": 0.6123386567490702, "grad_norm": 1.2427030801773071, "learning_rate": 6.900565622491904e-06, "loss": 0.505, "step": 22392 }, { "epoch": 0.6123660030627871, "grad_norm": 1.4764704704284668, "learning_rate": 6.899723546994133e-06, "loss": 0.4758, "step": 22393 }, { "epoch": 0.612393349376504, "grad_norm": 1.1567184925079346, "learning_rate": 6.898881495817157e-06, "loss": 0.4731, "step": 22394 }, { "epoch": 0.612420695690221, "grad_norm": 1.4119725227355957, "learning_rate": 6.8980394689675855e-06, "loss": 0.4549, "step": 22395 }, { "epoch": 0.6124480420039379, "grad_norm": 1.2963694334030151, "learning_rate": 6.897197466452019e-06, "loss": 0.3351, "step": 22396 }, { "epoch": 0.6124753883176548, "grad_norm": 1.3926522731781006, "learning_rate": 6.896355488277067e-06, "loss": 0.363, "step": 22397 }, { "epoch": 0.6125027346313717, "grad_norm": 1.326309084892273, "learning_rate": 6.895513534449332e-06, "loss": 0.5149, "step": 22398 }, { "epoch": 0.6125300809450887, "grad_norm": 1.2793104648590088, "learning_rate": 6.89467160497542e-06, "loss": 0.5113, "step": 22399 }, { "epoch": 0.6125574272588055, "grad_norm": 1.3197627067565918, "learning_rate": 6.893829699861936e-06, "loss": 0.456, "step": 22400 }, { "epoch": 0.6125847735725224, "grad_norm": 1.4661409854888916, "learning_rate": 6.892987819115482e-06, "loss": 0.454, "step": 22401 }, { "epoch": 0.6126121198862393, "grad_norm": 1.4959090948104858, "learning_rate": 6.892145962742664e-06, "loss": 0.5203, "step": 22402 }, { "epoch": 0.6126394661999562, "grad_norm": 1.172285795211792, "learning_rate": 6.891304130750089e-06, "loss": 0.4809, "step": 22403 }, { "epoch": 0.6126668125136732, "grad_norm": 1.3508045673370361, "learning_rate": 6.890462323144357e-06, "loss": 0.516, "step": 22404 }, { "epoch": 0.6126941588273901, "grad_norm": 1.237280011177063, "learning_rate": 6.889620539932072e-06, "loss": 0.5021, "step": 22405 }, { "epoch": 0.612721505141107, "grad_norm": 1.439129114151001, "learning_rate": 6.8887787811198425e-06, "loss": 0.4532, "step": 22406 }, { "epoch": 0.6127488514548239, "grad_norm": 1.3082486391067505, "learning_rate": 6.887937046714266e-06, "loss": 0.4556, "step": 22407 }, { "epoch": 0.6127761977685408, "grad_norm": 1.2790412902832031, "learning_rate": 6.887095336721945e-06, "loss": 0.5205, "step": 22408 }, { "epoch": 0.6128035440822577, "grad_norm": 1.245414137840271, "learning_rate": 6.886253651149488e-06, "loss": 0.3323, "step": 22409 }, { "epoch": 0.6128308903959746, "grad_norm": 1.1642462015151978, "learning_rate": 6.885411990003492e-06, "loss": 0.4875, "step": 22410 }, { "epoch": 0.6128582367096915, "grad_norm": 1.3179004192352295, "learning_rate": 6.884570353290562e-06, "loss": 0.4985, "step": 22411 }, { "epoch": 0.6128855830234085, "grad_norm": 1.2380750179290771, "learning_rate": 6.883728741017304e-06, "loss": 0.387, "step": 22412 }, { "epoch": 0.6129129293371254, "grad_norm": 1.5106568336486816, "learning_rate": 6.882887153190315e-06, "loss": 0.3586, "step": 22413 }, { "epoch": 0.6129402756508423, "grad_norm": 1.8705098628997803, "learning_rate": 6.8820455898162005e-06, "loss": 0.7481, "step": 22414 }, { "epoch": 0.6129676219645592, "grad_norm": 1.5902814865112305, "learning_rate": 6.88120405090156e-06, "loss": 0.4808, "step": 22415 }, { "epoch": 0.612994968278276, "grad_norm": 1.164537787437439, "learning_rate": 6.880362536452994e-06, "loss": 0.4756, "step": 22416 }, { "epoch": 0.613022314591993, "grad_norm": 1.4616717100143433, "learning_rate": 6.879521046477109e-06, "loss": 0.4775, "step": 22417 }, { "epoch": 0.6130496609057099, "grad_norm": 2.4808175563812256, "learning_rate": 6.878679580980501e-06, "loss": 0.4629, "step": 22418 }, { "epoch": 0.6130770072194268, "grad_norm": 1.5876374244689941, "learning_rate": 6.877838139969775e-06, "loss": 0.4826, "step": 22419 }, { "epoch": 0.6131043535331437, "grad_norm": 1.3039945363998413, "learning_rate": 6.87699672345153e-06, "loss": 0.4827, "step": 22420 }, { "epoch": 0.6131316998468607, "grad_norm": 1.1889421939849854, "learning_rate": 6.876155331432365e-06, "loss": 0.4493, "step": 22421 }, { "epoch": 0.6131590461605776, "grad_norm": 1.2043681144714355, "learning_rate": 6.875313963918885e-06, "loss": 0.4711, "step": 22422 }, { "epoch": 0.6131863924742945, "grad_norm": 1.18799889087677, "learning_rate": 6.874472620917685e-06, "loss": 0.7351, "step": 22423 }, { "epoch": 0.6132137387880113, "grad_norm": 1.3588701486587524, "learning_rate": 6.873631302435368e-06, "loss": 0.7285, "step": 22424 }, { "epoch": 0.6132410851017283, "grad_norm": 1.3551948070526123, "learning_rate": 6.872790008478536e-06, "loss": 0.489, "step": 22425 }, { "epoch": 0.6132684314154452, "grad_norm": 1.4230811595916748, "learning_rate": 6.871948739053784e-06, "loss": 0.4883, "step": 22426 }, { "epoch": 0.6132957777291621, "grad_norm": 1.7518495321273804, "learning_rate": 6.871107494167714e-06, "loss": 0.3526, "step": 22427 }, { "epoch": 0.613323124042879, "grad_norm": 1.327786922454834, "learning_rate": 6.870266273826927e-06, "loss": 0.773, "step": 22428 }, { "epoch": 0.613350470356596, "grad_norm": 1.560921311378479, "learning_rate": 6.869425078038017e-06, "loss": 0.383, "step": 22429 }, { "epoch": 0.6133778166703129, "grad_norm": 1.478595495223999, "learning_rate": 6.86858390680759e-06, "loss": 0.7168, "step": 22430 }, { "epoch": 0.6134051629840298, "grad_norm": 1.2159383296966553, "learning_rate": 6.8677427601422384e-06, "loss": 0.4885, "step": 22431 }, { "epoch": 0.6134325092977466, "grad_norm": 1.3527954816818237, "learning_rate": 6.866901638048564e-06, "loss": 0.4952, "step": 22432 }, { "epoch": 0.6134598556114635, "grad_norm": 1.3681764602661133, "learning_rate": 6.8660605405331655e-06, "loss": 0.473, "step": 22433 }, { "epoch": 0.6134872019251805, "grad_norm": 1.7284562587738037, "learning_rate": 6.865219467602639e-06, "loss": 0.358, "step": 22434 }, { "epoch": 0.6135145482388974, "grad_norm": 1.2768499851226807, "learning_rate": 6.864378419263584e-06, "loss": 0.4963, "step": 22435 }, { "epoch": 0.6135418945526143, "grad_norm": 1.3586386442184448, "learning_rate": 6.863537395522597e-06, "loss": 0.4932, "step": 22436 }, { "epoch": 0.6135692408663312, "grad_norm": 1.3005282878875732, "learning_rate": 6.862696396386277e-06, "loss": 0.4733, "step": 22437 }, { "epoch": 0.6135965871800482, "grad_norm": 1.2585681676864624, "learning_rate": 6.861855421861222e-06, "loss": 0.7162, "step": 22438 }, { "epoch": 0.6136239334937651, "grad_norm": 1.0809855461120605, "learning_rate": 6.8610144719540265e-06, "loss": 0.3678, "step": 22439 }, { "epoch": 0.6136512798074819, "grad_norm": 1.4128236770629883, "learning_rate": 6.860173546671289e-06, "loss": 0.4962, "step": 22440 }, { "epoch": 0.6136786261211988, "grad_norm": 1.388309121131897, "learning_rate": 6.859332646019609e-06, "loss": 0.4839, "step": 22441 }, { "epoch": 0.6137059724349158, "grad_norm": 1.5012255907058716, "learning_rate": 6.858491770005582e-06, "loss": 0.4951, "step": 22442 }, { "epoch": 0.6137333187486327, "grad_norm": 1.477957844734192, "learning_rate": 6.857650918635799e-06, "loss": 0.4971, "step": 22443 }, { "epoch": 0.6137606650623496, "grad_norm": 1.2460944652557373, "learning_rate": 6.856810091916861e-06, "loss": 0.4928, "step": 22444 }, { "epoch": 0.6137880113760665, "grad_norm": 1.2435269355773926, "learning_rate": 6.855969289855362e-06, "loss": 0.4677, "step": 22445 }, { "epoch": 0.6138153576897835, "grad_norm": 1.2650556564331055, "learning_rate": 6.8551285124579e-06, "loss": 0.4486, "step": 22446 }, { "epoch": 0.6138427040035004, "grad_norm": 1.2835049629211426, "learning_rate": 6.854287759731071e-06, "loss": 0.47, "step": 22447 }, { "epoch": 0.6138700503172172, "grad_norm": 1.5555157661437988, "learning_rate": 6.853447031681467e-06, "loss": 0.7493, "step": 22448 }, { "epoch": 0.6138973966309341, "grad_norm": 1.4047138690948486, "learning_rate": 6.852606328315686e-06, "loss": 0.7286, "step": 22449 }, { "epoch": 0.613924742944651, "grad_norm": 1.1157475709915161, "learning_rate": 6.851765649640325e-06, "loss": 0.4644, "step": 22450 }, { "epoch": 0.613952089258368, "grad_norm": 1.220402479171753, "learning_rate": 6.850924995661974e-06, "loss": 0.4414, "step": 22451 }, { "epoch": 0.6139794355720849, "grad_norm": 1.3143692016601562, "learning_rate": 6.850084366387232e-06, "loss": 0.4673, "step": 22452 }, { "epoch": 0.6140067818858018, "grad_norm": 1.1704411506652832, "learning_rate": 6.84924376182269e-06, "loss": 0.4799, "step": 22453 }, { "epoch": 0.6140341281995187, "grad_norm": 1.4655191898345947, "learning_rate": 6.848403181974943e-06, "loss": 0.3587, "step": 22454 }, { "epoch": 0.6140614745132356, "grad_norm": 1.2982661724090576, "learning_rate": 6.8475626268505905e-06, "loss": 0.4844, "step": 22455 }, { "epoch": 0.6140888208269525, "grad_norm": 1.257993459701538, "learning_rate": 6.846722096456218e-06, "loss": 0.7061, "step": 22456 }, { "epoch": 0.6141161671406694, "grad_norm": 1.495186686515808, "learning_rate": 6.8458815907984254e-06, "loss": 0.4659, "step": 22457 }, { "epoch": 0.6141435134543863, "grad_norm": 1.4121674299240112, "learning_rate": 6.845041109883803e-06, "loss": 0.4906, "step": 22458 }, { "epoch": 0.6141708597681033, "grad_norm": 1.2893025875091553, "learning_rate": 6.844200653718944e-06, "loss": 0.4855, "step": 22459 }, { "epoch": 0.6141982060818202, "grad_norm": 1.3159905672073364, "learning_rate": 6.8433602223104455e-06, "loss": 0.4739, "step": 22460 }, { "epoch": 0.6142255523955371, "grad_norm": 1.5039150714874268, "learning_rate": 6.842519815664897e-06, "loss": 0.5081, "step": 22461 }, { "epoch": 0.614252898709254, "grad_norm": 1.4102157354354858, "learning_rate": 6.8416794337888895e-06, "loss": 0.4987, "step": 22462 }, { "epoch": 0.6142802450229708, "grad_norm": 1.5836960077285767, "learning_rate": 6.840839076689021e-06, "loss": 0.3502, "step": 22463 }, { "epoch": 0.6143075913366878, "grad_norm": 1.3862634897232056, "learning_rate": 6.839998744371878e-06, "loss": 0.4283, "step": 22464 }, { "epoch": 0.6143349376504047, "grad_norm": 1.1693196296691895, "learning_rate": 6.83915843684406e-06, "loss": 0.4982, "step": 22465 }, { "epoch": 0.6143622839641216, "grad_norm": 1.2998645305633545, "learning_rate": 6.83831815411215e-06, "loss": 0.5086, "step": 22466 }, { "epoch": 0.6143896302778385, "grad_norm": 1.4015631675720215, "learning_rate": 6.8374778961827444e-06, "loss": 0.4827, "step": 22467 }, { "epoch": 0.6144169765915555, "grad_norm": 1.269243597984314, "learning_rate": 6.836637663062439e-06, "loss": 0.5022, "step": 22468 }, { "epoch": 0.6144443229052724, "grad_norm": 1.8148595094680786, "learning_rate": 6.835797454757818e-06, "loss": 0.4136, "step": 22469 }, { "epoch": 0.6144716692189893, "grad_norm": 1.4876019954681396, "learning_rate": 6.834957271275474e-06, "loss": 0.4421, "step": 22470 }, { "epoch": 0.6144990155327061, "grad_norm": 2.105567455291748, "learning_rate": 6.834117112622003e-06, "loss": 0.4511, "step": 22471 }, { "epoch": 0.6145263618464231, "grad_norm": 1.174536943435669, "learning_rate": 6.833276978803989e-06, "loss": 0.5048, "step": 22472 }, { "epoch": 0.61455370816014, "grad_norm": 1.192568302154541, "learning_rate": 6.83243686982803e-06, "loss": 0.5168, "step": 22473 }, { "epoch": 0.6145810544738569, "grad_norm": 5.937448978424072, "learning_rate": 6.8315967857007085e-06, "loss": 0.3558, "step": 22474 }, { "epoch": 0.6146084007875738, "grad_norm": 2.9191107749938965, "learning_rate": 6.830756726428618e-06, "loss": 0.3388, "step": 22475 }, { "epoch": 0.6146357471012908, "grad_norm": 1.304513931274414, "learning_rate": 6.829916692018352e-06, "loss": 0.5121, "step": 22476 }, { "epoch": 0.6146630934150077, "grad_norm": 1.25473153591156, "learning_rate": 6.829076682476501e-06, "loss": 0.4712, "step": 22477 }, { "epoch": 0.6146904397287246, "grad_norm": 1.1284356117248535, "learning_rate": 6.828236697809645e-06, "loss": 0.8024, "step": 22478 }, { "epoch": 0.6147177860424414, "grad_norm": 1.4028280973434448, "learning_rate": 6.8273967380243826e-06, "loss": 0.4702, "step": 22479 }, { "epoch": 0.6147451323561584, "grad_norm": 1.4151936769485474, "learning_rate": 6.826556803127298e-06, "loss": 0.3461, "step": 22480 }, { "epoch": 0.6147724786698753, "grad_norm": 1.4900845289230347, "learning_rate": 6.8257168931249805e-06, "loss": 0.472, "step": 22481 }, { "epoch": 0.6147998249835922, "grad_norm": 1.2145285606384277, "learning_rate": 6.8248770080240245e-06, "loss": 0.4694, "step": 22482 }, { "epoch": 0.6148271712973091, "grad_norm": 1.6607754230499268, "learning_rate": 6.824037147831011e-06, "loss": 0.3826, "step": 22483 }, { "epoch": 0.614854517611026, "grad_norm": 1.401054859161377, "learning_rate": 6.8231973125525346e-06, "loss": 0.4573, "step": 22484 }, { "epoch": 0.614881863924743, "grad_norm": 1.393580675125122, "learning_rate": 6.82235750219518e-06, "loss": 0.5132, "step": 22485 }, { "epoch": 0.6149092102384599, "grad_norm": 1.5255088806152344, "learning_rate": 6.8215177167655375e-06, "loss": 0.4426, "step": 22486 }, { "epoch": 0.6149365565521767, "grad_norm": 1.2434717416763306, "learning_rate": 6.820677956270195e-06, "loss": 0.476, "step": 22487 }, { "epoch": 0.6149639028658936, "grad_norm": 1.7986277341842651, "learning_rate": 6.819838220715737e-06, "loss": 0.4787, "step": 22488 }, { "epoch": 0.6149912491796106, "grad_norm": 1.179355502128601, "learning_rate": 6.818998510108754e-06, "loss": 0.4759, "step": 22489 }, { "epoch": 0.6150185954933275, "grad_norm": 1.2682956457138062, "learning_rate": 6.818158824455834e-06, "loss": 0.7349, "step": 22490 }, { "epoch": 0.6150459418070444, "grad_norm": 1.303188443183899, "learning_rate": 6.8173191637635605e-06, "loss": 0.4885, "step": 22491 }, { "epoch": 0.6150732881207613, "grad_norm": 1.1737204790115356, "learning_rate": 6.816479528038522e-06, "loss": 0.4586, "step": 22492 }, { "epoch": 0.6151006344344783, "grad_norm": 1.218379259109497, "learning_rate": 6.815639917287309e-06, "loss": 0.4996, "step": 22493 }, { "epoch": 0.6151279807481952, "grad_norm": 1.1215922832489014, "learning_rate": 6.814800331516501e-06, "loss": 0.4481, "step": 22494 }, { "epoch": 0.615155327061912, "grad_norm": 1.2715566158294678, "learning_rate": 6.81396077073269e-06, "loss": 0.4717, "step": 22495 }, { "epoch": 0.6151826733756289, "grad_norm": 1.2675821781158447, "learning_rate": 6.81312123494246e-06, "loss": 0.4748, "step": 22496 }, { "epoch": 0.6152100196893459, "grad_norm": 1.2941356897354126, "learning_rate": 6.812281724152397e-06, "loss": 0.5059, "step": 22497 }, { "epoch": 0.6152373660030628, "grad_norm": 1.364048719406128, "learning_rate": 6.811442238369087e-06, "loss": 0.4876, "step": 22498 }, { "epoch": 0.6152647123167797, "grad_norm": 1.3628662824630737, "learning_rate": 6.810602777599114e-06, "loss": 0.4762, "step": 22499 }, { "epoch": 0.6152920586304966, "grad_norm": 1.180432677268982, "learning_rate": 6.809763341849067e-06, "loss": 0.5162, "step": 22500 }, { "epoch": 0.6153194049442136, "grad_norm": 1.3712584972381592, "learning_rate": 6.808923931125526e-06, "loss": 0.7364, "step": 22501 }, { "epoch": 0.6153467512579305, "grad_norm": 1.3886150121688843, "learning_rate": 6.8080845454350796e-06, "loss": 0.4958, "step": 22502 }, { "epoch": 0.6153740975716473, "grad_norm": 1.2659130096435547, "learning_rate": 6.807245184784314e-06, "loss": 0.4975, "step": 22503 }, { "epoch": 0.6154014438853642, "grad_norm": 1.391245722770691, "learning_rate": 6.806405849179808e-06, "loss": 0.4458, "step": 22504 }, { "epoch": 0.6154287901990811, "grad_norm": 1.2987934350967407, "learning_rate": 6.80556653862815e-06, "loss": 0.5, "step": 22505 }, { "epoch": 0.6154561365127981, "grad_norm": 1.3586087226867676, "learning_rate": 6.804727253135926e-06, "loss": 0.7299, "step": 22506 }, { "epoch": 0.615483482826515, "grad_norm": 1.1070600748062134, "learning_rate": 6.803887992709714e-06, "loss": 0.7549, "step": 22507 }, { "epoch": 0.6155108291402319, "grad_norm": 1.3786430358886719, "learning_rate": 6.803048757356106e-06, "loss": 0.483, "step": 22508 }, { "epoch": 0.6155381754539488, "grad_norm": 1.292285680770874, "learning_rate": 6.802209547081675e-06, "loss": 0.4879, "step": 22509 }, { "epoch": 0.6155655217676658, "grad_norm": 1.1738085746765137, "learning_rate": 6.8013703618930135e-06, "loss": 0.4566, "step": 22510 }, { "epoch": 0.6155928680813826, "grad_norm": 1.2164467573165894, "learning_rate": 6.800531201796702e-06, "loss": 0.465, "step": 22511 }, { "epoch": 0.6156202143950995, "grad_norm": 1.3296499252319336, "learning_rate": 6.799692066799325e-06, "loss": 0.4945, "step": 22512 }, { "epoch": 0.6156475607088164, "grad_norm": 1.4899874925613403, "learning_rate": 6.798852956907458e-06, "loss": 0.4742, "step": 22513 }, { "epoch": 0.6156749070225334, "grad_norm": 1.1456654071807861, "learning_rate": 6.79801387212769e-06, "loss": 0.4606, "step": 22514 }, { "epoch": 0.6157022533362503, "grad_norm": 1.1362059116363525, "learning_rate": 6.797174812466604e-06, "loss": 0.4622, "step": 22515 }, { "epoch": 0.6157295996499672, "grad_norm": 1.5339902639389038, "learning_rate": 6.796335777930778e-06, "loss": 0.4912, "step": 22516 }, { "epoch": 0.6157569459636841, "grad_norm": 1.2888286113739014, "learning_rate": 6.795496768526798e-06, "loss": 0.4983, "step": 22517 }, { "epoch": 0.6157842922774011, "grad_norm": 1.6696255207061768, "learning_rate": 6.7946577842612424e-06, "loss": 0.375, "step": 22518 }, { "epoch": 0.6158116385911179, "grad_norm": 3.103034257888794, "learning_rate": 6.793818825140695e-06, "loss": 0.7376, "step": 22519 }, { "epoch": 0.6158389849048348, "grad_norm": 1.2122364044189453, "learning_rate": 6.792979891171737e-06, "loss": 0.5008, "step": 22520 }, { "epoch": 0.6158663312185517, "grad_norm": 1.3366498947143555, "learning_rate": 6.792140982360948e-06, "loss": 0.4856, "step": 22521 }, { "epoch": 0.6158936775322686, "grad_norm": 1.1514530181884766, "learning_rate": 6.791302098714912e-06, "loss": 0.4604, "step": 22522 }, { "epoch": 0.6159210238459856, "grad_norm": 1.4219701290130615, "learning_rate": 6.790463240240206e-06, "loss": 0.4624, "step": 22523 }, { "epoch": 0.6159483701597025, "grad_norm": 1.5693731307983398, "learning_rate": 6.7896244069434134e-06, "loss": 0.488, "step": 22524 }, { "epoch": 0.6159757164734194, "grad_norm": 1.318783164024353, "learning_rate": 6.788785598831116e-06, "loss": 0.4661, "step": 22525 }, { "epoch": 0.6160030627871363, "grad_norm": 1.181384563446045, "learning_rate": 6.7879468159098895e-06, "loss": 0.4643, "step": 22526 }, { "epoch": 0.6160304091008532, "grad_norm": 1.6221165657043457, "learning_rate": 6.787108058186316e-06, "loss": 0.4534, "step": 22527 }, { "epoch": 0.6160577554145701, "grad_norm": 1.2404383420944214, "learning_rate": 6.786269325666977e-06, "loss": 0.4841, "step": 22528 }, { "epoch": 0.616085101728287, "grad_norm": 1.914926290512085, "learning_rate": 6.785430618358449e-06, "loss": 0.4035, "step": 22529 }, { "epoch": 0.6161124480420039, "grad_norm": 1.5632132291793823, "learning_rate": 6.784591936267315e-06, "loss": 0.3559, "step": 22530 }, { "epoch": 0.6161397943557209, "grad_norm": 1.3855042457580566, "learning_rate": 6.783753279400152e-06, "loss": 0.488, "step": 22531 }, { "epoch": 0.6161671406694378, "grad_norm": 1.4001625776290894, "learning_rate": 6.782914647763538e-06, "loss": 0.3604, "step": 22532 }, { "epoch": 0.6161944869831547, "grad_norm": 2.1164958477020264, "learning_rate": 6.782076041364055e-06, "loss": 0.4927, "step": 22533 }, { "epoch": 0.6162218332968716, "grad_norm": 1.303126573562622, "learning_rate": 6.781237460208279e-06, "loss": 0.4862, "step": 22534 }, { "epoch": 0.6162491796105884, "grad_norm": 1.2387830018997192, "learning_rate": 6.780398904302789e-06, "loss": 0.4715, "step": 22535 }, { "epoch": 0.6162765259243054, "grad_norm": 1.338819146156311, "learning_rate": 6.779560373654164e-06, "loss": 0.468, "step": 22536 }, { "epoch": 0.6163038722380223, "grad_norm": 1.2750136852264404, "learning_rate": 6.7787218682689805e-06, "loss": 0.4942, "step": 22537 }, { "epoch": 0.6163312185517392, "grad_norm": 1.35056471824646, "learning_rate": 6.777883388153821e-06, "loss": 0.4661, "step": 22538 }, { "epoch": 0.6163585648654561, "grad_norm": 1.2652636766433716, "learning_rate": 6.777044933315255e-06, "loss": 0.4774, "step": 22539 }, { "epoch": 0.6163859111791731, "grad_norm": 1.5575413703918457, "learning_rate": 6.776206503759866e-06, "loss": 0.5116, "step": 22540 }, { "epoch": 0.61641325749289, "grad_norm": 1.2633938789367676, "learning_rate": 6.775368099494231e-06, "loss": 0.4671, "step": 22541 }, { "epoch": 0.6164406038066069, "grad_norm": 1.2835315465927124, "learning_rate": 6.774529720524925e-06, "loss": 0.4792, "step": 22542 }, { "epoch": 0.6164679501203237, "grad_norm": 1.1963061094284058, "learning_rate": 6.773691366858526e-06, "loss": 0.3596, "step": 22543 }, { "epoch": 0.6164952964340407, "grad_norm": 1.2897454500198364, "learning_rate": 6.77285303850161e-06, "loss": 0.467, "step": 22544 }, { "epoch": 0.6165226427477576, "grad_norm": 1.715234398841858, "learning_rate": 6.772014735460752e-06, "loss": 0.5025, "step": 22545 }, { "epoch": 0.6165499890614745, "grad_norm": 1.4040380716323853, "learning_rate": 6.771176457742532e-06, "loss": 0.508, "step": 22546 }, { "epoch": 0.6165773353751914, "grad_norm": 1.3394973278045654, "learning_rate": 6.7703382053535285e-06, "loss": 0.4633, "step": 22547 }, { "epoch": 0.6166046816889084, "grad_norm": 1.1616432666778564, "learning_rate": 6.769499978300307e-06, "loss": 0.4663, "step": 22548 }, { "epoch": 0.6166320280026253, "grad_norm": 1.347121238708496, "learning_rate": 6.768661776589448e-06, "loss": 0.48, "step": 22549 }, { "epoch": 0.6166593743163422, "grad_norm": 1.1580899953842163, "learning_rate": 6.767823600227531e-06, "loss": 0.4661, "step": 22550 }, { "epoch": 0.616686720630059, "grad_norm": 1.4050116539001465, "learning_rate": 6.766985449221126e-06, "loss": 0.3577, "step": 22551 }, { "epoch": 0.616714066943776, "grad_norm": 1.3420060873031616, "learning_rate": 6.766147323576813e-06, "loss": 0.4594, "step": 22552 }, { "epoch": 0.6167414132574929, "grad_norm": 1.4413983821868896, "learning_rate": 6.765309223301162e-06, "loss": 0.3485, "step": 22553 }, { "epoch": 0.6167687595712098, "grad_norm": 1.9398688077926636, "learning_rate": 6.76447114840075e-06, "loss": 0.7418, "step": 22554 }, { "epoch": 0.6167961058849267, "grad_norm": 0.9215204119682312, "learning_rate": 6.763633098882152e-06, "loss": 0.3249, "step": 22555 }, { "epoch": 0.6168234521986437, "grad_norm": 1.3797255754470825, "learning_rate": 6.762795074751941e-06, "loss": 0.3403, "step": 22556 }, { "epoch": 0.6168507985123606, "grad_norm": 1.1945379972457886, "learning_rate": 6.761957076016691e-06, "loss": 0.4324, "step": 22557 }, { "epoch": 0.6168781448260774, "grad_norm": 1.5431668758392334, "learning_rate": 6.761119102682978e-06, "loss": 0.4753, "step": 22558 }, { "epoch": 0.6169054911397943, "grad_norm": 1.4625827074050903, "learning_rate": 6.760281154757373e-06, "loss": 0.447, "step": 22559 }, { "epoch": 0.6169328374535112, "grad_norm": 1.3569761514663696, "learning_rate": 6.759443232246452e-06, "loss": 0.4521, "step": 22560 }, { "epoch": 0.6169601837672282, "grad_norm": 1.4508856534957886, "learning_rate": 6.7586053351567845e-06, "loss": 0.5062, "step": 22561 }, { "epoch": 0.6169875300809451, "grad_norm": 3.905107021331787, "learning_rate": 6.757767463494948e-06, "loss": 0.7797, "step": 22562 }, { "epoch": 0.617014876394662, "grad_norm": 1.3679194450378418, "learning_rate": 6.756929617267514e-06, "loss": 0.5281, "step": 22563 }, { "epoch": 0.6170422227083789, "grad_norm": 1.2088900804519653, "learning_rate": 6.756091796481053e-06, "loss": 0.4779, "step": 22564 }, { "epoch": 0.6170695690220959, "grad_norm": 1.6236183643341064, "learning_rate": 6.75525400114214e-06, "loss": 0.3797, "step": 22565 }, { "epoch": 0.6170969153358127, "grad_norm": 1.441127896308899, "learning_rate": 6.754416231257343e-06, "loss": 0.4415, "step": 22566 }, { "epoch": 0.6171242616495296, "grad_norm": 1.1247016191482544, "learning_rate": 6.753578486833239e-06, "loss": 0.4726, "step": 22567 }, { "epoch": 0.6171516079632465, "grad_norm": 1.3915525674819946, "learning_rate": 6.7527407678764e-06, "loss": 0.4645, "step": 22568 }, { "epoch": 0.6171789542769635, "grad_norm": 1.234541654586792, "learning_rate": 6.751903074393393e-06, "loss": 0.4893, "step": 22569 }, { "epoch": 0.6172063005906804, "grad_norm": 4.164954662322998, "learning_rate": 6.7510654063907944e-06, "loss": 0.5039, "step": 22570 }, { "epoch": 0.6172336469043973, "grad_norm": 1.1359230279922485, "learning_rate": 6.750227763875174e-06, "loss": 0.4878, "step": 22571 }, { "epoch": 0.6172609932181142, "grad_norm": 1.538230299949646, "learning_rate": 6.7493901468531e-06, "loss": 0.3969, "step": 22572 }, { "epoch": 0.6172883395318312, "grad_norm": 1.2495131492614746, "learning_rate": 6.748552555331147e-06, "loss": 0.4746, "step": 22573 }, { "epoch": 0.617315685845548, "grad_norm": 1.2768449783325195, "learning_rate": 6.747714989315883e-06, "loss": 0.458, "step": 22574 }, { "epoch": 0.6173430321592649, "grad_norm": 1.5459455251693726, "learning_rate": 6.74687744881388e-06, "loss": 0.3649, "step": 22575 }, { "epoch": 0.6173703784729818, "grad_norm": 1.1944941282272339, "learning_rate": 6.7460399338317095e-06, "loss": 0.7573, "step": 22576 }, { "epoch": 0.6173977247866987, "grad_norm": 1.616557002067566, "learning_rate": 6.745202444375938e-06, "loss": 0.3208, "step": 22577 }, { "epoch": 0.6174250711004157, "grad_norm": 1.3682888746261597, "learning_rate": 6.7443649804531395e-06, "loss": 0.496, "step": 22578 }, { "epoch": 0.6174524174141326, "grad_norm": 1.2636550664901733, "learning_rate": 6.743527542069879e-06, "loss": 0.4897, "step": 22579 }, { "epoch": 0.6174797637278495, "grad_norm": 1.3036344051361084, "learning_rate": 6.742690129232728e-06, "loss": 0.459, "step": 22580 }, { "epoch": 0.6175071100415664, "grad_norm": 1.379759669303894, "learning_rate": 6.74185274194826e-06, "loss": 0.4836, "step": 22581 }, { "epoch": 0.6175344563552833, "grad_norm": 1.4422850608825684, "learning_rate": 6.741015380223041e-06, "loss": 0.4772, "step": 22582 }, { "epoch": 0.6175618026690002, "grad_norm": 1.2120745182037354, "learning_rate": 6.740178044063636e-06, "loss": 0.4605, "step": 22583 }, { "epoch": 0.6175891489827171, "grad_norm": 1.1585034132003784, "learning_rate": 6.7393407334766155e-06, "loss": 0.4905, "step": 22584 }, { "epoch": 0.617616495296434, "grad_norm": 1.6817524433135986, "learning_rate": 6.738503448468553e-06, "loss": 0.5096, "step": 22585 }, { "epoch": 0.617643841610151, "grad_norm": 1.1404675245285034, "learning_rate": 6.737666189046011e-06, "loss": 0.7493, "step": 22586 }, { "epoch": 0.6176711879238679, "grad_norm": 1.2143142223358154, "learning_rate": 6.7368289552155594e-06, "loss": 0.4904, "step": 22587 }, { "epoch": 0.6176985342375848, "grad_norm": 1.1514390707015991, "learning_rate": 6.735991746983765e-06, "loss": 0.7447, "step": 22588 }, { "epoch": 0.6177258805513017, "grad_norm": 1.3286525011062622, "learning_rate": 6.735154564357198e-06, "loss": 0.4715, "step": 22589 }, { "epoch": 0.6177532268650185, "grad_norm": 1.6315423250198364, "learning_rate": 6.734317407342428e-06, "loss": 0.3694, "step": 22590 }, { "epoch": 0.6177805731787355, "grad_norm": 1.4551721811294556, "learning_rate": 6.7334802759460135e-06, "loss": 0.502, "step": 22591 }, { "epoch": 0.6178079194924524, "grad_norm": 1.0600098371505737, "learning_rate": 6.732643170174529e-06, "loss": 0.4646, "step": 22592 }, { "epoch": 0.6178352658061693, "grad_norm": 1.191308617591858, "learning_rate": 6.731806090034541e-06, "loss": 0.5035, "step": 22593 }, { "epoch": 0.6178626121198862, "grad_norm": 1.2814444303512573, "learning_rate": 6.730969035532613e-06, "loss": 0.4766, "step": 22594 }, { "epoch": 0.6178899584336032, "grad_norm": 1.4761981964111328, "learning_rate": 6.730132006675314e-06, "loss": 0.4554, "step": 22595 }, { "epoch": 0.6179173047473201, "grad_norm": 1.241145133972168, "learning_rate": 6.729295003469207e-06, "loss": 0.4708, "step": 22596 }, { "epoch": 0.617944651061037, "grad_norm": 1.286611557006836, "learning_rate": 6.728458025920861e-06, "loss": 0.4733, "step": 22597 }, { "epoch": 0.6179719973747538, "grad_norm": 1.0674039125442505, "learning_rate": 6.727621074036843e-06, "loss": 0.4791, "step": 22598 }, { "epoch": 0.6179993436884708, "grad_norm": 1.567286491394043, "learning_rate": 6.726784147823715e-06, "loss": 0.3848, "step": 22599 }, { "epoch": 0.6180266900021877, "grad_norm": 1.2829104661941528, "learning_rate": 6.725947247288045e-06, "loss": 0.7374, "step": 22600 }, { "epoch": 0.6180540363159046, "grad_norm": 1.262878179550171, "learning_rate": 6.725110372436397e-06, "loss": 0.7485, "step": 22601 }, { "epoch": 0.6180813826296215, "grad_norm": 2.0486440658569336, "learning_rate": 6.724273523275336e-06, "loss": 0.4874, "step": 22602 }, { "epoch": 0.6181087289433385, "grad_norm": 1.0977668762207031, "learning_rate": 6.723436699811427e-06, "loss": 0.4658, "step": 22603 }, { "epoch": 0.6181360752570554, "grad_norm": 1.5135529041290283, "learning_rate": 6.722599902051237e-06, "loss": 0.3349, "step": 22604 }, { "epoch": 0.6181634215707723, "grad_norm": 1.5245198011398315, "learning_rate": 6.721763130001325e-06, "loss": 0.3423, "step": 22605 }, { "epoch": 0.6181907678844891, "grad_norm": 1.323286771774292, "learning_rate": 6.720926383668262e-06, "loss": 0.4902, "step": 22606 }, { "epoch": 0.618218114198206, "grad_norm": 1.1796940565109253, "learning_rate": 6.720089663058608e-06, "loss": 0.4971, "step": 22607 }, { "epoch": 0.618245460511923, "grad_norm": 1.1445286273956299, "learning_rate": 6.719252968178929e-06, "loss": 0.4874, "step": 22608 }, { "epoch": 0.6182728068256399, "grad_norm": 1.2323265075683594, "learning_rate": 6.718416299035784e-06, "loss": 0.4804, "step": 22609 }, { "epoch": 0.6183001531393568, "grad_norm": 1.2887669801712036, "learning_rate": 6.71757965563574e-06, "loss": 0.5391, "step": 22610 }, { "epoch": 0.6183274994530737, "grad_norm": 1.047134280204773, "learning_rate": 6.716743037985362e-06, "loss": 0.368, "step": 22611 }, { "epoch": 0.6183548457667907, "grad_norm": 1.352021336555481, "learning_rate": 6.71590644609121e-06, "loss": 0.4531, "step": 22612 }, { "epoch": 0.6183821920805076, "grad_norm": 1.1429438591003418, "learning_rate": 6.715069879959846e-06, "loss": 0.5272, "step": 22613 }, { "epoch": 0.6184095383942244, "grad_norm": 1.1981970071792603, "learning_rate": 6.714233339597836e-06, "loss": 0.4805, "step": 22614 }, { "epoch": 0.6184368847079413, "grad_norm": 1.3808315992355347, "learning_rate": 6.71339682501174e-06, "loss": 0.3246, "step": 22615 }, { "epoch": 0.6184642310216583, "grad_norm": 1.2271857261657715, "learning_rate": 6.712560336208123e-06, "loss": 0.4855, "step": 22616 }, { "epoch": 0.6184915773353752, "grad_norm": 1.227830410003662, "learning_rate": 6.711723873193546e-06, "loss": 0.4681, "step": 22617 }, { "epoch": 0.6185189236490921, "grad_norm": 2.155904531478882, "learning_rate": 6.710887435974567e-06, "loss": 0.7539, "step": 22618 }, { "epoch": 0.618546269962809, "grad_norm": 1.2356332540512085, "learning_rate": 6.71005102455775e-06, "loss": 0.4616, "step": 22619 }, { "epoch": 0.618573616276526, "grad_norm": 1.1799442768096924, "learning_rate": 6.7092146389496595e-06, "loss": 0.4671, "step": 22620 }, { "epoch": 0.6186009625902429, "grad_norm": 1.3244763612747192, "learning_rate": 6.708378279156851e-06, "loss": 0.4548, "step": 22621 }, { "epoch": 0.6186283089039597, "grad_norm": 1.384470820426941, "learning_rate": 6.707541945185888e-06, "loss": 0.4483, "step": 22622 }, { "epoch": 0.6186556552176766, "grad_norm": 1.4113619327545166, "learning_rate": 6.706705637043335e-06, "loss": 0.4523, "step": 22623 }, { "epoch": 0.6186830015313936, "grad_norm": 1.2227423191070557, "learning_rate": 6.705869354735748e-06, "loss": 0.5124, "step": 22624 }, { "epoch": 0.6187103478451105, "grad_norm": 1.167778491973877, "learning_rate": 6.705033098269689e-06, "loss": 0.4603, "step": 22625 }, { "epoch": 0.6187376941588274, "grad_norm": 1.2135168313980103, "learning_rate": 6.704196867651718e-06, "loss": 0.5039, "step": 22626 }, { "epoch": 0.6187650404725443, "grad_norm": 1.4562687873840332, "learning_rate": 6.703360662888394e-06, "loss": 0.3389, "step": 22627 }, { "epoch": 0.6187923867862613, "grad_norm": 1.6219046115875244, "learning_rate": 6.702524483986279e-06, "loss": 0.3563, "step": 22628 }, { "epoch": 0.6188197330999782, "grad_norm": 1.326735496520996, "learning_rate": 6.701688330951931e-06, "loss": 0.7602, "step": 22629 }, { "epoch": 0.618847079413695, "grad_norm": 1.3151394128799438, "learning_rate": 6.700852203791912e-06, "loss": 0.4736, "step": 22630 }, { "epoch": 0.6188744257274119, "grad_norm": 1.1645772457122803, "learning_rate": 6.700016102512776e-06, "loss": 0.4995, "step": 22631 }, { "epoch": 0.6189017720411288, "grad_norm": 1.2132729291915894, "learning_rate": 6.699180027121085e-06, "loss": 0.484, "step": 22632 }, { "epoch": 0.6189291183548458, "grad_norm": 1.5961887836456299, "learning_rate": 6.698343977623401e-06, "loss": 0.3824, "step": 22633 }, { "epoch": 0.6189564646685627, "grad_norm": 1.3601611852645874, "learning_rate": 6.697507954026275e-06, "loss": 0.4965, "step": 22634 }, { "epoch": 0.6189838109822796, "grad_norm": 1.190810203552246, "learning_rate": 6.696671956336271e-06, "loss": 0.3781, "step": 22635 }, { "epoch": 0.6190111572959965, "grad_norm": 1.2402807474136353, "learning_rate": 6.6958359845599485e-06, "loss": 0.4673, "step": 22636 }, { "epoch": 0.6190385036097135, "grad_norm": 1.1433932781219482, "learning_rate": 6.6950000387038604e-06, "loss": 0.445, "step": 22637 }, { "epoch": 0.6190658499234303, "grad_norm": 1.3063814640045166, "learning_rate": 6.694164118774569e-06, "loss": 0.5225, "step": 22638 }, { "epoch": 0.6190931962371472, "grad_norm": 1.2352004051208496, "learning_rate": 6.6933282247786255e-06, "loss": 0.3669, "step": 22639 }, { "epoch": 0.6191205425508641, "grad_norm": 1.9194340705871582, "learning_rate": 6.692492356722595e-06, "loss": 0.4895, "step": 22640 }, { "epoch": 0.619147888864581, "grad_norm": 1.2498911619186401, "learning_rate": 6.691656514613032e-06, "loss": 0.6986, "step": 22641 }, { "epoch": 0.619175235178298, "grad_norm": 1.4348222017288208, "learning_rate": 6.69082069845649e-06, "loss": 0.5028, "step": 22642 }, { "epoch": 0.6192025814920149, "grad_norm": 1.5542951822280884, "learning_rate": 6.68998490825953e-06, "loss": 0.3784, "step": 22643 }, { "epoch": 0.6192299278057318, "grad_norm": 1.7113966941833496, "learning_rate": 6.689149144028706e-06, "loss": 0.5108, "step": 22644 }, { "epoch": 0.6192572741194488, "grad_norm": 1.1863815784454346, "learning_rate": 6.688313405770575e-06, "loss": 0.4918, "step": 22645 }, { "epoch": 0.6192846204331656, "grad_norm": 1.5248979330062866, "learning_rate": 6.687477693491694e-06, "loss": 0.3747, "step": 22646 }, { "epoch": 0.6193119667468825, "grad_norm": 1.3405225276947021, "learning_rate": 6.6866420071986175e-06, "loss": 0.7216, "step": 22647 }, { "epoch": 0.6193393130605994, "grad_norm": 1.233608365058899, "learning_rate": 6.685806346897901e-06, "loss": 0.5013, "step": 22648 }, { "epoch": 0.6193666593743163, "grad_norm": 1.4541138410568237, "learning_rate": 6.684970712596104e-06, "loss": 0.4953, "step": 22649 }, { "epoch": 0.6193940056880333, "grad_norm": 1.2690166234970093, "learning_rate": 6.684135104299776e-06, "loss": 0.48, "step": 22650 }, { "epoch": 0.6194213520017502, "grad_norm": 1.2264134883880615, "learning_rate": 6.6832995220154785e-06, "loss": 0.4624, "step": 22651 }, { "epoch": 0.6194486983154671, "grad_norm": 1.1360929012298584, "learning_rate": 6.6824639657497635e-06, "loss": 0.47, "step": 22652 }, { "epoch": 0.6194760446291839, "grad_norm": 1.4542165994644165, "learning_rate": 6.6816284355091825e-06, "loss": 0.333, "step": 22653 }, { "epoch": 0.6195033909429009, "grad_norm": 1.2689577341079712, "learning_rate": 6.6807929313002905e-06, "loss": 0.4699, "step": 22654 }, { "epoch": 0.6195307372566178, "grad_norm": 1.5960056781768799, "learning_rate": 6.679957453129646e-06, "loss": 0.3597, "step": 22655 }, { "epoch": 0.6195580835703347, "grad_norm": 1.4191343784332275, "learning_rate": 6.6791220010038004e-06, "loss": 0.5148, "step": 22656 }, { "epoch": 0.6195854298840516, "grad_norm": 1.1180598735809326, "learning_rate": 6.678286574929308e-06, "loss": 0.4703, "step": 22657 }, { "epoch": 0.6196127761977686, "grad_norm": 1.2510191202163696, "learning_rate": 6.677451174912724e-06, "loss": 0.4744, "step": 22658 }, { "epoch": 0.6196401225114855, "grad_norm": 1.4448739290237427, "learning_rate": 6.676615800960598e-06, "loss": 0.4534, "step": 22659 }, { "epoch": 0.6196674688252024, "grad_norm": 1.2778661251068115, "learning_rate": 6.675780453079489e-06, "loss": 0.5019, "step": 22660 }, { "epoch": 0.6196948151389192, "grad_norm": 1.5399113893508911, "learning_rate": 6.674945131275944e-06, "loss": 0.482, "step": 22661 }, { "epoch": 0.6197221614526361, "grad_norm": 1.3475004434585571, "learning_rate": 6.67410983555652e-06, "loss": 0.4353, "step": 22662 }, { "epoch": 0.6197495077663531, "grad_norm": 1.408342957496643, "learning_rate": 6.673274565927768e-06, "loss": 0.3446, "step": 22663 }, { "epoch": 0.61977685408007, "grad_norm": 1.267301321029663, "learning_rate": 6.67243932239624e-06, "loss": 0.4942, "step": 22664 }, { "epoch": 0.6198042003937869, "grad_norm": 1.376261830329895, "learning_rate": 6.671604104968492e-06, "loss": 0.4448, "step": 22665 }, { "epoch": 0.6198315467075038, "grad_norm": 1.2133032083511353, "learning_rate": 6.670768913651069e-06, "loss": 0.4703, "step": 22666 }, { "epoch": 0.6198588930212208, "grad_norm": 1.211171269416809, "learning_rate": 6.669933748450528e-06, "loss": 0.487, "step": 22667 }, { "epoch": 0.6198862393349377, "grad_norm": 1.6270641088485718, "learning_rate": 6.669098609373422e-06, "loss": 0.3806, "step": 22668 }, { "epoch": 0.6199135856486545, "grad_norm": 1.2204185724258423, "learning_rate": 6.668263496426298e-06, "loss": 0.4962, "step": 22669 }, { "epoch": 0.6199409319623714, "grad_norm": 1.179329514503479, "learning_rate": 6.667428409615708e-06, "loss": 0.4897, "step": 22670 }, { "epoch": 0.6199682782760884, "grad_norm": 1.9716863632202148, "learning_rate": 6.666593348948208e-06, "loss": 0.4869, "step": 22671 }, { "epoch": 0.6199956245898053, "grad_norm": 1.2812707424163818, "learning_rate": 6.665758314430343e-06, "loss": 0.4855, "step": 22672 }, { "epoch": 0.6200229709035222, "grad_norm": 1.1102409362792969, "learning_rate": 6.664923306068667e-06, "loss": 0.4517, "step": 22673 }, { "epoch": 0.6200503172172391, "grad_norm": 1.1901146173477173, "learning_rate": 6.664088323869726e-06, "loss": 0.4842, "step": 22674 }, { "epoch": 0.6200776635309561, "grad_norm": 1.3403353691101074, "learning_rate": 6.663253367840075e-06, "loss": 0.489, "step": 22675 }, { "epoch": 0.620105009844673, "grad_norm": 1.3795267343521118, "learning_rate": 6.662418437986265e-06, "loss": 0.4899, "step": 22676 }, { "epoch": 0.6201323561583898, "grad_norm": 1.221746563911438, "learning_rate": 6.661583534314838e-06, "loss": 0.5078, "step": 22677 }, { "epoch": 0.6201597024721067, "grad_norm": 1.4382504224777222, "learning_rate": 6.660748656832351e-06, "loss": 0.4622, "step": 22678 }, { "epoch": 0.6201870487858236, "grad_norm": 1.3789598941802979, "learning_rate": 6.659913805545352e-06, "loss": 0.4666, "step": 22679 }, { "epoch": 0.6202143950995406, "grad_norm": 1.2498303651809692, "learning_rate": 6.659078980460387e-06, "loss": 0.4519, "step": 22680 }, { "epoch": 0.6202417414132575, "grad_norm": 1.3443386554718018, "learning_rate": 6.658244181584011e-06, "loss": 0.4471, "step": 22681 }, { "epoch": 0.6202690877269744, "grad_norm": 1.695147156715393, "learning_rate": 6.6574094089227656e-06, "loss": 0.362, "step": 22682 }, { "epoch": 0.6202964340406913, "grad_norm": 1.3463715314865112, "learning_rate": 6.656574662483203e-06, "loss": 0.4935, "step": 22683 }, { "epoch": 0.6203237803544083, "grad_norm": 1.2019153833389282, "learning_rate": 6.655739942271873e-06, "loss": 0.4665, "step": 22684 }, { "epoch": 0.6203511266681251, "grad_norm": 1.1764750480651855, "learning_rate": 6.6549052482953206e-06, "loss": 0.4523, "step": 22685 }, { "epoch": 0.620378472981842, "grad_norm": 1.1862080097198486, "learning_rate": 6.654070580560097e-06, "loss": 0.5009, "step": 22686 }, { "epoch": 0.6204058192955589, "grad_norm": 1.1605333089828491, "learning_rate": 6.653235939072745e-06, "loss": 0.368, "step": 22687 }, { "epoch": 0.6204331656092759, "grad_norm": 1.2086818218231201, "learning_rate": 6.65240132383982e-06, "loss": 0.7337, "step": 22688 }, { "epoch": 0.6204605119229928, "grad_norm": 1.9127694368362427, "learning_rate": 6.6515667348678604e-06, "loss": 0.526, "step": 22689 }, { "epoch": 0.6204878582367097, "grad_norm": 1.2273223400115967, "learning_rate": 6.650732172163419e-06, "loss": 0.4649, "step": 22690 }, { "epoch": 0.6205152045504266, "grad_norm": 1.7632195949554443, "learning_rate": 6.649897635733041e-06, "loss": 0.4893, "step": 22691 }, { "epoch": 0.6205425508641436, "grad_norm": 1.1761552095413208, "learning_rate": 6.649063125583271e-06, "loss": 0.4691, "step": 22692 }, { "epoch": 0.6205698971778604, "grad_norm": 1.3006831407546997, "learning_rate": 6.648228641720661e-06, "loss": 0.4423, "step": 22693 }, { "epoch": 0.6205972434915773, "grad_norm": 1.1394801139831543, "learning_rate": 6.647394184151752e-06, "loss": 0.4449, "step": 22694 }, { "epoch": 0.6206245898052942, "grad_norm": 1.5898973941802979, "learning_rate": 6.6465597528830926e-06, "loss": 0.3594, "step": 22695 }, { "epoch": 0.6206519361190111, "grad_norm": 1.9531525373458862, "learning_rate": 6.645725347921228e-06, "loss": 0.474, "step": 22696 }, { "epoch": 0.6206792824327281, "grad_norm": 1.3404505252838135, "learning_rate": 6.644890969272703e-06, "loss": 0.5128, "step": 22697 }, { "epoch": 0.620706628746445, "grad_norm": 1.2512315511703491, "learning_rate": 6.644056616944067e-06, "loss": 0.4802, "step": 22698 }, { "epoch": 0.6207339750601619, "grad_norm": 1.784268856048584, "learning_rate": 6.6432222909418595e-06, "loss": 0.4901, "step": 22699 }, { "epoch": 0.6207613213738788, "grad_norm": 1.44741690158844, "learning_rate": 6.64238799127263e-06, "loss": 0.5034, "step": 22700 }, { "epoch": 0.6207886676875957, "grad_norm": 1.1529734134674072, "learning_rate": 6.641553717942922e-06, "loss": 0.3392, "step": 22701 }, { "epoch": 0.6208160140013126, "grad_norm": 2.019282102584839, "learning_rate": 6.640719470959278e-06, "loss": 0.7404, "step": 22702 }, { "epoch": 0.6208433603150295, "grad_norm": 1.1110541820526123, "learning_rate": 6.639885250328248e-06, "loss": 0.4664, "step": 22703 }, { "epoch": 0.6208707066287464, "grad_norm": 1.2872216701507568, "learning_rate": 6.639051056056369e-06, "loss": 0.499, "step": 22704 }, { "epoch": 0.6208980529424634, "grad_norm": 1.4441431760787964, "learning_rate": 6.63821688815019e-06, "loss": 0.4736, "step": 22705 }, { "epoch": 0.6209253992561803, "grad_norm": 1.308100938796997, "learning_rate": 6.637382746616255e-06, "loss": 0.5099, "step": 22706 }, { "epoch": 0.6209527455698972, "grad_norm": 1.5365387201309204, "learning_rate": 6.636548631461104e-06, "loss": 0.4621, "step": 22707 }, { "epoch": 0.6209800918836141, "grad_norm": 1.1764962673187256, "learning_rate": 6.6357145426912825e-06, "loss": 0.4704, "step": 22708 }, { "epoch": 0.621007438197331, "grad_norm": 1.58352530002594, "learning_rate": 6.634880480313334e-06, "loss": 0.5071, "step": 22709 }, { "epoch": 0.6210347845110479, "grad_norm": 1.5032272338867188, "learning_rate": 6.6340464443338e-06, "loss": 0.4802, "step": 22710 }, { "epoch": 0.6210621308247648, "grad_norm": 1.5004693269729614, "learning_rate": 6.633212434759226e-06, "loss": 0.4731, "step": 22711 }, { "epoch": 0.6210894771384817, "grad_norm": 1.7325867414474487, "learning_rate": 6.632378451596153e-06, "loss": 0.3353, "step": 22712 }, { "epoch": 0.6211168234521987, "grad_norm": 1.1710306406021118, "learning_rate": 6.631544494851121e-06, "loss": 0.5066, "step": 22713 }, { "epoch": 0.6211441697659156, "grad_norm": 1.2624107599258423, "learning_rate": 6.630710564530678e-06, "loss": 0.7185, "step": 22714 }, { "epoch": 0.6211715160796325, "grad_norm": 1.298904538154602, "learning_rate": 6.62987666064136e-06, "loss": 0.4745, "step": 22715 }, { "epoch": 0.6211988623933494, "grad_norm": 1.2258821725845337, "learning_rate": 6.629042783189713e-06, "loss": 0.4708, "step": 22716 }, { "epoch": 0.6212262087070662, "grad_norm": 1.2787280082702637, "learning_rate": 6.628208932182274e-06, "loss": 0.4884, "step": 22717 }, { "epoch": 0.6212535550207832, "grad_norm": 1.2852634191513062, "learning_rate": 6.627375107625589e-06, "loss": 0.4645, "step": 22718 }, { "epoch": 0.6212809013345001, "grad_norm": 1.3629447221755981, "learning_rate": 6.626541309526198e-06, "loss": 0.493, "step": 22719 }, { "epoch": 0.621308247648217, "grad_norm": 1.783017635345459, "learning_rate": 6.625707537890638e-06, "loss": 0.5027, "step": 22720 }, { "epoch": 0.6213355939619339, "grad_norm": 1.3690265417099, "learning_rate": 6.624873792725455e-06, "loss": 0.5095, "step": 22721 }, { "epoch": 0.6213629402756509, "grad_norm": 1.4284058809280396, "learning_rate": 6.624040074037188e-06, "loss": 0.4895, "step": 22722 }, { "epoch": 0.6213902865893678, "grad_norm": 1.218476414680481, "learning_rate": 6.623206381832378e-06, "loss": 0.4742, "step": 22723 }, { "epoch": 0.6214176329030847, "grad_norm": 1.217760443687439, "learning_rate": 6.62237271611756e-06, "loss": 0.5178, "step": 22724 }, { "epoch": 0.6214449792168015, "grad_norm": 1.449169397354126, "learning_rate": 6.6215390768992795e-06, "loss": 0.7419, "step": 22725 }, { "epoch": 0.6214723255305185, "grad_norm": 1.1695479154586792, "learning_rate": 6.620705464184072e-06, "loss": 0.3591, "step": 22726 }, { "epoch": 0.6214996718442354, "grad_norm": 1.4947271347045898, "learning_rate": 6.61987187797848e-06, "loss": 0.3332, "step": 22727 }, { "epoch": 0.6215270181579523, "grad_norm": 1.4793028831481934, "learning_rate": 6.619038318289044e-06, "loss": 0.4212, "step": 22728 }, { "epoch": 0.6215543644716692, "grad_norm": 1.135603666305542, "learning_rate": 6.6182047851222975e-06, "loss": 0.4704, "step": 22729 }, { "epoch": 0.6215817107853862, "grad_norm": 1.2356438636779785, "learning_rate": 6.6173712784847855e-06, "loss": 0.4886, "step": 22730 }, { "epoch": 0.6216090570991031, "grad_norm": 1.2479336261749268, "learning_rate": 6.616537798383041e-06, "loss": 0.5147, "step": 22731 }, { "epoch": 0.62163640341282, "grad_norm": 1.7796725034713745, "learning_rate": 6.615704344823605e-06, "loss": 0.498, "step": 22732 }, { "epoch": 0.6216637497265368, "grad_norm": 1.5932172536849976, "learning_rate": 6.6148709178130214e-06, "loss": 0.3876, "step": 22733 }, { "epoch": 0.6216910960402537, "grad_norm": 1.1394766569137573, "learning_rate": 6.614037517357817e-06, "loss": 0.376, "step": 22734 }, { "epoch": 0.6217184423539707, "grad_norm": 2.624471664428711, "learning_rate": 6.613204143464537e-06, "loss": 0.3722, "step": 22735 }, { "epoch": 0.6217457886676876, "grad_norm": 1.5289654731750488, "learning_rate": 6.61237079613972e-06, "loss": 0.5056, "step": 22736 }, { "epoch": 0.6217731349814045, "grad_norm": 1.3889082670211792, "learning_rate": 6.6115374753898976e-06, "loss": 0.3462, "step": 22737 }, { "epoch": 0.6218004812951214, "grad_norm": 1.165877103805542, "learning_rate": 6.6107041812216125e-06, "loss": 0.5087, "step": 22738 }, { "epoch": 0.6218278276088384, "grad_norm": 1.575990080833435, "learning_rate": 6.609870913641398e-06, "loss": 0.3547, "step": 22739 }, { "epoch": 0.6218551739225553, "grad_norm": 1.1507165431976318, "learning_rate": 6.6090376726557916e-06, "loss": 0.4818, "step": 22740 }, { "epoch": 0.6218825202362721, "grad_norm": 1.5685089826583862, "learning_rate": 6.608204458271331e-06, "loss": 0.3784, "step": 22741 }, { "epoch": 0.621909866549989, "grad_norm": 1.5169248580932617, "learning_rate": 6.607371270494551e-06, "loss": 0.7291, "step": 22742 }, { "epoch": 0.621937212863706, "grad_norm": 1.129927158355713, "learning_rate": 6.606538109331988e-06, "loss": 0.4708, "step": 22743 }, { "epoch": 0.6219645591774229, "grad_norm": 1.2801175117492676, "learning_rate": 6.6057049747901815e-06, "loss": 0.4765, "step": 22744 }, { "epoch": 0.6219919054911398, "grad_norm": 1.2422919273376465, "learning_rate": 6.604871866875662e-06, "loss": 0.4813, "step": 22745 }, { "epoch": 0.6220192518048567, "grad_norm": 1.4377801418304443, "learning_rate": 6.60403878559497e-06, "loss": 0.4539, "step": 22746 }, { "epoch": 0.6220465981185737, "grad_norm": 1.509787917137146, "learning_rate": 6.603205730954635e-06, "loss": 0.7469, "step": 22747 }, { "epoch": 0.6220739444322906, "grad_norm": 1.113783597946167, "learning_rate": 6.602372702961195e-06, "loss": 0.4724, "step": 22748 }, { "epoch": 0.6221012907460074, "grad_norm": 1.2510589361190796, "learning_rate": 6.601539701621187e-06, "loss": 0.4742, "step": 22749 }, { "epoch": 0.6221286370597243, "grad_norm": 1.1603524684906006, "learning_rate": 6.600706726941142e-06, "loss": 0.4599, "step": 22750 }, { "epoch": 0.6221559833734412, "grad_norm": 1.2911477088928223, "learning_rate": 6.599873778927598e-06, "loss": 0.4529, "step": 22751 }, { "epoch": 0.6221833296871582, "grad_norm": 1.3832467794418335, "learning_rate": 6.599040857587085e-06, "loss": 0.4978, "step": 22752 }, { "epoch": 0.6222106760008751, "grad_norm": 1.1528050899505615, "learning_rate": 6.5982079629261395e-06, "loss": 0.7143, "step": 22753 }, { "epoch": 0.622238022314592, "grad_norm": 1.3548963069915771, "learning_rate": 6.597375094951297e-06, "loss": 0.4837, "step": 22754 }, { "epoch": 0.622265368628309, "grad_norm": 1.2158914804458618, "learning_rate": 6.596542253669088e-06, "loss": 0.7275, "step": 22755 }, { "epoch": 0.6222927149420258, "grad_norm": 4.23213005065918, "learning_rate": 6.595709439086047e-06, "loss": 0.7207, "step": 22756 }, { "epoch": 0.6223200612557427, "grad_norm": 1.1668305397033691, "learning_rate": 6.594876651208709e-06, "loss": 0.722, "step": 22757 }, { "epoch": 0.6223474075694596, "grad_norm": 1.2596622705459595, "learning_rate": 6.5940438900436074e-06, "loss": 0.5222, "step": 22758 }, { "epoch": 0.6223747538831765, "grad_norm": 1.3603347539901733, "learning_rate": 6.59321115559727e-06, "loss": 0.4764, "step": 22759 }, { "epoch": 0.6224021001968935, "grad_norm": 1.8088505268096924, "learning_rate": 6.592378447876233e-06, "loss": 0.396, "step": 22760 }, { "epoch": 0.6224294465106104, "grad_norm": 1.5215853452682495, "learning_rate": 6.591545766887026e-06, "loss": 0.4737, "step": 22761 }, { "epoch": 0.6224567928243273, "grad_norm": 1.1552654504776, "learning_rate": 6.590713112636185e-06, "loss": 0.4845, "step": 22762 }, { "epoch": 0.6224841391380442, "grad_norm": 1.5010110139846802, "learning_rate": 6.58988048513024e-06, "loss": 0.5198, "step": 22763 }, { "epoch": 0.622511485451761, "grad_norm": 1.3086926937103271, "learning_rate": 6.589047884375722e-06, "loss": 0.4809, "step": 22764 }, { "epoch": 0.622538831765478, "grad_norm": 1.5854973793029785, "learning_rate": 6.588215310379164e-06, "loss": 0.4689, "step": 22765 }, { "epoch": 0.6225661780791949, "grad_norm": 1.4240809679031372, "learning_rate": 6.5873827631470975e-06, "loss": 0.4842, "step": 22766 }, { "epoch": 0.6225935243929118, "grad_norm": 1.5388065576553345, "learning_rate": 6.586550242686052e-06, "loss": 0.3818, "step": 22767 }, { "epoch": 0.6226208707066287, "grad_norm": 1.4702404737472534, "learning_rate": 6.58571774900256e-06, "loss": 0.3724, "step": 22768 }, { "epoch": 0.6226482170203457, "grad_norm": 1.2752947807312012, "learning_rate": 6.584885282103151e-06, "loss": 0.4886, "step": 22769 }, { "epoch": 0.6226755633340626, "grad_norm": 1.2733216285705566, "learning_rate": 6.584052841994354e-06, "loss": 0.7338, "step": 22770 }, { "epoch": 0.6227029096477795, "grad_norm": 1.3498681783676147, "learning_rate": 6.583220428682704e-06, "loss": 0.4917, "step": 22771 }, { "epoch": 0.6227302559614963, "grad_norm": 1.4643239974975586, "learning_rate": 6.5823880421747254e-06, "loss": 0.4741, "step": 22772 }, { "epoch": 0.6227576022752133, "grad_norm": 1.4791980981826782, "learning_rate": 6.581555682476955e-06, "loss": 0.3647, "step": 22773 }, { "epoch": 0.6227849485889302, "grad_norm": 1.3599711656570435, "learning_rate": 6.580723349595914e-06, "loss": 0.431, "step": 22774 }, { "epoch": 0.6228122949026471, "grad_norm": 1.2571258544921875, "learning_rate": 6.579891043538137e-06, "loss": 0.4575, "step": 22775 }, { "epoch": 0.622839641216364, "grad_norm": 1.167803168296814, "learning_rate": 6.579058764310153e-06, "loss": 0.4684, "step": 22776 }, { "epoch": 0.622866987530081, "grad_norm": 1.1626694202423096, "learning_rate": 6.5782265119184904e-06, "loss": 0.4774, "step": 22777 }, { "epoch": 0.6228943338437979, "grad_norm": 1.4040528535842896, "learning_rate": 6.577394286369676e-06, "loss": 0.5137, "step": 22778 }, { "epoch": 0.6229216801575148, "grad_norm": 1.1865872144699097, "learning_rate": 6.576562087670244e-06, "loss": 0.489, "step": 22779 }, { "epoch": 0.6229490264712316, "grad_norm": 1.1914070844650269, "learning_rate": 6.575729915826717e-06, "loss": 0.7494, "step": 22780 }, { "epoch": 0.6229763727849486, "grad_norm": 1.0863279104232788, "learning_rate": 6.574897770845625e-06, "loss": 0.4619, "step": 22781 }, { "epoch": 0.6230037190986655, "grad_norm": 1.313686728477478, "learning_rate": 6.574065652733495e-06, "loss": 0.4757, "step": 22782 }, { "epoch": 0.6230310654123824, "grad_norm": 1.4918813705444336, "learning_rate": 6.573233561496857e-06, "loss": 0.4276, "step": 22783 }, { "epoch": 0.6230584117260993, "grad_norm": 1.14017653465271, "learning_rate": 6.572401497142238e-06, "loss": 0.5112, "step": 22784 }, { "epoch": 0.6230857580398163, "grad_norm": 1.3741129636764526, "learning_rate": 6.571569459676163e-06, "loss": 0.479, "step": 22785 }, { "epoch": 0.6231131043535332, "grad_norm": 0.9222729206085205, "learning_rate": 6.57073744910516e-06, "loss": 0.3156, "step": 22786 }, { "epoch": 0.6231404506672501, "grad_norm": 1.5937706232070923, "learning_rate": 6.56990546543576e-06, "loss": 0.4014, "step": 22787 }, { "epoch": 0.6231677969809669, "grad_norm": 1.345669150352478, "learning_rate": 6.569073508674484e-06, "loss": 0.4803, "step": 22788 }, { "epoch": 0.6231951432946838, "grad_norm": 1.3438745737075806, "learning_rate": 6.568241578827862e-06, "loss": 0.529, "step": 22789 }, { "epoch": 0.6232224896084008, "grad_norm": 1.2382409572601318, "learning_rate": 6.5674096759024175e-06, "loss": 0.4763, "step": 22790 }, { "epoch": 0.6232498359221177, "grad_norm": 1.3882166147232056, "learning_rate": 6.566577799904679e-06, "loss": 0.4844, "step": 22791 }, { "epoch": 0.6232771822358346, "grad_norm": 1.2644752264022827, "learning_rate": 6.565745950841171e-06, "loss": 0.5005, "step": 22792 }, { "epoch": 0.6233045285495515, "grad_norm": 1.448651909828186, "learning_rate": 6.564914128718424e-06, "loss": 0.4892, "step": 22793 }, { "epoch": 0.6233318748632685, "grad_norm": 1.572750449180603, "learning_rate": 6.564082333542954e-06, "loss": 0.3691, "step": 22794 }, { "epoch": 0.6233592211769854, "grad_norm": 1.2360423803329468, "learning_rate": 6.563250565321293e-06, "loss": 0.7332, "step": 22795 }, { "epoch": 0.6233865674907022, "grad_norm": 1.3872828483581543, "learning_rate": 6.562418824059962e-06, "loss": 0.4745, "step": 22796 }, { "epoch": 0.6234139138044191, "grad_norm": 2.7238051891326904, "learning_rate": 6.561587109765488e-06, "loss": 0.3557, "step": 22797 }, { "epoch": 0.623441260118136, "grad_norm": 1.2866557836532593, "learning_rate": 6.560755422444398e-06, "loss": 0.5139, "step": 22798 }, { "epoch": 0.623468606431853, "grad_norm": 1.5403339862823486, "learning_rate": 6.559923762103211e-06, "loss": 0.5239, "step": 22799 }, { "epoch": 0.6234959527455699, "grad_norm": 1.3926583528518677, "learning_rate": 6.559092128748454e-06, "loss": 0.4959, "step": 22800 }, { "epoch": 0.6235232990592868, "grad_norm": 1.2485287189483643, "learning_rate": 6.5582605223866546e-06, "loss": 0.5239, "step": 22801 }, { "epoch": 0.6235506453730038, "grad_norm": 1.1044340133666992, "learning_rate": 6.557428943024329e-06, "loss": 0.4673, "step": 22802 }, { "epoch": 0.6235779916867207, "grad_norm": 1.1002775430679321, "learning_rate": 6.556597390668007e-06, "loss": 0.6838, "step": 22803 }, { "epoch": 0.6236053380004375, "grad_norm": 1.3283560276031494, "learning_rate": 6.555765865324207e-06, "loss": 0.51, "step": 22804 }, { "epoch": 0.6236326843141544, "grad_norm": 1.4407098293304443, "learning_rate": 6.554934366999455e-06, "loss": 0.5078, "step": 22805 }, { "epoch": 0.6236600306278713, "grad_norm": 1.299048900604248, "learning_rate": 6.5541028957002765e-06, "loss": 0.468, "step": 22806 }, { "epoch": 0.6236873769415883, "grad_norm": 1.3329483270645142, "learning_rate": 6.553271451433187e-06, "loss": 0.4715, "step": 22807 }, { "epoch": 0.6237147232553052, "grad_norm": 1.4561973810195923, "learning_rate": 6.552440034204715e-06, "loss": 0.4697, "step": 22808 }, { "epoch": 0.6237420695690221, "grad_norm": 1.192639708518982, "learning_rate": 6.551608644021381e-06, "loss": 0.4926, "step": 22809 }, { "epoch": 0.623769415882739, "grad_norm": 1.2559884786605835, "learning_rate": 6.550777280889704e-06, "loss": 0.7518, "step": 22810 }, { "epoch": 0.623796762196456, "grad_norm": 1.243055820465088, "learning_rate": 6.5499459448162115e-06, "loss": 0.4725, "step": 22811 }, { "epoch": 0.6238241085101728, "grad_norm": 1.7755763530731201, "learning_rate": 6.549114635807421e-06, "loss": 0.3603, "step": 22812 }, { "epoch": 0.6238514548238897, "grad_norm": 1.3301732540130615, "learning_rate": 6.548283353869855e-06, "loss": 0.4622, "step": 22813 }, { "epoch": 0.6238788011376066, "grad_norm": 1.3068861961364746, "learning_rate": 6.547452099010037e-06, "loss": 0.4969, "step": 22814 }, { "epoch": 0.6239061474513236, "grad_norm": 1.3354789018630981, "learning_rate": 6.546620871234482e-06, "loss": 0.5, "step": 22815 }, { "epoch": 0.6239334937650405, "grad_norm": 1.1875596046447754, "learning_rate": 6.545789670549717e-06, "loss": 0.4743, "step": 22816 }, { "epoch": 0.6239608400787574, "grad_norm": 1.2639633417129517, "learning_rate": 6.5449584969622585e-06, "loss": 0.4848, "step": 22817 }, { "epoch": 0.6239881863924743, "grad_norm": 1.2275781631469727, "learning_rate": 6.544127350478627e-06, "loss": 0.7196, "step": 22818 }, { "epoch": 0.6240155327061913, "grad_norm": 1.438891887664795, "learning_rate": 6.543296231105347e-06, "loss": 0.353, "step": 22819 }, { "epoch": 0.6240428790199081, "grad_norm": 1.333945870399475, "learning_rate": 6.542465138848933e-06, "loss": 0.4879, "step": 22820 }, { "epoch": 0.624070225333625, "grad_norm": 1.8908013105392456, "learning_rate": 6.541634073715907e-06, "loss": 0.5114, "step": 22821 }, { "epoch": 0.6240975716473419, "grad_norm": 1.5001705884933472, "learning_rate": 6.54080303571279e-06, "loss": 0.4747, "step": 22822 }, { "epoch": 0.6241249179610588, "grad_norm": 1.4554651975631714, "learning_rate": 6.539972024846099e-06, "loss": 0.4801, "step": 22823 }, { "epoch": 0.6241522642747758, "grad_norm": 1.1941213607788086, "learning_rate": 6.539141041122355e-06, "loss": 0.5014, "step": 22824 }, { "epoch": 0.6241796105884927, "grad_norm": 1.3664544820785522, "learning_rate": 6.538310084548073e-06, "loss": 0.778, "step": 22825 }, { "epoch": 0.6242069569022096, "grad_norm": 1.7069131135940552, "learning_rate": 6.5374791551297735e-06, "loss": 0.3834, "step": 22826 }, { "epoch": 0.6242343032159265, "grad_norm": 1.2653160095214844, "learning_rate": 6.536648252873979e-06, "loss": 0.5146, "step": 22827 }, { "epoch": 0.6242616495296434, "grad_norm": 1.0685515403747559, "learning_rate": 6.535817377787206e-06, "loss": 0.4738, "step": 22828 }, { "epoch": 0.6242889958433603, "grad_norm": 1.3977323770523071, "learning_rate": 6.534986529875967e-06, "loss": 0.4871, "step": 22829 }, { "epoch": 0.6243163421570772, "grad_norm": 1.342189073562622, "learning_rate": 6.534155709146783e-06, "loss": 0.4886, "step": 22830 }, { "epoch": 0.6243436884707941, "grad_norm": 1.2642788887023926, "learning_rate": 6.533324915606174e-06, "loss": 0.4966, "step": 22831 }, { "epoch": 0.6243710347845111, "grad_norm": 1.395434856414795, "learning_rate": 6.5324941492606545e-06, "loss": 0.4884, "step": 22832 }, { "epoch": 0.624398381098228, "grad_norm": 1.2753280401229858, "learning_rate": 6.5316634101167446e-06, "loss": 0.4761, "step": 22833 }, { "epoch": 0.6244257274119449, "grad_norm": 1.3533754348754883, "learning_rate": 6.530832698180956e-06, "loss": 0.4708, "step": 22834 }, { "epoch": 0.6244530737256618, "grad_norm": 1.4468164443969727, "learning_rate": 6.530002013459808e-06, "loss": 0.3762, "step": 22835 }, { "epoch": 0.6244804200393786, "grad_norm": 1.4478392601013184, "learning_rate": 6.52917135595982e-06, "loss": 0.5057, "step": 22836 }, { "epoch": 0.6245077663530956, "grad_norm": 1.172447919845581, "learning_rate": 6.528340725687505e-06, "loss": 0.4691, "step": 22837 }, { "epoch": 0.6245351126668125, "grad_norm": 1.4838664531707764, "learning_rate": 6.527510122649382e-06, "loss": 0.4638, "step": 22838 }, { "epoch": 0.6245624589805294, "grad_norm": 1.1122617721557617, "learning_rate": 6.526679546851962e-06, "loss": 0.4769, "step": 22839 }, { "epoch": 0.6245898052942463, "grad_norm": 1.1203781366348267, "learning_rate": 6.525848998301765e-06, "loss": 0.4833, "step": 22840 }, { "epoch": 0.6246171516079633, "grad_norm": 1.1448817253112793, "learning_rate": 6.525018477005305e-06, "loss": 0.4804, "step": 22841 }, { "epoch": 0.6246444979216802, "grad_norm": 1.8898162841796875, "learning_rate": 6.5241879829690954e-06, "loss": 0.4739, "step": 22842 }, { "epoch": 0.6246718442353971, "grad_norm": 1.5043836832046509, "learning_rate": 6.523357516199653e-06, "loss": 0.3634, "step": 22843 }, { "epoch": 0.6246991905491139, "grad_norm": 1.105177402496338, "learning_rate": 6.522527076703496e-06, "loss": 0.4878, "step": 22844 }, { "epoch": 0.6247265368628309, "grad_norm": 1.1975362300872803, "learning_rate": 6.5216966644871315e-06, "loss": 0.4612, "step": 22845 }, { "epoch": 0.6247538831765478, "grad_norm": 1.2336679697036743, "learning_rate": 6.520866279557081e-06, "loss": 0.4869, "step": 22846 }, { "epoch": 0.6247812294902647, "grad_norm": 1.3221808671951294, "learning_rate": 6.520035921919853e-06, "loss": 0.4866, "step": 22847 }, { "epoch": 0.6248085758039816, "grad_norm": 1.2014304399490356, "learning_rate": 6.519205591581963e-06, "loss": 0.484, "step": 22848 }, { "epoch": 0.6248359221176986, "grad_norm": 1.4918339252471924, "learning_rate": 6.51837528854993e-06, "loss": 0.448, "step": 22849 }, { "epoch": 0.6248632684314155, "grad_norm": 1.36891770362854, "learning_rate": 6.517545012830258e-06, "loss": 0.4805, "step": 22850 }, { "epoch": 0.6248906147451324, "grad_norm": 1.458783745765686, "learning_rate": 6.516714764429469e-06, "loss": 0.4485, "step": 22851 }, { "epoch": 0.6249179610588492, "grad_norm": 1.5047720670700073, "learning_rate": 6.515884543354071e-06, "loss": 0.504, "step": 22852 }, { "epoch": 0.6249453073725662, "grad_norm": 1.5773817300796509, "learning_rate": 6.515054349610577e-06, "loss": 0.5053, "step": 22853 }, { "epoch": 0.6249726536862831, "grad_norm": 1.522089958190918, "learning_rate": 6.514224183205503e-06, "loss": 0.3249, "step": 22854 }, { "epoch": 0.625, "grad_norm": 2.0756499767303467, "learning_rate": 6.513394044145358e-06, "loss": 0.7389, "step": 22855 }, { "epoch": 0.6250273463137169, "grad_norm": 1.2054810523986816, "learning_rate": 6.512563932436655e-06, "loss": 0.4988, "step": 22856 }, { "epoch": 0.6250546926274338, "grad_norm": 1.2169044017791748, "learning_rate": 6.5117338480859084e-06, "loss": 0.4875, "step": 22857 }, { "epoch": 0.6250820389411508, "grad_norm": 1.385344386100769, "learning_rate": 6.510903791099625e-06, "loss": 0.476, "step": 22858 }, { "epoch": 0.6251093852548676, "grad_norm": 1.5568761825561523, "learning_rate": 6.510073761484324e-06, "loss": 0.3664, "step": 22859 }, { "epoch": 0.6251367315685845, "grad_norm": 1.3077644109725952, "learning_rate": 6.509243759246509e-06, "loss": 0.492, "step": 22860 }, { "epoch": 0.6251640778823014, "grad_norm": 1.2221534252166748, "learning_rate": 6.508413784392694e-06, "loss": 0.478, "step": 22861 }, { "epoch": 0.6251914241960184, "grad_norm": 1.224273920059204, "learning_rate": 6.507583836929392e-06, "loss": 0.4894, "step": 22862 }, { "epoch": 0.6252187705097353, "grad_norm": 1.4170608520507812, "learning_rate": 6.506753916863111e-06, "loss": 0.5053, "step": 22863 }, { "epoch": 0.6252461168234522, "grad_norm": 1.399303674697876, "learning_rate": 6.505924024200365e-06, "loss": 0.4639, "step": 22864 }, { "epoch": 0.6252734631371691, "grad_norm": 1.2751117944717407, "learning_rate": 6.505094158947659e-06, "loss": 0.7401, "step": 22865 }, { "epoch": 0.6253008094508861, "grad_norm": 1.2433137893676758, "learning_rate": 6.504264321111508e-06, "loss": 0.4566, "step": 22866 }, { "epoch": 0.6253281557646029, "grad_norm": 1.2974120378494263, "learning_rate": 6.503434510698416e-06, "loss": 0.7282, "step": 22867 }, { "epoch": 0.6253555020783198, "grad_norm": 1.4563125371932983, "learning_rate": 6.502604727714899e-06, "loss": 0.4924, "step": 22868 }, { "epoch": 0.6253828483920367, "grad_norm": 1.529919981956482, "learning_rate": 6.501774972167463e-06, "loss": 0.502, "step": 22869 }, { "epoch": 0.6254101947057537, "grad_norm": 1.2160676717758179, "learning_rate": 6.500945244062616e-06, "loss": 0.4959, "step": 22870 }, { "epoch": 0.6254375410194706, "grad_norm": 1.2921181917190552, "learning_rate": 6.500115543406872e-06, "loss": 0.4971, "step": 22871 }, { "epoch": 0.6254648873331875, "grad_norm": 1.4806305170059204, "learning_rate": 6.499285870206734e-06, "loss": 0.4724, "step": 22872 }, { "epoch": 0.6254922336469044, "grad_norm": 1.597538709640503, "learning_rate": 6.4984562244687134e-06, "loss": 0.4905, "step": 22873 }, { "epoch": 0.6255195799606214, "grad_norm": 1.3012042045593262, "learning_rate": 6.497626606199321e-06, "loss": 0.4772, "step": 22874 }, { "epoch": 0.6255469262743382, "grad_norm": 1.5301928520202637, "learning_rate": 6.496797015405059e-06, "loss": 0.35, "step": 22875 }, { "epoch": 0.6255742725880551, "grad_norm": 1.2437502145767212, "learning_rate": 6.4959674520924395e-06, "loss": 0.4707, "step": 22876 }, { "epoch": 0.625601618901772, "grad_norm": 1.296901822090149, "learning_rate": 6.49513791626797e-06, "loss": 0.4931, "step": 22877 }, { "epoch": 0.6256289652154889, "grad_norm": 1.2920868396759033, "learning_rate": 6.494308407938155e-06, "loss": 0.4725, "step": 22878 }, { "epoch": 0.6256563115292059, "grad_norm": 1.3998897075653076, "learning_rate": 6.493478927109507e-06, "loss": 0.3471, "step": 22879 }, { "epoch": 0.6256836578429228, "grad_norm": 1.2056670188903809, "learning_rate": 6.492649473788528e-06, "loss": 0.4735, "step": 22880 }, { "epoch": 0.6257110041566397, "grad_norm": 1.2308186292648315, "learning_rate": 6.49182004798173e-06, "loss": 0.3272, "step": 22881 }, { "epoch": 0.6257383504703566, "grad_norm": 1.532294511795044, "learning_rate": 6.490990649695612e-06, "loss": 0.3914, "step": 22882 }, { "epoch": 0.6257656967840735, "grad_norm": 1.400532603263855, "learning_rate": 6.490161278936686e-06, "loss": 0.4872, "step": 22883 }, { "epoch": 0.6257930430977904, "grad_norm": 0.9134793281555176, "learning_rate": 6.489331935711458e-06, "loss": 0.3479, "step": 22884 }, { "epoch": 0.6258203894115073, "grad_norm": 1.405619740486145, "learning_rate": 6.488502620026434e-06, "loss": 0.4592, "step": 22885 }, { "epoch": 0.6258477357252242, "grad_norm": 1.3506146669387817, "learning_rate": 6.487673331888115e-06, "loss": 0.4982, "step": 22886 }, { "epoch": 0.6258750820389412, "grad_norm": 1.0739811658859253, "learning_rate": 6.486844071303015e-06, "loss": 0.4591, "step": 22887 }, { "epoch": 0.6259024283526581, "grad_norm": 1.5847021341323853, "learning_rate": 6.48601483827763e-06, "loss": 0.4673, "step": 22888 }, { "epoch": 0.625929774666375, "grad_norm": 1.5883382558822632, "learning_rate": 6.485185632818473e-06, "loss": 0.3669, "step": 22889 }, { "epoch": 0.6259571209800919, "grad_norm": 1.377213478088379, "learning_rate": 6.4843564549320445e-06, "loss": 0.4846, "step": 22890 }, { "epoch": 0.6259844672938087, "grad_norm": 1.1979979276657104, "learning_rate": 6.483527304624849e-06, "loss": 0.4857, "step": 22891 }, { "epoch": 0.6260118136075257, "grad_norm": 1.2062355279922485, "learning_rate": 6.482698181903395e-06, "loss": 0.509, "step": 22892 }, { "epoch": 0.6260391599212426, "grad_norm": 1.1070958375930786, "learning_rate": 6.481869086774182e-06, "loss": 0.4852, "step": 22893 }, { "epoch": 0.6260665062349595, "grad_norm": 1.2754595279693604, "learning_rate": 6.4810400192437175e-06, "loss": 0.459, "step": 22894 }, { "epoch": 0.6260938525486764, "grad_norm": 1.1357442140579224, "learning_rate": 6.480210979318502e-06, "loss": 0.4704, "step": 22895 }, { "epoch": 0.6261211988623934, "grad_norm": 1.7845333814620972, "learning_rate": 6.4793819670050395e-06, "loss": 0.4743, "step": 22896 }, { "epoch": 0.6261485451761103, "grad_norm": 1.180840015411377, "learning_rate": 6.478552982309838e-06, "loss": 0.4814, "step": 22897 }, { "epoch": 0.6261758914898272, "grad_norm": 2.550959587097168, "learning_rate": 6.4777240252393944e-06, "loss": 0.3595, "step": 22898 }, { "epoch": 0.626203237803544, "grad_norm": 2.43542742729187, "learning_rate": 6.4768950958002194e-06, "loss": 0.7464, "step": 22899 }, { "epoch": 0.626230584117261, "grad_norm": 1.613152027130127, "learning_rate": 6.476066193998807e-06, "loss": 0.391, "step": 22900 }, { "epoch": 0.6262579304309779, "grad_norm": 1.318479061126709, "learning_rate": 6.475237319841665e-06, "loss": 0.4582, "step": 22901 }, { "epoch": 0.6262852767446948, "grad_norm": 1.2978527545928955, "learning_rate": 6.474408473335292e-06, "loss": 0.462, "step": 22902 }, { "epoch": 0.6263126230584117, "grad_norm": 1.5139191150665283, "learning_rate": 6.4735796544861955e-06, "loss": 0.3563, "step": 22903 }, { "epoch": 0.6263399693721287, "grad_norm": 1.2872449159622192, "learning_rate": 6.4727508633008714e-06, "loss": 0.7376, "step": 22904 }, { "epoch": 0.6263673156858456, "grad_norm": 1.1661101579666138, "learning_rate": 6.4719220997858234e-06, "loss": 0.5024, "step": 22905 }, { "epoch": 0.6263946619995625, "grad_norm": 1.3185564279556274, "learning_rate": 6.471093363947558e-06, "loss": 0.4934, "step": 22906 }, { "epoch": 0.6264220083132793, "grad_norm": 1.20946204662323, "learning_rate": 6.470264655792569e-06, "loss": 0.4773, "step": 22907 }, { "epoch": 0.6264493546269962, "grad_norm": 1.2484652996063232, "learning_rate": 6.469435975327359e-06, "loss": 0.4766, "step": 22908 }, { "epoch": 0.6264767009407132, "grad_norm": 1.16648530960083, "learning_rate": 6.468607322558433e-06, "loss": 0.4675, "step": 22909 }, { "epoch": 0.6265040472544301, "grad_norm": 1.1506434679031372, "learning_rate": 6.4677786974922865e-06, "loss": 0.5116, "step": 22910 }, { "epoch": 0.626531393568147, "grad_norm": 1.359532356262207, "learning_rate": 6.466950100135425e-06, "loss": 0.4795, "step": 22911 }, { "epoch": 0.626558739881864, "grad_norm": 1.5388997793197632, "learning_rate": 6.466121530494343e-06, "loss": 0.7583, "step": 22912 }, { "epoch": 0.6265860861955809, "grad_norm": 1.306298851966858, "learning_rate": 6.465292988575542e-06, "loss": 0.4792, "step": 22913 }, { "epoch": 0.6266134325092978, "grad_norm": 1.3096740245819092, "learning_rate": 6.464464474385526e-06, "loss": 0.4992, "step": 22914 }, { "epoch": 0.6266407788230146, "grad_norm": 1.583726167678833, "learning_rate": 6.46363598793079e-06, "loss": 0.3552, "step": 22915 }, { "epoch": 0.6266681251367315, "grad_norm": 1.3687807321548462, "learning_rate": 6.462807529217836e-06, "loss": 0.4787, "step": 22916 }, { "epoch": 0.6266954714504485, "grad_norm": 1.4639372825622559, "learning_rate": 6.461979098253159e-06, "loss": 0.484, "step": 22917 }, { "epoch": 0.6267228177641654, "grad_norm": 1.5533157587051392, "learning_rate": 6.46115069504326e-06, "loss": 0.4994, "step": 22918 }, { "epoch": 0.6267501640778823, "grad_norm": 1.4614900350570679, "learning_rate": 6.460322319594641e-06, "loss": 0.5135, "step": 22919 }, { "epoch": 0.6267775103915992, "grad_norm": 1.2904947996139526, "learning_rate": 6.4594939719137935e-06, "loss": 0.5039, "step": 22920 }, { "epoch": 0.6268048567053162, "grad_norm": 1.2715457677841187, "learning_rate": 6.458665652007221e-06, "loss": 0.5004, "step": 22921 }, { "epoch": 0.6268322030190331, "grad_norm": 1.21469247341156, "learning_rate": 6.457837359881421e-06, "loss": 0.4591, "step": 22922 }, { "epoch": 0.6268595493327499, "grad_norm": 1.7391653060913086, "learning_rate": 6.457009095542888e-06, "loss": 0.3876, "step": 22923 }, { "epoch": 0.6268868956464668, "grad_norm": 1.442635178565979, "learning_rate": 6.456180858998125e-06, "loss": 0.3739, "step": 22924 }, { "epoch": 0.6269142419601837, "grad_norm": 1.2769393920898438, "learning_rate": 6.455352650253621e-06, "loss": 0.7463, "step": 22925 }, { "epoch": 0.6269415882739007, "grad_norm": 1.5592201948165894, "learning_rate": 6.45452446931588e-06, "loss": 0.4334, "step": 22926 }, { "epoch": 0.6269689345876176, "grad_norm": 1.356981873512268, "learning_rate": 6.453696316191399e-06, "loss": 0.4732, "step": 22927 }, { "epoch": 0.6269962809013345, "grad_norm": 2.0975565910339355, "learning_rate": 6.4528681908866695e-06, "loss": 0.3508, "step": 22928 }, { "epoch": 0.6270236272150514, "grad_norm": 1.8610610961914062, "learning_rate": 6.452040093408192e-06, "loss": 0.3606, "step": 22929 }, { "epoch": 0.6270509735287684, "grad_norm": 1.2228502035140991, "learning_rate": 6.451212023762462e-06, "loss": 0.7554, "step": 22930 }, { "epoch": 0.6270783198424852, "grad_norm": 1.362913727760315, "learning_rate": 6.450383981955974e-06, "loss": 0.4778, "step": 22931 }, { "epoch": 0.6271056661562021, "grad_norm": 1.1489673852920532, "learning_rate": 6.449555967995226e-06, "loss": 0.4897, "step": 22932 }, { "epoch": 0.627133012469919, "grad_norm": 1.484370470046997, "learning_rate": 6.44872798188671e-06, "loss": 0.3585, "step": 22933 }, { "epoch": 0.627160358783636, "grad_norm": 1.209336280822754, "learning_rate": 6.4479000236369284e-06, "loss": 0.4705, "step": 22934 }, { "epoch": 0.6271877050973529, "grad_norm": 1.1332588195800781, "learning_rate": 6.447072093252367e-06, "loss": 0.5075, "step": 22935 }, { "epoch": 0.6272150514110698, "grad_norm": 1.3967291116714478, "learning_rate": 6.446244190739528e-06, "loss": 0.3418, "step": 22936 }, { "epoch": 0.6272423977247867, "grad_norm": 1.1874117851257324, "learning_rate": 6.4454163161048996e-06, "loss": 0.4776, "step": 22937 }, { "epoch": 0.6272697440385037, "grad_norm": 1.4785704612731934, "learning_rate": 6.44458846935498e-06, "loss": 0.3491, "step": 22938 }, { "epoch": 0.6272970903522205, "grad_norm": 1.2895811796188354, "learning_rate": 6.443760650496267e-06, "loss": 0.5246, "step": 22939 }, { "epoch": 0.6273244366659374, "grad_norm": 1.1817902326583862, "learning_rate": 6.4429328595352465e-06, "loss": 0.7789, "step": 22940 }, { "epoch": 0.6273517829796543, "grad_norm": 1.6388756036758423, "learning_rate": 6.4421050964784215e-06, "loss": 0.3724, "step": 22941 }, { "epoch": 0.6273791292933713, "grad_norm": 1.761743426322937, "learning_rate": 6.441277361332277e-06, "loss": 0.3545, "step": 22942 }, { "epoch": 0.6274064756070882, "grad_norm": 1.1117171049118042, "learning_rate": 6.44044965410331e-06, "loss": 0.3594, "step": 22943 }, { "epoch": 0.6274338219208051, "grad_norm": 1.3199728727340698, "learning_rate": 6.439621974798016e-06, "loss": 0.4869, "step": 22944 }, { "epoch": 0.627461168234522, "grad_norm": 1.32781183719635, "learning_rate": 6.438794323422884e-06, "loss": 0.461, "step": 22945 }, { "epoch": 0.627488514548239, "grad_norm": 1.2056411504745483, "learning_rate": 6.43796669998441e-06, "loss": 0.4543, "step": 22946 }, { "epoch": 0.6275158608619558, "grad_norm": 1.716007947921753, "learning_rate": 6.437139104489083e-06, "loss": 0.3166, "step": 22947 }, { "epoch": 0.6275432071756727, "grad_norm": 1.3171511888504028, "learning_rate": 6.436311536943397e-06, "loss": 0.491, "step": 22948 }, { "epoch": 0.6275705534893896, "grad_norm": 1.2338565587997437, "learning_rate": 6.435483997353846e-06, "loss": 0.5068, "step": 22949 }, { "epoch": 0.6275978998031065, "grad_norm": 1.368988275527954, "learning_rate": 6.434656485726918e-06, "loss": 0.4449, "step": 22950 }, { "epoch": 0.6276252461168235, "grad_norm": 1.1501033306121826, "learning_rate": 6.433829002069106e-06, "loss": 0.4729, "step": 22951 }, { "epoch": 0.6276525924305404, "grad_norm": 1.5575731992721558, "learning_rate": 6.433001546386905e-06, "loss": 0.3367, "step": 22952 }, { "epoch": 0.6276799387442573, "grad_norm": 1.4495283365249634, "learning_rate": 6.4321741186868e-06, "loss": 0.4675, "step": 22953 }, { "epoch": 0.6277072850579742, "grad_norm": 1.9886728525161743, "learning_rate": 6.4313467189752864e-06, "loss": 0.3804, "step": 22954 }, { "epoch": 0.627734631371691, "grad_norm": 1.1534467935562134, "learning_rate": 6.430519347258852e-06, "loss": 0.4651, "step": 22955 }, { "epoch": 0.627761977685408, "grad_norm": 1.2511017322540283, "learning_rate": 6.42969200354399e-06, "loss": 0.4961, "step": 22956 }, { "epoch": 0.6277893239991249, "grad_norm": 1.6266899108886719, "learning_rate": 6.428864687837191e-06, "loss": 0.7506, "step": 22957 }, { "epoch": 0.6278166703128418, "grad_norm": 1.1574409008026123, "learning_rate": 6.4280374001449395e-06, "loss": 0.4746, "step": 22958 }, { "epoch": 0.6278440166265588, "grad_norm": 1.0875695943832397, "learning_rate": 6.4272101404737345e-06, "loss": 0.3487, "step": 22959 }, { "epoch": 0.6278713629402757, "grad_norm": 1.1630091667175293, "learning_rate": 6.426382908830055e-06, "loss": 0.4683, "step": 22960 }, { "epoch": 0.6278987092539926, "grad_norm": 1.5908600091934204, "learning_rate": 6.4255557052203985e-06, "loss": 0.365, "step": 22961 }, { "epoch": 0.6279260555677094, "grad_norm": 1.1861459016799927, "learning_rate": 6.424728529651254e-06, "loss": 0.4491, "step": 22962 }, { "epoch": 0.6279534018814263, "grad_norm": 1.4780242443084717, "learning_rate": 6.423901382129105e-06, "loss": 0.4484, "step": 22963 }, { "epoch": 0.6279807481951433, "grad_norm": 1.4027354717254639, "learning_rate": 6.423074262660442e-06, "loss": 0.5226, "step": 22964 }, { "epoch": 0.6280080945088602, "grad_norm": 1.602805495262146, "learning_rate": 6.422247171251759e-06, "loss": 0.4608, "step": 22965 }, { "epoch": 0.6280354408225771, "grad_norm": 1.4730713367462158, "learning_rate": 6.421420107909537e-06, "loss": 0.5321, "step": 22966 }, { "epoch": 0.628062787136294, "grad_norm": 1.2898608446121216, "learning_rate": 6.420593072640269e-06, "loss": 0.4878, "step": 22967 }, { "epoch": 0.628090133450011, "grad_norm": 1.5594584941864014, "learning_rate": 6.419766065450439e-06, "loss": 0.3658, "step": 22968 }, { "epoch": 0.6281174797637279, "grad_norm": 1.4426653385162354, "learning_rate": 6.418939086346541e-06, "loss": 0.4752, "step": 22969 }, { "epoch": 0.6281448260774447, "grad_norm": 4.110320568084717, "learning_rate": 6.4181121353350524e-06, "loss": 0.3447, "step": 22970 }, { "epoch": 0.6281721723911616, "grad_norm": 1.3034576177597046, "learning_rate": 6.417285212422471e-06, "loss": 0.4579, "step": 22971 }, { "epoch": 0.6281995187048786, "grad_norm": 1.5885754823684692, "learning_rate": 6.416458317615275e-06, "loss": 0.4764, "step": 22972 }, { "epoch": 0.6282268650185955, "grad_norm": 1.3124650716781616, "learning_rate": 6.415631450919954e-06, "loss": 0.5001, "step": 22973 }, { "epoch": 0.6282542113323124, "grad_norm": 1.2519463300704956, "learning_rate": 6.414804612342999e-06, "loss": 0.4657, "step": 22974 }, { "epoch": 0.6282815576460293, "grad_norm": 1.275374412536621, "learning_rate": 6.413977801890892e-06, "loss": 0.4605, "step": 22975 }, { "epoch": 0.6283089039597463, "grad_norm": 1.241660475730896, "learning_rate": 6.413151019570121e-06, "loss": 0.5121, "step": 22976 }, { "epoch": 0.6283362502734632, "grad_norm": 1.6042042970657349, "learning_rate": 6.412324265387168e-06, "loss": 0.4708, "step": 22977 }, { "epoch": 0.62836359658718, "grad_norm": 1.1082043647766113, "learning_rate": 6.411497539348523e-06, "loss": 0.4585, "step": 22978 }, { "epoch": 0.6283909429008969, "grad_norm": 1.1808991432189941, "learning_rate": 6.4106708414606715e-06, "loss": 0.4813, "step": 22979 }, { "epoch": 0.6284182892146138, "grad_norm": 1.6399327516555786, "learning_rate": 6.409844171730093e-06, "loss": 0.483, "step": 22980 }, { "epoch": 0.6284456355283308, "grad_norm": 1.383276343345642, "learning_rate": 6.409017530163281e-06, "loss": 0.4625, "step": 22981 }, { "epoch": 0.6284729818420477, "grad_norm": 1.1562163829803467, "learning_rate": 6.408190916766713e-06, "loss": 0.4895, "step": 22982 }, { "epoch": 0.6285003281557646, "grad_norm": 1.6498621702194214, "learning_rate": 6.407364331546876e-06, "loss": 0.7277, "step": 22983 }, { "epoch": 0.6285276744694815, "grad_norm": 1.4760819673538208, "learning_rate": 6.406537774510257e-06, "loss": 0.4818, "step": 22984 }, { "epoch": 0.6285550207831985, "grad_norm": 1.1728402376174927, "learning_rate": 6.4057112456633364e-06, "loss": 0.7504, "step": 22985 }, { "epoch": 0.6285823670969153, "grad_norm": 1.1862313747406006, "learning_rate": 6.404884745012599e-06, "loss": 0.4931, "step": 22986 }, { "epoch": 0.6286097134106322, "grad_norm": 1.9313586950302124, "learning_rate": 6.404058272564532e-06, "loss": 0.4791, "step": 22987 }, { "epoch": 0.6286370597243491, "grad_norm": 1.372870922088623, "learning_rate": 6.403231828325614e-06, "loss": 0.5123, "step": 22988 }, { "epoch": 0.6286644060380661, "grad_norm": 1.1391993761062622, "learning_rate": 6.40240541230233e-06, "loss": 0.5243, "step": 22989 }, { "epoch": 0.628691752351783, "grad_norm": 1.4455565214157104, "learning_rate": 6.401579024501162e-06, "loss": 0.4908, "step": 22990 }, { "epoch": 0.6287190986654999, "grad_norm": 1.3678256273269653, "learning_rate": 6.400752664928595e-06, "loss": 0.5009, "step": 22991 }, { "epoch": 0.6287464449792168, "grad_norm": 1.1835806369781494, "learning_rate": 6.399926333591111e-06, "loss": 0.7088, "step": 22992 }, { "epoch": 0.6287737912929338, "grad_norm": 1.7416397333145142, "learning_rate": 6.399100030495191e-06, "loss": 0.4979, "step": 22993 }, { "epoch": 0.6288011376066506, "grad_norm": 1.193793535232544, "learning_rate": 6.398273755647317e-06, "loss": 0.4994, "step": 22994 }, { "epoch": 0.6288284839203675, "grad_norm": 1.293410301208496, "learning_rate": 6.397447509053973e-06, "loss": 0.4888, "step": 22995 }, { "epoch": 0.6288558302340844, "grad_norm": 1.4428532123565674, "learning_rate": 6.39662129072164e-06, "loss": 0.372, "step": 22996 }, { "epoch": 0.6288831765478013, "grad_norm": 1.523459553718567, "learning_rate": 6.395795100656799e-06, "loss": 0.4927, "step": 22997 }, { "epoch": 0.6289105228615183, "grad_norm": 1.1886317729949951, "learning_rate": 6.39496893886593e-06, "loss": 0.4726, "step": 22998 }, { "epoch": 0.6289378691752352, "grad_norm": 1.2420756816864014, "learning_rate": 6.394142805355515e-06, "loss": 0.469, "step": 22999 }, { "epoch": 0.6289652154889521, "grad_norm": 1.6285890340805054, "learning_rate": 6.393316700132035e-06, "loss": 0.5148, "step": 23000 }, { "epoch": 0.628992561802669, "grad_norm": 1.2450239658355713, "learning_rate": 6.3924906232019705e-06, "loss": 0.4581, "step": 23001 }, { "epoch": 0.6290199081163859, "grad_norm": 1.3914763927459717, "learning_rate": 6.391664574571803e-06, "loss": 0.475, "step": 23002 }, { "epoch": 0.6290472544301028, "grad_norm": 1.14603590965271, "learning_rate": 6.390838554248011e-06, "loss": 0.5039, "step": 23003 }, { "epoch": 0.6290746007438197, "grad_norm": 1.1582369804382324, "learning_rate": 6.390012562237078e-06, "loss": 0.479, "step": 23004 }, { "epoch": 0.6291019470575366, "grad_norm": 4.3068742752075195, "learning_rate": 6.389186598545476e-06, "loss": 0.4389, "step": 23005 }, { "epoch": 0.6291292933712536, "grad_norm": 1.745676040649414, "learning_rate": 6.388360663179691e-06, "loss": 0.489, "step": 23006 }, { "epoch": 0.6291566396849705, "grad_norm": 1.5975077152252197, "learning_rate": 6.3875347561461984e-06, "loss": 0.5162, "step": 23007 }, { "epoch": 0.6291839859986874, "grad_norm": 2.714529514312744, "learning_rate": 6.386708877451479e-06, "loss": 0.4653, "step": 23008 }, { "epoch": 0.6292113323124043, "grad_norm": 1.6477956771850586, "learning_rate": 6.385883027102014e-06, "loss": 0.4841, "step": 23009 }, { "epoch": 0.6292386786261212, "grad_norm": 1.413969874382019, "learning_rate": 6.385057205104277e-06, "loss": 0.4705, "step": 23010 }, { "epoch": 0.6292660249398381, "grad_norm": 1.2316278219223022, "learning_rate": 6.3842314114647495e-06, "loss": 0.7018, "step": 23011 }, { "epoch": 0.629293371253555, "grad_norm": 1.6171290874481201, "learning_rate": 6.383405646189908e-06, "loss": 0.4828, "step": 23012 }, { "epoch": 0.6293207175672719, "grad_norm": 1.7072477340698242, "learning_rate": 6.382579909286233e-06, "loss": 0.3694, "step": 23013 }, { "epoch": 0.6293480638809889, "grad_norm": 1.3821008205413818, "learning_rate": 6.381754200760201e-06, "loss": 0.4709, "step": 23014 }, { "epoch": 0.6293754101947058, "grad_norm": 1.6365655660629272, "learning_rate": 6.380928520618288e-06, "loss": 0.3464, "step": 23015 }, { "epoch": 0.6294027565084227, "grad_norm": 1.266321063041687, "learning_rate": 6.380102868866972e-06, "loss": 0.4664, "step": 23016 }, { "epoch": 0.6294301028221396, "grad_norm": 1.3899775743484497, "learning_rate": 6.3792772455127315e-06, "loss": 0.4616, "step": 23017 }, { "epoch": 0.6294574491358564, "grad_norm": 1.3731154203414917, "learning_rate": 6.378451650562041e-06, "loss": 0.5084, "step": 23018 }, { "epoch": 0.6294847954495734, "grad_norm": 1.3408273458480835, "learning_rate": 6.37762608402138e-06, "loss": 0.4611, "step": 23019 }, { "epoch": 0.6295121417632903, "grad_norm": 1.3934268951416016, "learning_rate": 6.376800545897219e-06, "loss": 0.4892, "step": 23020 }, { "epoch": 0.6295394880770072, "grad_norm": 1.2768049240112305, "learning_rate": 6.375975036196041e-06, "loss": 0.4968, "step": 23021 }, { "epoch": 0.6295668343907241, "grad_norm": 1.2978954315185547, "learning_rate": 6.375149554924321e-06, "loss": 0.4693, "step": 23022 }, { "epoch": 0.6295941807044411, "grad_norm": 1.3582186698913574, "learning_rate": 6.374324102088527e-06, "loss": 0.4577, "step": 23023 }, { "epoch": 0.629621527018158, "grad_norm": 1.1923171281814575, "learning_rate": 6.373498677695145e-06, "loss": 0.4847, "step": 23024 }, { "epoch": 0.6296488733318749, "grad_norm": 1.5289274454116821, "learning_rate": 6.372673281750643e-06, "loss": 0.4664, "step": 23025 }, { "epoch": 0.6296762196455917, "grad_norm": 1.4785873889923096, "learning_rate": 6.371847914261497e-06, "loss": 0.4586, "step": 23026 }, { "epoch": 0.6297035659593087, "grad_norm": 1.3610775470733643, "learning_rate": 6.371022575234187e-06, "loss": 0.3544, "step": 23027 }, { "epoch": 0.6297309122730256, "grad_norm": 1.4623949527740479, "learning_rate": 6.37019726467518e-06, "loss": 0.4622, "step": 23028 }, { "epoch": 0.6297582585867425, "grad_norm": 2.5666279792785645, "learning_rate": 6.369371982590955e-06, "loss": 0.3765, "step": 23029 }, { "epoch": 0.6297856049004594, "grad_norm": 1.5599266290664673, "learning_rate": 6.368546728987985e-06, "loss": 0.4683, "step": 23030 }, { "epoch": 0.6298129512141764, "grad_norm": 1.2720595598220825, "learning_rate": 6.3677215038727444e-06, "loss": 0.4484, "step": 23031 }, { "epoch": 0.6298402975278933, "grad_norm": 1.296567678451538, "learning_rate": 6.366896307251708e-06, "loss": 0.4672, "step": 23032 }, { "epoch": 0.6298676438416102, "grad_norm": 1.6605619192123413, "learning_rate": 6.366071139131345e-06, "loss": 0.7778, "step": 23033 }, { "epoch": 0.629894990155327, "grad_norm": 1.4366710186004639, "learning_rate": 6.365245999518132e-06, "loss": 0.4464, "step": 23034 }, { "epoch": 0.6299223364690439, "grad_norm": 1.7227230072021484, "learning_rate": 6.364420888418542e-06, "loss": 0.5199, "step": 23035 }, { "epoch": 0.6299496827827609, "grad_norm": 1.1378053426742554, "learning_rate": 6.363595805839046e-06, "loss": 0.4861, "step": 23036 }, { "epoch": 0.6299770290964778, "grad_norm": 1.3038287162780762, "learning_rate": 6.362770751786116e-06, "loss": 0.4575, "step": 23037 }, { "epoch": 0.6300043754101947, "grad_norm": 1.1802436113357544, "learning_rate": 6.36194572626623e-06, "loss": 0.4643, "step": 23038 }, { "epoch": 0.6300317217239116, "grad_norm": 1.4714434146881104, "learning_rate": 6.3611207292858525e-06, "loss": 0.4437, "step": 23039 }, { "epoch": 0.6300590680376286, "grad_norm": 1.3575282096862793, "learning_rate": 6.360295760851463e-06, "loss": 0.4567, "step": 23040 }, { "epoch": 0.6300864143513455, "grad_norm": 1.5007812976837158, "learning_rate": 6.359470820969527e-06, "loss": 0.3192, "step": 23041 }, { "epoch": 0.6301137606650623, "grad_norm": 1.6931012868881226, "learning_rate": 6.358645909646517e-06, "loss": 0.3669, "step": 23042 }, { "epoch": 0.6301411069787792, "grad_norm": 1.4226425886154175, "learning_rate": 6.357821026888904e-06, "loss": 0.4596, "step": 23043 }, { "epoch": 0.6301684532924962, "grad_norm": 1.2303522825241089, "learning_rate": 6.356996172703161e-06, "loss": 0.5021, "step": 23044 }, { "epoch": 0.6301957996062131, "grad_norm": 2.0302305221557617, "learning_rate": 6.356171347095758e-06, "loss": 0.5257, "step": 23045 }, { "epoch": 0.63022314591993, "grad_norm": 1.6385165452957153, "learning_rate": 6.355346550073166e-06, "loss": 0.4806, "step": 23046 }, { "epoch": 0.6302504922336469, "grad_norm": 1.1479772329330444, "learning_rate": 6.354521781641854e-06, "loss": 0.4952, "step": 23047 }, { "epoch": 0.6302778385473639, "grad_norm": 1.4235117435455322, "learning_rate": 6.353697041808291e-06, "loss": 0.488, "step": 23048 }, { "epoch": 0.6303051848610808, "grad_norm": 1.5543090105056763, "learning_rate": 6.35287233057895e-06, "loss": 0.4901, "step": 23049 }, { "epoch": 0.6303325311747976, "grad_norm": 1.3069077730178833, "learning_rate": 6.352047647960299e-06, "loss": 0.4811, "step": 23050 }, { "epoch": 0.6303598774885145, "grad_norm": 1.3364686965942383, "learning_rate": 6.351222993958807e-06, "loss": 0.4556, "step": 23051 }, { "epoch": 0.6303872238022314, "grad_norm": 1.4501889944076538, "learning_rate": 6.350398368580945e-06, "loss": 0.4348, "step": 23052 }, { "epoch": 0.6304145701159484, "grad_norm": 1.2481797933578491, "learning_rate": 6.34957377183318e-06, "loss": 0.3081, "step": 23053 }, { "epoch": 0.6304419164296653, "grad_norm": 1.6670557260513306, "learning_rate": 6.348749203721982e-06, "loss": 0.4751, "step": 23054 }, { "epoch": 0.6304692627433822, "grad_norm": 1.326003909111023, "learning_rate": 6.347924664253818e-06, "loss": 0.4976, "step": 23055 }, { "epoch": 0.6304966090570991, "grad_norm": 1.275484323501587, "learning_rate": 6.347100153435157e-06, "loss": 0.4738, "step": 23056 }, { "epoch": 0.6305239553708161, "grad_norm": 1.5740028619766235, "learning_rate": 6.346275671272469e-06, "loss": 0.4553, "step": 23057 }, { "epoch": 0.6305513016845329, "grad_norm": 1.3341947793960571, "learning_rate": 6.345451217772218e-06, "loss": 0.4963, "step": 23058 }, { "epoch": 0.6305786479982498, "grad_norm": 1.4224721193313599, "learning_rate": 6.344626792940876e-06, "loss": 0.4606, "step": 23059 }, { "epoch": 0.6306059943119667, "grad_norm": 1.3231933116912842, "learning_rate": 6.343802396784907e-06, "loss": 0.461, "step": 23060 }, { "epoch": 0.6306333406256837, "grad_norm": 1.8329498767852783, "learning_rate": 6.342978029310778e-06, "loss": 0.3684, "step": 23061 }, { "epoch": 0.6306606869394006, "grad_norm": 1.2383145093917847, "learning_rate": 6.34215369052496e-06, "loss": 0.476, "step": 23062 }, { "epoch": 0.6306880332531175, "grad_norm": 1.211742639541626, "learning_rate": 6.341329380433915e-06, "loss": 0.4841, "step": 23063 }, { "epoch": 0.6307153795668344, "grad_norm": 1.5876853466033936, "learning_rate": 6.34050509904411e-06, "loss": 0.4836, "step": 23064 }, { "epoch": 0.6307427258805512, "grad_norm": 1.2540947198867798, "learning_rate": 6.339680846362017e-06, "loss": 0.7506, "step": 23065 }, { "epoch": 0.6307700721942682, "grad_norm": 1.3795369863510132, "learning_rate": 6.338856622394094e-06, "loss": 0.4824, "step": 23066 }, { "epoch": 0.6307974185079851, "grad_norm": 1.2624098062515259, "learning_rate": 6.338032427146812e-06, "loss": 0.4995, "step": 23067 }, { "epoch": 0.630824764821702, "grad_norm": 1.7812639474868774, "learning_rate": 6.337208260626632e-06, "loss": 0.4591, "step": 23068 }, { "epoch": 0.630852111135419, "grad_norm": 1.8349133729934692, "learning_rate": 6.336384122840026e-06, "loss": 0.3476, "step": 23069 }, { "epoch": 0.6308794574491359, "grad_norm": 1.3195823431015015, "learning_rate": 6.335560013793456e-06, "loss": 0.4462, "step": 23070 }, { "epoch": 0.6309068037628528, "grad_norm": 1.452787160873413, "learning_rate": 6.3347359334933835e-06, "loss": 0.717, "step": 23071 }, { "epoch": 0.6309341500765697, "grad_norm": 1.4100258350372314, "learning_rate": 6.333911881946276e-06, "loss": 0.4957, "step": 23072 }, { "epoch": 0.6309614963902865, "grad_norm": 1.3709135055541992, "learning_rate": 6.333087859158601e-06, "loss": 0.4715, "step": 23073 }, { "epoch": 0.6309888427040035, "grad_norm": 1.2038519382476807, "learning_rate": 6.332263865136817e-06, "loss": 0.4821, "step": 23074 }, { "epoch": 0.6310161890177204, "grad_norm": 1.450522780418396, "learning_rate": 6.331439899887396e-06, "loss": 0.3589, "step": 23075 }, { "epoch": 0.6310435353314373, "grad_norm": 1.201818585395813, "learning_rate": 6.330615963416793e-06, "loss": 0.4676, "step": 23076 }, { "epoch": 0.6310708816451542, "grad_norm": 1.4258897304534912, "learning_rate": 6.329792055731474e-06, "loss": 0.4844, "step": 23077 }, { "epoch": 0.6310982279588712, "grad_norm": 1.4287947416305542, "learning_rate": 6.328968176837904e-06, "loss": 0.4791, "step": 23078 }, { "epoch": 0.6311255742725881, "grad_norm": 1.4106488227844238, "learning_rate": 6.328144326742547e-06, "loss": 0.4925, "step": 23079 }, { "epoch": 0.631152920586305, "grad_norm": 1.5569432973861694, "learning_rate": 6.327320505451861e-06, "loss": 0.365, "step": 23080 }, { "epoch": 0.6311802669000218, "grad_norm": 1.3553781509399414, "learning_rate": 6.3264967129723145e-06, "loss": 0.4922, "step": 23081 }, { "epoch": 0.6312076132137387, "grad_norm": 1.4385329484939575, "learning_rate": 6.325672949310368e-06, "loss": 0.3396, "step": 23082 }, { "epoch": 0.6312349595274557, "grad_norm": 1.1750527620315552, "learning_rate": 6.324849214472482e-06, "loss": 0.4826, "step": 23083 }, { "epoch": 0.6312623058411726, "grad_norm": 1.0844136476516724, "learning_rate": 6.324025508465122e-06, "loss": 0.4896, "step": 23084 }, { "epoch": 0.6312896521548895, "grad_norm": 1.1060490608215332, "learning_rate": 6.323201831294743e-06, "loss": 0.4697, "step": 23085 }, { "epoch": 0.6313169984686064, "grad_norm": 1.507439374923706, "learning_rate": 6.3223781829678136e-06, "loss": 0.3543, "step": 23086 }, { "epoch": 0.6313443447823234, "grad_norm": 1.2590274810791016, "learning_rate": 6.321554563490792e-06, "loss": 0.4456, "step": 23087 }, { "epoch": 0.6313716910960403, "grad_norm": 1.2087595462799072, "learning_rate": 6.32073097287014e-06, "loss": 0.5012, "step": 23088 }, { "epoch": 0.6313990374097571, "grad_norm": 1.481514811515808, "learning_rate": 6.319907411112318e-06, "loss": 0.4706, "step": 23089 }, { "epoch": 0.631426383723474, "grad_norm": 1.4013614654541016, "learning_rate": 6.3190838782237865e-06, "loss": 0.4871, "step": 23090 }, { "epoch": 0.631453730037191, "grad_norm": 2.1814498901367188, "learning_rate": 6.318260374211005e-06, "loss": 0.7249, "step": 23091 }, { "epoch": 0.6314810763509079, "grad_norm": 1.582316517829895, "learning_rate": 6.317436899080437e-06, "loss": 0.7372, "step": 23092 }, { "epoch": 0.6315084226646248, "grad_norm": 1.4358798265457153, "learning_rate": 6.316613452838538e-06, "loss": 0.4452, "step": 23093 }, { "epoch": 0.6315357689783417, "grad_norm": 1.215418815612793, "learning_rate": 6.315790035491769e-06, "loss": 0.4589, "step": 23094 }, { "epoch": 0.6315631152920587, "grad_norm": 1.332480549812317, "learning_rate": 6.314966647046594e-06, "loss": 0.4703, "step": 23095 }, { "epoch": 0.6315904616057756, "grad_norm": 1.178025722503662, "learning_rate": 6.314143287509467e-06, "loss": 0.4485, "step": 23096 }, { "epoch": 0.6316178079194924, "grad_norm": 1.2830992937088013, "learning_rate": 6.313319956886849e-06, "loss": 0.3537, "step": 23097 }, { "epoch": 0.6316451542332093, "grad_norm": 1.5135722160339355, "learning_rate": 6.3124966551851965e-06, "loss": 0.5067, "step": 23098 }, { "epoch": 0.6316725005469263, "grad_norm": 1.6549572944641113, "learning_rate": 6.31167338241097e-06, "loss": 0.4791, "step": 23099 }, { "epoch": 0.6316998468606432, "grad_norm": 1.5654360055923462, "learning_rate": 6.31085013857063e-06, "loss": 0.49, "step": 23100 }, { "epoch": 0.6317271931743601, "grad_norm": 1.5016405582427979, "learning_rate": 6.310026923670632e-06, "loss": 0.3332, "step": 23101 }, { "epoch": 0.631754539488077, "grad_norm": 1.4111706018447876, "learning_rate": 6.3092037377174335e-06, "loss": 0.4439, "step": 23102 }, { "epoch": 0.631781885801794, "grad_norm": 2.618246078491211, "learning_rate": 6.3083805807174935e-06, "loss": 0.3473, "step": 23103 }, { "epoch": 0.6318092321155109, "grad_norm": 1.284274697303772, "learning_rate": 6.307557452677268e-06, "loss": 0.4863, "step": 23104 }, { "epoch": 0.6318365784292277, "grad_norm": 1.188398838043213, "learning_rate": 6.306734353603217e-06, "loss": 0.4662, "step": 23105 }, { "epoch": 0.6318639247429446, "grad_norm": 1.2089273929595947, "learning_rate": 6.305911283501793e-06, "loss": 0.4598, "step": 23106 }, { "epoch": 0.6318912710566615, "grad_norm": 1.4064372777938843, "learning_rate": 6.305088242379457e-06, "loss": 0.4591, "step": 23107 }, { "epoch": 0.6319186173703785, "grad_norm": 1.5035520792007446, "learning_rate": 6.304265230242666e-06, "loss": 0.3761, "step": 23108 }, { "epoch": 0.6319459636840954, "grad_norm": 1.2159827947616577, "learning_rate": 6.3034422470978705e-06, "loss": 0.714, "step": 23109 }, { "epoch": 0.6319733099978123, "grad_norm": 1.6764631271362305, "learning_rate": 6.302619292951535e-06, "loss": 0.5063, "step": 23110 }, { "epoch": 0.6320006563115292, "grad_norm": 1.3411670923233032, "learning_rate": 6.301796367810108e-06, "loss": 0.4384, "step": 23111 }, { "epoch": 0.6320280026252462, "grad_norm": 1.4209376573562622, "learning_rate": 6.300973471680047e-06, "loss": 0.52, "step": 23112 }, { "epoch": 0.632055348938963, "grad_norm": 1.3821208477020264, "learning_rate": 6.300150604567807e-06, "loss": 0.7604, "step": 23113 }, { "epoch": 0.6320826952526799, "grad_norm": 1.3021116256713867, "learning_rate": 6.299327766479846e-06, "loss": 0.5153, "step": 23114 }, { "epoch": 0.6321100415663968, "grad_norm": 1.890160083770752, "learning_rate": 6.298504957422616e-06, "loss": 0.3868, "step": 23115 }, { "epoch": 0.6321373878801138, "grad_norm": 1.22926664352417, "learning_rate": 6.2976821774025735e-06, "loss": 0.4529, "step": 23116 }, { "epoch": 0.6321647341938307, "grad_norm": 1.5329726934432983, "learning_rate": 6.296859426426174e-06, "loss": 0.5022, "step": 23117 }, { "epoch": 0.6321920805075476, "grad_norm": 1.1187703609466553, "learning_rate": 6.2960367044998686e-06, "loss": 0.4919, "step": 23118 }, { "epoch": 0.6322194268212645, "grad_norm": 1.3745815753936768, "learning_rate": 6.295214011630113e-06, "loss": 0.4883, "step": 23119 }, { "epoch": 0.6322467731349815, "grad_norm": 1.3153340816497803, "learning_rate": 6.2943913478233595e-06, "loss": 0.461, "step": 23120 }, { "epoch": 0.6322741194486983, "grad_norm": 1.1586652994155884, "learning_rate": 6.293568713086064e-06, "loss": 0.7382, "step": 23121 }, { "epoch": 0.6323014657624152, "grad_norm": 1.2447080612182617, "learning_rate": 6.292746107424681e-06, "loss": 0.5051, "step": 23122 }, { "epoch": 0.6323288120761321, "grad_norm": 4.171238422393799, "learning_rate": 6.2919235308456585e-06, "loss": 0.6987, "step": 23123 }, { "epoch": 0.632356158389849, "grad_norm": 1.121334195137024, "learning_rate": 6.291100983355453e-06, "loss": 0.3547, "step": 23124 }, { "epoch": 0.632383504703566, "grad_norm": 1.3072993755340576, "learning_rate": 6.290278464960517e-06, "loss": 0.4693, "step": 23125 }, { "epoch": 0.6324108510172829, "grad_norm": 1.5277621746063232, "learning_rate": 6.2894559756673025e-06, "loss": 0.4516, "step": 23126 }, { "epoch": 0.6324381973309998, "grad_norm": 1.46054208278656, "learning_rate": 6.288633515482263e-06, "loss": 0.4995, "step": 23127 }, { "epoch": 0.6324655436447167, "grad_norm": 1.1956508159637451, "learning_rate": 6.287811084411847e-06, "loss": 0.7561, "step": 23128 }, { "epoch": 0.6324928899584336, "grad_norm": 1.3306361436843872, "learning_rate": 6.286988682462508e-06, "loss": 0.4775, "step": 23129 }, { "epoch": 0.6325202362721505, "grad_norm": 1.2164932489395142, "learning_rate": 6.2861663096407e-06, "loss": 0.7249, "step": 23130 }, { "epoch": 0.6325475825858674, "grad_norm": 1.3887207508087158, "learning_rate": 6.2853439659528705e-06, "loss": 0.5046, "step": 23131 }, { "epoch": 0.6325749288995843, "grad_norm": 1.380511999130249, "learning_rate": 6.284521651405474e-06, "loss": 0.4594, "step": 23132 }, { "epoch": 0.6326022752133013, "grad_norm": 1.396150827407837, "learning_rate": 6.283699366004957e-06, "loss": 0.5179, "step": 23133 }, { "epoch": 0.6326296215270182, "grad_norm": 1.896230697631836, "learning_rate": 6.282877109757774e-06, "loss": 0.3588, "step": 23134 }, { "epoch": 0.6326569678407351, "grad_norm": 1.2251615524291992, "learning_rate": 6.282054882670374e-06, "loss": 0.4672, "step": 23135 }, { "epoch": 0.632684314154452, "grad_norm": 1.241211175918579, "learning_rate": 6.281232684749207e-06, "loss": 0.3726, "step": 23136 }, { "epoch": 0.6327116604681688, "grad_norm": 1.6004775762557983, "learning_rate": 6.280410516000721e-06, "loss": 0.4782, "step": 23137 }, { "epoch": 0.6327390067818858, "grad_norm": 1.1035369634628296, "learning_rate": 6.279588376431371e-06, "loss": 0.4528, "step": 23138 }, { "epoch": 0.6327663530956027, "grad_norm": 1.3213610649108887, "learning_rate": 6.2787662660476026e-06, "loss": 0.4465, "step": 23139 }, { "epoch": 0.6327936994093196, "grad_norm": 1.2283445596694946, "learning_rate": 6.2779441848558665e-06, "loss": 0.4693, "step": 23140 }, { "epoch": 0.6328210457230365, "grad_norm": 1.2756808996200562, "learning_rate": 6.277122132862607e-06, "loss": 0.4528, "step": 23141 }, { "epoch": 0.6328483920367535, "grad_norm": 1.3793247938156128, "learning_rate": 6.276300110074278e-06, "loss": 0.4793, "step": 23142 }, { "epoch": 0.6328757383504704, "grad_norm": 1.478737235069275, "learning_rate": 6.275478116497329e-06, "loss": 0.454, "step": 23143 }, { "epoch": 0.6329030846641873, "grad_norm": 1.1158404350280762, "learning_rate": 6.274656152138204e-06, "loss": 0.4398, "step": 23144 }, { "epoch": 0.6329304309779041, "grad_norm": 1.2878475189208984, "learning_rate": 6.2738342170033575e-06, "loss": 0.491, "step": 23145 }, { "epoch": 0.6329577772916211, "grad_norm": 1.5087754726409912, "learning_rate": 6.273012311099228e-06, "loss": 0.4447, "step": 23146 }, { "epoch": 0.632985123605338, "grad_norm": 1.3079367876052856, "learning_rate": 6.27219043443227e-06, "loss": 0.4993, "step": 23147 }, { "epoch": 0.6330124699190549, "grad_norm": 2.2108685970306396, "learning_rate": 6.271368587008928e-06, "loss": 0.3459, "step": 23148 }, { "epoch": 0.6330398162327718, "grad_norm": 1.32411789894104, "learning_rate": 6.27054676883565e-06, "loss": 0.4993, "step": 23149 }, { "epoch": 0.6330671625464888, "grad_norm": 1.387328028678894, "learning_rate": 6.269724979918883e-06, "loss": 0.5056, "step": 23150 }, { "epoch": 0.6330945088602057, "grad_norm": 1.3665481805801392, "learning_rate": 6.2689032202650735e-06, "loss": 0.3285, "step": 23151 }, { "epoch": 0.6331218551739226, "grad_norm": 1.2039523124694824, "learning_rate": 6.268081489880671e-06, "loss": 0.4662, "step": 23152 }, { "epoch": 0.6331492014876394, "grad_norm": 1.7110846042633057, "learning_rate": 6.267259788772116e-06, "loss": 0.4854, "step": 23153 }, { "epoch": 0.6331765478013563, "grad_norm": 1.6749423742294312, "learning_rate": 6.2664381169458584e-06, "loss": 0.4858, "step": 23154 }, { "epoch": 0.6332038941150733, "grad_norm": 1.7675461769104004, "learning_rate": 6.265616474408343e-06, "loss": 0.344, "step": 23155 }, { "epoch": 0.6332312404287902, "grad_norm": 1.326663851737976, "learning_rate": 6.264794861166012e-06, "loss": 0.4762, "step": 23156 }, { "epoch": 0.6332585867425071, "grad_norm": 1.2349869012832642, "learning_rate": 6.263973277225319e-06, "loss": 0.4666, "step": 23157 }, { "epoch": 0.633285933056224, "grad_norm": 1.2336442470550537, "learning_rate": 6.263151722592702e-06, "loss": 0.5135, "step": 23158 }, { "epoch": 0.633313279369941, "grad_norm": 1.513418197631836, "learning_rate": 6.262330197274608e-06, "loss": 0.7147, "step": 23159 }, { "epoch": 0.6333406256836578, "grad_norm": 1.281834363937378, "learning_rate": 6.261508701277483e-06, "loss": 0.5138, "step": 23160 }, { "epoch": 0.6333679719973747, "grad_norm": 1.4671308994293213, "learning_rate": 6.26068723460777e-06, "loss": 0.5219, "step": 23161 }, { "epoch": 0.6333953183110916, "grad_norm": 1.03673255443573, "learning_rate": 6.259865797271913e-06, "loss": 0.3325, "step": 23162 }, { "epoch": 0.6334226646248086, "grad_norm": 1.4444897174835205, "learning_rate": 6.259044389276356e-06, "loss": 0.479, "step": 23163 }, { "epoch": 0.6334500109385255, "grad_norm": 1.7953100204467773, "learning_rate": 6.258223010627542e-06, "loss": 0.472, "step": 23164 }, { "epoch": 0.6334773572522424, "grad_norm": 1.3072806596755981, "learning_rate": 6.257401661331918e-06, "loss": 0.5122, "step": 23165 }, { "epoch": 0.6335047035659593, "grad_norm": 1.4001743793487549, "learning_rate": 6.256580341395923e-06, "loss": 0.4588, "step": 23166 }, { "epoch": 0.6335320498796763, "grad_norm": 1.3215888738632202, "learning_rate": 6.255759050826003e-06, "loss": 0.4667, "step": 23167 }, { "epoch": 0.6335593961933931, "grad_norm": 1.196872591972351, "learning_rate": 6.2549377896285965e-06, "loss": 0.723, "step": 23168 }, { "epoch": 0.63358674250711, "grad_norm": 1.4961735010147095, "learning_rate": 6.25411655781015e-06, "loss": 0.4941, "step": 23169 }, { "epoch": 0.6336140888208269, "grad_norm": 1.1753703355789185, "learning_rate": 6.2532953553771085e-06, "loss": 0.3332, "step": 23170 }, { "epoch": 0.6336414351345439, "grad_norm": 1.2407749891281128, "learning_rate": 6.252474182335907e-06, "loss": 0.5034, "step": 23171 }, { "epoch": 0.6336687814482608, "grad_norm": 1.2918928861618042, "learning_rate": 6.251653038692991e-06, "loss": 0.4868, "step": 23172 }, { "epoch": 0.6336961277619777, "grad_norm": 1.2084325551986694, "learning_rate": 6.250831924454805e-06, "loss": 0.4553, "step": 23173 }, { "epoch": 0.6337234740756946, "grad_norm": 1.2720730304718018, "learning_rate": 6.250010839627784e-06, "loss": 0.4959, "step": 23174 }, { "epoch": 0.6337508203894116, "grad_norm": 1.3432440757751465, "learning_rate": 6.249189784218375e-06, "loss": 0.4674, "step": 23175 }, { "epoch": 0.6337781667031284, "grad_norm": 1.8528892993927002, "learning_rate": 6.248368758233014e-06, "loss": 0.4883, "step": 23176 }, { "epoch": 0.6338055130168453, "grad_norm": 1.1942110061645508, "learning_rate": 6.247547761678145e-06, "loss": 0.4762, "step": 23177 }, { "epoch": 0.6338328593305622, "grad_norm": 1.0806156396865845, "learning_rate": 6.246726794560211e-06, "loss": 0.4936, "step": 23178 }, { "epoch": 0.6338602056442791, "grad_norm": 1.1711548566818237, "learning_rate": 6.245905856885646e-06, "loss": 0.4601, "step": 23179 }, { "epoch": 0.6338875519579961, "grad_norm": 1.379962682723999, "learning_rate": 6.245084948660896e-06, "loss": 0.49, "step": 23180 }, { "epoch": 0.633914898271713, "grad_norm": 1.2643369436264038, "learning_rate": 6.244264069892396e-06, "loss": 0.5028, "step": 23181 }, { "epoch": 0.6339422445854299, "grad_norm": 1.1495798826217651, "learning_rate": 6.243443220586587e-06, "loss": 0.4735, "step": 23182 }, { "epoch": 0.6339695908991468, "grad_norm": 1.6406691074371338, "learning_rate": 6.242622400749909e-06, "loss": 0.4744, "step": 23183 }, { "epoch": 0.6339969372128637, "grad_norm": 3.0152714252471924, "learning_rate": 6.2418016103888015e-06, "loss": 0.3364, "step": 23184 }, { "epoch": 0.6340242835265806, "grad_norm": 1.1445599794387817, "learning_rate": 6.240980849509702e-06, "loss": 0.7353, "step": 23185 }, { "epoch": 0.6340516298402975, "grad_norm": 1.5483825206756592, "learning_rate": 6.2401601181190475e-06, "loss": 0.4786, "step": 23186 }, { "epoch": 0.6340789761540144, "grad_norm": 1.5109599828720093, "learning_rate": 6.2393394162232825e-06, "loss": 0.4927, "step": 23187 }, { "epoch": 0.6341063224677314, "grad_norm": 1.5952857732772827, "learning_rate": 6.238518743828838e-06, "loss": 0.468, "step": 23188 }, { "epoch": 0.6341336687814483, "grad_norm": 1.2014483213424683, "learning_rate": 6.237698100942158e-06, "loss": 0.4683, "step": 23189 }, { "epoch": 0.6341610150951652, "grad_norm": 1.383059024810791, "learning_rate": 6.236877487569677e-06, "loss": 0.4795, "step": 23190 }, { "epoch": 0.6341883614088821, "grad_norm": 1.3439418077468872, "learning_rate": 6.236056903717833e-06, "loss": 0.4724, "step": 23191 }, { "epoch": 0.6342157077225989, "grad_norm": 1.2775821685791016, "learning_rate": 6.2352363493930644e-06, "loss": 0.4606, "step": 23192 }, { "epoch": 0.6342430540363159, "grad_norm": 1.900619626045227, "learning_rate": 6.234415824601805e-06, "loss": 0.3608, "step": 23193 }, { "epoch": 0.6342704003500328, "grad_norm": 1.2835071086883545, "learning_rate": 6.233595329350495e-06, "loss": 0.4874, "step": 23194 }, { "epoch": 0.6342977466637497, "grad_norm": 1.228888750076294, "learning_rate": 6.23277486364557e-06, "loss": 0.4499, "step": 23195 }, { "epoch": 0.6343250929774666, "grad_norm": 1.73147714138031, "learning_rate": 6.231954427493465e-06, "loss": 0.3945, "step": 23196 }, { "epoch": 0.6343524392911836, "grad_norm": 1.2403819561004639, "learning_rate": 6.231134020900618e-06, "loss": 0.4372, "step": 23197 }, { "epoch": 0.6343797856049005, "grad_norm": 1.646923542022705, "learning_rate": 6.230313643873462e-06, "loss": 0.4839, "step": 23198 }, { "epoch": 0.6344071319186174, "grad_norm": 1.2408546209335327, "learning_rate": 6.2294932964184365e-06, "loss": 0.4915, "step": 23199 }, { "epoch": 0.6344344782323342, "grad_norm": 1.281200647354126, "learning_rate": 6.228672978541975e-06, "loss": 0.4729, "step": 23200 }, { "epoch": 0.6344618245460512, "grad_norm": 1.6228971481323242, "learning_rate": 6.227852690250511e-06, "loss": 0.3965, "step": 23201 }, { "epoch": 0.6344891708597681, "grad_norm": 1.1676753759384155, "learning_rate": 6.227032431550482e-06, "loss": 0.72, "step": 23202 }, { "epoch": 0.634516517173485, "grad_norm": 1.4795875549316406, "learning_rate": 6.226212202448323e-06, "loss": 0.467, "step": 23203 }, { "epoch": 0.6345438634872019, "grad_norm": 1.2957162857055664, "learning_rate": 6.225392002950465e-06, "loss": 0.4652, "step": 23204 }, { "epoch": 0.6345712098009189, "grad_norm": 1.2368991374969482, "learning_rate": 6.224571833063349e-06, "loss": 0.7271, "step": 23205 }, { "epoch": 0.6345985561146358, "grad_norm": 1.2752964496612549, "learning_rate": 6.2237516927934e-06, "loss": 0.4762, "step": 23206 }, { "epoch": 0.6346259024283527, "grad_norm": 1.436074137687683, "learning_rate": 6.222931582147056e-06, "loss": 0.4254, "step": 23207 }, { "epoch": 0.6346532487420695, "grad_norm": 1.2121992111206055, "learning_rate": 6.222111501130753e-06, "loss": 0.4922, "step": 23208 }, { "epoch": 0.6346805950557864, "grad_norm": 1.3890697956085205, "learning_rate": 6.221291449750922e-06, "loss": 0.482, "step": 23209 }, { "epoch": 0.6347079413695034, "grad_norm": 1.3203057050704956, "learning_rate": 6.220471428013996e-06, "loss": 0.5158, "step": 23210 }, { "epoch": 0.6347352876832203, "grad_norm": 1.2293416261672974, "learning_rate": 6.219651435926406e-06, "loss": 0.4817, "step": 23211 }, { "epoch": 0.6347626339969372, "grad_norm": 1.2443549633026123, "learning_rate": 6.218831473494587e-06, "loss": 0.4729, "step": 23212 }, { "epoch": 0.6347899803106541, "grad_norm": 1.6426386833190918, "learning_rate": 6.218011540724973e-06, "loss": 0.3964, "step": 23213 }, { "epoch": 0.6348173266243711, "grad_norm": 1.4656575918197632, "learning_rate": 6.2171916376239935e-06, "loss": 0.4835, "step": 23214 }, { "epoch": 0.634844672938088, "grad_norm": 1.3778221607208252, "learning_rate": 6.216371764198084e-06, "loss": 0.494, "step": 23215 }, { "epoch": 0.6348720192518048, "grad_norm": 1.3941954374313354, "learning_rate": 6.215551920453669e-06, "loss": 0.457, "step": 23216 }, { "epoch": 0.6348993655655217, "grad_norm": 1.4681421518325806, "learning_rate": 6.214732106397186e-06, "loss": 0.476, "step": 23217 }, { "epoch": 0.6349267118792387, "grad_norm": 1.4650557041168213, "learning_rate": 6.2139123220350624e-06, "loss": 0.4838, "step": 23218 }, { "epoch": 0.6349540581929556, "grad_norm": 1.4828068017959595, "learning_rate": 6.213092567373733e-06, "loss": 0.4895, "step": 23219 }, { "epoch": 0.6349814045066725, "grad_norm": 1.306810736656189, "learning_rate": 6.212272842419625e-06, "loss": 0.5015, "step": 23220 }, { "epoch": 0.6350087508203894, "grad_norm": 1.4384617805480957, "learning_rate": 6.211453147179171e-06, "loss": 0.5031, "step": 23221 }, { "epoch": 0.6350360971341064, "grad_norm": 1.521895408630371, "learning_rate": 6.210633481658803e-06, "loss": 0.4677, "step": 23222 }, { "epoch": 0.6350634434478233, "grad_norm": 1.6958160400390625, "learning_rate": 6.2098138458649455e-06, "loss": 0.7096, "step": 23223 }, { "epoch": 0.6350907897615401, "grad_norm": 1.2757072448730469, "learning_rate": 6.2089942398040335e-06, "loss": 0.454, "step": 23224 }, { "epoch": 0.635118136075257, "grad_norm": 1.2125486135482788, "learning_rate": 6.208174663482496e-06, "loss": 0.452, "step": 23225 }, { "epoch": 0.635145482388974, "grad_norm": 1.119426965713501, "learning_rate": 6.207355116906759e-06, "loss": 0.743, "step": 23226 }, { "epoch": 0.6351728287026909, "grad_norm": 1.4108999967575073, "learning_rate": 6.206535600083256e-06, "loss": 0.7256, "step": 23227 }, { "epoch": 0.6352001750164078, "grad_norm": 1.3174432516098022, "learning_rate": 6.20571611301841e-06, "loss": 0.4728, "step": 23228 }, { "epoch": 0.6352275213301247, "grad_norm": 1.3824799060821533, "learning_rate": 6.204896655718655e-06, "loss": 0.4744, "step": 23229 }, { "epoch": 0.6352548676438416, "grad_norm": 1.3623172044754028, "learning_rate": 6.20407722819042e-06, "loss": 0.7219, "step": 23230 }, { "epoch": 0.6352822139575586, "grad_norm": 1.248430848121643, "learning_rate": 6.203257830440128e-06, "loss": 0.4822, "step": 23231 }, { "epoch": 0.6353095602712754, "grad_norm": 1.325966715812683, "learning_rate": 6.202438462474213e-06, "loss": 0.5064, "step": 23232 }, { "epoch": 0.6353369065849923, "grad_norm": 1.3952314853668213, "learning_rate": 6.201619124299096e-06, "loss": 0.4847, "step": 23233 }, { "epoch": 0.6353642528987092, "grad_norm": 1.5456187725067139, "learning_rate": 6.200799815921209e-06, "loss": 0.4734, "step": 23234 }, { "epoch": 0.6353915992124262, "grad_norm": 1.68545401096344, "learning_rate": 6.19998053734698e-06, "loss": 0.3948, "step": 23235 }, { "epoch": 0.6354189455261431, "grad_norm": 1.2274268865585327, "learning_rate": 6.199161288582832e-06, "loss": 0.4245, "step": 23236 }, { "epoch": 0.63544629183986, "grad_norm": 1.6811940670013428, "learning_rate": 6.198342069635195e-06, "loss": 0.4599, "step": 23237 }, { "epoch": 0.6354736381535769, "grad_norm": 1.3572039604187012, "learning_rate": 6.197522880510496e-06, "loss": 0.4831, "step": 23238 }, { "epoch": 0.6355009844672939, "grad_norm": 1.1858086585998535, "learning_rate": 6.196703721215158e-06, "loss": 0.4891, "step": 23239 }, { "epoch": 0.6355283307810107, "grad_norm": 1.6754120588302612, "learning_rate": 6.19588459175561e-06, "loss": 0.736, "step": 23240 }, { "epoch": 0.6355556770947276, "grad_norm": 1.208710789680481, "learning_rate": 6.1950654921382755e-06, "loss": 0.4758, "step": 23241 }, { "epoch": 0.6355830234084445, "grad_norm": 1.2161716222763062, "learning_rate": 6.194246422369582e-06, "loss": 0.4685, "step": 23242 }, { "epoch": 0.6356103697221615, "grad_norm": 1.4214093685150146, "learning_rate": 6.193427382455956e-06, "loss": 0.4972, "step": 23243 }, { "epoch": 0.6356377160358784, "grad_norm": 1.2397823333740234, "learning_rate": 6.192608372403819e-06, "loss": 0.4883, "step": 23244 }, { "epoch": 0.6356650623495953, "grad_norm": 1.1758769750595093, "learning_rate": 6.191789392219596e-06, "loss": 0.4871, "step": 23245 }, { "epoch": 0.6356924086633122, "grad_norm": 1.2803363800048828, "learning_rate": 6.190970441909718e-06, "loss": 0.4718, "step": 23246 }, { "epoch": 0.6357197549770292, "grad_norm": 1.770401954650879, "learning_rate": 6.190151521480602e-06, "loss": 0.5303, "step": 23247 }, { "epoch": 0.635747101290746, "grad_norm": 1.4096819162368774, "learning_rate": 6.189332630938676e-06, "loss": 0.4912, "step": 23248 }, { "epoch": 0.6357744476044629, "grad_norm": 1.2094727754592896, "learning_rate": 6.188513770290363e-06, "loss": 0.4643, "step": 23249 }, { "epoch": 0.6358017939181798, "grad_norm": 1.4011317491531372, "learning_rate": 6.187694939542086e-06, "loss": 0.4845, "step": 23250 }, { "epoch": 0.6358291402318967, "grad_norm": 1.2851959466934204, "learning_rate": 6.186876138700273e-06, "loss": 0.4478, "step": 23251 }, { "epoch": 0.6358564865456137, "grad_norm": 1.1510446071624756, "learning_rate": 6.186057367771342e-06, "loss": 0.4719, "step": 23252 }, { "epoch": 0.6358838328593306, "grad_norm": 1.341792106628418, "learning_rate": 6.185238626761716e-06, "loss": 0.5054, "step": 23253 }, { "epoch": 0.6359111791730475, "grad_norm": 1.7392727136611938, "learning_rate": 6.184419915677819e-06, "loss": 0.4995, "step": 23254 }, { "epoch": 0.6359385254867644, "grad_norm": 1.7119953632354736, "learning_rate": 6.183601234526076e-06, "loss": 0.3685, "step": 23255 }, { "epoch": 0.6359658718004813, "grad_norm": 1.181806206703186, "learning_rate": 6.182782583312905e-06, "loss": 0.4879, "step": 23256 }, { "epoch": 0.6359932181141982, "grad_norm": 1.5008084774017334, "learning_rate": 6.1819639620447324e-06, "loss": 0.3815, "step": 23257 }, { "epoch": 0.6360205644279151, "grad_norm": 1.2890018224716187, "learning_rate": 6.181145370727976e-06, "loss": 0.4601, "step": 23258 }, { "epoch": 0.636047910741632, "grad_norm": 1.4776660203933716, "learning_rate": 6.180326809369059e-06, "loss": 0.3321, "step": 23259 }, { "epoch": 0.636075257055349, "grad_norm": 1.372882604598999, "learning_rate": 6.1795082779744065e-06, "loss": 0.479, "step": 23260 }, { "epoch": 0.6361026033690659, "grad_norm": 1.8384993076324463, "learning_rate": 6.1786897765504325e-06, "loss": 0.4566, "step": 23261 }, { "epoch": 0.6361299496827828, "grad_norm": 1.449824333190918, "learning_rate": 6.177871305103567e-06, "loss": 0.4735, "step": 23262 }, { "epoch": 0.6361572959964996, "grad_norm": 1.22553551197052, "learning_rate": 6.177052863640221e-06, "loss": 0.4901, "step": 23263 }, { "epoch": 0.6361846423102165, "grad_norm": 1.7030370235443115, "learning_rate": 6.176234452166819e-06, "loss": 0.4145, "step": 23264 }, { "epoch": 0.6362119886239335, "grad_norm": 1.4536893367767334, "learning_rate": 6.175416070689785e-06, "loss": 0.4638, "step": 23265 }, { "epoch": 0.6362393349376504, "grad_norm": 1.4354071617126465, "learning_rate": 6.1745977192155335e-06, "loss": 0.5073, "step": 23266 }, { "epoch": 0.6362666812513673, "grad_norm": 1.1582354307174683, "learning_rate": 6.173779397750485e-06, "loss": 0.4686, "step": 23267 }, { "epoch": 0.6362940275650842, "grad_norm": 1.442989468574524, "learning_rate": 6.172961106301063e-06, "loss": 0.4004, "step": 23268 }, { "epoch": 0.6363213738788012, "grad_norm": 1.7540557384490967, "learning_rate": 6.172142844873683e-06, "loss": 0.4687, "step": 23269 }, { "epoch": 0.6363487201925181, "grad_norm": 1.0848995447158813, "learning_rate": 6.171324613474767e-06, "loss": 0.5203, "step": 23270 }, { "epoch": 0.6363760665062349, "grad_norm": 0.943254292011261, "learning_rate": 6.17050641211073e-06, "loss": 0.3376, "step": 23271 }, { "epoch": 0.6364034128199518, "grad_norm": 1.2891435623168945, "learning_rate": 6.169688240787991e-06, "loss": 0.4928, "step": 23272 }, { "epoch": 0.6364307591336688, "grad_norm": 1.6840630769729614, "learning_rate": 6.168870099512974e-06, "loss": 0.3833, "step": 23273 }, { "epoch": 0.6364581054473857, "grad_norm": 1.2616267204284668, "learning_rate": 6.1680519882920885e-06, "loss": 0.4572, "step": 23274 }, { "epoch": 0.6364854517611026, "grad_norm": 1.8159916400909424, "learning_rate": 6.16723390713176e-06, "loss": 0.4934, "step": 23275 }, { "epoch": 0.6365127980748195, "grad_norm": 1.3136630058288574, "learning_rate": 6.166415856038401e-06, "loss": 0.4782, "step": 23276 }, { "epoch": 0.6365401443885365, "grad_norm": 1.485241174697876, "learning_rate": 6.165597835018431e-06, "loss": 0.4741, "step": 23277 }, { "epoch": 0.6365674907022534, "grad_norm": 1.2401094436645508, "learning_rate": 6.164779844078268e-06, "loss": 0.5112, "step": 23278 }, { "epoch": 0.6365948370159702, "grad_norm": 1.3424928188323975, "learning_rate": 6.163961883224325e-06, "loss": 0.4724, "step": 23279 }, { "epoch": 0.6366221833296871, "grad_norm": 1.2110193967819214, "learning_rate": 6.1631439524630235e-06, "loss": 0.3717, "step": 23280 }, { "epoch": 0.636649529643404, "grad_norm": 1.2958180904388428, "learning_rate": 6.162326051800779e-06, "loss": 0.4719, "step": 23281 }, { "epoch": 0.636676875957121, "grad_norm": 1.6945897340774536, "learning_rate": 6.161508181244005e-06, "loss": 0.4867, "step": 23282 }, { "epoch": 0.6367042222708379, "grad_norm": 1.163373351097107, "learning_rate": 6.16069034079912e-06, "loss": 0.4496, "step": 23283 }, { "epoch": 0.6367315685845548, "grad_norm": 1.206776738166809, "learning_rate": 6.159872530472537e-06, "loss": 0.4528, "step": 23284 }, { "epoch": 0.6367589148982717, "grad_norm": 1.2570579051971436, "learning_rate": 6.159054750270674e-06, "loss": 0.4751, "step": 23285 }, { "epoch": 0.6367862612119887, "grad_norm": 1.22446608543396, "learning_rate": 6.15823700019995e-06, "loss": 0.3793, "step": 23286 }, { "epoch": 0.6368136075257055, "grad_norm": 1.3423503637313843, "learning_rate": 6.157419280266773e-06, "loss": 0.3574, "step": 23287 }, { "epoch": 0.6368409538394224, "grad_norm": 1.1813730001449585, "learning_rate": 6.156601590477558e-06, "loss": 0.7079, "step": 23288 }, { "epoch": 0.6368683001531393, "grad_norm": 1.4462177753448486, "learning_rate": 6.155783930838722e-06, "loss": 0.5127, "step": 23289 }, { "epoch": 0.6368956464668563, "grad_norm": 1.4629069566726685, "learning_rate": 6.154966301356682e-06, "loss": 0.49, "step": 23290 }, { "epoch": 0.6369229927805732, "grad_norm": 1.2028170824050903, "learning_rate": 6.154148702037846e-06, "loss": 0.4797, "step": 23291 }, { "epoch": 0.6369503390942901, "grad_norm": 1.5406132936477661, "learning_rate": 6.153331132888635e-06, "loss": 0.3626, "step": 23292 }, { "epoch": 0.636977685408007, "grad_norm": 1.5086320638656616, "learning_rate": 6.152513593915454e-06, "loss": 0.5001, "step": 23293 }, { "epoch": 0.637005031721724, "grad_norm": 1.4568140506744385, "learning_rate": 6.151696085124723e-06, "loss": 0.6989, "step": 23294 }, { "epoch": 0.6370323780354408, "grad_norm": 1.570927381515503, "learning_rate": 6.150878606522855e-06, "loss": 0.3723, "step": 23295 }, { "epoch": 0.6370597243491577, "grad_norm": 1.6685233116149902, "learning_rate": 6.150061158116259e-06, "loss": 0.3601, "step": 23296 }, { "epoch": 0.6370870706628746, "grad_norm": 1.2870421409606934, "learning_rate": 6.149243739911352e-06, "loss": 0.4919, "step": 23297 }, { "epoch": 0.6371144169765915, "grad_norm": 1.3568813800811768, "learning_rate": 6.148426351914541e-06, "loss": 0.5005, "step": 23298 }, { "epoch": 0.6371417632903085, "grad_norm": 1.4668378829956055, "learning_rate": 6.147608994132243e-06, "loss": 0.4993, "step": 23299 }, { "epoch": 0.6371691096040254, "grad_norm": 1.3882651329040527, "learning_rate": 6.146791666570869e-06, "loss": 0.481, "step": 23300 }, { "epoch": 0.6371964559177423, "grad_norm": 1.472745656967163, "learning_rate": 6.145974369236829e-06, "loss": 0.4673, "step": 23301 }, { "epoch": 0.6372238022314592, "grad_norm": 1.4956779479980469, "learning_rate": 6.145157102136534e-06, "loss": 0.701, "step": 23302 }, { "epoch": 0.6372511485451761, "grad_norm": 1.1871604919433594, "learning_rate": 6.144339865276401e-06, "loss": 0.4936, "step": 23303 }, { "epoch": 0.637278494858893, "grad_norm": 1.5055971145629883, "learning_rate": 6.1435226586628325e-06, "loss": 0.7196, "step": 23304 }, { "epoch": 0.6373058411726099, "grad_norm": 1.315575122833252, "learning_rate": 6.142705482302246e-06, "loss": 0.4706, "step": 23305 }, { "epoch": 0.6373331874863268, "grad_norm": 1.2757543325424194, "learning_rate": 6.141888336201047e-06, "loss": 0.4897, "step": 23306 }, { "epoch": 0.6373605338000438, "grad_norm": 1.320070505142212, "learning_rate": 6.1410712203656495e-06, "loss": 0.4582, "step": 23307 }, { "epoch": 0.6373878801137607, "grad_norm": 1.2493882179260254, "learning_rate": 6.140254134802464e-06, "loss": 0.4535, "step": 23308 }, { "epoch": 0.6374152264274776, "grad_norm": 1.2916030883789062, "learning_rate": 6.139437079517896e-06, "loss": 0.5062, "step": 23309 }, { "epoch": 0.6374425727411945, "grad_norm": 1.5413970947265625, "learning_rate": 6.138620054518358e-06, "loss": 0.4491, "step": 23310 }, { "epoch": 0.6374699190549113, "grad_norm": 1.6625889539718628, "learning_rate": 6.137803059810261e-06, "loss": 0.721, "step": 23311 }, { "epoch": 0.6374972653686283, "grad_norm": 1.3284327983856201, "learning_rate": 6.13698609540001e-06, "loss": 0.5104, "step": 23312 }, { "epoch": 0.6375246116823452, "grad_norm": 1.4757633209228516, "learning_rate": 6.136169161294018e-06, "loss": 0.4659, "step": 23313 }, { "epoch": 0.6375519579960621, "grad_norm": 1.7410857677459717, "learning_rate": 6.135352257498689e-06, "loss": 0.7612, "step": 23314 }, { "epoch": 0.637579304309779, "grad_norm": 1.2790982723236084, "learning_rate": 6.134535384020435e-06, "loss": 0.4777, "step": 23315 }, { "epoch": 0.637606650623496, "grad_norm": 1.1527385711669922, "learning_rate": 6.1337185408656644e-06, "loss": 0.4476, "step": 23316 }, { "epoch": 0.6376339969372129, "grad_norm": 1.8477609157562256, "learning_rate": 6.132901728040781e-06, "loss": 0.7488, "step": 23317 }, { "epoch": 0.6376613432509298, "grad_norm": 1.1479178667068481, "learning_rate": 6.1320849455521995e-06, "loss": 0.4812, "step": 23318 }, { "epoch": 0.6376886895646466, "grad_norm": 1.6314784288406372, "learning_rate": 6.131268193406319e-06, "loss": 0.5182, "step": 23319 }, { "epoch": 0.6377160358783636, "grad_norm": 1.34814453125, "learning_rate": 6.130451471609551e-06, "loss": 0.4874, "step": 23320 }, { "epoch": 0.6377433821920805, "grad_norm": 1.409232497215271, "learning_rate": 6.129634780168306e-06, "loss": 0.3363, "step": 23321 }, { "epoch": 0.6377707285057974, "grad_norm": 1.4000798463821411, "learning_rate": 6.128818119088986e-06, "loss": 0.4957, "step": 23322 }, { "epoch": 0.6377980748195143, "grad_norm": 1.3461549282073975, "learning_rate": 6.128001488377997e-06, "loss": 0.4692, "step": 23323 }, { "epoch": 0.6378254211332313, "grad_norm": 1.5392498970031738, "learning_rate": 6.127184888041745e-06, "loss": 0.4924, "step": 23324 }, { "epoch": 0.6378527674469482, "grad_norm": 1.799644947052002, "learning_rate": 6.12636831808664e-06, "loss": 0.3808, "step": 23325 }, { "epoch": 0.6378801137606651, "grad_norm": 1.4104995727539062, "learning_rate": 6.1255517785190834e-06, "loss": 0.4883, "step": 23326 }, { "epoch": 0.6379074600743819, "grad_norm": 1.1093531847000122, "learning_rate": 6.124735269345485e-06, "loss": 0.4647, "step": 23327 }, { "epoch": 0.6379348063880989, "grad_norm": 1.1523820161819458, "learning_rate": 6.123918790572247e-06, "loss": 0.4608, "step": 23328 }, { "epoch": 0.6379621527018158, "grad_norm": 1.1199402809143066, "learning_rate": 6.123102342205773e-06, "loss": 0.4853, "step": 23329 }, { "epoch": 0.6379894990155327, "grad_norm": 1.5620653629302979, "learning_rate": 6.122285924252473e-06, "loss": 0.4865, "step": 23330 }, { "epoch": 0.6380168453292496, "grad_norm": 1.2075858116149902, "learning_rate": 6.1214695367187475e-06, "loss": 0.4806, "step": 23331 }, { "epoch": 0.6380441916429666, "grad_norm": 1.59772527217865, "learning_rate": 6.1206531796110015e-06, "loss": 0.4504, "step": 23332 }, { "epoch": 0.6380715379566835, "grad_norm": 1.2993052005767822, "learning_rate": 6.119836852935641e-06, "loss": 0.4963, "step": 23333 }, { "epoch": 0.6380988842704004, "grad_norm": 1.614307165145874, "learning_rate": 6.119020556699067e-06, "loss": 0.4626, "step": 23334 }, { "epoch": 0.6381262305841172, "grad_norm": 2.032094955444336, "learning_rate": 6.118204290907687e-06, "loss": 0.359, "step": 23335 }, { "epoch": 0.6381535768978341, "grad_norm": 1.3355433940887451, "learning_rate": 6.117388055567899e-06, "loss": 0.4776, "step": 23336 }, { "epoch": 0.6381809232115511, "grad_norm": 1.7971402406692505, "learning_rate": 6.116571850686108e-06, "loss": 0.3443, "step": 23337 }, { "epoch": 0.638208269525268, "grad_norm": 1.5092482566833496, "learning_rate": 6.115755676268722e-06, "loss": 0.3548, "step": 23338 }, { "epoch": 0.6382356158389849, "grad_norm": 1.7143324613571167, "learning_rate": 6.114939532322137e-06, "loss": 0.7354, "step": 23339 }, { "epoch": 0.6382629621527018, "grad_norm": 1.5953443050384521, "learning_rate": 6.11412341885276e-06, "loss": 0.3487, "step": 23340 }, { "epoch": 0.6382903084664188, "grad_norm": 1.388919711112976, "learning_rate": 6.1133073358669895e-06, "loss": 0.4884, "step": 23341 }, { "epoch": 0.6383176547801357, "grad_norm": 1.4356259107589722, "learning_rate": 6.112491283371228e-06, "loss": 0.3589, "step": 23342 }, { "epoch": 0.6383450010938525, "grad_norm": 1.2689118385314941, "learning_rate": 6.111675261371881e-06, "loss": 0.4692, "step": 23343 }, { "epoch": 0.6383723474075694, "grad_norm": 1.7318141460418701, "learning_rate": 6.110859269875346e-06, "loss": 0.4825, "step": 23344 }, { "epoch": 0.6383996937212864, "grad_norm": 1.3704936504364014, "learning_rate": 6.110043308888024e-06, "loss": 0.471, "step": 23345 }, { "epoch": 0.6384270400350033, "grad_norm": 1.4240645170211792, "learning_rate": 6.10922737841632e-06, "loss": 0.462, "step": 23346 }, { "epoch": 0.6384543863487202, "grad_norm": 1.1508026123046875, "learning_rate": 6.1084114784666315e-06, "loss": 0.4567, "step": 23347 }, { "epoch": 0.6384817326624371, "grad_norm": 1.3282275199890137, "learning_rate": 6.1075956090453606e-06, "loss": 0.4634, "step": 23348 }, { "epoch": 0.638509078976154, "grad_norm": 1.3412268161773682, "learning_rate": 6.106779770158905e-06, "loss": 0.4815, "step": 23349 }, { "epoch": 0.638536425289871, "grad_norm": 1.2658860683441162, "learning_rate": 6.1059639618136655e-06, "loss": 0.3559, "step": 23350 }, { "epoch": 0.6385637716035878, "grad_norm": 1.2242058515548706, "learning_rate": 6.105148184016047e-06, "loss": 0.7301, "step": 23351 }, { "epoch": 0.6385911179173047, "grad_norm": 1.4100723266601562, "learning_rate": 6.104332436772441e-06, "loss": 0.4608, "step": 23352 }, { "epoch": 0.6386184642310216, "grad_norm": 1.3165470361709595, "learning_rate": 6.103516720089251e-06, "loss": 0.5028, "step": 23353 }, { "epoch": 0.6386458105447386, "grad_norm": 1.2770421504974365, "learning_rate": 6.102701033972879e-06, "loss": 0.4881, "step": 23354 }, { "epoch": 0.6386731568584555, "grad_norm": 2.1501336097717285, "learning_rate": 6.101885378429717e-06, "loss": 0.3508, "step": 23355 }, { "epoch": 0.6387005031721724, "grad_norm": 1.458745002746582, "learning_rate": 6.101069753466174e-06, "loss": 0.4781, "step": 23356 }, { "epoch": 0.6387278494858893, "grad_norm": 1.5574681758880615, "learning_rate": 6.100254159088637e-06, "loss": 0.3394, "step": 23357 }, { "epoch": 0.6387551957996063, "grad_norm": 1.4064122438430786, "learning_rate": 6.099438595303506e-06, "loss": 0.491, "step": 23358 }, { "epoch": 0.6387825421133231, "grad_norm": 1.3171242475509644, "learning_rate": 6.098623062117182e-06, "loss": 0.4897, "step": 23359 }, { "epoch": 0.63880988842704, "grad_norm": 1.35568368434906, "learning_rate": 6.0978075595360654e-06, "loss": 0.7157, "step": 23360 }, { "epoch": 0.6388372347407569, "grad_norm": 1.1985727548599243, "learning_rate": 6.096992087566548e-06, "loss": 0.7459, "step": 23361 }, { "epoch": 0.6388645810544739, "grad_norm": 1.4185307025909424, "learning_rate": 6.096176646215031e-06, "loss": 0.4873, "step": 23362 }, { "epoch": 0.6388919273681908, "grad_norm": 1.3509632349014282, "learning_rate": 6.0953612354879064e-06, "loss": 0.4857, "step": 23363 }, { "epoch": 0.6389192736819077, "grad_norm": 1.2808268070220947, "learning_rate": 6.0945458553915756e-06, "loss": 0.4695, "step": 23364 }, { "epoch": 0.6389466199956246, "grad_norm": 1.4526050090789795, "learning_rate": 6.093730505932436e-06, "loss": 0.5012, "step": 23365 }, { "epoch": 0.6389739663093414, "grad_norm": 1.6783132553100586, "learning_rate": 6.092915187116878e-06, "loss": 0.4634, "step": 23366 }, { "epoch": 0.6390013126230584, "grad_norm": 1.282165765762329, "learning_rate": 6.0920998989513e-06, "loss": 0.487, "step": 23367 }, { "epoch": 0.6390286589367753, "grad_norm": 1.5884958505630493, "learning_rate": 6.091284641442102e-06, "loss": 0.4836, "step": 23368 }, { "epoch": 0.6390560052504922, "grad_norm": 1.285813570022583, "learning_rate": 6.090469414595675e-06, "loss": 0.4963, "step": 23369 }, { "epoch": 0.6390833515642091, "grad_norm": 1.1558878421783447, "learning_rate": 6.089654218418415e-06, "loss": 0.4885, "step": 23370 }, { "epoch": 0.6391106978779261, "grad_norm": 1.2173051834106445, "learning_rate": 6.088839052916715e-06, "loss": 0.4675, "step": 23371 }, { "epoch": 0.639138044191643, "grad_norm": 1.3819960355758667, "learning_rate": 6.088023918096974e-06, "loss": 0.4772, "step": 23372 }, { "epoch": 0.6391653905053599, "grad_norm": 1.2998017072677612, "learning_rate": 6.087208813965585e-06, "loss": 0.4775, "step": 23373 }, { "epoch": 0.6391927368190767, "grad_norm": 1.1196976900100708, "learning_rate": 6.0863937405289395e-06, "loss": 0.4531, "step": 23374 }, { "epoch": 0.6392200831327937, "grad_norm": 1.6276416778564453, "learning_rate": 6.085578697793435e-06, "loss": 0.3832, "step": 23375 }, { "epoch": 0.6392474294465106, "grad_norm": 1.5466339588165283, "learning_rate": 6.0847636857654646e-06, "loss": 0.4931, "step": 23376 }, { "epoch": 0.6392747757602275, "grad_norm": 1.379584550857544, "learning_rate": 6.083948704451421e-06, "loss": 0.7565, "step": 23377 }, { "epoch": 0.6393021220739444, "grad_norm": 1.249255895614624, "learning_rate": 6.083133753857698e-06, "loss": 0.4892, "step": 23378 }, { "epoch": 0.6393294683876614, "grad_norm": 1.1687456369400024, "learning_rate": 6.082318833990688e-06, "loss": 0.4689, "step": 23379 }, { "epoch": 0.6393568147013783, "grad_norm": 1.2194145917892456, "learning_rate": 6.081503944856782e-06, "loss": 0.5022, "step": 23380 }, { "epoch": 0.6393841610150952, "grad_norm": 1.4344314336776733, "learning_rate": 6.080689086462379e-06, "loss": 0.4938, "step": 23381 }, { "epoch": 0.639411507328812, "grad_norm": 1.3666435480117798, "learning_rate": 6.079874258813864e-06, "loss": 0.4524, "step": 23382 }, { "epoch": 0.639438853642529, "grad_norm": 1.1347594261169434, "learning_rate": 6.079059461917635e-06, "loss": 0.4781, "step": 23383 }, { "epoch": 0.6394661999562459, "grad_norm": 1.5731689929962158, "learning_rate": 6.078244695780078e-06, "loss": 0.3632, "step": 23384 }, { "epoch": 0.6394935462699628, "grad_norm": 1.4896758794784546, "learning_rate": 6.077429960407588e-06, "loss": 0.4687, "step": 23385 }, { "epoch": 0.6395208925836797, "grad_norm": 1.2967848777770996, "learning_rate": 6.076615255806559e-06, "loss": 0.4709, "step": 23386 }, { "epoch": 0.6395482388973966, "grad_norm": 1.3826748132705688, "learning_rate": 6.075800581983375e-06, "loss": 0.4966, "step": 23387 }, { "epoch": 0.6395755852111136, "grad_norm": 1.3295460939407349, "learning_rate": 6.074985938944433e-06, "loss": 0.469, "step": 23388 }, { "epoch": 0.6396029315248305, "grad_norm": 1.465839147567749, "learning_rate": 6.074171326696124e-06, "loss": 0.4606, "step": 23389 }, { "epoch": 0.6396302778385473, "grad_norm": 1.3595038652420044, "learning_rate": 6.073356745244833e-06, "loss": 0.4499, "step": 23390 }, { "epoch": 0.6396576241522642, "grad_norm": 1.5079169273376465, "learning_rate": 6.072542194596958e-06, "loss": 0.4434, "step": 23391 }, { "epoch": 0.6396849704659812, "grad_norm": 1.7650909423828125, "learning_rate": 6.071727674758881e-06, "loss": 0.3527, "step": 23392 }, { "epoch": 0.6397123167796981, "grad_norm": 1.5384550094604492, "learning_rate": 6.070913185736994e-06, "loss": 0.5135, "step": 23393 }, { "epoch": 0.639739663093415, "grad_norm": 1.7272318601608276, "learning_rate": 6.070098727537688e-06, "loss": 0.3516, "step": 23394 }, { "epoch": 0.6397670094071319, "grad_norm": 2.6639249324798584, "learning_rate": 6.069284300167351e-06, "loss": 0.7433, "step": 23395 }, { "epoch": 0.6397943557208489, "grad_norm": 1.2792656421661377, "learning_rate": 6.068469903632372e-06, "loss": 0.4822, "step": 23396 }, { "epoch": 0.6398217020345658, "grad_norm": 1.5852322578430176, "learning_rate": 6.067655537939141e-06, "loss": 0.4956, "step": 23397 }, { "epoch": 0.6398490483482826, "grad_norm": 1.39879310131073, "learning_rate": 6.0668412030940475e-06, "loss": 0.4749, "step": 23398 }, { "epoch": 0.6398763946619995, "grad_norm": 1.362253189086914, "learning_rate": 6.066026899103475e-06, "loss": 0.4881, "step": 23399 }, { "epoch": 0.6399037409757165, "grad_norm": 1.2735754251480103, "learning_rate": 6.0652126259738155e-06, "loss": 0.4536, "step": 23400 }, { "epoch": 0.6399310872894334, "grad_norm": 1.3328347206115723, "learning_rate": 6.064398383711455e-06, "loss": 0.462, "step": 23401 }, { "epoch": 0.6399584336031503, "grad_norm": 1.3911747932434082, "learning_rate": 6.063584172322781e-06, "loss": 0.4593, "step": 23402 }, { "epoch": 0.6399857799168672, "grad_norm": 1.3244274854660034, "learning_rate": 6.062769991814183e-06, "loss": 0.4819, "step": 23403 }, { "epoch": 0.6400131262305842, "grad_norm": 1.6518199443817139, "learning_rate": 6.0619558421920455e-06, "loss": 0.483, "step": 23404 }, { "epoch": 0.6400404725443011, "grad_norm": 1.3234955072402954, "learning_rate": 6.061141723462758e-06, "loss": 0.5026, "step": 23405 }, { "epoch": 0.6400678188580179, "grad_norm": 1.401418685913086, "learning_rate": 6.060327635632704e-06, "loss": 0.4256, "step": 23406 }, { "epoch": 0.6400951651717348, "grad_norm": 1.4116477966308594, "learning_rate": 6.0595135787082696e-06, "loss": 0.4815, "step": 23407 }, { "epoch": 0.6401225114854517, "grad_norm": 2.1813488006591797, "learning_rate": 6.058699552695844e-06, "loss": 0.3446, "step": 23408 }, { "epoch": 0.6401498577991687, "grad_norm": 1.2346681356430054, "learning_rate": 6.057885557601809e-06, "loss": 0.4739, "step": 23409 }, { "epoch": 0.6401772041128856, "grad_norm": 1.3061579465866089, "learning_rate": 6.057071593432553e-06, "loss": 0.4348, "step": 23410 }, { "epoch": 0.6402045504266025, "grad_norm": 1.2428112030029297, "learning_rate": 6.056257660194462e-06, "loss": 0.3599, "step": 23411 }, { "epoch": 0.6402318967403194, "grad_norm": 1.2965089082717896, "learning_rate": 6.0554437578939195e-06, "loss": 0.4734, "step": 23412 }, { "epoch": 0.6402592430540364, "grad_norm": 1.4862158298492432, "learning_rate": 6.054629886537311e-06, "loss": 0.7392, "step": 23413 }, { "epoch": 0.6402865893677532, "grad_norm": 1.2177355289459229, "learning_rate": 6.05381604613102e-06, "loss": 0.4893, "step": 23414 }, { "epoch": 0.6403139356814701, "grad_norm": 1.6362782716751099, "learning_rate": 6.053002236681431e-06, "loss": 0.4398, "step": 23415 }, { "epoch": 0.640341281995187, "grad_norm": 1.3532392978668213, "learning_rate": 6.052188458194931e-06, "loss": 0.4769, "step": 23416 }, { "epoch": 0.640368628308904, "grad_norm": 1.3332659006118774, "learning_rate": 6.0513747106779e-06, "loss": 0.4465, "step": 23417 }, { "epoch": 0.6403959746226209, "grad_norm": 1.2129831314086914, "learning_rate": 6.050560994136723e-06, "loss": 0.4951, "step": 23418 }, { "epoch": 0.6404233209363378, "grad_norm": 1.4011026620864868, "learning_rate": 6.049747308577782e-06, "loss": 0.4814, "step": 23419 }, { "epoch": 0.6404506672500547, "grad_norm": 1.3421075344085693, "learning_rate": 6.048933654007464e-06, "loss": 0.4669, "step": 23420 }, { "epoch": 0.6404780135637717, "grad_norm": 1.316442608833313, "learning_rate": 6.04812003043215e-06, "loss": 0.4847, "step": 23421 }, { "epoch": 0.6405053598774885, "grad_norm": 1.5009759664535522, "learning_rate": 6.047306437858221e-06, "loss": 0.4815, "step": 23422 }, { "epoch": 0.6405327061912054, "grad_norm": 1.6073980331420898, "learning_rate": 6.046492876292058e-06, "loss": 0.3458, "step": 23423 }, { "epoch": 0.6405600525049223, "grad_norm": 1.363540768623352, "learning_rate": 6.0456793457400505e-06, "loss": 0.499, "step": 23424 }, { "epoch": 0.6405873988186392, "grad_norm": 1.3578006029129028, "learning_rate": 6.044865846208573e-06, "loss": 0.4935, "step": 23425 }, { "epoch": 0.6406147451323562, "grad_norm": 1.3537780046463013, "learning_rate": 6.044052377704013e-06, "loss": 0.4517, "step": 23426 }, { "epoch": 0.6406420914460731, "grad_norm": 1.2830623388290405, "learning_rate": 6.043238940232749e-06, "loss": 0.5029, "step": 23427 }, { "epoch": 0.64066943775979, "grad_norm": 1.255049228668213, "learning_rate": 6.042425533801157e-06, "loss": 0.4689, "step": 23428 }, { "epoch": 0.6406967840735069, "grad_norm": 1.1576184034347534, "learning_rate": 6.0416121584156265e-06, "loss": 0.4811, "step": 23429 }, { "epoch": 0.6407241303872238, "grad_norm": 1.5083831548690796, "learning_rate": 6.040798814082533e-06, "loss": 0.4542, "step": 23430 }, { "epoch": 0.6407514767009407, "grad_norm": 1.5206022262573242, "learning_rate": 6.0399855008082584e-06, "loss": 0.4566, "step": 23431 }, { "epoch": 0.6407788230146576, "grad_norm": 1.3681408166885376, "learning_rate": 6.039172218599183e-06, "loss": 0.7121, "step": 23432 }, { "epoch": 0.6408061693283745, "grad_norm": 1.2859801054000854, "learning_rate": 6.038358967461688e-06, "loss": 0.5186, "step": 23433 }, { "epoch": 0.6408335156420915, "grad_norm": 1.4483962059020996, "learning_rate": 6.037545747402151e-06, "loss": 0.4702, "step": 23434 }, { "epoch": 0.6408608619558084, "grad_norm": 1.476326823234558, "learning_rate": 6.0367325584269545e-06, "loss": 0.5076, "step": 23435 }, { "epoch": 0.6408882082695253, "grad_norm": 1.5512698888778687, "learning_rate": 6.035919400542474e-06, "loss": 0.3735, "step": 23436 }, { "epoch": 0.6409155545832422, "grad_norm": 1.880189299583435, "learning_rate": 6.0351062737550895e-06, "loss": 0.4893, "step": 23437 }, { "epoch": 0.640942900896959, "grad_norm": 1.272339105606079, "learning_rate": 6.0342931780711825e-06, "loss": 0.4562, "step": 23438 }, { "epoch": 0.640970247210676, "grad_norm": 1.3838928937911987, "learning_rate": 6.033480113497128e-06, "loss": 0.4733, "step": 23439 }, { "epoch": 0.6409975935243929, "grad_norm": 1.253922462463379, "learning_rate": 6.032667080039305e-06, "loss": 0.5014, "step": 23440 }, { "epoch": 0.6410249398381098, "grad_norm": 1.416395902633667, "learning_rate": 6.031854077704094e-06, "loss": 0.4593, "step": 23441 }, { "epoch": 0.6410522861518267, "grad_norm": 1.2003384828567505, "learning_rate": 6.03104110649787e-06, "loss": 0.4565, "step": 23442 }, { "epoch": 0.6410796324655437, "grad_norm": 1.209098219871521, "learning_rate": 6.030228166427013e-06, "loss": 0.471, "step": 23443 }, { "epoch": 0.6411069787792606, "grad_norm": 1.2008790969848633, "learning_rate": 6.029415257497897e-06, "loss": 0.4752, "step": 23444 }, { "epoch": 0.6411343250929775, "grad_norm": 1.243504524230957, "learning_rate": 6.028602379716901e-06, "loss": 0.3399, "step": 23445 }, { "epoch": 0.6411616714066943, "grad_norm": 1.3762524127960205, "learning_rate": 6.027789533090405e-06, "loss": 0.509, "step": 23446 }, { "epoch": 0.6411890177204113, "grad_norm": 1.3809421062469482, "learning_rate": 6.026976717624778e-06, "loss": 0.4869, "step": 23447 }, { "epoch": 0.6412163640341282, "grad_norm": 1.1829713582992554, "learning_rate": 6.026163933326404e-06, "loss": 0.4885, "step": 23448 }, { "epoch": 0.6412437103478451, "grad_norm": 1.2860678434371948, "learning_rate": 6.025351180201653e-06, "loss": 0.485, "step": 23449 }, { "epoch": 0.641271056661562, "grad_norm": 1.4941489696502686, "learning_rate": 6.0245384582569034e-06, "loss": 0.3672, "step": 23450 }, { "epoch": 0.641298402975279, "grad_norm": 1.1991276741027832, "learning_rate": 6.023725767498532e-06, "loss": 0.6847, "step": 23451 }, { "epoch": 0.6413257492889959, "grad_norm": 1.6794127225875854, "learning_rate": 6.022913107932912e-06, "loss": 0.3918, "step": 23452 }, { "epoch": 0.6413530956027128, "grad_norm": 1.7860482931137085, "learning_rate": 6.0221004795664195e-06, "loss": 0.3637, "step": 23453 }, { "epoch": 0.6413804419164296, "grad_norm": 1.2446630001068115, "learning_rate": 6.021287882405431e-06, "loss": 0.741, "step": 23454 }, { "epoch": 0.6414077882301465, "grad_norm": 1.2183977365493774, "learning_rate": 6.020475316456317e-06, "loss": 0.3228, "step": 23455 }, { "epoch": 0.6414351345438635, "grad_norm": 1.5175729990005493, "learning_rate": 6.019662781725456e-06, "loss": 0.4794, "step": 23456 }, { "epoch": 0.6414624808575804, "grad_norm": 1.443663477897644, "learning_rate": 6.018850278219219e-06, "loss": 0.4836, "step": 23457 }, { "epoch": 0.6414898271712973, "grad_norm": 1.152474045753479, "learning_rate": 6.018037805943981e-06, "loss": 0.471, "step": 23458 }, { "epoch": 0.6415171734850142, "grad_norm": 1.1473677158355713, "learning_rate": 6.0172253649061184e-06, "loss": 0.4891, "step": 23459 }, { "epoch": 0.6415445197987312, "grad_norm": 1.312560796737671, "learning_rate": 6.016412955111999e-06, "loss": 0.4747, "step": 23460 }, { "epoch": 0.6415718661124481, "grad_norm": 1.2389805316925049, "learning_rate": 6.015600576568002e-06, "loss": 0.7288, "step": 23461 }, { "epoch": 0.6415992124261649, "grad_norm": 1.6979955434799194, "learning_rate": 6.014788229280495e-06, "loss": 0.3458, "step": 23462 }, { "epoch": 0.6416265587398818, "grad_norm": 1.3622989654541016, "learning_rate": 6.013975913255854e-06, "loss": 0.48, "step": 23463 }, { "epoch": 0.6416539050535988, "grad_norm": 1.2409088611602783, "learning_rate": 6.013163628500449e-06, "loss": 0.4758, "step": 23464 }, { "epoch": 0.6416812513673157, "grad_norm": 1.5461515188217163, "learning_rate": 6.0123513750206555e-06, "loss": 0.52, "step": 23465 }, { "epoch": 0.6417085976810326, "grad_norm": 1.3019887208938599, "learning_rate": 6.01153915282284e-06, "loss": 0.5142, "step": 23466 }, { "epoch": 0.6417359439947495, "grad_norm": 1.457883596420288, "learning_rate": 6.010726961913378e-06, "loss": 0.355, "step": 23467 }, { "epoch": 0.6417632903084665, "grad_norm": 1.1970455646514893, "learning_rate": 6.009914802298642e-06, "loss": 0.4851, "step": 23468 }, { "epoch": 0.6417906366221833, "grad_norm": 1.7805536985397339, "learning_rate": 6.009102673984999e-06, "loss": 0.4772, "step": 23469 }, { "epoch": 0.6418179829359002, "grad_norm": 1.3437572717666626, "learning_rate": 6.008290576978826e-06, "loss": 0.5167, "step": 23470 }, { "epoch": 0.6418453292496171, "grad_norm": 1.315716028213501, "learning_rate": 6.007478511286486e-06, "loss": 0.7215, "step": 23471 }, { "epoch": 0.641872675563334, "grad_norm": 1.2512311935424805, "learning_rate": 6.0066664769143555e-06, "loss": 0.4871, "step": 23472 }, { "epoch": 0.641900021877051, "grad_norm": 1.5175563097000122, "learning_rate": 6.005854473868804e-06, "loss": 0.4379, "step": 23473 }, { "epoch": 0.6419273681907679, "grad_norm": 1.2978287935256958, "learning_rate": 6.0050425021561974e-06, "loss": 0.4661, "step": 23474 }, { "epoch": 0.6419547145044848, "grad_norm": 1.1809560060501099, "learning_rate": 6.004230561782909e-06, "loss": 0.4817, "step": 23475 }, { "epoch": 0.6419820608182017, "grad_norm": 1.2061219215393066, "learning_rate": 6.003418652755308e-06, "loss": 0.4809, "step": 23476 }, { "epoch": 0.6420094071319186, "grad_norm": 1.3609291315078735, "learning_rate": 6.002606775079762e-06, "loss": 0.4873, "step": 23477 }, { "epoch": 0.6420367534456355, "grad_norm": 1.2791075706481934, "learning_rate": 6.001794928762643e-06, "loss": 0.4673, "step": 23478 }, { "epoch": 0.6420640997593524, "grad_norm": 1.1671518087387085, "learning_rate": 6.000983113810315e-06, "loss": 0.4508, "step": 23479 }, { "epoch": 0.6420914460730693, "grad_norm": 1.4468263387680054, "learning_rate": 6.00017133022915e-06, "loss": 0.3589, "step": 23480 }, { "epoch": 0.6421187923867863, "grad_norm": 1.4618067741394043, "learning_rate": 5.999359578025516e-06, "loss": 0.5024, "step": 23481 }, { "epoch": 0.6421461387005032, "grad_norm": 1.424289345741272, "learning_rate": 5.998547857205779e-06, "loss": 0.4759, "step": 23482 }, { "epoch": 0.6421734850142201, "grad_norm": 1.1361031532287598, "learning_rate": 5.997736167776309e-06, "loss": 0.4677, "step": 23483 }, { "epoch": 0.642200831327937, "grad_norm": 1.1290782690048218, "learning_rate": 5.996924509743471e-06, "loss": 0.4778, "step": 23484 }, { "epoch": 0.6422281776416539, "grad_norm": 1.3981850147247314, "learning_rate": 5.996112883113634e-06, "loss": 0.4854, "step": 23485 }, { "epoch": 0.6422555239553708, "grad_norm": 1.850638508796692, "learning_rate": 5.995301287893167e-06, "loss": 0.356, "step": 23486 }, { "epoch": 0.6422828702690877, "grad_norm": 1.422876238822937, "learning_rate": 5.994489724088432e-06, "loss": 0.5168, "step": 23487 }, { "epoch": 0.6423102165828046, "grad_norm": 1.286473274230957, "learning_rate": 5.993678191705799e-06, "loss": 0.4514, "step": 23488 }, { "epoch": 0.6423375628965216, "grad_norm": 1.3688280582427979, "learning_rate": 5.992866690751633e-06, "loss": 0.453, "step": 23489 }, { "epoch": 0.6423649092102385, "grad_norm": 1.1980825662612915, "learning_rate": 5.992055221232298e-06, "loss": 0.4909, "step": 23490 }, { "epoch": 0.6423922555239554, "grad_norm": 1.234984040260315, "learning_rate": 5.9912437831541655e-06, "loss": 0.6934, "step": 23491 }, { "epoch": 0.6424196018376723, "grad_norm": 1.281692624092102, "learning_rate": 5.990432376523594e-06, "loss": 0.4914, "step": 23492 }, { "epoch": 0.6424469481513891, "grad_norm": 2.4476370811462402, "learning_rate": 5.989621001346954e-06, "loss": 0.36, "step": 23493 }, { "epoch": 0.6424742944651061, "grad_norm": 7.991970539093018, "learning_rate": 5.988809657630608e-06, "loss": 0.3516, "step": 23494 }, { "epoch": 0.642501640778823, "grad_norm": 1.3691601753234863, "learning_rate": 5.987998345380921e-06, "loss": 0.4682, "step": 23495 }, { "epoch": 0.6425289870925399, "grad_norm": 1.8171522617340088, "learning_rate": 5.987187064604259e-06, "loss": 0.4699, "step": 23496 }, { "epoch": 0.6425563334062568, "grad_norm": 1.2553496360778809, "learning_rate": 5.986375815306988e-06, "loss": 0.4897, "step": 23497 }, { "epoch": 0.6425836797199738, "grad_norm": 1.2258387804031372, "learning_rate": 5.985564597495465e-06, "loss": 0.4886, "step": 23498 }, { "epoch": 0.6426110260336907, "grad_norm": 1.3062031269073486, "learning_rate": 5.9847534111760596e-06, "loss": 0.4922, "step": 23499 }, { "epoch": 0.6426383723474076, "grad_norm": 1.4097791910171509, "learning_rate": 5.983942256355134e-06, "loss": 0.4812, "step": 23500 }, { "epoch": 0.6426657186611244, "grad_norm": 1.3040684461593628, "learning_rate": 5.98313113303905e-06, "loss": 0.4858, "step": 23501 }, { "epoch": 0.6426930649748414, "grad_norm": 1.5887762308120728, "learning_rate": 5.982320041234171e-06, "loss": 0.449, "step": 23502 }, { "epoch": 0.6427204112885583, "grad_norm": 1.403639554977417, "learning_rate": 5.981508980946862e-06, "loss": 0.4841, "step": 23503 }, { "epoch": 0.6427477576022752, "grad_norm": 1.2144887447357178, "learning_rate": 5.980697952183483e-06, "loss": 0.4595, "step": 23504 }, { "epoch": 0.6427751039159921, "grad_norm": 1.771618127822876, "learning_rate": 5.979886954950398e-06, "loss": 0.4591, "step": 23505 }, { "epoch": 0.642802450229709, "grad_norm": 1.4045108556747437, "learning_rate": 5.979075989253969e-06, "loss": 0.4531, "step": 23506 }, { "epoch": 0.642829796543426, "grad_norm": 1.291749358177185, "learning_rate": 5.978265055100556e-06, "loss": 0.5075, "step": 23507 }, { "epoch": 0.6428571428571429, "grad_norm": 1.232567310333252, "learning_rate": 5.9774541524965244e-06, "loss": 0.4637, "step": 23508 }, { "epoch": 0.6428844891708597, "grad_norm": 1.3004733324050903, "learning_rate": 5.9766432814482315e-06, "loss": 0.7051, "step": 23509 }, { "epoch": 0.6429118354845766, "grad_norm": 1.4446431398391724, "learning_rate": 5.975832441962037e-06, "loss": 0.4898, "step": 23510 }, { "epoch": 0.6429391817982936, "grad_norm": 1.5418436527252197, "learning_rate": 5.975021634044309e-06, "loss": 0.4559, "step": 23511 }, { "epoch": 0.6429665281120105, "grad_norm": 1.5222989320755005, "learning_rate": 5.974210857701401e-06, "loss": 0.4351, "step": 23512 }, { "epoch": 0.6429938744257274, "grad_norm": 1.2820123434066772, "learning_rate": 5.973400112939678e-06, "loss": 0.5123, "step": 23513 }, { "epoch": 0.6430212207394443, "grad_norm": 1.4557141065597534, "learning_rate": 5.972589399765496e-06, "loss": 0.4797, "step": 23514 }, { "epoch": 0.6430485670531613, "grad_norm": 1.3307617902755737, "learning_rate": 5.971778718185215e-06, "loss": 0.4649, "step": 23515 }, { "epoch": 0.6430759133668782, "grad_norm": 0.9279597401618958, "learning_rate": 5.9709680682052e-06, "loss": 0.3817, "step": 23516 }, { "epoch": 0.643103259680595, "grad_norm": 1.2021970748901367, "learning_rate": 5.9701574498318034e-06, "loss": 0.48, "step": 23517 }, { "epoch": 0.6431306059943119, "grad_norm": 1.6883851289749146, "learning_rate": 5.9693468630713905e-06, "loss": 0.4482, "step": 23518 }, { "epoch": 0.6431579523080289, "grad_norm": 1.85177481174469, "learning_rate": 5.968536307930316e-06, "loss": 0.5158, "step": 23519 }, { "epoch": 0.6431852986217458, "grad_norm": 1.6277313232421875, "learning_rate": 5.967725784414939e-06, "loss": 0.4959, "step": 23520 }, { "epoch": 0.6432126449354627, "grad_norm": 1.3054096698760986, "learning_rate": 5.9669152925316195e-06, "loss": 0.4891, "step": 23521 }, { "epoch": 0.6432399912491796, "grad_norm": 1.3177955150604248, "learning_rate": 5.966104832286714e-06, "loss": 0.734, "step": 23522 }, { "epoch": 0.6432673375628966, "grad_norm": 1.2327096462249756, "learning_rate": 5.965294403686579e-06, "loss": 0.4703, "step": 23523 }, { "epoch": 0.6432946838766135, "grad_norm": 1.4604988098144531, "learning_rate": 5.964484006737578e-06, "loss": 0.4527, "step": 23524 }, { "epoch": 0.6433220301903303, "grad_norm": 1.2728136777877808, "learning_rate": 5.963673641446061e-06, "loss": 0.5189, "step": 23525 }, { "epoch": 0.6433493765040472, "grad_norm": 1.1958171129226685, "learning_rate": 5.96286330781839e-06, "loss": 0.4461, "step": 23526 }, { "epoch": 0.6433767228177641, "grad_norm": 1.5524121522903442, "learning_rate": 5.96205300586092e-06, "loss": 0.5111, "step": 23527 }, { "epoch": 0.6434040691314811, "grad_norm": 0.9803332090377808, "learning_rate": 5.961242735580006e-06, "loss": 0.344, "step": 23528 }, { "epoch": 0.643431415445198, "grad_norm": 1.4538427591323853, "learning_rate": 5.960432496982008e-06, "loss": 0.4782, "step": 23529 }, { "epoch": 0.6434587617589149, "grad_norm": 1.2049847841262817, "learning_rate": 5.959622290073279e-06, "loss": 0.7369, "step": 23530 }, { "epoch": 0.6434861080726318, "grad_norm": 1.3310543298721313, "learning_rate": 5.9588121148601765e-06, "loss": 0.4677, "step": 23531 }, { "epoch": 0.6435134543863488, "grad_norm": 1.276464581489563, "learning_rate": 5.958001971349059e-06, "loss": 0.4678, "step": 23532 }, { "epoch": 0.6435408007000656, "grad_norm": 1.2664777040481567, "learning_rate": 5.957191859546277e-06, "loss": 0.4848, "step": 23533 }, { "epoch": 0.6435681470137825, "grad_norm": 1.187057614326477, "learning_rate": 5.956381779458185e-06, "loss": 0.4452, "step": 23534 }, { "epoch": 0.6435954933274994, "grad_norm": 1.2403613328933716, "learning_rate": 5.955571731091141e-06, "loss": 0.4945, "step": 23535 }, { "epoch": 0.6436228396412164, "grad_norm": 1.1778842210769653, "learning_rate": 5.954761714451498e-06, "loss": 0.4868, "step": 23536 }, { "epoch": 0.6436501859549333, "grad_norm": 1.8272905349731445, "learning_rate": 5.953951729545609e-06, "loss": 0.4568, "step": 23537 }, { "epoch": 0.6436775322686502, "grad_norm": 1.5040132999420166, "learning_rate": 5.953141776379834e-06, "loss": 0.3812, "step": 23538 }, { "epoch": 0.6437048785823671, "grad_norm": 1.469459056854248, "learning_rate": 5.9523318549605204e-06, "loss": 0.4537, "step": 23539 }, { "epoch": 0.6437322248960841, "grad_norm": 1.9442647695541382, "learning_rate": 5.951521965294023e-06, "loss": 0.4468, "step": 23540 }, { "epoch": 0.6437595712098009, "grad_norm": 1.3340882062911987, "learning_rate": 5.950712107386701e-06, "loss": 0.4598, "step": 23541 }, { "epoch": 0.6437869175235178, "grad_norm": 1.1269536018371582, "learning_rate": 5.949902281244899e-06, "loss": 0.7447, "step": 23542 }, { "epoch": 0.6438142638372347, "grad_norm": 1.5894458293914795, "learning_rate": 5.949092486874977e-06, "loss": 0.357, "step": 23543 }, { "epoch": 0.6438416101509516, "grad_norm": 1.4313859939575195, "learning_rate": 5.948282724283281e-06, "loss": 0.4451, "step": 23544 }, { "epoch": 0.6438689564646686, "grad_norm": 1.3557511568069458, "learning_rate": 5.947472993476166e-06, "loss": 0.4613, "step": 23545 }, { "epoch": 0.6438963027783855, "grad_norm": 1.438266396522522, "learning_rate": 5.946663294459989e-06, "loss": 0.4899, "step": 23546 }, { "epoch": 0.6439236490921024, "grad_norm": 1.4508165121078491, "learning_rate": 5.945853627241096e-06, "loss": 0.4477, "step": 23547 }, { "epoch": 0.6439509954058193, "grad_norm": 1.400866985321045, "learning_rate": 5.945043991825841e-06, "loss": 0.4245, "step": 23548 }, { "epoch": 0.6439783417195362, "grad_norm": 1.593314290046692, "learning_rate": 5.944234388220573e-06, "loss": 0.4862, "step": 23549 }, { "epoch": 0.6440056880332531, "grad_norm": 2.107914686203003, "learning_rate": 5.9434248164316445e-06, "loss": 0.4943, "step": 23550 }, { "epoch": 0.64403303434697, "grad_norm": 1.3464887142181396, "learning_rate": 5.942615276465409e-06, "loss": 0.4769, "step": 23551 }, { "epoch": 0.6440603806606869, "grad_norm": 1.3380067348480225, "learning_rate": 5.941805768328214e-06, "loss": 0.4865, "step": 23552 }, { "epoch": 0.6440877269744039, "grad_norm": 1.3373382091522217, "learning_rate": 5.940996292026409e-06, "loss": 0.4948, "step": 23553 }, { "epoch": 0.6441150732881208, "grad_norm": 1.2215899229049683, "learning_rate": 5.94018684756635e-06, "loss": 0.4788, "step": 23554 }, { "epoch": 0.6441424196018377, "grad_norm": 1.336194634437561, "learning_rate": 5.939377434954378e-06, "loss": 0.4921, "step": 23555 }, { "epoch": 0.6441697659155546, "grad_norm": 1.2608951330184937, "learning_rate": 5.938568054196849e-06, "loss": 0.4818, "step": 23556 }, { "epoch": 0.6441971122292715, "grad_norm": 1.2247785329818726, "learning_rate": 5.937758705300112e-06, "loss": 0.4723, "step": 23557 }, { "epoch": 0.6442244585429884, "grad_norm": 1.4232101440429688, "learning_rate": 5.936949388270512e-06, "loss": 0.455, "step": 23558 }, { "epoch": 0.6442518048567053, "grad_norm": 1.5577073097229004, "learning_rate": 5.936140103114403e-06, "loss": 0.3398, "step": 23559 }, { "epoch": 0.6442791511704222, "grad_norm": 1.3409521579742432, "learning_rate": 5.93533084983813e-06, "loss": 0.7225, "step": 23560 }, { "epoch": 0.6443064974841392, "grad_norm": 1.3249602317810059, "learning_rate": 5.934521628448042e-06, "loss": 0.4934, "step": 23561 }, { "epoch": 0.6443338437978561, "grad_norm": 1.358473300933838, "learning_rate": 5.93371243895049e-06, "loss": 0.4541, "step": 23562 }, { "epoch": 0.644361190111573, "grad_norm": 1.401507019996643, "learning_rate": 5.932903281351818e-06, "loss": 0.4541, "step": 23563 }, { "epoch": 0.6443885364252899, "grad_norm": 2.1190288066864014, "learning_rate": 5.932094155658375e-06, "loss": 0.342, "step": 23564 }, { "epoch": 0.6444158827390067, "grad_norm": 1.1802161931991577, "learning_rate": 5.931285061876508e-06, "loss": 0.3259, "step": 23565 }, { "epoch": 0.6444432290527237, "grad_norm": 1.1980587244033813, "learning_rate": 5.930476000012566e-06, "loss": 0.4627, "step": 23566 }, { "epoch": 0.6444705753664406, "grad_norm": 1.1148287057876587, "learning_rate": 5.929666970072897e-06, "loss": 0.7227, "step": 23567 }, { "epoch": 0.6444979216801575, "grad_norm": 1.5496126413345337, "learning_rate": 5.928857972063844e-06, "loss": 0.4847, "step": 23568 }, { "epoch": 0.6445252679938744, "grad_norm": 1.4707037210464478, "learning_rate": 5.9280490059917515e-06, "loss": 0.4758, "step": 23569 }, { "epoch": 0.6445526143075914, "grad_norm": 1.1533377170562744, "learning_rate": 5.92724007186297e-06, "loss": 0.4757, "step": 23570 }, { "epoch": 0.6445799606213083, "grad_norm": 1.4204623699188232, "learning_rate": 5.926431169683844e-06, "loss": 0.4225, "step": 23571 }, { "epoch": 0.6446073069350251, "grad_norm": 1.599501609802246, "learning_rate": 5.925622299460718e-06, "loss": 0.4797, "step": 23572 }, { "epoch": 0.644634653248742, "grad_norm": 1.4100955724716187, "learning_rate": 5.9248134611999405e-06, "loss": 0.4418, "step": 23573 }, { "epoch": 0.644661999562459, "grad_norm": 1.3829303979873657, "learning_rate": 5.924004654907852e-06, "loss": 0.4909, "step": 23574 }, { "epoch": 0.6446893458761759, "grad_norm": 1.7097634077072144, "learning_rate": 5.923195880590801e-06, "loss": 0.495, "step": 23575 }, { "epoch": 0.6447166921898928, "grad_norm": 1.201848030090332, "learning_rate": 5.922387138255132e-06, "loss": 0.4954, "step": 23576 }, { "epoch": 0.6447440385036097, "grad_norm": 1.2237187623977661, "learning_rate": 5.921578427907187e-06, "loss": 0.4738, "step": 23577 }, { "epoch": 0.6447713848173267, "grad_norm": 1.3203502893447876, "learning_rate": 5.920769749553314e-06, "loss": 0.4848, "step": 23578 }, { "epoch": 0.6447987311310436, "grad_norm": 1.1057490110397339, "learning_rate": 5.919961103199852e-06, "loss": 0.3378, "step": 23579 }, { "epoch": 0.6448260774447604, "grad_norm": 1.20932137966156, "learning_rate": 5.919152488853146e-06, "loss": 0.4902, "step": 23580 }, { "epoch": 0.6448534237584773, "grad_norm": 1.6196566820144653, "learning_rate": 5.9183439065195425e-06, "loss": 0.3304, "step": 23581 }, { "epoch": 0.6448807700721942, "grad_norm": 1.3126205205917358, "learning_rate": 5.9175353562053805e-06, "loss": 0.4713, "step": 23582 }, { "epoch": 0.6449081163859112, "grad_norm": 1.256019949913025, "learning_rate": 5.916726837917004e-06, "loss": 0.4913, "step": 23583 }, { "epoch": 0.6449354626996281, "grad_norm": 1.3254048824310303, "learning_rate": 5.91591835166076e-06, "loss": 0.4565, "step": 23584 }, { "epoch": 0.644962809013345, "grad_norm": 1.63267982006073, "learning_rate": 5.915109897442984e-06, "loss": 0.3394, "step": 23585 }, { "epoch": 0.6449901553270619, "grad_norm": 1.20120370388031, "learning_rate": 5.914301475270024e-06, "loss": 0.461, "step": 23586 }, { "epoch": 0.6450175016407789, "grad_norm": 1.393717646598816, "learning_rate": 5.913493085148217e-06, "loss": 0.4843, "step": 23587 }, { "epoch": 0.6450448479544957, "grad_norm": 1.2573597431182861, "learning_rate": 5.912684727083906e-06, "loss": 0.4595, "step": 23588 }, { "epoch": 0.6450721942682126, "grad_norm": 1.520141839981079, "learning_rate": 5.9118764010834365e-06, "loss": 0.4546, "step": 23589 }, { "epoch": 0.6450995405819295, "grad_norm": 1.2904695272445679, "learning_rate": 5.9110681071531415e-06, "loss": 0.7551, "step": 23590 }, { "epoch": 0.6451268868956465, "grad_norm": 1.2677688598632812, "learning_rate": 5.910259845299371e-06, "loss": 0.4786, "step": 23591 }, { "epoch": 0.6451542332093634, "grad_norm": 1.186723232269287, "learning_rate": 5.909451615528459e-06, "loss": 0.4758, "step": 23592 }, { "epoch": 0.6451815795230803, "grad_norm": 1.5270339250564575, "learning_rate": 5.9086434178467466e-06, "loss": 0.4666, "step": 23593 }, { "epoch": 0.6452089258367972, "grad_norm": 1.2160767316818237, "learning_rate": 5.907835252260576e-06, "loss": 0.5002, "step": 23594 }, { "epoch": 0.6452362721505142, "grad_norm": 5.327711582183838, "learning_rate": 5.907027118776287e-06, "loss": 0.7161, "step": 23595 }, { "epoch": 0.645263618464231, "grad_norm": 1.908434271812439, "learning_rate": 5.9062190174002164e-06, "loss": 0.4946, "step": 23596 }, { "epoch": 0.6452909647779479, "grad_norm": 1.3593825101852417, "learning_rate": 5.90541094813871e-06, "loss": 0.4823, "step": 23597 }, { "epoch": 0.6453183110916648, "grad_norm": 1.6915779113769531, "learning_rate": 5.904602910998099e-06, "loss": 0.4752, "step": 23598 }, { "epoch": 0.6453456574053817, "grad_norm": 1.8657903671264648, "learning_rate": 5.903794905984726e-06, "loss": 0.4713, "step": 23599 }, { "epoch": 0.6453730037190987, "grad_norm": 1.334923267364502, "learning_rate": 5.902986933104929e-06, "loss": 0.4502, "step": 23600 }, { "epoch": 0.6454003500328156, "grad_norm": 1.4857240915298462, "learning_rate": 5.902178992365047e-06, "loss": 0.4733, "step": 23601 }, { "epoch": 0.6454276963465325, "grad_norm": 1.4981780052185059, "learning_rate": 5.901371083771418e-06, "loss": 0.463, "step": 23602 }, { "epoch": 0.6454550426602494, "grad_norm": 1.422826886177063, "learning_rate": 5.900563207330382e-06, "loss": 0.4699, "step": 23603 }, { "epoch": 0.6454823889739663, "grad_norm": 1.2476098537445068, "learning_rate": 5.89975536304827e-06, "loss": 0.471, "step": 23604 }, { "epoch": 0.6455097352876832, "grad_norm": 1.7757540941238403, "learning_rate": 5.898947550931423e-06, "loss": 0.4576, "step": 23605 }, { "epoch": 0.6455370816014001, "grad_norm": 1.263639211654663, "learning_rate": 5.898139770986181e-06, "loss": 0.7096, "step": 23606 }, { "epoch": 0.645564427915117, "grad_norm": 1.3793895244598389, "learning_rate": 5.897332023218876e-06, "loss": 0.4805, "step": 23607 }, { "epoch": 0.645591774228834, "grad_norm": 1.3272933959960938, "learning_rate": 5.896524307635848e-06, "loss": 0.7211, "step": 23608 }, { "epoch": 0.6456191205425509, "grad_norm": 1.584106683731079, "learning_rate": 5.895716624243431e-06, "loss": 0.4858, "step": 23609 }, { "epoch": 0.6456464668562678, "grad_norm": 1.7529423236846924, "learning_rate": 5.894908973047961e-06, "loss": 0.3731, "step": 23610 }, { "epoch": 0.6456738131699847, "grad_norm": 2.0651111602783203, "learning_rate": 5.894101354055777e-06, "loss": 0.3248, "step": 23611 }, { "epoch": 0.6457011594837015, "grad_norm": 1.6141942739486694, "learning_rate": 5.893293767273211e-06, "loss": 0.3726, "step": 23612 }, { "epoch": 0.6457285057974185, "grad_norm": 1.6983532905578613, "learning_rate": 5.892486212706601e-06, "loss": 0.481, "step": 23613 }, { "epoch": 0.6457558521111354, "grad_norm": 1.3483333587646484, "learning_rate": 5.891678690362279e-06, "loss": 0.4916, "step": 23614 }, { "epoch": 0.6457831984248523, "grad_norm": 1.2950563430786133, "learning_rate": 5.890871200246582e-06, "loss": 0.4642, "step": 23615 }, { "epoch": 0.6458105447385692, "grad_norm": 1.7820969820022583, "learning_rate": 5.890063742365846e-06, "loss": 0.3196, "step": 23616 }, { "epoch": 0.6458378910522862, "grad_norm": 1.5415146350860596, "learning_rate": 5.889256316726399e-06, "loss": 0.5126, "step": 23617 }, { "epoch": 0.6458652373660031, "grad_norm": 1.2182369232177734, "learning_rate": 5.888448923334582e-06, "loss": 0.4749, "step": 23618 }, { "epoch": 0.64589258367972, "grad_norm": 1.6075334548950195, "learning_rate": 5.887641562196728e-06, "loss": 0.4918, "step": 23619 }, { "epoch": 0.6459199299934368, "grad_norm": 1.2471041679382324, "learning_rate": 5.8868342333191655e-06, "loss": 0.4774, "step": 23620 }, { "epoch": 0.6459472763071538, "grad_norm": 1.2212532758712769, "learning_rate": 5.886026936708233e-06, "loss": 0.477, "step": 23621 }, { "epoch": 0.6459746226208707, "grad_norm": 1.4772841930389404, "learning_rate": 5.88521967237026e-06, "loss": 0.3444, "step": 23622 }, { "epoch": 0.6460019689345876, "grad_norm": 1.2628543376922607, "learning_rate": 5.8844124403115795e-06, "loss": 0.503, "step": 23623 }, { "epoch": 0.6460293152483045, "grad_norm": 1.0177861452102661, "learning_rate": 5.883605240538529e-06, "loss": 0.3379, "step": 23624 }, { "epoch": 0.6460566615620215, "grad_norm": 1.4246000051498413, "learning_rate": 5.882798073057434e-06, "loss": 0.4375, "step": 23625 }, { "epoch": 0.6460840078757384, "grad_norm": 1.4180620908737183, "learning_rate": 5.8819909378746295e-06, "loss": 0.4714, "step": 23626 }, { "epoch": 0.6461113541894553, "grad_norm": 1.6205720901489258, "learning_rate": 5.881183834996449e-06, "loss": 0.343, "step": 23627 }, { "epoch": 0.6461387005031721, "grad_norm": 1.2734678983688354, "learning_rate": 5.880376764429221e-06, "loss": 0.4896, "step": 23628 }, { "epoch": 0.646166046816889, "grad_norm": 1.6706520318984985, "learning_rate": 5.87956972617928e-06, "loss": 0.4522, "step": 23629 }, { "epoch": 0.646193393130606, "grad_norm": 1.2868083715438843, "learning_rate": 5.878762720252951e-06, "loss": 0.4863, "step": 23630 }, { "epoch": 0.6462207394443229, "grad_norm": 1.2273203134536743, "learning_rate": 5.87795574665657e-06, "loss": 0.462, "step": 23631 }, { "epoch": 0.6462480857580398, "grad_norm": 1.4597492218017578, "learning_rate": 5.877148805396467e-06, "loss": 0.4602, "step": 23632 }, { "epoch": 0.6462754320717568, "grad_norm": 1.3324843645095825, "learning_rate": 5.876341896478971e-06, "loss": 0.4684, "step": 23633 }, { "epoch": 0.6463027783854737, "grad_norm": 1.4320443868637085, "learning_rate": 5.875535019910413e-06, "loss": 0.4819, "step": 23634 }, { "epoch": 0.6463301246991906, "grad_norm": 1.5882190465927124, "learning_rate": 5.874728175697122e-06, "loss": 0.3297, "step": 23635 }, { "epoch": 0.6463574710129074, "grad_norm": 1.6015468835830688, "learning_rate": 5.873921363845425e-06, "loss": 0.3517, "step": 23636 }, { "epoch": 0.6463848173266243, "grad_norm": 1.491422414779663, "learning_rate": 5.873114584361657e-06, "loss": 0.5075, "step": 23637 }, { "epoch": 0.6464121636403413, "grad_norm": 1.44255793094635, "learning_rate": 5.872307837252146e-06, "loss": 0.4964, "step": 23638 }, { "epoch": 0.6464395099540582, "grad_norm": 1.496779203414917, "learning_rate": 5.871501122523213e-06, "loss": 0.4928, "step": 23639 }, { "epoch": 0.6464668562677751, "grad_norm": 1.269580364227295, "learning_rate": 5.870694440181193e-06, "loss": 0.4932, "step": 23640 }, { "epoch": 0.646494202581492, "grad_norm": 1.299053430557251, "learning_rate": 5.869887790232416e-06, "loss": 0.4965, "step": 23641 }, { "epoch": 0.646521548895209, "grad_norm": 1.4550248384475708, "learning_rate": 5.869081172683203e-06, "loss": 0.4951, "step": 23642 }, { "epoch": 0.6465488952089259, "grad_norm": 1.6225485801696777, "learning_rate": 5.868274587539888e-06, "loss": 0.4148, "step": 23643 }, { "epoch": 0.6465762415226427, "grad_norm": 1.234378457069397, "learning_rate": 5.8674680348087945e-06, "loss": 0.7154, "step": 23644 }, { "epoch": 0.6466035878363596, "grad_norm": 1.2918951511383057, "learning_rate": 5.866661514496252e-06, "loss": 0.5062, "step": 23645 }, { "epoch": 0.6466309341500766, "grad_norm": 1.3399982452392578, "learning_rate": 5.865855026608588e-06, "loss": 0.7661, "step": 23646 }, { "epoch": 0.6466582804637935, "grad_norm": 1.110508918762207, "learning_rate": 5.865048571152125e-06, "loss": 0.481, "step": 23647 }, { "epoch": 0.6466856267775104, "grad_norm": 1.3461298942565918, "learning_rate": 5.864242148133193e-06, "loss": 0.4625, "step": 23648 }, { "epoch": 0.6467129730912273, "grad_norm": 1.247983455657959, "learning_rate": 5.86343575755812e-06, "loss": 0.7378, "step": 23649 }, { "epoch": 0.6467403194049443, "grad_norm": 1.3989734649658203, "learning_rate": 5.862629399433227e-06, "loss": 0.4975, "step": 23650 }, { "epoch": 0.6467676657186612, "grad_norm": 1.50973641872406, "learning_rate": 5.861823073764844e-06, "loss": 0.4368, "step": 23651 }, { "epoch": 0.646795012032378, "grad_norm": 1.366028904914856, "learning_rate": 5.861016780559292e-06, "loss": 0.7327, "step": 23652 }, { "epoch": 0.6468223583460949, "grad_norm": 1.4138991832733154, "learning_rate": 5.8602105198229e-06, "loss": 0.4911, "step": 23653 }, { "epoch": 0.6468497046598118, "grad_norm": 1.7801567316055298, "learning_rate": 5.859404291561992e-06, "loss": 0.3422, "step": 23654 }, { "epoch": 0.6468770509735288, "grad_norm": 1.0662857294082642, "learning_rate": 5.85859809578289e-06, "loss": 0.3569, "step": 23655 }, { "epoch": 0.6469043972872457, "grad_norm": 1.40244460105896, "learning_rate": 5.857791932491923e-06, "loss": 0.4596, "step": 23656 }, { "epoch": 0.6469317436009626, "grad_norm": 1.2786211967468262, "learning_rate": 5.8569858016954094e-06, "loss": 0.4591, "step": 23657 }, { "epoch": 0.6469590899146795, "grad_norm": 1.239254117012024, "learning_rate": 5.856179703399678e-06, "loss": 0.5027, "step": 23658 }, { "epoch": 0.6469864362283965, "grad_norm": 1.0788242816925049, "learning_rate": 5.855373637611053e-06, "loss": 0.4339, "step": 23659 }, { "epoch": 0.6470137825421133, "grad_norm": 1.0651240348815918, "learning_rate": 5.8545676043358516e-06, "loss": 0.3506, "step": 23660 }, { "epoch": 0.6470411288558302, "grad_norm": 1.3697726726531982, "learning_rate": 5.853761603580401e-06, "loss": 0.4914, "step": 23661 }, { "epoch": 0.6470684751695471, "grad_norm": 1.5203940868377686, "learning_rate": 5.852955635351026e-06, "loss": 0.3244, "step": 23662 }, { "epoch": 0.647095821483264, "grad_norm": 1.2182488441467285, "learning_rate": 5.852149699654046e-06, "loss": 0.4959, "step": 23663 }, { "epoch": 0.647123167796981, "grad_norm": 1.3995862007141113, "learning_rate": 5.851343796495786e-06, "loss": 0.4561, "step": 23664 }, { "epoch": 0.6471505141106979, "grad_norm": 1.519883632659912, "learning_rate": 5.850537925882562e-06, "loss": 0.3286, "step": 23665 }, { "epoch": 0.6471778604244148, "grad_norm": 1.1024519205093384, "learning_rate": 5.849732087820703e-06, "loss": 0.3625, "step": 23666 }, { "epoch": 0.6472052067381318, "grad_norm": 1.219863772392273, "learning_rate": 5.84892628231653e-06, "loss": 0.49, "step": 23667 }, { "epoch": 0.6472325530518486, "grad_norm": 1.5776574611663818, "learning_rate": 5.8481205093763585e-06, "loss": 0.3472, "step": 23668 }, { "epoch": 0.6472598993655655, "grad_norm": 1.1974064111709595, "learning_rate": 5.847314769006513e-06, "loss": 0.7397, "step": 23669 }, { "epoch": 0.6472872456792824, "grad_norm": 1.3954718112945557, "learning_rate": 5.846509061213317e-06, "loss": 0.4866, "step": 23670 }, { "epoch": 0.6473145919929993, "grad_norm": 1.2297356128692627, "learning_rate": 5.845703386003087e-06, "loss": 0.4634, "step": 23671 }, { "epoch": 0.6473419383067163, "grad_norm": 1.6772229671478271, "learning_rate": 5.844897743382145e-06, "loss": 0.3598, "step": 23672 }, { "epoch": 0.6473692846204332, "grad_norm": 1.6016510725021362, "learning_rate": 5.844092133356816e-06, "loss": 0.4307, "step": 23673 }, { "epoch": 0.6473966309341501, "grad_norm": 1.224812626838684, "learning_rate": 5.8432865559334075e-06, "loss": 0.4755, "step": 23674 }, { "epoch": 0.6474239772478669, "grad_norm": 1.5902734994888306, "learning_rate": 5.8424810111182486e-06, "loss": 0.4997, "step": 23675 }, { "epoch": 0.6474513235615839, "grad_norm": 1.4626266956329346, "learning_rate": 5.841675498917656e-06, "loss": 0.4783, "step": 23676 }, { "epoch": 0.6474786698753008, "grad_norm": 1.3718303442001343, "learning_rate": 5.840870019337948e-06, "loss": 0.4818, "step": 23677 }, { "epoch": 0.6475060161890177, "grad_norm": 1.374245285987854, "learning_rate": 5.840064572385447e-06, "loss": 0.4941, "step": 23678 }, { "epoch": 0.6475333625027346, "grad_norm": 1.1963534355163574, "learning_rate": 5.839259158066466e-06, "loss": 0.4784, "step": 23679 }, { "epoch": 0.6475607088164516, "grad_norm": 1.4641211032867432, "learning_rate": 5.838453776387325e-06, "loss": 0.4831, "step": 23680 }, { "epoch": 0.6475880551301685, "grad_norm": 1.437908411026001, "learning_rate": 5.837648427354346e-06, "loss": 0.4554, "step": 23681 }, { "epoch": 0.6476154014438854, "grad_norm": 1.6899688243865967, "learning_rate": 5.8368431109738444e-06, "loss": 0.4565, "step": 23682 }, { "epoch": 0.6476427477576022, "grad_norm": 2.087474822998047, "learning_rate": 5.836037827252133e-06, "loss": 0.3694, "step": 23683 }, { "epoch": 0.6476700940713191, "grad_norm": 1.8111754655838013, "learning_rate": 5.835232576195535e-06, "loss": 0.4809, "step": 23684 }, { "epoch": 0.6476974403850361, "grad_norm": 1.2990068197250366, "learning_rate": 5.834427357810366e-06, "loss": 0.491, "step": 23685 }, { "epoch": 0.647724786698753, "grad_norm": 1.2727526426315308, "learning_rate": 5.83362217210294e-06, "loss": 0.469, "step": 23686 }, { "epoch": 0.6477521330124699, "grad_norm": 1.28660249710083, "learning_rate": 5.832817019079577e-06, "loss": 0.4811, "step": 23687 }, { "epoch": 0.6477794793261868, "grad_norm": 1.524556279182434, "learning_rate": 5.832011898746593e-06, "loss": 0.4798, "step": 23688 }, { "epoch": 0.6478068256399038, "grad_norm": 1.2936655282974243, "learning_rate": 5.831206811110299e-06, "loss": 0.4714, "step": 23689 }, { "epoch": 0.6478341719536207, "grad_norm": 1.5192937850952148, "learning_rate": 5.830401756177017e-06, "loss": 0.4928, "step": 23690 }, { "epoch": 0.6478615182673375, "grad_norm": 1.246293544769287, "learning_rate": 5.829596733953061e-06, "loss": 0.4868, "step": 23691 }, { "epoch": 0.6478888645810544, "grad_norm": 1.318092703819275, "learning_rate": 5.828791744444742e-06, "loss": 0.4555, "step": 23692 }, { "epoch": 0.6479162108947714, "grad_norm": 1.5864911079406738, "learning_rate": 5.82798678765838e-06, "loss": 0.3411, "step": 23693 }, { "epoch": 0.6479435572084883, "grad_norm": 1.4608527421951294, "learning_rate": 5.827181863600288e-06, "loss": 0.4469, "step": 23694 }, { "epoch": 0.6479709035222052, "grad_norm": 1.1989256143569946, "learning_rate": 5.82637697227678e-06, "loss": 0.4953, "step": 23695 }, { "epoch": 0.6479982498359221, "grad_norm": 1.3944292068481445, "learning_rate": 5.825572113694165e-06, "loss": 0.4676, "step": 23696 }, { "epoch": 0.6480255961496391, "grad_norm": 1.243310809135437, "learning_rate": 5.824767287858768e-06, "loss": 0.4919, "step": 23697 }, { "epoch": 0.648052942463356, "grad_norm": 1.152273416519165, "learning_rate": 5.823962494776895e-06, "loss": 0.4724, "step": 23698 }, { "epoch": 0.6480802887770728, "grad_norm": 1.4598000049591064, "learning_rate": 5.823157734454858e-06, "loss": 0.3422, "step": 23699 }, { "epoch": 0.6481076350907897, "grad_norm": 1.2776782512664795, "learning_rate": 5.822353006898978e-06, "loss": 0.5031, "step": 23700 }, { "epoch": 0.6481349814045066, "grad_norm": 1.1238561868667603, "learning_rate": 5.8215483121155615e-06, "loss": 0.4436, "step": 23701 }, { "epoch": 0.6481623277182236, "grad_norm": 1.2811784744262695, "learning_rate": 5.820743650110918e-06, "loss": 0.4844, "step": 23702 }, { "epoch": 0.6481896740319405, "grad_norm": 1.2461916208267212, "learning_rate": 5.81993902089137e-06, "loss": 0.4872, "step": 23703 }, { "epoch": 0.6482170203456574, "grad_norm": 1.3095645904541016, "learning_rate": 5.819134424463223e-06, "loss": 0.4826, "step": 23704 }, { "epoch": 0.6482443666593743, "grad_norm": 1.5640404224395752, "learning_rate": 5.818329860832787e-06, "loss": 0.4741, "step": 23705 }, { "epoch": 0.6482717129730913, "grad_norm": 1.732866644859314, "learning_rate": 5.81752533000638e-06, "loss": 0.4857, "step": 23706 }, { "epoch": 0.6482990592868081, "grad_norm": 1.27609384059906, "learning_rate": 5.816720831990308e-06, "loss": 0.7198, "step": 23707 }, { "epoch": 0.648326405600525, "grad_norm": 1.1906720399856567, "learning_rate": 5.815916366790886e-06, "loss": 0.5104, "step": 23708 }, { "epoch": 0.6483537519142419, "grad_norm": 1.689414620399475, "learning_rate": 5.815111934414421e-06, "loss": 0.4703, "step": 23709 }, { "epoch": 0.6483810982279589, "grad_norm": 1.270605444908142, "learning_rate": 5.814307534867226e-06, "loss": 0.505, "step": 23710 }, { "epoch": 0.6484084445416758, "grad_norm": 1.4407179355621338, "learning_rate": 5.813503168155607e-06, "loss": 0.4345, "step": 23711 }, { "epoch": 0.6484357908553927, "grad_norm": 1.2775695323944092, "learning_rate": 5.8126988342858805e-06, "loss": 0.478, "step": 23712 }, { "epoch": 0.6484631371691096, "grad_norm": 1.3052476644515991, "learning_rate": 5.811894533264353e-06, "loss": 0.4956, "step": 23713 }, { "epoch": 0.6484904834828266, "grad_norm": 1.494879961013794, "learning_rate": 5.811090265097331e-06, "loss": 0.339, "step": 23714 }, { "epoch": 0.6485178297965434, "grad_norm": 1.1395195722579956, "learning_rate": 5.81028602979113e-06, "loss": 0.4647, "step": 23715 }, { "epoch": 0.6485451761102603, "grad_norm": 1.200262188911438, "learning_rate": 5.809481827352057e-06, "loss": 0.5053, "step": 23716 }, { "epoch": 0.6485725224239772, "grad_norm": 1.271690011024475, "learning_rate": 5.80867765778642e-06, "loss": 0.4621, "step": 23717 }, { "epoch": 0.6485998687376942, "grad_norm": 1.3267818689346313, "learning_rate": 5.807873521100522e-06, "loss": 0.7139, "step": 23718 }, { "epoch": 0.6486272150514111, "grad_norm": 1.829103946685791, "learning_rate": 5.807069417300682e-06, "loss": 0.3686, "step": 23719 }, { "epoch": 0.648654561365128, "grad_norm": 1.2906246185302734, "learning_rate": 5.806265346393202e-06, "loss": 0.7463, "step": 23720 }, { "epoch": 0.6486819076788449, "grad_norm": 1.2956849336624146, "learning_rate": 5.805461308384386e-06, "loss": 0.4728, "step": 23721 }, { "epoch": 0.6487092539925619, "grad_norm": 1.246695876121521, "learning_rate": 5.804657303280549e-06, "loss": 0.7424, "step": 23722 }, { "epoch": 0.6487366003062787, "grad_norm": 1.5933740139007568, "learning_rate": 5.803853331087994e-06, "loss": 0.4721, "step": 23723 }, { "epoch": 0.6487639466199956, "grad_norm": 1.403021216392517, "learning_rate": 5.803049391813027e-06, "loss": 0.483, "step": 23724 }, { "epoch": 0.6487912929337125, "grad_norm": 1.2152211666107178, "learning_rate": 5.802245485461959e-06, "loss": 0.4892, "step": 23725 }, { "epoch": 0.6488186392474294, "grad_norm": 1.2950164079666138, "learning_rate": 5.8014416120410945e-06, "loss": 0.4369, "step": 23726 }, { "epoch": 0.6488459855611464, "grad_norm": 1.2817661762237549, "learning_rate": 5.800637771556734e-06, "loss": 0.4486, "step": 23727 }, { "epoch": 0.6488733318748633, "grad_norm": 1.2909265756607056, "learning_rate": 5.799833964015193e-06, "loss": 0.4644, "step": 23728 }, { "epoch": 0.6489006781885802, "grad_norm": 1.5749999284744263, "learning_rate": 5.799030189422772e-06, "loss": 0.3741, "step": 23729 }, { "epoch": 0.6489280245022971, "grad_norm": 1.2313913106918335, "learning_rate": 5.798226447785773e-06, "loss": 0.6916, "step": 23730 }, { "epoch": 0.648955370816014, "grad_norm": 1.2858978509902954, "learning_rate": 5.79742273911051e-06, "loss": 0.4594, "step": 23731 }, { "epoch": 0.6489827171297309, "grad_norm": 1.3791319131851196, "learning_rate": 5.7966190634032825e-06, "loss": 0.493, "step": 23732 }, { "epoch": 0.6490100634434478, "grad_norm": 1.394768238067627, "learning_rate": 5.795815420670395e-06, "loss": 0.5084, "step": 23733 }, { "epoch": 0.6490374097571647, "grad_norm": 1.3453099727630615, "learning_rate": 5.79501181091815e-06, "loss": 0.485, "step": 23734 }, { "epoch": 0.6490647560708817, "grad_norm": 1.203566074371338, "learning_rate": 5.794208234152856e-06, "loss": 0.4535, "step": 23735 }, { "epoch": 0.6490921023845986, "grad_norm": 1.2081298828125, "learning_rate": 5.793404690380815e-06, "loss": 0.4676, "step": 23736 }, { "epoch": 0.6491194486983155, "grad_norm": 1.196904182434082, "learning_rate": 5.792601179608327e-06, "loss": 0.4706, "step": 23737 }, { "epoch": 0.6491467950120324, "grad_norm": 1.2875769138336182, "learning_rate": 5.791797701841703e-06, "loss": 0.4857, "step": 23738 }, { "epoch": 0.6491741413257492, "grad_norm": 1.2809144258499146, "learning_rate": 5.790994257087241e-06, "loss": 0.4888, "step": 23739 }, { "epoch": 0.6492014876394662, "grad_norm": 1.412269949913025, "learning_rate": 5.79019084535124e-06, "loss": 0.5031, "step": 23740 }, { "epoch": 0.6492288339531831, "grad_norm": 1.3612946271896362, "learning_rate": 5.789387466640013e-06, "loss": 0.4806, "step": 23741 }, { "epoch": 0.6492561802669, "grad_norm": 1.3785572052001953, "learning_rate": 5.788584120959855e-06, "loss": 0.4946, "step": 23742 }, { "epoch": 0.6492835265806169, "grad_norm": 1.1968472003936768, "learning_rate": 5.7877808083170696e-06, "loss": 0.4837, "step": 23743 }, { "epoch": 0.6493108728943339, "grad_norm": 1.305212140083313, "learning_rate": 5.786977528717958e-06, "loss": 0.4664, "step": 23744 }, { "epoch": 0.6493382192080508, "grad_norm": 1.5669260025024414, "learning_rate": 5.7861742821688216e-06, "loss": 0.4555, "step": 23745 }, { "epoch": 0.6493655655217677, "grad_norm": 1.5900424718856812, "learning_rate": 5.7853710686759604e-06, "loss": 0.4808, "step": 23746 }, { "epoch": 0.6493929118354845, "grad_norm": 1.778212547302246, "learning_rate": 5.784567888245678e-06, "loss": 0.3476, "step": 23747 }, { "epoch": 0.6494202581492015, "grad_norm": 1.3291776180267334, "learning_rate": 5.783764740884277e-06, "loss": 0.4506, "step": 23748 }, { "epoch": 0.6494476044629184, "grad_norm": 1.4537522792816162, "learning_rate": 5.782961626598049e-06, "loss": 0.4627, "step": 23749 }, { "epoch": 0.6494749507766353, "grad_norm": 1.4348989725112915, "learning_rate": 5.782158545393305e-06, "loss": 0.4366, "step": 23750 }, { "epoch": 0.6495022970903522, "grad_norm": 1.363147497177124, "learning_rate": 5.7813554972763395e-06, "loss": 0.4951, "step": 23751 }, { "epoch": 0.6495296434040692, "grad_norm": 1.4095820188522339, "learning_rate": 5.78055248225345e-06, "loss": 0.4832, "step": 23752 }, { "epoch": 0.6495569897177861, "grad_norm": 1.2835016250610352, "learning_rate": 5.779749500330942e-06, "loss": 0.7362, "step": 23753 }, { "epoch": 0.649584336031503, "grad_norm": 2.34379243850708, "learning_rate": 5.778946551515112e-06, "loss": 0.7731, "step": 23754 }, { "epoch": 0.6496116823452198, "grad_norm": 1.541805624961853, "learning_rate": 5.778143635812259e-06, "loss": 0.5017, "step": 23755 }, { "epoch": 0.6496390286589367, "grad_norm": 1.682212233543396, "learning_rate": 5.7773407532286765e-06, "loss": 0.3464, "step": 23756 }, { "epoch": 0.6496663749726537, "grad_norm": 1.1449679136276245, "learning_rate": 5.776537903770672e-06, "loss": 0.3655, "step": 23757 }, { "epoch": 0.6496937212863706, "grad_norm": 1.288803219795227, "learning_rate": 5.775735087444538e-06, "loss": 0.48, "step": 23758 }, { "epoch": 0.6497210676000875, "grad_norm": 1.281218409538269, "learning_rate": 5.774932304256569e-06, "loss": 0.4555, "step": 23759 }, { "epoch": 0.6497484139138044, "grad_norm": 3.0903565883636475, "learning_rate": 5.774129554213073e-06, "loss": 0.3372, "step": 23760 }, { "epoch": 0.6497757602275214, "grad_norm": 1.3097953796386719, "learning_rate": 5.773326837320339e-06, "loss": 0.4991, "step": 23761 }, { "epoch": 0.6498031065412383, "grad_norm": 1.4745527505874634, "learning_rate": 5.7725241535846645e-06, "loss": 0.4534, "step": 23762 }, { "epoch": 0.6498304528549551, "grad_norm": 1.264854073524475, "learning_rate": 5.7717215030123505e-06, "loss": 0.5011, "step": 23763 }, { "epoch": 0.649857799168672, "grad_norm": 1.32829749584198, "learning_rate": 5.770918885609693e-06, "loss": 0.4735, "step": 23764 }, { "epoch": 0.649885145482389, "grad_norm": 1.5018969774246216, "learning_rate": 5.770116301382982e-06, "loss": 0.4816, "step": 23765 }, { "epoch": 0.6499124917961059, "grad_norm": 1.5578536987304688, "learning_rate": 5.769313750338522e-06, "loss": 0.4893, "step": 23766 }, { "epoch": 0.6499398381098228, "grad_norm": 1.3876979351043701, "learning_rate": 5.7685112324826055e-06, "loss": 0.4387, "step": 23767 }, { "epoch": 0.6499671844235397, "grad_norm": 1.1771025657653809, "learning_rate": 5.7677087478215274e-06, "loss": 0.7467, "step": 23768 }, { "epoch": 0.6499945307372567, "grad_norm": 1.5326374769210815, "learning_rate": 5.766906296361579e-06, "loss": 0.5, "step": 23769 }, { "epoch": 0.6500218770509735, "grad_norm": 1.2142173051834106, "learning_rate": 5.7661038781090636e-06, "loss": 0.4806, "step": 23770 }, { "epoch": 0.6500492233646904, "grad_norm": 1.5943008661270142, "learning_rate": 5.7653014930702714e-06, "loss": 0.35, "step": 23771 }, { "epoch": 0.6500765696784073, "grad_norm": 1.3647289276123047, "learning_rate": 5.764499141251493e-06, "loss": 0.481, "step": 23772 }, { "epoch": 0.6501039159921242, "grad_norm": 1.5133321285247803, "learning_rate": 5.763696822659031e-06, "loss": 0.4616, "step": 23773 }, { "epoch": 0.6501312623058412, "grad_norm": 1.2132878303527832, "learning_rate": 5.762894537299175e-06, "loss": 0.4867, "step": 23774 }, { "epoch": 0.6501586086195581, "grad_norm": 1.050539255142212, "learning_rate": 5.762092285178216e-06, "loss": 0.3191, "step": 23775 }, { "epoch": 0.650185954933275, "grad_norm": 1.3138477802276611, "learning_rate": 5.761290066302454e-06, "loss": 0.479, "step": 23776 }, { "epoch": 0.650213301246992, "grad_norm": 1.1896462440490723, "learning_rate": 5.760487880678178e-06, "loss": 0.4878, "step": 23777 }, { "epoch": 0.6502406475607088, "grad_norm": 1.3263700008392334, "learning_rate": 5.759685728311681e-06, "loss": 0.5014, "step": 23778 }, { "epoch": 0.6502679938744257, "grad_norm": 1.2347625494003296, "learning_rate": 5.758883609209258e-06, "loss": 0.4749, "step": 23779 }, { "epoch": 0.6502953401881426, "grad_norm": 1.9731690883636475, "learning_rate": 5.758081523377199e-06, "loss": 0.3774, "step": 23780 }, { "epoch": 0.6503226865018595, "grad_norm": 1.510467290878296, "learning_rate": 5.7572794708217916e-06, "loss": 0.4855, "step": 23781 }, { "epoch": 0.6503500328155765, "grad_norm": 3.0327792167663574, "learning_rate": 5.7564774515493385e-06, "loss": 0.4617, "step": 23782 }, { "epoch": 0.6503773791292934, "grad_norm": 1.3656107187271118, "learning_rate": 5.755675465566124e-06, "loss": 0.4945, "step": 23783 }, { "epoch": 0.6504047254430103, "grad_norm": 1.133531928062439, "learning_rate": 5.754873512878436e-06, "loss": 0.4583, "step": 23784 }, { "epoch": 0.6504320717567272, "grad_norm": 1.2423754930496216, "learning_rate": 5.754071593492576e-06, "loss": 0.4705, "step": 23785 }, { "epoch": 0.650459418070444, "grad_norm": 1.3930305242538452, "learning_rate": 5.75326970741483e-06, "loss": 0.4762, "step": 23786 }, { "epoch": 0.650486764384161, "grad_norm": 2.262298345565796, "learning_rate": 5.752467854651484e-06, "loss": 0.6961, "step": 23787 }, { "epoch": 0.6505141106978779, "grad_norm": 1.348633885383606, "learning_rate": 5.751666035208836e-06, "loss": 0.4794, "step": 23788 }, { "epoch": 0.6505414570115948, "grad_norm": 1.3628159761428833, "learning_rate": 5.750864249093171e-06, "loss": 0.4791, "step": 23789 }, { "epoch": 0.6505688033253118, "grad_norm": 1.4559603929519653, "learning_rate": 5.750062496310782e-06, "loss": 0.5193, "step": 23790 }, { "epoch": 0.6505961496390287, "grad_norm": 1.690268635749817, "learning_rate": 5.749260776867951e-06, "loss": 0.3488, "step": 23791 }, { "epoch": 0.6506234959527456, "grad_norm": 1.350099802017212, "learning_rate": 5.748459090770979e-06, "loss": 0.5073, "step": 23792 }, { "epoch": 0.6506508422664625, "grad_norm": 1.8790394067764282, "learning_rate": 5.747657438026147e-06, "loss": 0.4616, "step": 23793 }, { "epoch": 0.6506781885801793, "grad_norm": 1.3682645559310913, "learning_rate": 5.746855818639742e-06, "loss": 0.497, "step": 23794 }, { "epoch": 0.6507055348938963, "grad_norm": 1.4713021516799927, "learning_rate": 5.746054232618061e-06, "loss": 0.4612, "step": 23795 }, { "epoch": 0.6507328812076132, "grad_norm": 1.223120093345642, "learning_rate": 5.745252679967388e-06, "loss": 0.4481, "step": 23796 }, { "epoch": 0.6507602275213301, "grad_norm": 1.2710497379302979, "learning_rate": 5.744451160694004e-06, "loss": 0.4739, "step": 23797 }, { "epoch": 0.650787573835047, "grad_norm": 1.2612457275390625, "learning_rate": 5.74364967480421e-06, "loss": 0.4787, "step": 23798 }, { "epoch": 0.650814920148764, "grad_norm": 1.600621223449707, "learning_rate": 5.742848222304285e-06, "loss": 0.4858, "step": 23799 }, { "epoch": 0.6508422664624809, "grad_norm": 7.014380931854248, "learning_rate": 5.742046803200514e-06, "loss": 0.3526, "step": 23800 }, { "epoch": 0.6508696127761978, "grad_norm": 1.2637356519699097, "learning_rate": 5.7412454174991905e-06, "loss": 0.4762, "step": 23801 }, { "epoch": 0.6508969590899146, "grad_norm": 1.3405101299285889, "learning_rate": 5.740444065206598e-06, "loss": 0.5057, "step": 23802 }, { "epoch": 0.6509243054036316, "grad_norm": 1.393247127532959, "learning_rate": 5.739642746329024e-06, "loss": 0.4652, "step": 23803 }, { "epoch": 0.6509516517173485, "grad_norm": 2.15230131149292, "learning_rate": 5.738841460872751e-06, "loss": 0.7519, "step": 23804 }, { "epoch": 0.6509789980310654, "grad_norm": 1.2928494215011597, "learning_rate": 5.738040208844069e-06, "loss": 0.4553, "step": 23805 }, { "epoch": 0.6510063443447823, "grad_norm": 1.3468925952911377, "learning_rate": 5.737238990249263e-06, "loss": 0.4684, "step": 23806 }, { "epoch": 0.6510336906584993, "grad_norm": 1.3057851791381836, "learning_rate": 5.736437805094615e-06, "loss": 0.4832, "step": 23807 }, { "epoch": 0.6510610369722162, "grad_norm": 1.4556028842926025, "learning_rate": 5.7356366533864135e-06, "loss": 0.4792, "step": 23808 }, { "epoch": 0.6510883832859331, "grad_norm": 1.5617077350616455, "learning_rate": 5.734835535130945e-06, "loss": 0.7287, "step": 23809 }, { "epoch": 0.6511157295996499, "grad_norm": 1.735270380973816, "learning_rate": 5.734034450334487e-06, "loss": 0.3801, "step": 23810 }, { "epoch": 0.6511430759133668, "grad_norm": 1.5023294687271118, "learning_rate": 5.733233399003331e-06, "loss": 0.4676, "step": 23811 }, { "epoch": 0.6511704222270838, "grad_norm": 1.2682851552963257, "learning_rate": 5.7324323811437584e-06, "loss": 0.5038, "step": 23812 }, { "epoch": 0.6511977685408007, "grad_norm": 1.3627938032150269, "learning_rate": 5.7316313967620496e-06, "loss": 0.4739, "step": 23813 }, { "epoch": 0.6512251148545176, "grad_norm": 1.334704875946045, "learning_rate": 5.730830445864499e-06, "loss": 0.4834, "step": 23814 }, { "epoch": 0.6512524611682345, "grad_norm": 1.1466128826141357, "learning_rate": 5.730029528457378e-06, "loss": 0.4602, "step": 23815 }, { "epoch": 0.6512798074819515, "grad_norm": 1.0774282217025757, "learning_rate": 5.729228644546969e-06, "loss": 0.4671, "step": 23816 }, { "epoch": 0.6513071537956684, "grad_norm": 1.3133164644241333, "learning_rate": 5.728427794139565e-06, "loss": 0.4778, "step": 23817 }, { "epoch": 0.6513345001093852, "grad_norm": 1.3303135633468628, "learning_rate": 5.727626977241442e-06, "loss": 0.5039, "step": 23818 }, { "epoch": 0.6513618464231021, "grad_norm": 1.219448447227478, "learning_rate": 5.726826193858879e-06, "loss": 0.5022, "step": 23819 }, { "epoch": 0.6513891927368191, "grad_norm": 1.6204863786697388, "learning_rate": 5.726025443998166e-06, "loss": 0.3544, "step": 23820 }, { "epoch": 0.651416539050536, "grad_norm": 1.6460124254226685, "learning_rate": 5.72522472766558e-06, "loss": 0.3287, "step": 23821 }, { "epoch": 0.6514438853642529, "grad_norm": 1.330912709236145, "learning_rate": 5.724424044867399e-06, "loss": 0.4782, "step": 23822 }, { "epoch": 0.6514712316779698, "grad_norm": 1.3140738010406494, "learning_rate": 5.723623395609913e-06, "loss": 0.4913, "step": 23823 }, { "epoch": 0.6514985779916868, "grad_norm": 1.3558576107025146, "learning_rate": 5.722822779899398e-06, "loss": 0.3961, "step": 23824 }, { "epoch": 0.6515259243054037, "grad_norm": 1.380379319190979, "learning_rate": 5.722022197742134e-06, "loss": 0.4777, "step": 23825 }, { "epoch": 0.6515532706191205, "grad_norm": 1.3623449802398682, "learning_rate": 5.7212216491443976e-06, "loss": 0.4825, "step": 23826 }, { "epoch": 0.6515806169328374, "grad_norm": 1.4682660102844238, "learning_rate": 5.7204211341124785e-06, "loss": 0.3313, "step": 23827 }, { "epoch": 0.6516079632465543, "grad_norm": 1.3782678842544556, "learning_rate": 5.71962065265265e-06, "loss": 0.4735, "step": 23828 }, { "epoch": 0.6516353095602713, "grad_norm": 1.3381705284118652, "learning_rate": 5.718820204771189e-06, "loss": 0.4829, "step": 23829 }, { "epoch": 0.6516626558739882, "grad_norm": 1.2253026962280273, "learning_rate": 5.718019790474383e-06, "loss": 0.4436, "step": 23830 }, { "epoch": 0.6516900021877051, "grad_norm": 1.2355704307556152, "learning_rate": 5.717219409768507e-06, "loss": 0.4762, "step": 23831 }, { "epoch": 0.651717348501422, "grad_norm": 0.931961715221405, "learning_rate": 5.716419062659835e-06, "loss": 0.3335, "step": 23832 }, { "epoch": 0.651744694815139, "grad_norm": 1.323274850845337, "learning_rate": 5.715618749154655e-06, "loss": 0.4636, "step": 23833 }, { "epoch": 0.6517720411288558, "grad_norm": 1.247544288635254, "learning_rate": 5.714818469259238e-06, "loss": 0.4756, "step": 23834 }, { "epoch": 0.6517993874425727, "grad_norm": 1.2812175750732422, "learning_rate": 5.714018222979862e-06, "loss": 0.4745, "step": 23835 }, { "epoch": 0.6518267337562896, "grad_norm": 1.223057746887207, "learning_rate": 5.71321801032281e-06, "loss": 0.4513, "step": 23836 }, { "epoch": 0.6518540800700066, "grad_norm": 1.615894079208374, "learning_rate": 5.712417831294356e-06, "loss": 0.4567, "step": 23837 }, { "epoch": 0.6518814263837235, "grad_norm": 1.4924463033676147, "learning_rate": 5.711617685900774e-06, "loss": 0.4811, "step": 23838 }, { "epoch": 0.6519087726974404, "grad_norm": 1.480653166770935, "learning_rate": 5.7108175741483485e-06, "loss": 0.4524, "step": 23839 }, { "epoch": 0.6519361190111573, "grad_norm": 1.6102590560913086, "learning_rate": 5.7100174960433515e-06, "loss": 0.4812, "step": 23840 }, { "epoch": 0.6519634653248743, "grad_norm": 1.3002394437789917, "learning_rate": 5.709217451592062e-06, "loss": 0.6908, "step": 23841 }, { "epoch": 0.6519908116385911, "grad_norm": 1.4139697551727295, "learning_rate": 5.708417440800748e-06, "loss": 0.4661, "step": 23842 }, { "epoch": 0.652018157952308, "grad_norm": 1.3836146593093872, "learning_rate": 5.7076174636756965e-06, "loss": 0.4854, "step": 23843 }, { "epoch": 0.6520455042660249, "grad_norm": 1.3428447246551514, "learning_rate": 5.706817520223176e-06, "loss": 0.7376, "step": 23844 }, { "epoch": 0.6520728505797418, "grad_norm": 1.63759183883667, "learning_rate": 5.706017610449461e-06, "loss": 0.3407, "step": 23845 }, { "epoch": 0.6521001968934588, "grad_norm": 1.5635160207748413, "learning_rate": 5.705217734360835e-06, "loss": 0.4917, "step": 23846 }, { "epoch": 0.6521275432071757, "grad_norm": 1.9305086135864258, "learning_rate": 5.704417891963565e-06, "loss": 0.7439, "step": 23847 }, { "epoch": 0.6521548895208926, "grad_norm": 1.3735228776931763, "learning_rate": 5.703618083263926e-06, "loss": 0.4623, "step": 23848 }, { "epoch": 0.6521822358346095, "grad_norm": 1.5995303392410278, "learning_rate": 5.7028183082682e-06, "loss": 0.4731, "step": 23849 }, { "epoch": 0.6522095821483264, "grad_norm": 1.3865991830825806, "learning_rate": 5.702018566982652e-06, "loss": 0.4864, "step": 23850 }, { "epoch": 0.6522369284620433, "grad_norm": 1.6484206914901733, "learning_rate": 5.701218859413554e-06, "loss": 0.3585, "step": 23851 }, { "epoch": 0.6522642747757602, "grad_norm": 1.4091464281082153, "learning_rate": 5.700419185567189e-06, "loss": 0.4737, "step": 23852 }, { "epoch": 0.6522916210894771, "grad_norm": 1.2251783609390259, "learning_rate": 5.699619545449825e-06, "loss": 0.4762, "step": 23853 }, { "epoch": 0.6523189674031941, "grad_norm": 1.118605613708496, "learning_rate": 5.698819939067732e-06, "loss": 0.5054, "step": 23854 }, { "epoch": 0.652346313716911, "grad_norm": 1.2709574699401855, "learning_rate": 5.698020366427191e-06, "loss": 0.4699, "step": 23855 }, { "epoch": 0.6523736600306279, "grad_norm": 1.1169716119766235, "learning_rate": 5.6972208275344665e-06, "loss": 0.3387, "step": 23856 }, { "epoch": 0.6524010063443448, "grad_norm": 1.291109561920166, "learning_rate": 5.696421322395832e-06, "loss": 0.4865, "step": 23857 }, { "epoch": 0.6524283526580616, "grad_norm": 1.504578709602356, "learning_rate": 5.695621851017566e-06, "loss": 0.5111, "step": 23858 }, { "epoch": 0.6524556989717786, "grad_norm": 1.3792065382003784, "learning_rate": 5.694822413405933e-06, "loss": 0.4804, "step": 23859 }, { "epoch": 0.6524830452854955, "grad_norm": 1.877778172492981, "learning_rate": 5.694023009567204e-06, "loss": 0.3604, "step": 23860 }, { "epoch": 0.6525103915992124, "grad_norm": 1.7157223224639893, "learning_rate": 5.693223639507655e-06, "loss": 0.4553, "step": 23861 }, { "epoch": 0.6525377379129293, "grad_norm": 1.5404082536697388, "learning_rate": 5.6924243032335554e-06, "loss": 0.5095, "step": 23862 }, { "epoch": 0.6525650842266463, "grad_norm": 1.5869481563568115, "learning_rate": 5.691625000751175e-06, "loss": 0.3577, "step": 23863 }, { "epoch": 0.6525924305403632, "grad_norm": 1.3335720300674438, "learning_rate": 5.6908257320667805e-06, "loss": 0.4732, "step": 23864 }, { "epoch": 0.6526197768540801, "grad_norm": 1.157469391822815, "learning_rate": 5.690026497186649e-06, "loss": 0.4461, "step": 23865 }, { "epoch": 0.6526471231677969, "grad_norm": 1.2743704319000244, "learning_rate": 5.689227296117047e-06, "loss": 0.4686, "step": 23866 }, { "epoch": 0.6526744694815139, "grad_norm": 1.3610105514526367, "learning_rate": 5.6884281288642396e-06, "loss": 0.4738, "step": 23867 }, { "epoch": 0.6527018157952308, "grad_norm": 1.5139720439910889, "learning_rate": 5.687628995434504e-06, "loss": 0.5084, "step": 23868 }, { "epoch": 0.6527291621089477, "grad_norm": 1.077078104019165, "learning_rate": 5.686829895834105e-06, "loss": 0.4556, "step": 23869 }, { "epoch": 0.6527565084226646, "grad_norm": 1.2752195596694946, "learning_rate": 5.686030830069309e-06, "loss": 0.4966, "step": 23870 }, { "epoch": 0.6527838547363816, "grad_norm": 1.4400701522827148, "learning_rate": 5.685231798146389e-06, "loss": 0.4519, "step": 23871 }, { "epoch": 0.6528112010500985, "grad_norm": 1.3390454053878784, "learning_rate": 5.684432800071613e-06, "loss": 0.755, "step": 23872 }, { "epoch": 0.6528385473638153, "grad_norm": 1.2002670764923096, "learning_rate": 5.683633835851242e-06, "loss": 0.4531, "step": 23873 }, { "epoch": 0.6528658936775322, "grad_norm": 1.3763933181762695, "learning_rate": 5.6828349054915535e-06, "loss": 0.4943, "step": 23874 }, { "epoch": 0.6528932399912492, "grad_norm": 1.5072317123413086, "learning_rate": 5.68203600899881e-06, "loss": 0.3496, "step": 23875 }, { "epoch": 0.6529205863049661, "grad_norm": 1.2823379039764404, "learning_rate": 5.681237146379279e-06, "loss": 0.4817, "step": 23876 }, { "epoch": 0.652947932618683, "grad_norm": 1.6233971118927002, "learning_rate": 5.680438317639223e-06, "loss": 0.3261, "step": 23877 }, { "epoch": 0.6529752789323999, "grad_norm": 1.4127919673919678, "learning_rate": 5.6796395227849164e-06, "loss": 0.4809, "step": 23878 }, { "epoch": 0.6530026252461169, "grad_norm": 1.7090455293655396, "learning_rate": 5.678840761822623e-06, "loss": 0.3503, "step": 23879 }, { "epoch": 0.6530299715598338, "grad_norm": 1.3055675029754639, "learning_rate": 5.678042034758603e-06, "loss": 0.4933, "step": 23880 }, { "epoch": 0.6530573178735506, "grad_norm": 1.3053022623062134, "learning_rate": 5.67724334159913e-06, "loss": 0.494, "step": 23881 }, { "epoch": 0.6530846641872675, "grad_norm": 1.5089561939239502, "learning_rate": 5.676444682350468e-06, "loss": 0.4847, "step": 23882 }, { "epoch": 0.6531120105009844, "grad_norm": 1.3510476350784302, "learning_rate": 5.675646057018876e-06, "loss": 0.482, "step": 23883 }, { "epoch": 0.6531393568147014, "grad_norm": 1.3580999374389648, "learning_rate": 5.674847465610632e-06, "loss": 0.4795, "step": 23884 }, { "epoch": 0.6531667031284183, "grad_norm": 1.2256029844284058, "learning_rate": 5.674048908131988e-06, "loss": 0.4985, "step": 23885 }, { "epoch": 0.6531940494421352, "grad_norm": 1.436603307723999, "learning_rate": 5.67325038458921e-06, "loss": 0.4462, "step": 23886 }, { "epoch": 0.6532213957558521, "grad_norm": 1.3762526512145996, "learning_rate": 5.672451894988568e-06, "loss": 0.4523, "step": 23887 }, { "epoch": 0.6532487420695691, "grad_norm": 1.2496721744537354, "learning_rate": 5.671653439336323e-06, "loss": 0.4646, "step": 23888 }, { "epoch": 0.6532760883832859, "grad_norm": 1.2364575862884521, "learning_rate": 5.670855017638735e-06, "loss": 0.4826, "step": 23889 }, { "epoch": 0.6533034346970028, "grad_norm": 1.3084063529968262, "learning_rate": 5.670056629902074e-06, "loss": 0.4737, "step": 23890 }, { "epoch": 0.6533307810107197, "grad_norm": 1.7309833765029907, "learning_rate": 5.669258276132602e-06, "loss": 0.4329, "step": 23891 }, { "epoch": 0.6533581273244367, "grad_norm": 1.8913161754608154, "learning_rate": 5.668459956336576e-06, "loss": 0.3348, "step": 23892 }, { "epoch": 0.6533854736381536, "grad_norm": 1.4098117351531982, "learning_rate": 5.6676616705202655e-06, "loss": 0.5036, "step": 23893 }, { "epoch": 0.6534128199518705, "grad_norm": 1.2615565061569214, "learning_rate": 5.6668634186899295e-06, "loss": 0.4673, "step": 23894 }, { "epoch": 0.6534401662655874, "grad_norm": 1.3150354623794556, "learning_rate": 5.666065200851828e-06, "loss": 0.4483, "step": 23895 }, { "epoch": 0.6534675125793044, "grad_norm": 1.1926997900009155, "learning_rate": 5.6652670170122285e-06, "loss": 0.7214, "step": 23896 }, { "epoch": 0.6534948588930212, "grad_norm": 1.1401809453964233, "learning_rate": 5.664468867177389e-06, "loss": 0.4822, "step": 23897 }, { "epoch": 0.6535222052067381, "grad_norm": 1.2713741064071655, "learning_rate": 5.663670751353572e-06, "loss": 0.7378, "step": 23898 }, { "epoch": 0.653549551520455, "grad_norm": 1.332251787185669, "learning_rate": 5.662872669547034e-06, "loss": 0.489, "step": 23899 }, { "epoch": 0.6535768978341719, "grad_norm": 1.8859672546386719, "learning_rate": 5.662074621764042e-06, "loss": 0.3552, "step": 23900 }, { "epoch": 0.6536042441478889, "grad_norm": 1.1801962852478027, "learning_rate": 5.661276608010855e-06, "loss": 0.4504, "step": 23901 }, { "epoch": 0.6536315904616058, "grad_norm": 1.4624429941177368, "learning_rate": 5.660478628293727e-06, "loss": 0.446, "step": 23902 }, { "epoch": 0.6536589367753227, "grad_norm": 1.47064208984375, "learning_rate": 5.659680682618926e-06, "loss": 0.4838, "step": 23903 }, { "epoch": 0.6536862830890396, "grad_norm": 1.5640085935592651, "learning_rate": 5.658882770992711e-06, "loss": 0.4993, "step": 23904 }, { "epoch": 0.6537136294027565, "grad_norm": 1.383347988128662, "learning_rate": 5.658084893421334e-06, "loss": 0.4688, "step": 23905 }, { "epoch": 0.6537409757164734, "grad_norm": 1.1255934238433838, "learning_rate": 5.657287049911062e-06, "loss": 0.4706, "step": 23906 }, { "epoch": 0.6537683220301903, "grad_norm": 1.308012843132019, "learning_rate": 5.6564892404681525e-06, "loss": 0.4559, "step": 23907 }, { "epoch": 0.6537956683439072, "grad_norm": 1.3929649591445923, "learning_rate": 5.655691465098858e-06, "loss": 0.4437, "step": 23908 }, { "epoch": 0.6538230146576242, "grad_norm": 1.3343135118484497, "learning_rate": 5.654893723809445e-06, "loss": 0.4894, "step": 23909 }, { "epoch": 0.6538503609713411, "grad_norm": 1.2398264408111572, "learning_rate": 5.654096016606169e-06, "loss": 0.4254, "step": 23910 }, { "epoch": 0.653877707285058, "grad_norm": 1.5054892301559448, "learning_rate": 5.653298343495286e-06, "loss": 0.494, "step": 23911 }, { "epoch": 0.6539050535987749, "grad_norm": 1.6712356805801392, "learning_rate": 5.652500704483049e-06, "loss": 0.3764, "step": 23912 }, { "epoch": 0.6539323999124917, "grad_norm": 2.068129301071167, "learning_rate": 5.651703099575726e-06, "loss": 0.4829, "step": 23913 }, { "epoch": 0.6539597462262087, "grad_norm": 1.4524937868118286, "learning_rate": 5.650905528779569e-06, "loss": 0.4986, "step": 23914 }, { "epoch": 0.6539870925399256, "grad_norm": 1.1667556762695312, "learning_rate": 5.650107992100829e-06, "loss": 0.7342, "step": 23915 }, { "epoch": 0.6540144388536425, "grad_norm": 1.3544814586639404, "learning_rate": 5.649310489545772e-06, "loss": 0.4686, "step": 23916 }, { "epoch": 0.6540417851673594, "grad_norm": 1.6751104593276978, "learning_rate": 5.64851302112065e-06, "loss": 0.4205, "step": 23917 }, { "epoch": 0.6540691314810764, "grad_norm": 1.3923953771591187, "learning_rate": 5.647715586831713e-06, "loss": 0.4937, "step": 23918 }, { "epoch": 0.6540964777947933, "grad_norm": 1.3564047813415527, "learning_rate": 5.646918186685233e-06, "loss": 0.4843, "step": 23919 }, { "epoch": 0.6541238241085102, "grad_norm": 1.4915260076522827, "learning_rate": 5.646120820687448e-06, "loss": 0.3382, "step": 23920 }, { "epoch": 0.654151170422227, "grad_norm": 1.4175925254821777, "learning_rate": 5.6453234888446185e-06, "loss": 0.4571, "step": 23921 }, { "epoch": 0.654178516735944, "grad_norm": 1.154181718826294, "learning_rate": 5.644526191163004e-06, "loss": 0.4786, "step": 23922 }, { "epoch": 0.6542058630496609, "grad_norm": 1.215827226638794, "learning_rate": 5.643728927648855e-06, "loss": 0.5062, "step": 23923 }, { "epoch": 0.6542332093633778, "grad_norm": 1.4757885932922363, "learning_rate": 5.642931698308423e-06, "loss": 0.7666, "step": 23924 }, { "epoch": 0.6542605556770947, "grad_norm": 1.2120847702026367, "learning_rate": 5.642134503147969e-06, "loss": 0.4263, "step": 23925 }, { "epoch": 0.6542879019908117, "grad_norm": 1.4440838098526, "learning_rate": 5.641337342173745e-06, "loss": 0.5088, "step": 23926 }, { "epoch": 0.6543152483045286, "grad_norm": 1.4061880111694336, "learning_rate": 5.640540215391999e-06, "loss": 0.4873, "step": 23927 }, { "epoch": 0.6543425946182455, "grad_norm": 1.3384487628936768, "learning_rate": 5.639743122808989e-06, "loss": 0.4853, "step": 23928 }, { "epoch": 0.6543699409319623, "grad_norm": 1.3119555711746216, "learning_rate": 5.63894606443097e-06, "loss": 0.4404, "step": 23929 }, { "epoch": 0.6543972872456792, "grad_norm": 1.3762513399124146, "learning_rate": 5.638149040264187e-06, "loss": 0.4713, "step": 23930 }, { "epoch": 0.6544246335593962, "grad_norm": 1.483626365661621, "learning_rate": 5.637352050314901e-06, "loss": 0.3395, "step": 23931 }, { "epoch": 0.6544519798731131, "grad_norm": 1.2602053880691528, "learning_rate": 5.6365550945893625e-06, "loss": 0.4487, "step": 23932 }, { "epoch": 0.65447932618683, "grad_norm": 1.344886064529419, "learning_rate": 5.635758173093819e-06, "loss": 0.4657, "step": 23933 }, { "epoch": 0.654506672500547, "grad_norm": 1.4432979822158813, "learning_rate": 5.634961285834522e-06, "loss": 0.4927, "step": 23934 }, { "epoch": 0.6545340188142639, "grad_norm": 1.5877550840377808, "learning_rate": 5.634164432817728e-06, "loss": 0.4632, "step": 23935 }, { "epoch": 0.6545613651279808, "grad_norm": 1.5312317609786987, "learning_rate": 5.633367614049685e-06, "loss": 0.4828, "step": 23936 }, { "epoch": 0.6545887114416976, "grad_norm": 1.33672297000885, "learning_rate": 5.632570829536641e-06, "loss": 0.465, "step": 23937 }, { "epoch": 0.6546160577554145, "grad_norm": 1.1642634868621826, "learning_rate": 5.631774079284853e-06, "loss": 0.4665, "step": 23938 }, { "epoch": 0.6546434040691315, "grad_norm": 1.3361785411834717, "learning_rate": 5.630977363300569e-06, "loss": 0.4842, "step": 23939 }, { "epoch": 0.6546707503828484, "grad_norm": 1.473671317100525, "learning_rate": 5.630180681590034e-06, "loss": 0.4534, "step": 23940 }, { "epoch": 0.6546980966965653, "grad_norm": 1.4072046279907227, "learning_rate": 5.629384034159505e-06, "loss": 0.4468, "step": 23941 }, { "epoch": 0.6547254430102822, "grad_norm": 1.3068324327468872, "learning_rate": 5.6285874210152294e-06, "loss": 0.4823, "step": 23942 }, { "epoch": 0.6547527893239992, "grad_norm": 1.3415560722351074, "learning_rate": 5.6277908421634496e-06, "loss": 0.4321, "step": 23943 }, { "epoch": 0.6547801356377161, "grad_norm": 1.4350639581680298, "learning_rate": 5.626994297610427e-06, "loss": 0.3663, "step": 23944 }, { "epoch": 0.6548074819514329, "grad_norm": 1.5003622770309448, "learning_rate": 5.6261977873624e-06, "loss": 0.3255, "step": 23945 }, { "epoch": 0.6548348282651498, "grad_norm": 1.3937726020812988, "learning_rate": 5.625401311425622e-06, "loss": 0.3778, "step": 23946 }, { "epoch": 0.6548621745788668, "grad_norm": 1.666906714439392, "learning_rate": 5.624604869806336e-06, "loss": 0.4699, "step": 23947 }, { "epoch": 0.6548895208925837, "grad_norm": 1.3068451881408691, "learning_rate": 5.623808462510797e-06, "loss": 0.4117, "step": 23948 }, { "epoch": 0.6549168672063006, "grad_norm": 1.5763516426086426, "learning_rate": 5.62301208954525e-06, "loss": 0.4549, "step": 23949 }, { "epoch": 0.6549442135200175, "grad_norm": 1.2546287775039673, "learning_rate": 5.622215750915936e-06, "loss": 0.4806, "step": 23950 }, { "epoch": 0.6549715598337345, "grad_norm": 1.238405704498291, "learning_rate": 5.621419446629111e-06, "loss": 0.4961, "step": 23951 }, { "epoch": 0.6549989061474514, "grad_norm": 1.613196611404419, "learning_rate": 5.62062317669102e-06, "loss": 0.3723, "step": 23952 }, { "epoch": 0.6550262524611682, "grad_norm": 1.438936471939087, "learning_rate": 5.619826941107903e-06, "loss": 0.4626, "step": 23953 }, { "epoch": 0.6550535987748851, "grad_norm": 1.5293891429901123, "learning_rate": 5.619030739886019e-06, "loss": 0.4959, "step": 23954 }, { "epoch": 0.655080945088602, "grad_norm": 1.1531575918197632, "learning_rate": 5.618234573031601e-06, "loss": 0.4479, "step": 23955 }, { "epoch": 0.655108291402319, "grad_norm": 1.5325381755828857, "learning_rate": 5.617438440550898e-06, "loss": 0.4914, "step": 23956 }, { "epoch": 0.6551356377160359, "grad_norm": 1.1393762826919556, "learning_rate": 5.616642342450158e-06, "loss": 0.5023, "step": 23957 }, { "epoch": 0.6551629840297528, "grad_norm": 1.2860640287399292, "learning_rate": 5.615846278735627e-06, "loss": 0.4397, "step": 23958 }, { "epoch": 0.6551903303434697, "grad_norm": 1.233176350593567, "learning_rate": 5.615050249413544e-06, "loss": 0.4742, "step": 23959 }, { "epoch": 0.6552176766571867, "grad_norm": 1.1364997625350952, "learning_rate": 5.614254254490161e-06, "loss": 0.4728, "step": 23960 }, { "epoch": 0.6552450229709035, "grad_norm": 1.7810732126235962, "learning_rate": 5.613458293971721e-06, "loss": 0.4723, "step": 23961 }, { "epoch": 0.6552723692846204, "grad_norm": 1.2946407794952393, "learning_rate": 5.612662367864461e-06, "loss": 0.4835, "step": 23962 }, { "epoch": 0.6552997155983373, "grad_norm": 1.2630711793899536, "learning_rate": 5.611866476174633e-06, "loss": 0.4645, "step": 23963 }, { "epoch": 0.6553270619120543, "grad_norm": 1.328726887702942, "learning_rate": 5.611070618908479e-06, "loss": 0.4887, "step": 23964 }, { "epoch": 0.6553544082257712, "grad_norm": 1.3929202556610107, "learning_rate": 5.610274796072237e-06, "loss": 0.4723, "step": 23965 }, { "epoch": 0.6553817545394881, "grad_norm": 1.1268898248672485, "learning_rate": 5.609479007672156e-06, "loss": 0.4617, "step": 23966 }, { "epoch": 0.655409100853205, "grad_norm": 1.209141492843628, "learning_rate": 5.608683253714478e-06, "loss": 0.4944, "step": 23967 }, { "epoch": 0.655436447166922, "grad_norm": 1.707593560218811, "learning_rate": 5.60788753420544e-06, "loss": 0.3956, "step": 23968 }, { "epoch": 0.6554637934806388, "grad_norm": 1.3169188499450684, "learning_rate": 5.6070918491512916e-06, "loss": 0.5062, "step": 23969 }, { "epoch": 0.6554911397943557, "grad_norm": 1.5927668809890747, "learning_rate": 5.606296198558272e-06, "loss": 0.4851, "step": 23970 }, { "epoch": 0.6555184861080726, "grad_norm": 1.4976869821548462, "learning_rate": 5.605500582432623e-06, "loss": 0.4404, "step": 23971 }, { "epoch": 0.6555458324217895, "grad_norm": 1.2025635242462158, "learning_rate": 5.604705000780579e-06, "loss": 0.4297, "step": 23972 }, { "epoch": 0.6555731787355065, "grad_norm": 1.3595727682113647, "learning_rate": 5.603909453608393e-06, "loss": 0.4722, "step": 23973 }, { "epoch": 0.6556005250492234, "grad_norm": 1.311118483543396, "learning_rate": 5.6031139409223e-06, "loss": 0.4712, "step": 23974 }, { "epoch": 0.6556278713629403, "grad_norm": 1.5170458555221558, "learning_rate": 5.602318462728537e-06, "loss": 0.4336, "step": 23975 }, { "epoch": 0.6556552176766571, "grad_norm": 2.0729000568389893, "learning_rate": 5.601523019033351e-06, "loss": 0.4631, "step": 23976 }, { "epoch": 0.6556825639903741, "grad_norm": 1.2344259023666382, "learning_rate": 5.60072760984298e-06, "loss": 0.5029, "step": 23977 }, { "epoch": 0.655709910304091, "grad_norm": 1.3821443319320679, "learning_rate": 5.599932235163658e-06, "loss": 0.477, "step": 23978 }, { "epoch": 0.6557372566178079, "grad_norm": 1.5135191679000854, "learning_rate": 5.599136895001635e-06, "loss": 0.333, "step": 23979 }, { "epoch": 0.6557646029315248, "grad_norm": 1.4277323484420776, "learning_rate": 5.598341589363144e-06, "loss": 0.4149, "step": 23980 }, { "epoch": 0.6557919492452418, "grad_norm": 1.6202821731567383, "learning_rate": 5.597546318254419e-06, "loss": 0.465, "step": 23981 }, { "epoch": 0.6558192955589587, "grad_norm": 1.1767908334732056, "learning_rate": 5.5967510816817085e-06, "loss": 0.4863, "step": 23982 }, { "epoch": 0.6558466418726756, "grad_norm": 1.234224796295166, "learning_rate": 5.595955879651247e-06, "loss": 0.4539, "step": 23983 }, { "epoch": 0.6558739881863924, "grad_norm": 1.2753931283950806, "learning_rate": 5.595160712169273e-06, "loss": 0.4676, "step": 23984 }, { "epoch": 0.6559013345001093, "grad_norm": 1.4510188102722168, "learning_rate": 5.594365579242019e-06, "loss": 0.4548, "step": 23985 }, { "epoch": 0.6559286808138263, "grad_norm": 1.2302316427230835, "learning_rate": 5.593570480875732e-06, "loss": 0.4751, "step": 23986 }, { "epoch": 0.6559560271275432, "grad_norm": 1.2209523916244507, "learning_rate": 5.592775417076644e-06, "loss": 0.4819, "step": 23987 }, { "epoch": 0.6559833734412601, "grad_norm": 1.3273675441741943, "learning_rate": 5.591980387850987e-06, "loss": 0.4229, "step": 23988 }, { "epoch": 0.656010719754977, "grad_norm": 1.3176323175430298, "learning_rate": 5.591185393205009e-06, "loss": 0.4701, "step": 23989 }, { "epoch": 0.656038066068694, "grad_norm": 1.3599398136138916, "learning_rate": 5.59039043314494e-06, "loss": 0.4737, "step": 23990 }, { "epoch": 0.6560654123824109, "grad_norm": 1.2194125652313232, "learning_rate": 5.589595507677017e-06, "loss": 0.4751, "step": 23991 }, { "epoch": 0.6560927586961277, "grad_norm": 1.8279269933700562, "learning_rate": 5.588800616807477e-06, "loss": 0.355, "step": 23992 }, { "epoch": 0.6561201050098446, "grad_norm": 1.2555242776870728, "learning_rate": 5.5880057605425545e-06, "loss": 0.4818, "step": 23993 }, { "epoch": 0.6561474513235616, "grad_norm": 1.1945165395736694, "learning_rate": 5.587210938888481e-06, "loss": 0.4366, "step": 23994 }, { "epoch": 0.6561747976372785, "grad_norm": 1.4074409008026123, "learning_rate": 5.5864161518515e-06, "loss": 0.4793, "step": 23995 }, { "epoch": 0.6562021439509954, "grad_norm": 1.2735915184020996, "learning_rate": 5.585621399437841e-06, "loss": 0.7121, "step": 23996 }, { "epoch": 0.6562294902647123, "grad_norm": 1.5395383834838867, "learning_rate": 5.584826681653737e-06, "loss": 0.4641, "step": 23997 }, { "epoch": 0.6562568365784293, "grad_norm": 6.195730209350586, "learning_rate": 5.5840319985054284e-06, "loss": 0.4801, "step": 23998 }, { "epoch": 0.6562841828921462, "grad_norm": 1.4228928089141846, "learning_rate": 5.5832373499991465e-06, "loss": 0.4457, "step": 23999 }, { "epoch": 0.656311529205863, "grad_norm": 1.3896946907043457, "learning_rate": 5.58244273614112e-06, "loss": 0.4563, "step": 24000 }, { "epoch": 0.6563388755195799, "grad_norm": 1.2309314012527466, "learning_rate": 5.5816481569375925e-06, "loss": 0.4704, "step": 24001 }, { "epoch": 0.6563662218332968, "grad_norm": 1.4084959030151367, "learning_rate": 5.580853612394791e-06, "loss": 0.4679, "step": 24002 }, { "epoch": 0.6563935681470138, "grad_norm": 1.2558354139328003, "learning_rate": 5.580059102518944e-06, "loss": 0.4875, "step": 24003 }, { "epoch": 0.6564209144607307, "grad_norm": 1.5740071535110474, "learning_rate": 5.579264627316296e-06, "loss": 0.4561, "step": 24004 }, { "epoch": 0.6564482607744476, "grad_norm": 2.4625892639160156, "learning_rate": 5.578470186793071e-06, "loss": 0.4685, "step": 24005 }, { "epoch": 0.6564756070881645, "grad_norm": 1.1960090398788452, "learning_rate": 5.577675780955504e-06, "loss": 0.4658, "step": 24006 }, { "epoch": 0.6565029534018815, "grad_norm": 1.274698257446289, "learning_rate": 5.576881409809821e-06, "loss": 0.4656, "step": 24007 }, { "epoch": 0.6565302997155983, "grad_norm": 1.635472059249878, "learning_rate": 5.576087073362264e-06, "loss": 0.3438, "step": 24008 }, { "epoch": 0.6565576460293152, "grad_norm": 1.49898099899292, "learning_rate": 5.575292771619057e-06, "loss": 0.4286, "step": 24009 }, { "epoch": 0.6565849923430321, "grad_norm": 1.2394806146621704, "learning_rate": 5.57449850458643e-06, "loss": 0.4752, "step": 24010 }, { "epoch": 0.6566123386567491, "grad_norm": 1.433457851409912, "learning_rate": 5.573704272270621e-06, "loss": 0.5117, "step": 24011 }, { "epoch": 0.656639684970466, "grad_norm": 1.71710205078125, "learning_rate": 5.572910074677856e-06, "loss": 0.4822, "step": 24012 }, { "epoch": 0.6566670312841829, "grad_norm": 1.3507193326950073, "learning_rate": 5.572115911814362e-06, "loss": 0.4905, "step": 24013 }, { "epoch": 0.6566943775978998, "grad_norm": 1.5649086236953735, "learning_rate": 5.5713217836863765e-06, "loss": 0.5031, "step": 24014 }, { "epoch": 0.6567217239116168, "grad_norm": 1.2407039403915405, "learning_rate": 5.570527690300125e-06, "loss": 0.4962, "step": 24015 }, { "epoch": 0.6567490702253336, "grad_norm": 1.8257017135620117, "learning_rate": 5.569733631661833e-06, "loss": 0.4891, "step": 24016 }, { "epoch": 0.6567764165390505, "grad_norm": 1.263732671737671, "learning_rate": 5.568939607777738e-06, "loss": 0.474, "step": 24017 }, { "epoch": 0.6568037628527674, "grad_norm": 1.2820481061935425, "learning_rate": 5.568145618654064e-06, "loss": 0.3493, "step": 24018 }, { "epoch": 0.6568311091664844, "grad_norm": 1.3773210048675537, "learning_rate": 5.567351664297043e-06, "loss": 0.4539, "step": 24019 }, { "epoch": 0.6568584554802013, "grad_norm": 1.3702565431594849, "learning_rate": 5.566557744712894e-06, "loss": 0.4282, "step": 24020 }, { "epoch": 0.6568858017939182, "grad_norm": 1.1899316310882568, "learning_rate": 5.565763859907857e-06, "loss": 0.4608, "step": 24021 }, { "epoch": 0.6569131481076351, "grad_norm": 1.339869737625122, "learning_rate": 5.564970009888153e-06, "loss": 0.5016, "step": 24022 }, { "epoch": 0.656940494421352, "grad_norm": 1.4330047369003296, "learning_rate": 5.56417619466001e-06, "loss": 0.4615, "step": 24023 }, { "epoch": 0.6569678407350689, "grad_norm": 1.4689441919326782, "learning_rate": 5.5633824142296585e-06, "loss": 0.4594, "step": 24024 }, { "epoch": 0.6569951870487858, "grad_norm": 1.2073098421096802, "learning_rate": 5.562588668603324e-06, "loss": 0.4324, "step": 24025 }, { "epoch": 0.6570225333625027, "grad_norm": 1.2484623193740845, "learning_rate": 5.5617949577872325e-06, "loss": 0.4826, "step": 24026 }, { "epoch": 0.6570498796762196, "grad_norm": 1.2649755477905273, "learning_rate": 5.56100128178761e-06, "loss": 0.7472, "step": 24027 }, { "epoch": 0.6570772259899366, "grad_norm": 1.1843533515930176, "learning_rate": 5.560207640610684e-06, "loss": 0.4983, "step": 24028 }, { "epoch": 0.6571045723036535, "grad_norm": 1.7174807786941528, "learning_rate": 5.5594140342626755e-06, "loss": 0.3618, "step": 24029 }, { "epoch": 0.6571319186173704, "grad_norm": 1.692301630973816, "learning_rate": 5.558620462749818e-06, "loss": 0.4594, "step": 24030 }, { "epoch": 0.6571592649310873, "grad_norm": 1.5446958541870117, "learning_rate": 5.557826926078333e-06, "loss": 0.4509, "step": 24031 }, { "epoch": 0.6571866112448042, "grad_norm": 1.2015032768249512, "learning_rate": 5.557033424254441e-06, "loss": 0.4632, "step": 24032 }, { "epoch": 0.6572139575585211, "grad_norm": 1.2952933311462402, "learning_rate": 5.556239957284376e-06, "loss": 0.4836, "step": 24033 }, { "epoch": 0.657241303872238, "grad_norm": 1.6291536092758179, "learning_rate": 5.555446525174359e-06, "loss": 0.4751, "step": 24034 }, { "epoch": 0.6572686501859549, "grad_norm": 1.450414776802063, "learning_rate": 5.554653127930607e-06, "loss": 0.5278, "step": 24035 }, { "epoch": 0.6572959964996719, "grad_norm": 1.5735095739364624, "learning_rate": 5.553859765559356e-06, "loss": 0.4936, "step": 24036 }, { "epoch": 0.6573233428133888, "grad_norm": 1.5831049680709839, "learning_rate": 5.553066438066823e-06, "loss": 0.4344, "step": 24037 }, { "epoch": 0.6573506891271057, "grad_norm": 1.6879154443740845, "learning_rate": 5.5522731454592296e-06, "loss": 0.481, "step": 24038 }, { "epoch": 0.6573780354408226, "grad_norm": 1.6640750169754028, "learning_rate": 5.551479887742804e-06, "loss": 0.4541, "step": 24039 }, { "epoch": 0.6574053817545394, "grad_norm": 2.9665963649749756, "learning_rate": 5.550686664923768e-06, "loss": 0.4667, "step": 24040 }, { "epoch": 0.6574327280682564, "grad_norm": 2.3620169162750244, "learning_rate": 5.549893477008342e-06, "loss": 0.4527, "step": 24041 }, { "epoch": 0.6574600743819733, "grad_norm": 1.2637532949447632, "learning_rate": 5.549100324002745e-06, "loss": 0.4489, "step": 24042 }, { "epoch": 0.6574874206956902, "grad_norm": 1.252164363861084, "learning_rate": 5.548307205913208e-06, "loss": 0.4864, "step": 24043 }, { "epoch": 0.6575147670094071, "grad_norm": 1.4308667182922363, "learning_rate": 5.54751412274595e-06, "loss": 0.4953, "step": 24044 }, { "epoch": 0.6575421133231241, "grad_norm": 1.678785800933838, "learning_rate": 5.5467210745071845e-06, "loss": 0.3912, "step": 24045 }, { "epoch": 0.657569459636841, "grad_norm": 1.1650125980377197, "learning_rate": 5.545928061203143e-06, "loss": 0.4618, "step": 24046 }, { "epoch": 0.6575968059505579, "grad_norm": 1.5864441394805908, "learning_rate": 5.545135082840043e-06, "loss": 0.3338, "step": 24047 }, { "epoch": 0.6576241522642747, "grad_norm": 2.5912656784057617, "learning_rate": 5.544342139424099e-06, "loss": 0.3301, "step": 24048 }, { "epoch": 0.6576514985779917, "grad_norm": 1.625164270401001, "learning_rate": 5.543549230961544e-06, "loss": 0.4727, "step": 24049 }, { "epoch": 0.6576788448917086, "grad_norm": 1.4705040454864502, "learning_rate": 5.542756357458588e-06, "loss": 0.4875, "step": 24050 }, { "epoch": 0.6577061912054255, "grad_norm": 1.3461328744888306, "learning_rate": 5.541963518921451e-06, "loss": 0.7094, "step": 24051 }, { "epoch": 0.6577335375191424, "grad_norm": 1.2107181549072266, "learning_rate": 5.541170715356361e-06, "loss": 0.4894, "step": 24052 }, { "epoch": 0.6577608838328594, "grad_norm": 1.20274817943573, "learning_rate": 5.540377946769531e-06, "loss": 0.444, "step": 24053 }, { "epoch": 0.6577882301465763, "grad_norm": 1.5471131801605225, "learning_rate": 5.5395852131671805e-06, "loss": 0.3201, "step": 24054 }, { "epoch": 0.6578155764602932, "grad_norm": 1.2972967624664307, "learning_rate": 5.538792514555526e-06, "loss": 0.47, "step": 24055 }, { "epoch": 0.65784292277401, "grad_norm": 1.425856113433838, "learning_rate": 5.537999850940792e-06, "loss": 0.4739, "step": 24056 }, { "epoch": 0.6578702690877269, "grad_norm": 2.043789863586426, "learning_rate": 5.537207222329193e-06, "loss": 0.7087, "step": 24057 }, { "epoch": 0.6578976154014439, "grad_norm": 1.352826714515686, "learning_rate": 5.5364146287269425e-06, "loss": 0.4741, "step": 24058 }, { "epoch": 0.6579249617151608, "grad_norm": 2.138885736465454, "learning_rate": 5.535622070140269e-06, "loss": 0.3308, "step": 24059 }, { "epoch": 0.6579523080288777, "grad_norm": 1.250903606414795, "learning_rate": 5.534829546575383e-06, "loss": 0.4726, "step": 24060 }, { "epoch": 0.6579796543425946, "grad_norm": 1.2852270603179932, "learning_rate": 5.534037058038504e-06, "loss": 0.4888, "step": 24061 }, { "epoch": 0.6580070006563116, "grad_norm": 1.2705920934677124, "learning_rate": 5.533244604535846e-06, "loss": 0.4685, "step": 24062 }, { "epoch": 0.6580343469700285, "grad_norm": 1.3562442064285278, "learning_rate": 5.532452186073627e-06, "loss": 0.4623, "step": 24063 }, { "epoch": 0.6580616932837453, "grad_norm": 1.3876808881759644, "learning_rate": 5.5316598026580595e-06, "loss": 0.4559, "step": 24064 }, { "epoch": 0.6580890395974622, "grad_norm": 1.2151814699172974, "learning_rate": 5.530867454295368e-06, "loss": 0.4782, "step": 24065 }, { "epoch": 0.6581163859111792, "grad_norm": 1.4032593965530396, "learning_rate": 5.530075140991762e-06, "loss": 0.5018, "step": 24066 }, { "epoch": 0.6581437322248961, "grad_norm": 1.3505579233169556, "learning_rate": 5.529282862753456e-06, "loss": 0.4994, "step": 24067 }, { "epoch": 0.658171078538613, "grad_norm": 1.2673391103744507, "learning_rate": 5.528490619586672e-06, "loss": 0.3502, "step": 24068 }, { "epoch": 0.6581984248523299, "grad_norm": 1.3079142570495605, "learning_rate": 5.52769841149762e-06, "loss": 0.4642, "step": 24069 }, { "epoch": 0.6582257711660469, "grad_norm": 1.5235323905944824, "learning_rate": 5.526906238492511e-06, "loss": 0.51, "step": 24070 }, { "epoch": 0.6582531174797638, "grad_norm": 1.3599095344543457, "learning_rate": 5.526114100577568e-06, "loss": 0.4785, "step": 24071 }, { "epoch": 0.6582804637934806, "grad_norm": 1.3877419233322144, "learning_rate": 5.525321997759001e-06, "loss": 0.4952, "step": 24072 }, { "epoch": 0.6583078101071975, "grad_norm": 1.3634541034698486, "learning_rate": 5.52452993004302e-06, "loss": 0.4657, "step": 24073 }, { "epoch": 0.6583351564209144, "grad_norm": 1.446054458618164, "learning_rate": 5.523737897435846e-06, "loss": 0.4734, "step": 24074 }, { "epoch": 0.6583625027346314, "grad_norm": 1.2669519186019897, "learning_rate": 5.522945899943688e-06, "loss": 0.48, "step": 24075 }, { "epoch": 0.6583898490483483, "grad_norm": 1.1488977670669556, "learning_rate": 5.522153937572761e-06, "loss": 0.513, "step": 24076 }, { "epoch": 0.6584171953620652, "grad_norm": 1.377258539199829, "learning_rate": 5.52136201032927e-06, "loss": 0.4842, "step": 24077 }, { "epoch": 0.6584445416757821, "grad_norm": 1.3088802099227905, "learning_rate": 5.520570118219438e-06, "loss": 0.4728, "step": 24078 }, { "epoch": 0.658471887989499, "grad_norm": 1.4623687267303467, "learning_rate": 5.519778261249473e-06, "loss": 0.4273, "step": 24079 }, { "epoch": 0.6584992343032159, "grad_norm": 1.7144142389297485, "learning_rate": 5.5189864394255846e-06, "loss": 0.3511, "step": 24080 }, { "epoch": 0.6585265806169328, "grad_norm": 1.3048967123031616, "learning_rate": 5.518194652753988e-06, "loss": 0.4974, "step": 24081 }, { "epoch": 0.6585539269306497, "grad_norm": 1.7034510374069214, "learning_rate": 5.517402901240895e-06, "loss": 0.3731, "step": 24082 }, { "epoch": 0.6585812732443667, "grad_norm": 1.4532339572906494, "learning_rate": 5.516611184892508e-06, "loss": 0.4646, "step": 24083 }, { "epoch": 0.6586086195580836, "grad_norm": 1.2088696956634521, "learning_rate": 5.5158195037150516e-06, "loss": 0.4738, "step": 24084 }, { "epoch": 0.6586359658718005, "grad_norm": 1.162437081336975, "learning_rate": 5.5150278577147275e-06, "loss": 0.4237, "step": 24085 }, { "epoch": 0.6586633121855174, "grad_norm": 1.2559455633163452, "learning_rate": 5.514236246897744e-06, "loss": 0.4811, "step": 24086 }, { "epoch": 0.6586906584992342, "grad_norm": 1.6355903148651123, "learning_rate": 5.513444671270319e-06, "loss": 0.3573, "step": 24087 }, { "epoch": 0.6587180048129512, "grad_norm": 1.3316762447357178, "learning_rate": 5.512653130838659e-06, "loss": 0.4682, "step": 24088 }, { "epoch": 0.6587453511266681, "grad_norm": 1.2911536693572998, "learning_rate": 5.5118616256089664e-06, "loss": 0.4562, "step": 24089 }, { "epoch": 0.658772697440385, "grad_norm": 1.531246304512024, "learning_rate": 5.511070155587461e-06, "loss": 0.4243, "step": 24090 }, { "epoch": 0.658800043754102, "grad_norm": 1.1960606575012207, "learning_rate": 5.510278720780346e-06, "loss": 0.5064, "step": 24091 }, { "epoch": 0.6588273900678189, "grad_norm": 1.5386896133422852, "learning_rate": 5.509487321193833e-06, "loss": 0.4566, "step": 24092 }, { "epoch": 0.6588547363815358, "grad_norm": 1.160061240196228, "learning_rate": 5.508695956834123e-06, "loss": 0.3368, "step": 24093 }, { "epoch": 0.6588820826952527, "grad_norm": 1.774316430091858, "learning_rate": 5.507904627707434e-06, "loss": 0.4501, "step": 24094 }, { "epoch": 0.6589094290089695, "grad_norm": 1.225374698638916, "learning_rate": 5.507113333819968e-06, "loss": 0.4879, "step": 24095 }, { "epoch": 0.6589367753226865, "grad_norm": 1.5934919118881226, "learning_rate": 5.5063220751779335e-06, "loss": 0.4673, "step": 24096 }, { "epoch": 0.6589641216364034, "grad_norm": 1.519182562828064, "learning_rate": 5.505530851787538e-06, "loss": 0.438, "step": 24097 }, { "epoch": 0.6589914679501203, "grad_norm": 1.2497788667678833, "learning_rate": 5.504739663654987e-06, "loss": 0.465, "step": 24098 }, { "epoch": 0.6590188142638372, "grad_norm": 1.2835757732391357, "learning_rate": 5.503948510786486e-06, "loss": 0.4601, "step": 24099 }, { "epoch": 0.6590461605775542, "grad_norm": 1.2900294065475464, "learning_rate": 5.503157393188248e-06, "loss": 0.4907, "step": 24100 }, { "epoch": 0.6590735068912711, "grad_norm": 1.1401554346084595, "learning_rate": 5.502366310866474e-06, "loss": 0.4637, "step": 24101 }, { "epoch": 0.659100853204988, "grad_norm": 1.3700830936431885, "learning_rate": 5.501575263827365e-06, "loss": 0.4676, "step": 24102 }, { "epoch": 0.6591281995187048, "grad_norm": 2.3162670135498047, "learning_rate": 5.500784252077138e-06, "loss": 0.3221, "step": 24103 }, { "epoch": 0.6591555458324218, "grad_norm": 1.2792469263076782, "learning_rate": 5.499993275621992e-06, "loss": 0.7499, "step": 24104 }, { "epoch": 0.6591828921461387, "grad_norm": 1.7664198875427246, "learning_rate": 5.499202334468128e-06, "loss": 0.4982, "step": 24105 }, { "epoch": 0.6592102384598556, "grad_norm": 1.3654077053070068, "learning_rate": 5.4984114286217596e-06, "loss": 0.4657, "step": 24106 }, { "epoch": 0.6592375847735725, "grad_norm": 1.2211471796035767, "learning_rate": 5.497620558089085e-06, "loss": 0.4771, "step": 24107 }, { "epoch": 0.6592649310872895, "grad_norm": 1.6066244840621948, "learning_rate": 5.496829722876308e-06, "loss": 0.4989, "step": 24108 }, { "epoch": 0.6592922774010064, "grad_norm": 1.1340428590774536, "learning_rate": 5.496038922989637e-06, "loss": 0.7069, "step": 24109 }, { "epoch": 0.6593196237147233, "grad_norm": 6.313777446746826, "learning_rate": 5.495248158435276e-06, "loss": 0.4734, "step": 24110 }, { "epoch": 0.6593469700284401, "grad_norm": 1.565825343132019, "learning_rate": 5.494457429219417e-06, "loss": 0.4429, "step": 24111 }, { "epoch": 0.659374316342157, "grad_norm": 1.1891990900039673, "learning_rate": 5.493666735348279e-06, "loss": 0.4341, "step": 24112 }, { "epoch": 0.659401662655874, "grad_norm": 1.4206939935684204, "learning_rate": 5.4928760768280564e-06, "loss": 0.5086, "step": 24113 }, { "epoch": 0.6594290089695909, "grad_norm": 1.2433955669403076, "learning_rate": 5.492085453664953e-06, "loss": 0.5083, "step": 24114 }, { "epoch": 0.6594563552833078, "grad_norm": 1.6221836805343628, "learning_rate": 5.491294865865164e-06, "loss": 0.3426, "step": 24115 }, { "epoch": 0.6594837015970247, "grad_norm": 1.1894723176956177, "learning_rate": 5.490504313434906e-06, "loss": 0.7081, "step": 24116 }, { "epoch": 0.6595110479107417, "grad_norm": 1.5877741575241089, "learning_rate": 5.489713796380369e-06, "loss": 0.4948, "step": 24117 }, { "epoch": 0.6595383942244586, "grad_norm": 1.604745626449585, "learning_rate": 5.488923314707756e-06, "loss": 0.3543, "step": 24118 }, { "epoch": 0.6595657405381754, "grad_norm": 1.2412960529327393, "learning_rate": 5.488132868423273e-06, "loss": 0.6956, "step": 24119 }, { "epoch": 0.6595930868518923, "grad_norm": 1.615776777267456, "learning_rate": 5.487342457533118e-06, "loss": 0.3375, "step": 24120 }, { "epoch": 0.6596204331656093, "grad_norm": 1.2674431800842285, "learning_rate": 5.486552082043488e-06, "loss": 0.4597, "step": 24121 }, { "epoch": 0.6596477794793262, "grad_norm": 1.3089184761047363, "learning_rate": 5.485761741960591e-06, "loss": 0.4594, "step": 24122 }, { "epoch": 0.6596751257930431, "grad_norm": 1.5377782583236694, "learning_rate": 5.4849714372906226e-06, "loss": 0.5133, "step": 24123 }, { "epoch": 0.65970247210676, "grad_norm": 1.2211722135543823, "learning_rate": 5.484181168039777e-06, "loss": 0.4835, "step": 24124 }, { "epoch": 0.659729818420477, "grad_norm": 1.511125922203064, "learning_rate": 5.483390934214266e-06, "loss": 0.3442, "step": 24125 }, { "epoch": 0.6597571647341939, "grad_norm": 1.8959332704544067, "learning_rate": 5.482600735820281e-06, "loss": 0.3206, "step": 24126 }, { "epoch": 0.6597845110479107, "grad_norm": 1.1939218044281006, "learning_rate": 5.481810572864021e-06, "loss": 0.4621, "step": 24127 }, { "epoch": 0.6598118573616276, "grad_norm": 1.5113497972488403, "learning_rate": 5.481020445351681e-06, "loss": 0.5001, "step": 24128 }, { "epoch": 0.6598392036753445, "grad_norm": 1.3674860000610352, "learning_rate": 5.480230353289468e-06, "loss": 0.4821, "step": 24129 }, { "epoch": 0.6598665499890615, "grad_norm": 1.248792290687561, "learning_rate": 5.479440296683578e-06, "loss": 0.4733, "step": 24130 }, { "epoch": 0.6598938963027784, "grad_norm": 1.2877411842346191, "learning_rate": 5.478650275540205e-06, "loss": 0.3341, "step": 24131 }, { "epoch": 0.6599212426164953, "grad_norm": 1.149085283279419, "learning_rate": 5.477860289865549e-06, "loss": 0.7093, "step": 24132 }, { "epoch": 0.6599485889302122, "grad_norm": 1.5312331914901733, "learning_rate": 5.477070339665802e-06, "loss": 0.4912, "step": 24133 }, { "epoch": 0.6599759352439292, "grad_norm": 1.413517713546753, "learning_rate": 5.476280424947169e-06, "loss": 0.4392, "step": 24134 }, { "epoch": 0.660003281557646, "grad_norm": 1.2607897520065308, "learning_rate": 5.475490545715844e-06, "loss": 0.471, "step": 24135 }, { "epoch": 0.6600306278713629, "grad_norm": 1.4642952680587769, "learning_rate": 5.474700701978022e-06, "loss": 0.7021, "step": 24136 }, { "epoch": 0.6600579741850798, "grad_norm": 1.6411278247833252, "learning_rate": 5.473910893739894e-06, "loss": 0.4623, "step": 24137 }, { "epoch": 0.6600853204987968, "grad_norm": 1.2783329486846924, "learning_rate": 5.473121121007667e-06, "loss": 0.4621, "step": 24138 }, { "epoch": 0.6601126668125137, "grad_norm": 1.4848687648773193, "learning_rate": 5.47233138378753e-06, "loss": 0.6924, "step": 24139 }, { "epoch": 0.6601400131262306, "grad_norm": 1.1219491958618164, "learning_rate": 5.471541682085675e-06, "loss": 0.4781, "step": 24140 }, { "epoch": 0.6601673594399475, "grad_norm": 1.7736493349075317, "learning_rate": 5.470752015908306e-06, "loss": 0.7727, "step": 24141 }, { "epoch": 0.6601947057536645, "grad_norm": 1.117809772491455, "learning_rate": 5.469962385261612e-06, "loss": 0.3586, "step": 24142 }, { "epoch": 0.6602220520673813, "grad_norm": 1.6881731748580933, "learning_rate": 5.469172790151785e-06, "loss": 0.3297, "step": 24143 }, { "epoch": 0.6602493983810982, "grad_norm": 1.4870109558105469, "learning_rate": 5.468383230585026e-06, "loss": 0.4777, "step": 24144 }, { "epoch": 0.6602767446948151, "grad_norm": 1.3573954105377197, "learning_rate": 5.4675937065675246e-06, "loss": 0.4682, "step": 24145 }, { "epoch": 0.660304091008532, "grad_norm": 1.4113019704818726, "learning_rate": 5.4668042181054714e-06, "loss": 0.4904, "step": 24146 }, { "epoch": 0.660331437322249, "grad_norm": 1.144994854927063, "learning_rate": 5.4660147652050675e-06, "loss": 0.4761, "step": 24147 }, { "epoch": 0.6603587836359659, "grad_norm": 1.3253909349441528, "learning_rate": 5.465225347872502e-06, "loss": 0.4745, "step": 24148 }, { "epoch": 0.6603861299496828, "grad_norm": 1.4939913749694824, "learning_rate": 5.4644359661139665e-06, "loss": 0.3367, "step": 24149 }, { "epoch": 0.6604134762633997, "grad_norm": 1.5169650316238403, "learning_rate": 5.4636466199356515e-06, "loss": 0.4819, "step": 24150 }, { "epoch": 0.6604408225771166, "grad_norm": 1.3172798156738281, "learning_rate": 5.462857309343756e-06, "loss": 0.4917, "step": 24151 }, { "epoch": 0.6604681688908335, "grad_norm": 1.1818538904190063, "learning_rate": 5.462068034344468e-06, "loss": 0.4472, "step": 24152 }, { "epoch": 0.6604955152045504, "grad_norm": 1.4434245824813843, "learning_rate": 5.461278794943975e-06, "loss": 0.4987, "step": 24153 }, { "epoch": 0.6605228615182673, "grad_norm": 1.3041152954101562, "learning_rate": 5.460489591148477e-06, "loss": 0.2971, "step": 24154 }, { "epoch": 0.6605502078319843, "grad_norm": 1.4483872652053833, "learning_rate": 5.4597004229641594e-06, "loss": 0.4805, "step": 24155 }, { "epoch": 0.6605775541457012, "grad_norm": 1.4283777475357056, "learning_rate": 5.458911290397211e-06, "loss": 0.7255, "step": 24156 }, { "epoch": 0.6606049004594181, "grad_norm": 1.5275726318359375, "learning_rate": 5.45812219345383e-06, "loss": 0.392, "step": 24157 }, { "epoch": 0.660632246773135, "grad_norm": 1.3507030010223389, "learning_rate": 5.4573331321402015e-06, "loss": 0.473, "step": 24158 }, { "epoch": 0.6606595930868518, "grad_norm": 1.252732276916504, "learning_rate": 5.456544106462512e-06, "loss": 0.3309, "step": 24159 }, { "epoch": 0.6606869394005688, "grad_norm": 1.3221246004104614, "learning_rate": 5.455755116426959e-06, "loss": 0.4769, "step": 24160 }, { "epoch": 0.6607142857142857, "grad_norm": 1.17861807346344, "learning_rate": 5.45496616203973e-06, "loss": 0.4628, "step": 24161 }, { "epoch": 0.6607416320280026, "grad_norm": 1.467164158821106, "learning_rate": 5.454177243307012e-06, "loss": 0.4763, "step": 24162 }, { "epoch": 0.6607689783417195, "grad_norm": 1.41011381149292, "learning_rate": 5.453388360234989e-06, "loss": 0.5016, "step": 24163 }, { "epoch": 0.6607963246554365, "grad_norm": 4.830862045288086, "learning_rate": 5.452599512829859e-06, "loss": 0.3334, "step": 24164 }, { "epoch": 0.6608236709691534, "grad_norm": 1.405593752861023, "learning_rate": 5.4518107010978075e-06, "loss": 0.7383, "step": 24165 }, { "epoch": 0.6608510172828703, "grad_norm": 1.2348307371139526, "learning_rate": 5.451021925045019e-06, "loss": 0.4535, "step": 24166 }, { "epoch": 0.6608783635965871, "grad_norm": 1.636096715927124, "learning_rate": 5.450233184677685e-06, "loss": 0.3506, "step": 24167 }, { "epoch": 0.6609057099103041, "grad_norm": 1.8115988969802856, "learning_rate": 5.4494444800019866e-06, "loss": 0.4476, "step": 24168 }, { "epoch": 0.660933056224021, "grad_norm": 1.4280359745025635, "learning_rate": 5.448655811024119e-06, "loss": 0.4742, "step": 24169 }, { "epoch": 0.6609604025377379, "grad_norm": 1.3254165649414062, "learning_rate": 5.447867177750267e-06, "loss": 0.4732, "step": 24170 }, { "epoch": 0.6609877488514548, "grad_norm": 1.287861943244934, "learning_rate": 5.4470785801866155e-06, "loss": 0.4751, "step": 24171 }, { "epoch": 0.6610150951651718, "grad_norm": 1.5726890563964844, "learning_rate": 5.446290018339347e-06, "loss": 0.3656, "step": 24172 }, { "epoch": 0.6610424414788887, "grad_norm": 1.3957440853118896, "learning_rate": 5.445501492214656e-06, "loss": 0.4892, "step": 24173 }, { "epoch": 0.6610697877926056, "grad_norm": 1.403644323348999, "learning_rate": 5.444713001818723e-06, "loss": 0.49, "step": 24174 }, { "epoch": 0.6610971341063224, "grad_norm": 1.2367699146270752, "learning_rate": 5.443924547157732e-06, "loss": 0.4559, "step": 24175 }, { "epoch": 0.6611244804200394, "grad_norm": 1.3490800857543945, "learning_rate": 5.443136128237873e-06, "loss": 0.4884, "step": 24176 }, { "epoch": 0.6611518267337563, "grad_norm": 2.346841812133789, "learning_rate": 5.44234774506533e-06, "loss": 0.4926, "step": 24177 }, { "epoch": 0.6611791730474732, "grad_norm": 1.4599862098693848, "learning_rate": 5.441559397646283e-06, "loss": 0.4552, "step": 24178 }, { "epoch": 0.6612065193611901, "grad_norm": 1.2454719543457031, "learning_rate": 5.440771085986923e-06, "loss": 0.4607, "step": 24179 }, { "epoch": 0.661233865674907, "grad_norm": 1.438922643661499, "learning_rate": 5.439982810093429e-06, "loss": 0.4667, "step": 24180 }, { "epoch": 0.661261211988624, "grad_norm": 1.437168836593628, "learning_rate": 5.439194569971984e-06, "loss": 0.4743, "step": 24181 }, { "epoch": 0.6612885583023408, "grad_norm": 1.5724830627441406, "learning_rate": 5.438406365628779e-06, "loss": 0.3501, "step": 24182 }, { "epoch": 0.6613159046160577, "grad_norm": 1.1893497705459595, "learning_rate": 5.437618197069991e-06, "loss": 0.456, "step": 24183 }, { "epoch": 0.6613432509297746, "grad_norm": 1.57206130027771, "learning_rate": 5.436830064301805e-06, "loss": 0.7045, "step": 24184 }, { "epoch": 0.6613705972434916, "grad_norm": 1.308566927909851, "learning_rate": 5.436041967330399e-06, "loss": 0.4856, "step": 24185 }, { "epoch": 0.6613979435572085, "grad_norm": 1.0917766094207764, "learning_rate": 5.435253906161963e-06, "loss": 0.3509, "step": 24186 }, { "epoch": 0.6614252898709254, "grad_norm": 1.5511460304260254, "learning_rate": 5.434465880802675e-06, "loss": 0.4736, "step": 24187 }, { "epoch": 0.6614526361846423, "grad_norm": 1.4828966856002808, "learning_rate": 5.433677891258714e-06, "loss": 0.3093, "step": 24188 }, { "epoch": 0.6614799824983593, "grad_norm": 1.4865343570709229, "learning_rate": 5.432889937536269e-06, "loss": 0.3463, "step": 24189 }, { "epoch": 0.6615073288120761, "grad_norm": 1.2186250686645508, "learning_rate": 5.432102019641516e-06, "loss": 0.4713, "step": 24190 }, { "epoch": 0.661534675125793, "grad_norm": 1.2246912717819214, "learning_rate": 5.431314137580634e-06, "loss": 0.4465, "step": 24191 }, { "epoch": 0.6615620214395099, "grad_norm": 1.414510726928711, "learning_rate": 5.430526291359811e-06, "loss": 0.4719, "step": 24192 }, { "epoch": 0.6615893677532269, "grad_norm": 1.3955738544464111, "learning_rate": 5.429738480985223e-06, "loss": 0.4497, "step": 24193 }, { "epoch": 0.6616167140669438, "grad_norm": 1.2202337980270386, "learning_rate": 5.428950706463046e-06, "loss": 0.4361, "step": 24194 }, { "epoch": 0.6616440603806607, "grad_norm": 1.5690515041351318, "learning_rate": 5.4281629677994695e-06, "loss": 0.4474, "step": 24195 }, { "epoch": 0.6616714066943776, "grad_norm": 1.315277338027954, "learning_rate": 5.427375265000667e-06, "loss": 0.4879, "step": 24196 }, { "epoch": 0.6616987530080946, "grad_norm": 1.2505884170532227, "learning_rate": 5.4265875980728185e-06, "loss": 0.5029, "step": 24197 }, { "epoch": 0.6617260993218114, "grad_norm": 1.3700337409973145, "learning_rate": 5.425799967022101e-06, "loss": 0.4476, "step": 24198 }, { "epoch": 0.6617534456355283, "grad_norm": 1.2542674541473389, "learning_rate": 5.4250123718546976e-06, "loss": 0.462, "step": 24199 }, { "epoch": 0.6617807919492452, "grad_norm": 1.2175648212432861, "learning_rate": 5.424224812576785e-06, "loss": 0.5202, "step": 24200 }, { "epoch": 0.6618081382629621, "grad_norm": 1.2118242979049683, "learning_rate": 5.423437289194541e-06, "loss": 0.4632, "step": 24201 }, { "epoch": 0.6618354845766791, "grad_norm": 1.3139420747756958, "learning_rate": 5.422649801714144e-06, "loss": 0.7121, "step": 24202 }, { "epoch": 0.661862830890396, "grad_norm": 1.3511430025100708, "learning_rate": 5.421862350141767e-06, "loss": 0.4814, "step": 24203 }, { "epoch": 0.6618901772041129, "grad_norm": 1.3545643091201782, "learning_rate": 5.421074934483594e-06, "loss": 0.4807, "step": 24204 }, { "epoch": 0.6619175235178298, "grad_norm": 1.7414069175720215, "learning_rate": 5.420287554745802e-06, "loss": 0.3373, "step": 24205 }, { "epoch": 0.6619448698315467, "grad_norm": 1.2925572395324707, "learning_rate": 5.419500210934563e-06, "loss": 0.7174, "step": 24206 }, { "epoch": 0.6619722161452636, "grad_norm": 1.775641679763794, "learning_rate": 5.4187129030560535e-06, "loss": 0.4862, "step": 24207 }, { "epoch": 0.6619995624589805, "grad_norm": 1.7124733924865723, "learning_rate": 5.417925631116455e-06, "loss": 0.4611, "step": 24208 }, { "epoch": 0.6620269087726974, "grad_norm": 1.1956008672714233, "learning_rate": 5.417138395121941e-06, "loss": 0.4409, "step": 24209 }, { "epoch": 0.6620542550864144, "grad_norm": 1.361686110496521, "learning_rate": 5.416351195078682e-06, "loss": 0.4795, "step": 24210 }, { "epoch": 0.6620816014001313, "grad_norm": 1.2031490802764893, "learning_rate": 5.415564030992863e-06, "loss": 0.4656, "step": 24211 }, { "epoch": 0.6621089477138482, "grad_norm": 2.0993618965148926, "learning_rate": 5.414776902870653e-06, "loss": 0.3654, "step": 24212 }, { "epoch": 0.6621362940275651, "grad_norm": 1.4931460618972778, "learning_rate": 5.413989810718223e-06, "loss": 0.7023, "step": 24213 }, { "epoch": 0.662163640341282, "grad_norm": 1.5135066509246826, "learning_rate": 5.413202754541758e-06, "loss": 0.4716, "step": 24214 }, { "epoch": 0.6621909866549989, "grad_norm": 1.6731719970703125, "learning_rate": 5.4124157343474256e-06, "loss": 0.3522, "step": 24215 }, { "epoch": 0.6622183329687158, "grad_norm": 1.4104422330856323, "learning_rate": 5.411628750141398e-06, "loss": 0.4978, "step": 24216 }, { "epoch": 0.6622456792824327, "grad_norm": 1.1278765201568604, "learning_rate": 5.4108418019298536e-06, "loss": 0.3473, "step": 24217 }, { "epoch": 0.6622730255961496, "grad_norm": 1.3029227256774902, "learning_rate": 5.410054889718965e-06, "loss": 0.4727, "step": 24218 }, { "epoch": 0.6623003719098666, "grad_norm": 1.5376441478729248, "learning_rate": 5.4092680135149e-06, "loss": 0.7493, "step": 24219 }, { "epoch": 0.6623277182235835, "grad_norm": 1.290950059890747, "learning_rate": 5.408481173323839e-06, "loss": 0.4967, "step": 24220 }, { "epoch": 0.6623550645373004, "grad_norm": 1.2886101007461548, "learning_rate": 5.407694369151953e-06, "loss": 0.4826, "step": 24221 }, { "epoch": 0.6623824108510172, "grad_norm": 1.2687782049179077, "learning_rate": 5.406907601005409e-06, "loss": 0.4704, "step": 24222 }, { "epoch": 0.6624097571647342, "grad_norm": 1.547594428062439, "learning_rate": 5.4061208688903795e-06, "loss": 0.3617, "step": 24223 }, { "epoch": 0.6624371034784511, "grad_norm": 1.4527215957641602, "learning_rate": 5.405334172813044e-06, "loss": 0.3228, "step": 24224 }, { "epoch": 0.662464449792168, "grad_norm": 1.1442372798919678, "learning_rate": 5.404547512779568e-06, "loss": 0.4676, "step": 24225 }, { "epoch": 0.6624917961058849, "grad_norm": 1.6807523965835571, "learning_rate": 5.40376088879612e-06, "loss": 0.7211, "step": 24226 }, { "epoch": 0.6625191424196019, "grad_norm": 1.582389235496521, "learning_rate": 5.402974300868877e-06, "loss": 0.4871, "step": 24227 }, { "epoch": 0.6625464887333188, "grad_norm": 1.509676218032837, "learning_rate": 5.402187749004009e-06, "loss": 0.4834, "step": 24228 }, { "epoch": 0.6625738350470357, "grad_norm": 1.357226848602295, "learning_rate": 5.401401233207679e-06, "loss": 0.4934, "step": 24229 }, { "epoch": 0.6626011813607525, "grad_norm": 1.3585591316223145, "learning_rate": 5.4006147534860665e-06, "loss": 0.4818, "step": 24230 }, { "epoch": 0.6626285276744694, "grad_norm": 1.5020500421524048, "learning_rate": 5.399828309845336e-06, "loss": 0.7548, "step": 24231 }, { "epoch": 0.6626558739881864, "grad_norm": 1.4611762762069702, "learning_rate": 5.3990419022916555e-06, "loss": 0.5191, "step": 24232 }, { "epoch": 0.6626832203019033, "grad_norm": 1.4579027891159058, "learning_rate": 5.3982555308311976e-06, "loss": 0.2992, "step": 24233 }, { "epoch": 0.6627105666156202, "grad_norm": 1.172252893447876, "learning_rate": 5.397469195470133e-06, "loss": 0.4468, "step": 24234 }, { "epoch": 0.6627379129293371, "grad_norm": 1.296180248260498, "learning_rate": 5.3966828962146264e-06, "loss": 0.4353, "step": 24235 }, { "epoch": 0.6627652592430541, "grad_norm": 1.1593562364578247, "learning_rate": 5.395896633070846e-06, "loss": 0.473, "step": 24236 }, { "epoch": 0.662792605556771, "grad_norm": 1.6877455711364746, "learning_rate": 5.3951104060449614e-06, "loss": 0.3542, "step": 24237 }, { "epoch": 0.6628199518704878, "grad_norm": 1.54701566696167, "learning_rate": 5.394324215143136e-06, "loss": 0.3204, "step": 24238 }, { "epoch": 0.6628472981842047, "grad_norm": 1.4444338083267212, "learning_rate": 5.393538060371546e-06, "loss": 0.4469, "step": 24239 }, { "epoch": 0.6628746444979217, "grad_norm": 1.4152448177337646, "learning_rate": 5.3927519417363515e-06, "loss": 0.7172, "step": 24240 }, { "epoch": 0.6629019908116386, "grad_norm": 1.1443747282028198, "learning_rate": 5.391965859243719e-06, "loss": 0.4485, "step": 24241 }, { "epoch": 0.6629293371253555, "grad_norm": 2.047011375427246, "learning_rate": 5.39117981289982e-06, "loss": 0.366, "step": 24242 }, { "epoch": 0.6629566834390724, "grad_norm": 1.2445927858352661, "learning_rate": 5.390393802710819e-06, "loss": 0.2969, "step": 24243 }, { "epoch": 0.6629840297527894, "grad_norm": 2.111835241317749, "learning_rate": 5.389607828682881e-06, "loss": 0.3504, "step": 24244 }, { "epoch": 0.6630113760665063, "grad_norm": 1.2681394815444946, "learning_rate": 5.388821890822169e-06, "loss": 0.4407, "step": 24245 }, { "epoch": 0.6630387223802231, "grad_norm": 1.1817009449005127, "learning_rate": 5.388035989134857e-06, "loss": 0.4775, "step": 24246 }, { "epoch": 0.66306606869394, "grad_norm": 1.813113808631897, "learning_rate": 5.387250123627101e-06, "loss": 0.4673, "step": 24247 }, { "epoch": 0.663093415007657, "grad_norm": 1.5472767353057861, "learning_rate": 5.3864642943050685e-06, "loss": 0.3293, "step": 24248 }, { "epoch": 0.6631207613213739, "grad_norm": 1.4582250118255615, "learning_rate": 5.385678501174928e-06, "loss": 0.4856, "step": 24249 }, { "epoch": 0.6631481076350908, "grad_norm": 1.4577183723449707, "learning_rate": 5.3848927442428425e-06, "loss": 0.4596, "step": 24250 }, { "epoch": 0.6631754539488077, "grad_norm": 3.0223731994628906, "learning_rate": 5.3841070235149685e-06, "loss": 0.477, "step": 24251 }, { "epoch": 0.6632028002625246, "grad_norm": 1.2677429914474487, "learning_rate": 5.38332133899748e-06, "loss": 0.482, "step": 24252 }, { "epoch": 0.6632301465762416, "grad_norm": 1.3297394514083862, "learning_rate": 5.3825356906965374e-06, "loss": 0.4864, "step": 24253 }, { "epoch": 0.6632574928899584, "grad_norm": 1.5133081674575806, "learning_rate": 5.3817500786183e-06, "loss": 0.4331, "step": 24254 }, { "epoch": 0.6632848392036753, "grad_norm": 4.276084899902344, "learning_rate": 5.3809645027689365e-06, "loss": 0.7192, "step": 24255 }, { "epoch": 0.6633121855173922, "grad_norm": 1.323800802230835, "learning_rate": 5.380178963154605e-06, "loss": 0.4525, "step": 24256 }, { "epoch": 0.6633395318311092, "grad_norm": 2.0703601837158203, "learning_rate": 5.3793934597814715e-06, "loss": 0.4559, "step": 24257 }, { "epoch": 0.6633668781448261, "grad_norm": 1.6269906759262085, "learning_rate": 5.378607992655691e-06, "loss": 0.7393, "step": 24258 }, { "epoch": 0.663394224458543, "grad_norm": 1.8069959878921509, "learning_rate": 5.377822561783435e-06, "loss": 0.4236, "step": 24259 }, { "epoch": 0.6634215707722599, "grad_norm": 1.2587918043136597, "learning_rate": 5.37703716717086e-06, "loss": 0.472, "step": 24260 }, { "epoch": 0.6634489170859769, "grad_norm": 1.6070870161056519, "learning_rate": 5.376251808824123e-06, "loss": 0.4313, "step": 24261 }, { "epoch": 0.6634762633996937, "grad_norm": 2.23218035697937, "learning_rate": 5.375466486749392e-06, "loss": 0.3755, "step": 24262 }, { "epoch": 0.6635036097134106, "grad_norm": 1.439356803894043, "learning_rate": 5.374681200952827e-06, "loss": 0.4898, "step": 24263 }, { "epoch": 0.6635309560271275, "grad_norm": 1.5272823572158813, "learning_rate": 5.373895951440582e-06, "loss": 0.4543, "step": 24264 }, { "epoch": 0.6635583023408445, "grad_norm": 1.540790319442749, "learning_rate": 5.373110738218824e-06, "loss": 0.4615, "step": 24265 }, { "epoch": 0.6635856486545614, "grad_norm": 1.2954115867614746, "learning_rate": 5.372325561293712e-06, "loss": 0.4599, "step": 24266 }, { "epoch": 0.6636129949682783, "grad_norm": 1.2196056842803955, "learning_rate": 5.371540420671398e-06, "loss": 0.381, "step": 24267 }, { "epoch": 0.6636403412819952, "grad_norm": 1.2275526523590088, "learning_rate": 5.370755316358051e-06, "loss": 0.487, "step": 24268 }, { "epoch": 0.6636676875957122, "grad_norm": 1.4142370223999023, "learning_rate": 5.369970248359827e-06, "loss": 0.4796, "step": 24269 }, { "epoch": 0.663695033909429, "grad_norm": 1.2601587772369385, "learning_rate": 5.369185216682883e-06, "loss": 0.6934, "step": 24270 }, { "epoch": 0.6637223802231459, "grad_norm": 1.2967759370803833, "learning_rate": 5.368400221333378e-06, "loss": 0.4827, "step": 24271 }, { "epoch": 0.6637497265368628, "grad_norm": 1.2655807733535767, "learning_rate": 5.367615262317469e-06, "loss": 0.4688, "step": 24272 }, { "epoch": 0.6637770728505797, "grad_norm": 1.8791569471359253, "learning_rate": 5.3668303396413116e-06, "loss": 0.3644, "step": 24273 }, { "epoch": 0.6638044191642967, "grad_norm": 1.537238597869873, "learning_rate": 5.366045453311068e-06, "loss": 0.4815, "step": 24274 }, { "epoch": 0.6638317654780136, "grad_norm": 1.427746295928955, "learning_rate": 5.365260603332895e-06, "loss": 0.4499, "step": 24275 }, { "epoch": 0.6638591117917305, "grad_norm": 1.3449304103851318, "learning_rate": 5.364475789712945e-06, "loss": 0.4863, "step": 24276 }, { "epoch": 0.6638864581054474, "grad_norm": 1.3391796350479126, "learning_rate": 5.363691012457382e-06, "loss": 0.4845, "step": 24277 }, { "epoch": 0.6639138044191643, "grad_norm": 1.4084354639053345, "learning_rate": 5.362906271572357e-06, "loss": 0.4733, "step": 24278 }, { "epoch": 0.6639411507328812, "grad_norm": 1.6640934944152832, "learning_rate": 5.362121567064029e-06, "loss": 0.3407, "step": 24279 }, { "epoch": 0.6639684970465981, "grad_norm": 1.6781991720199585, "learning_rate": 5.361336898938547e-06, "loss": 0.4893, "step": 24280 }, { "epoch": 0.663995843360315, "grad_norm": 1.621977686882019, "learning_rate": 5.360552267202075e-06, "loss": 0.452, "step": 24281 }, { "epoch": 0.664023189674032, "grad_norm": 1.357134461402893, "learning_rate": 5.359767671860765e-06, "loss": 0.5066, "step": 24282 }, { "epoch": 0.6640505359877489, "grad_norm": 1.194724202156067, "learning_rate": 5.3589831129207684e-06, "loss": 0.4806, "step": 24283 }, { "epoch": 0.6640778823014658, "grad_norm": 1.4374791383743286, "learning_rate": 5.3581985903882485e-06, "loss": 0.4909, "step": 24284 }, { "epoch": 0.6641052286151826, "grad_norm": 1.5465140342712402, "learning_rate": 5.3574141042693515e-06, "loss": 0.335, "step": 24285 }, { "epoch": 0.6641325749288995, "grad_norm": 1.245455265045166, "learning_rate": 5.356629654570231e-06, "loss": 0.4877, "step": 24286 }, { "epoch": 0.6641599212426165, "grad_norm": 1.7450248003005981, "learning_rate": 5.35584524129705e-06, "loss": 0.4629, "step": 24287 }, { "epoch": 0.6641872675563334, "grad_norm": 1.2377113103866577, "learning_rate": 5.3550608644559546e-06, "loss": 0.7345, "step": 24288 }, { "epoch": 0.6642146138700503, "grad_norm": 1.551599383354187, "learning_rate": 5.354276524053096e-06, "loss": 0.4725, "step": 24289 }, { "epoch": 0.6642419601837672, "grad_norm": 1.249769687652588, "learning_rate": 5.353492220094634e-06, "loss": 0.4747, "step": 24290 }, { "epoch": 0.6642693064974842, "grad_norm": 1.2410582304000854, "learning_rate": 5.352707952586719e-06, "loss": 0.4583, "step": 24291 }, { "epoch": 0.6642966528112011, "grad_norm": 1.6059373617172241, "learning_rate": 5.351923721535501e-06, "loss": 0.4726, "step": 24292 }, { "epoch": 0.6643239991249179, "grad_norm": 1.2301040887832642, "learning_rate": 5.35113952694713e-06, "loss": 0.4647, "step": 24293 }, { "epoch": 0.6643513454386348, "grad_norm": 1.277573823928833, "learning_rate": 5.350355368827764e-06, "loss": 0.4857, "step": 24294 }, { "epoch": 0.6643786917523518, "grad_norm": 1.7202359437942505, "learning_rate": 5.349571247183554e-06, "loss": 0.7506, "step": 24295 }, { "epoch": 0.6644060380660687, "grad_norm": 1.3536357879638672, "learning_rate": 5.348787162020643e-06, "loss": 0.4595, "step": 24296 }, { "epoch": 0.6644333843797856, "grad_norm": 1.2927465438842773, "learning_rate": 5.348003113345192e-06, "loss": 0.4558, "step": 24297 }, { "epoch": 0.6644607306935025, "grad_norm": 1.3380273580551147, "learning_rate": 5.347219101163348e-06, "loss": 0.703, "step": 24298 }, { "epoch": 0.6644880770072195, "grad_norm": 1.600297212600708, "learning_rate": 5.346435125481257e-06, "loss": 0.5255, "step": 24299 }, { "epoch": 0.6645154233209364, "grad_norm": 1.327458143234253, "learning_rate": 5.345651186305075e-06, "loss": 0.7294, "step": 24300 }, { "epoch": 0.6645427696346532, "grad_norm": 1.6563382148742676, "learning_rate": 5.344867283640951e-06, "loss": 0.4873, "step": 24301 }, { "epoch": 0.6645701159483701, "grad_norm": 1.3155908584594727, "learning_rate": 5.3440834174950294e-06, "loss": 0.4658, "step": 24302 }, { "epoch": 0.664597462262087, "grad_norm": 1.5008132457733154, "learning_rate": 5.343299587873466e-06, "loss": 0.3443, "step": 24303 }, { "epoch": 0.664624808575804, "grad_norm": 1.150707483291626, "learning_rate": 5.342515794782408e-06, "loss": 0.4562, "step": 24304 }, { "epoch": 0.6646521548895209, "grad_norm": 1.4254741668701172, "learning_rate": 5.341732038228001e-06, "loss": 0.4285, "step": 24305 }, { "epoch": 0.6646795012032378, "grad_norm": 1.1650283336639404, "learning_rate": 5.340948318216397e-06, "loss": 0.7224, "step": 24306 }, { "epoch": 0.6647068475169547, "grad_norm": 1.41988205909729, "learning_rate": 5.340164634753742e-06, "loss": 0.5251, "step": 24307 }, { "epoch": 0.6647341938306717, "grad_norm": 1.4292408227920532, "learning_rate": 5.339380987846179e-06, "loss": 0.4939, "step": 24308 }, { "epoch": 0.6647615401443885, "grad_norm": 1.3153250217437744, "learning_rate": 5.338597377499866e-06, "loss": 0.4745, "step": 24309 }, { "epoch": 0.6647888864581054, "grad_norm": 1.363550066947937, "learning_rate": 5.337813803720944e-06, "loss": 0.7095, "step": 24310 }, { "epoch": 0.6648162327718223, "grad_norm": 1.4555507898330688, "learning_rate": 5.337030266515557e-06, "loss": 0.3348, "step": 24311 }, { "epoch": 0.6648435790855393, "grad_norm": 1.5225257873535156, "learning_rate": 5.336246765889859e-06, "loss": 0.3358, "step": 24312 }, { "epoch": 0.6648709253992562, "grad_norm": 1.506881833076477, "learning_rate": 5.335463301849992e-06, "loss": 0.4896, "step": 24313 }, { "epoch": 0.6648982717129731, "grad_norm": 1.638612985610962, "learning_rate": 5.334679874402103e-06, "loss": 0.4658, "step": 24314 }, { "epoch": 0.66492561802669, "grad_norm": 1.3091473579406738, "learning_rate": 5.333896483552334e-06, "loss": 0.4828, "step": 24315 }, { "epoch": 0.664952964340407, "grad_norm": 1.3475946187973022, "learning_rate": 5.333113129306837e-06, "loss": 0.4577, "step": 24316 }, { "epoch": 0.6649803106541238, "grad_norm": 1.4310601949691772, "learning_rate": 5.332329811671755e-06, "loss": 0.4798, "step": 24317 }, { "epoch": 0.6650076569678407, "grad_norm": 1.3488160371780396, "learning_rate": 5.331546530653229e-06, "loss": 0.4762, "step": 24318 }, { "epoch": 0.6650350032815576, "grad_norm": 1.6880921125411987, "learning_rate": 5.330763286257409e-06, "loss": 0.3598, "step": 24319 }, { "epoch": 0.6650623495952745, "grad_norm": 1.3915884494781494, "learning_rate": 5.329980078490438e-06, "loss": 0.4834, "step": 24320 }, { "epoch": 0.6650896959089915, "grad_norm": 1.2704432010650635, "learning_rate": 5.329196907358454e-06, "loss": 0.7403, "step": 24321 }, { "epoch": 0.6651170422227084, "grad_norm": 1.4779009819030762, "learning_rate": 5.32841377286761e-06, "loss": 0.3627, "step": 24322 }, { "epoch": 0.6651443885364253, "grad_norm": 1.266405701637268, "learning_rate": 5.327630675024045e-06, "loss": 0.7402, "step": 24323 }, { "epoch": 0.6651717348501422, "grad_norm": 1.789129614830017, "learning_rate": 5.326847613833899e-06, "loss": 0.4676, "step": 24324 }, { "epoch": 0.6651990811638591, "grad_norm": 1.1709538698196411, "learning_rate": 5.326064589303322e-06, "loss": 0.4865, "step": 24325 }, { "epoch": 0.665226427477576, "grad_norm": 1.2454304695129395, "learning_rate": 5.325281601438455e-06, "loss": 0.4589, "step": 24326 }, { "epoch": 0.6652537737912929, "grad_norm": 3.3678348064422607, "learning_rate": 5.324498650245436e-06, "loss": 0.7244, "step": 24327 }, { "epoch": 0.6652811201050098, "grad_norm": 1.289172649383545, "learning_rate": 5.323715735730406e-06, "loss": 0.7183, "step": 24328 }, { "epoch": 0.6653084664187268, "grad_norm": 1.2949023246765137, "learning_rate": 5.322932857899513e-06, "loss": 0.4924, "step": 24329 }, { "epoch": 0.6653358127324437, "grad_norm": 1.2577627897262573, "learning_rate": 5.322150016758895e-06, "loss": 0.4273, "step": 24330 }, { "epoch": 0.6653631590461606, "grad_norm": 1.3404611349105835, "learning_rate": 5.321367212314692e-06, "loss": 0.4913, "step": 24331 }, { "epoch": 0.6653905053598775, "grad_norm": 1.2256344556808472, "learning_rate": 5.320584444573049e-06, "loss": 0.4797, "step": 24332 }, { "epoch": 0.6654178516735944, "grad_norm": 1.5231760740280151, "learning_rate": 5.319801713540105e-06, "loss": 0.4991, "step": 24333 }, { "epoch": 0.6654451979873113, "grad_norm": 1.1486356258392334, "learning_rate": 5.319019019221996e-06, "loss": 0.4574, "step": 24334 }, { "epoch": 0.6654725443010282, "grad_norm": 1.313572645187378, "learning_rate": 5.3182363616248675e-06, "loss": 0.4874, "step": 24335 }, { "epoch": 0.6654998906147451, "grad_norm": 1.7054325342178345, "learning_rate": 5.317453740754859e-06, "loss": 0.4464, "step": 24336 }, { "epoch": 0.665527236928462, "grad_norm": 1.2007044553756714, "learning_rate": 5.316671156618103e-06, "loss": 0.3585, "step": 24337 }, { "epoch": 0.665554583242179, "grad_norm": 1.1892521381378174, "learning_rate": 5.315888609220748e-06, "loss": 0.455, "step": 24338 }, { "epoch": 0.6655819295558959, "grad_norm": 1.3791038990020752, "learning_rate": 5.315106098568928e-06, "loss": 0.4606, "step": 24339 }, { "epoch": 0.6656092758696128, "grad_norm": 1.4628634452819824, "learning_rate": 5.31432362466878e-06, "loss": 0.4793, "step": 24340 }, { "epoch": 0.6656366221833296, "grad_norm": 1.225103497505188, "learning_rate": 5.313541187526451e-06, "loss": 0.4736, "step": 24341 }, { "epoch": 0.6656639684970466, "grad_norm": 1.3005157709121704, "learning_rate": 5.312758787148069e-06, "loss": 0.4867, "step": 24342 }, { "epoch": 0.6656913148107635, "grad_norm": 1.3208216428756714, "learning_rate": 5.311976423539771e-06, "loss": 0.4529, "step": 24343 }, { "epoch": 0.6657186611244804, "grad_norm": 1.5677118301391602, "learning_rate": 5.3111940967077035e-06, "loss": 0.4897, "step": 24344 }, { "epoch": 0.6657460074381973, "grad_norm": 1.5227632522583008, "learning_rate": 5.310411806657998e-06, "loss": 0.4606, "step": 24345 }, { "epoch": 0.6657733537519143, "grad_norm": 1.2996132373809814, "learning_rate": 5.3096295533967866e-06, "loss": 0.7307, "step": 24346 }, { "epoch": 0.6658007000656312, "grad_norm": 1.2264304161071777, "learning_rate": 5.308847336930219e-06, "loss": 0.4717, "step": 24347 }, { "epoch": 0.6658280463793481, "grad_norm": 1.4415709972381592, "learning_rate": 5.308065157264421e-06, "loss": 0.4566, "step": 24348 }, { "epoch": 0.6658553926930649, "grad_norm": 1.2614109516143799, "learning_rate": 5.307283014405531e-06, "loss": 0.4788, "step": 24349 }, { "epoch": 0.6658827390067819, "grad_norm": 1.3489792346954346, "learning_rate": 5.306500908359683e-06, "loss": 0.47, "step": 24350 }, { "epoch": 0.6659100853204988, "grad_norm": 1.4934203624725342, "learning_rate": 5.305718839133018e-06, "loss": 0.4814, "step": 24351 }, { "epoch": 0.6659374316342157, "grad_norm": 1.251243233680725, "learning_rate": 5.304936806731667e-06, "loss": 0.4983, "step": 24352 }, { "epoch": 0.6659647779479326, "grad_norm": 1.4304224252700806, "learning_rate": 5.304154811161762e-06, "loss": 0.5155, "step": 24353 }, { "epoch": 0.6659921242616496, "grad_norm": 6.8601603507995605, "learning_rate": 5.303372852429445e-06, "loss": 0.3102, "step": 24354 }, { "epoch": 0.6660194705753665, "grad_norm": 1.3988561630249023, "learning_rate": 5.3025909305408456e-06, "loss": 0.4635, "step": 24355 }, { "epoch": 0.6660468168890834, "grad_norm": 1.4111590385437012, "learning_rate": 5.301809045502095e-06, "loss": 0.4781, "step": 24356 }, { "epoch": 0.6660741632028002, "grad_norm": 1.4150300025939941, "learning_rate": 5.301027197319335e-06, "loss": 0.4726, "step": 24357 }, { "epoch": 0.6661015095165171, "grad_norm": 1.4886579513549805, "learning_rate": 5.300245385998693e-06, "loss": 0.5065, "step": 24358 }, { "epoch": 0.6661288558302341, "grad_norm": 1.283133864402771, "learning_rate": 5.299463611546299e-06, "loss": 0.4945, "step": 24359 }, { "epoch": 0.666156202143951, "grad_norm": 1.392531156539917, "learning_rate": 5.298681873968295e-06, "loss": 0.4874, "step": 24360 }, { "epoch": 0.6661835484576679, "grad_norm": 1.2249211072921753, "learning_rate": 5.297900173270809e-06, "loss": 0.7691, "step": 24361 }, { "epoch": 0.6662108947713848, "grad_norm": 1.2858835458755493, "learning_rate": 5.297118509459968e-06, "loss": 0.4694, "step": 24362 }, { "epoch": 0.6662382410851018, "grad_norm": 1.3251190185546875, "learning_rate": 5.296336882541912e-06, "loss": 0.7133, "step": 24363 }, { "epoch": 0.6662655873988187, "grad_norm": 1.5104753971099854, "learning_rate": 5.2955552925227716e-06, "loss": 0.4452, "step": 24364 }, { "epoch": 0.6662929337125355, "grad_norm": 1.2435076236724854, "learning_rate": 5.294773739408674e-06, "loss": 0.4636, "step": 24365 }, { "epoch": 0.6663202800262524, "grad_norm": 1.2943085432052612, "learning_rate": 5.293992223205751e-06, "loss": 0.4638, "step": 24366 }, { "epoch": 0.6663476263399694, "grad_norm": 1.274717092514038, "learning_rate": 5.293210743920135e-06, "loss": 0.4743, "step": 24367 }, { "epoch": 0.6663749726536863, "grad_norm": 1.5912079811096191, "learning_rate": 5.292429301557957e-06, "loss": 0.4595, "step": 24368 }, { "epoch": 0.6664023189674032, "grad_norm": 1.4367141723632812, "learning_rate": 5.291647896125345e-06, "loss": 0.4327, "step": 24369 }, { "epoch": 0.6664296652811201, "grad_norm": 1.3958244323730469, "learning_rate": 5.290866527628432e-06, "loss": 0.4872, "step": 24370 }, { "epoch": 0.6664570115948371, "grad_norm": 1.7186239957809448, "learning_rate": 5.290085196073347e-06, "loss": 0.3498, "step": 24371 }, { "epoch": 0.666484357908554, "grad_norm": 1.5715078115463257, "learning_rate": 5.289303901466212e-06, "loss": 0.3298, "step": 24372 }, { "epoch": 0.6665117042222708, "grad_norm": 1.7212985754013062, "learning_rate": 5.288522643813167e-06, "loss": 0.3534, "step": 24373 }, { "epoch": 0.6665390505359877, "grad_norm": 1.7842860221862793, "learning_rate": 5.287741423120337e-06, "loss": 0.4645, "step": 24374 }, { "epoch": 0.6665663968497046, "grad_norm": 1.5340875387191772, "learning_rate": 5.2869602393938456e-06, "loss": 0.4646, "step": 24375 }, { "epoch": 0.6665937431634216, "grad_norm": 1.0165621042251587, "learning_rate": 5.286179092639827e-06, "loss": 0.3175, "step": 24376 }, { "epoch": 0.6666210894771385, "grad_norm": 1.356772780418396, "learning_rate": 5.285397982864413e-06, "loss": 0.4687, "step": 24377 }, { "epoch": 0.6666484357908554, "grad_norm": 1.4602760076522827, "learning_rate": 5.2846169100737155e-06, "loss": 0.4828, "step": 24378 }, { "epoch": 0.6666757821045723, "grad_norm": 1.1941701173782349, "learning_rate": 5.283835874273875e-06, "loss": 0.483, "step": 24379 }, { "epoch": 0.6667031284182892, "grad_norm": 1.5797492265701294, "learning_rate": 5.2830548754710166e-06, "loss": 0.4617, "step": 24380 }, { "epoch": 0.6667304747320061, "grad_norm": 1.3809499740600586, "learning_rate": 5.28227391367126e-06, "loss": 0.4842, "step": 24381 }, { "epoch": 0.666757821045723, "grad_norm": 1.249601125717163, "learning_rate": 5.281492988880741e-06, "loss": 0.4767, "step": 24382 }, { "epoch": 0.6667851673594399, "grad_norm": 1.4770946502685547, "learning_rate": 5.280712101105581e-06, "loss": 0.441, "step": 24383 }, { "epoch": 0.6668125136731569, "grad_norm": 1.3737496137619019, "learning_rate": 5.279931250351904e-06, "loss": 0.5261, "step": 24384 }, { "epoch": 0.6668398599868738, "grad_norm": 1.3553801774978638, "learning_rate": 5.279150436625841e-06, "loss": 0.4825, "step": 24385 }, { "epoch": 0.6668672063005907, "grad_norm": 1.696401596069336, "learning_rate": 5.278369659933514e-06, "loss": 0.3507, "step": 24386 }, { "epoch": 0.6668945526143076, "grad_norm": 1.3600131273269653, "learning_rate": 5.277588920281048e-06, "loss": 0.4823, "step": 24387 }, { "epoch": 0.6669218989280244, "grad_norm": 1.2683510780334473, "learning_rate": 5.276808217674565e-06, "loss": 0.7149, "step": 24388 }, { "epoch": 0.6669492452417414, "grad_norm": 1.590578317642212, "learning_rate": 5.276027552120196e-06, "loss": 0.4977, "step": 24389 }, { "epoch": 0.6669765915554583, "grad_norm": 2.9151804447174072, "learning_rate": 5.27524692362406e-06, "loss": 0.4422, "step": 24390 }, { "epoch": 0.6670039378691752, "grad_norm": 1.5935806035995483, "learning_rate": 5.274466332192281e-06, "loss": 0.3319, "step": 24391 }, { "epoch": 0.6670312841828921, "grad_norm": 1.331210970878601, "learning_rate": 5.2736857778309855e-06, "loss": 0.4632, "step": 24392 }, { "epoch": 0.6670586304966091, "grad_norm": 1.4474709033966064, "learning_rate": 5.272905260546295e-06, "loss": 0.3425, "step": 24393 }, { "epoch": 0.667085976810326, "grad_norm": 1.4573554992675781, "learning_rate": 5.2721247803443295e-06, "loss": 0.4891, "step": 24394 }, { "epoch": 0.6671133231240429, "grad_norm": 1.4069552421569824, "learning_rate": 5.271344337231219e-06, "loss": 0.4552, "step": 24395 }, { "epoch": 0.6671406694377597, "grad_norm": 1.4958701133728027, "learning_rate": 5.270563931213079e-06, "loss": 0.3557, "step": 24396 }, { "epoch": 0.6671680157514767, "grad_norm": 1.3815473318099976, "learning_rate": 5.269783562296032e-06, "loss": 0.4486, "step": 24397 }, { "epoch": 0.6671953620651936, "grad_norm": 1.3150606155395508, "learning_rate": 5.2690032304862045e-06, "loss": 0.7283, "step": 24398 }, { "epoch": 0.6672227083789105, "grad_norm": 1.3580968379974365, "learning_rate": 5.268222935789717e-06, "loss": 0.4739, "step": 24399 }, { "epoch": 0.6672500546926274, "grad_norm": 3.852982997894287, "learning_rate": 5.267442678212689e-06, "loss": 0.3649, "step": 24400 }, { "epoch": 0.6672774010063444, "grad_norm": 1.1914576292037964, "learning_rate": 5.266662457761236e-06, "loss": 0.457, "step": 24401 }, { "epoch": 0.6673047473200613, "grad_norm": 1.897652506828308, "learning_rate": 5.265882274441487e-06, "loss": 0.7524, "step": 24402 }, { "epoch": 0.6673320936337782, "grad_norm": 1.3307616710662842, "learning_rate": 5.26510212825956e-06, "loss": 0.4591, "step": 24403 }, { "epoch": 0.667359439947495, "grad_norm": 1.6147351264953613, "learning_rate": 5.264322019221571e-06, "loss": 0.3279, "step": 24404 }, { "epoch": 0.667386786261212, "grad_norm": 1.158095121383667, "learning_rate": 5.263541947333648e-06, "loss": 0.4643, "step": 24405 }, { "epoch": 0.6674141325749289, "grad_norm": 1.9053452014923096, "learning_rate": 5.262761912601904e-06, "loss": 0.4825, "step": 24406 }, { "epoch": 0.6674414788886458, "grad_norm": 1.3193093538284302, "learning_rate": 5.261981915032456e-06, "loss": 0.4506, "step": 24407 }, { "epoch": 0.6674688252023627, "grad_norm": 1.1074055433273315, "learning_rate": 5.26120195463143e-06, "loss": 0.3517, "step": 24408 }, { "epoch": 0.6674961715160797, "grad_norm": 1.2692149877548218, "learning_rate": 5.2604220314049415e-06, "loss": 0.4795, "step": 24409 }, { "epoch": 0.6675235178297966, "grad_norm": 1.359366536140442, "learning_rate": 5.259642145359104e-06, "loss": 0.471, "step": 24410 }, { "epoch": 0.6675508641435135, "grad_norm": 1.7085471153259277, "learning_rate": 5.258862296500044e-06, "loss": 0.4917, "step": 24411 }, { "epoch": 0.6675782104572303, "grad_norm": 1.2334145307540894, "learning_rate": 5.2580824848338795e-06, "loss": 0.7261, "step": 24412 }, { "epoch": 0.6676055567709472, "grad_norm": 1.4164952039718628, "learning_rate": 5.257302710366714e-06, "loss": 0.4694, "step": 24413 }, { "epoch": 0.6676329030846642, "grad_norm": 1.3059905767440796, "learning_rate": 5.2565229731046795e-06, "loss": 0.4781, "step": 24414 }, { "epoch": 0.6676602493983811, "grad_norm": 1.5339661836624146, "learning_rate": 5.2557432730538875e-06, "loss": 0.4961, "step": 24415 }, { "epoch": 0.667687595712098, "grad_norm": 1.2823728322982788, "learning_rate": 5.254963610220449e-06, "loss": 0.4647, "step": 24416 }, { "epoch": 0.6677149420258149, "grad_norm": 1.2751082181930542, "learning_rate": 5.2541839846104905e-06, "loss": 0.4828, "step": 24417 }, { "epoch": 0.6677422883395319, "grad_norm": 1.1521252393722534, "learning_rate": 5.2534043962301226e-06, "loss": 0.4783, "step": 24418 }, { "epoch": 0.6677696346532488, "grad_norm": 1.4160844087600708, "learning_rate": 5.252624845085458e-06, "loss": 0.4885, "step": 24419 }, { "epoch": 0.6677969809669656, "grad_norm": 1.4984440803527832, "learning_rate": 5.251845331182619e-06, "loss": 0.7097, "step": 24420 }, { "epoch": 0.6678243272806825, "grad_norm": 1.3291569948196411, "learning_rate": 5.251065854527717e-06, "loss": 0.4966, "step": 24421 }, { "epoch": 0.6678516735943995, "grad_norm": 1.275501012802124, "learning_rate": 5.2502864151268685e-06, "loss": 0.4686, "step": 24422 }, { "epoch": 0.6678790199081164, "grad_norm": 1.5438170433044434, "learning_rate": 5.24950701298618e-06, "loss": 0.3522, "step": 24423 }, { "epoch": 0.6679063662218333, "grad_norm": 1.169241189956665, "learning_rate": 5.248727648111778e-06, "loss": 0.477, "step": 24424 }, { "epoch": 0.6679337125355502, "grad_norm": 1.8571938276290894, "learning_rate": 5.247948320509769e-06, "loss": 0.4756, "step": 24425 }, { "epoch": 0.6679610588492672, "grad_norm": 1.595157504081726, "learning_rate": 5.247169030186265e-06, "loss": 0.716, "step": 24426 }, { "epoch": 0.6679884051629841, "grad_norm": 1.4920045137405396, "learning_rate": 5.246389777147386e-06, "loss": 0.5299, "step": 24427 }, { "epoch": 0.6680157514767009, "grad_norm": 1.366182804107666, "learning_rate": 5.245610561399241e-06, "loss": 0.4366, "step": 24428 }, { "epoch": 0.6680430977904178, "grad_norm": 1.3347653150558472, "learning_rate": 5.24483138294794e-06, "loss": 0.4916, "step": 24429 }, { "epoch": 0.6680704441041347, "grad_norm": 3.142232894897461, "learning_rate": 5.244052241799602e-06, "loss": 0.7191, "step": 24430 }, { "epoch": 0.6680977904178517, "grad_norm": 1.2458218336105347, "learning_rate": 5.243273137960336e-06, "loss": 0.3691, "step": 24431 }, { "epoch": 0.6681251367315686, "grad_norm": 1.3081170320510864, "learning_rate": 5.24249407143625e-06, "loss": 0.4787, "step": 24432 }, { "epoch": 0.6681524830452855, "grad_norm": 1.3632217645645142, "learning_rate": 5.241715042233463e-06, "loss": 0.4797, "step": 24433 }, { "epoch": 0.6681798293590024, "grad_norm": 1.749053716659546, "learning_rate": 5.240936050358081e-06, "loss": 0.4756, "step": 24434 }, { "epoch": 0.6682071756727194, "grad_norm": 1.3663562536239624, "learning_rate": 5.240157095816218e-06, "loss": 0.7115, "step": 24435 }, { "epoch": 0.6682345219864362, "grad_norm": 1.1660295724868774, "learning_rate": 5.239378178613978e-06, "loss": 0.4746, "step": 24436 }, { "epoch": 0.6682618683001531, "grad_norm": 1.4025596380233765, "learning_rate": 5.238599298757481e-06, "loss": 0.4734, "step": 24437 }, { "epoch": 0.66828921461387, "grad_norm": 1.4359604120254517, "learning_rate": 5.237820456252833e-06, "loss": 0.5011, "step": 24438 }, { "epoch": 0.668316560927587, "grad_norm": 1.1912156343460083, "learning_rate": 5.237041651106139e-06, "loss": 0.4665, "step": 24439 }, { "epoch": 0.6683439072413039, "grad_norm": 1.2360633611679077, "learning_rate": 5.236262883323516e-06, "loss": 0.4461, "step": 24440 }, { "epoch": 0.6683712535550208, "grad_norm": 1.1899443864822388, "learning_rate": 5.235484152911071e-06, "loss": 0.3777, "step": 24441 }, { "epoch": 0.6683985998687377, "grad_norm": 1.367836594581604, "learning_rate": 5.234705459874907e-06, "loss": 0.4701, "step": 24442 }, { "epoch": 0.6684259461824547, "grad_norm": 1.6757367849349976, "learning_rate": 5.233926804221144e-06, "loss": 0.3391, "step": 24443 }, { "epoch": 0.6684532924961715, "grad_norm": 1.3255482912063599, "learning_rate": 5.233148185955881e-06, "loss": 0.4525, "step": 24444 }, { "epoch": 0.6684806388098884, "grad_norm": 1.648073673248291, "learning_rate": 5.232369605085226e-06, "loss": 0.4771, "step": 24445 }, { "epoch": 0.6685079851236053, "grad_norm": 1.3604861497879028, "learning_rate": 5.2315910616152946e-06, "loss": 0.4648, "step": 24446 }, { "epoch": 0.6685353314373222, "grad_norm": 1.1917754411697388, "learning_rate": 5.230812555552192e-06, "loss": 0.7062, "step": 24447 }, { "epoch": 0.6685626777510392, "grad_norm": 1.396363615989685, "learning_rate": 5.2300340869020165e-06, "loss": 0.4409, "step": 24448 }, { "epoch": 0.6685900240647561, "grad_norm": 1.4074763059616089, "learning_rate": 5.229255655670883e-06, "loss": 0.6909, "step": 24449 }, { "epoch": 0.668617370378473, "grad_norm": 1.4107061624526978, "learning_rate": 5.228477261864897e-06, "loss": 0.4684, "step": 24450 }, { "epoch": 0.6686447166921899, "grad_norm": 1.2308019399642944, "learning_rate": 5.2276989054901615e-06, "loss": 0.4767, "step": 24451 }, { "epoch": 0.6686720630059068, "grad_norm": 1.2948051691055298, "learning_rate": 5.226920586552787e-06, "loss": 0.4454, "step": 24452 }, { "epoch": 0.6686994093196237, "grad_norm": 1.425206184387207, "learning_rate": 5.226142305058879e-06, "loss": 0.4531, "step": 24453 }, { "epoch": 0.6687267556333406, "grad_norm": 1.1695168018341064, "learning_rate": 5.225364061014536e-06, "loss": 0.4671, "step": 24454 }, { "epoch": 0.6687541019470575, "grad_norm": 1.3807069063186646, "learning_rate": 5.224585854425872e-06, "loss": 0.495, "step": 24455 }, { "epoch": 0.6687814482607745, "grad_norm": 1.450664758682251, "learning_rate": 5.22380768529899e-06, "loss": 0.5014, "step": 24456 }, { "epoch": 0.6688087945744914, "grad_norm": 1.311023473739624, "learning_rate": 5.22302955363999e-06, "loss": 0.4557, "step": 24457 }, { "epoch": 0.6688361408882083, "grad_norm": 1.8007546663284302, "learning_rate": 5.222251459454976e-06, "loss": 0.3255, "step": 24458 }, { "epoch": 0.6688634872019252, "grad_norm": 1.371263861656189, "learning_rate": 5.221473402750059e-06, "loss": 0.4925, "step": 24459 }, { "epoch": 0.668890833515642, "grad_norm": 1.5894050598144531, "learning_rate": 5.2206953835313376e-06, "loss": 0.4543, "step": 24460 }, { "epoch": 0.668918179829359, "grad_norm": 1.378627061843872, "learning_rate": 5.219917401804912e-06, "loss": 0.4749, "step": 24461 }, { "epoch": 0.6689455261430759, "grad_norm": 1.307986855506897, "learning_rate": 5.219139457576894e-06, "loss": 0.4853, "step": 24462 }, { "epoch": 0.6689728724567928, "grad_norm": 1.3909556865692139, "learning_rate": 5.218361550853381e-06, "loss": 0.4642, "step": 24463 }, { "epoch": 0.6690002187705097, "grad_norm": 1.3359363079071045, "learning_rate": 5.217583681640472e-06, "loss": 0.5058, "step": 24464 }, { "epoch": 0.6690275650842267, "grad_norm": 1.50121009349823, "learning_rate": 5.2168058499442775e-06, "loss": 0.4614, "step": 24465 }, { "epoch": 0.6690549113979436, "grad_norm": 2.1569437980651855, "learning_rate": 5.216028055770894e-06, "loss": 0.4837, "step": 24466 }, { "epoch": 0.6690822577116605, "grad_norm": 1.259363055229187, "learning_rate": 5.215250299126422e-06, "loss": 0.4603, "step": 24467 }, { "epoch": 0.6691096040253773, "grad_norm": 1.4822583198547363, "learning_rate": 5.214472580016967e-06, "loss": 0.4747, "step": 24468 }, { "epoch": 0.6691369503390943, "grad_norm": 1.61564302444458, "learning_rate": 5.213694898448629e-06, "loss": 0.3435, "step": 24469 }, { "epoch": 0.6691642966528112, "grad_norm": 1.300119400024414, "learning_rate": 5.212917254427503e-06, "loss": 0.459, "step": 24470 }, { "epoch": 0.6691916429665281, "grad_norm": 1.3924342393875122, "learning_rate": 5.212139647959698e-06, "loss": 0.4809, "step": 24471 }, { "epoch": 0.669218989280245, "grad_norm": 1.841184139251709, "learning_rate": 5.21136207905131e-06, "loss": 0.3458, "step": 24472 }, { "epoch": 0.669246335593962, "grad_norm": 1.3900980949401855, "learning_rate": 5.2105845477084385e-06, "loss": 0.4852, "step": 24473 }, { "epoch": 0.6692736819076789, "grad_norm": 1.5519744157791138, "learning_rate": 5.209807053937179e-06, "loss": 0.4301, "step": 24474 }, { "epoch": 0.6693010282213958, "grad_norm": 1.3443487882614136, "learning_rate": 5.20902959774364e-06, "loss": 0.5012, "step": 24475 }, { "epoch": 0.6693283745351126, "grad_norm": 1.4983079433441162, "learning_rate": 5.208252179133915e-06, "loss": 0.4712, "step": 24476 }, { "epoch": 0.6693557208488295, "grad_norm": 1.4503686428070068, "learning_rate": 5.2074747981140995e-06, "loss": 0.4888, "step": 24477 }, { "epoch": 0.6693830671625465, "grad_norm": 1.6686294078826904, "learning_rate": 5.2066974546903e-06, "loss": 0.4666, "step": 24478 }, { "epoch": 0.6694104134762634, "grad_norm": 1.2669141292572021, "learning_rate": 5.205920148868609e-06, "loss": 0.4892, "step": 24479 }, { "epoch": 0.6694377597899803, "grad_norm": 1.2087188959121704, "learning_rate": 5.205142880655121e-06, "loss": 0.4907, "step": 24480 }, { "epoch": 0.6694651061036972, "grad_norm": 1.1869202852249146, "learning_rate": 5.2043656500559425e-06, "loss": 0.4407, "step": 24481 }, { "epoch": 0.6694924524174142, "grad_norm": 1.4415327310562134, "learning_rate": 5.20358845707717e-06, "loss": 0.4472, "step": 24482 }, { "epoch": 0.669519798731131, "grad_norm": 1.3844634294509888, "learning_rate": 5.202811301724888e-06, "loss": 0.4611, "step": 24483 }, { "epoch": 0.6695471450448479, "grad_norm": 1.6004163026809692, "learning_rate": 5.202034184005205e-06, "loss": 0.3323, "step": 24484 }, { "epoch": 0.6695744913585648, "grad_norm": 1.1018394231796265, "learning_rate": 5.201257103924213e-06, "loss": 0.4589, "step": 24485 }, { "epoch": 0.6696018376722818, "grad_norm": 1.7612212896347046, "learning_rate": 5.200480061488007e-06, "loss": 0.3165, "step": 24486 }, { "epoch": 0.6696291839859987, "grad_norm": 1.3001506328582764, "learning_rate": 5.1997030567026854e-06, "loss": 0.4671, "step": 24487 }, { "epoch": 0.6696565302997156, "grad_norm": 1.348524808883667, "learning_rate": 5.198926089574343e-06, "loss": 0.4874, "step": 24488 }, { "epoch": 0.6696838766134325, "grad_norm": 1.4362597465515137, "learning_rate": 5.1981491601090714e-06, "loss": 0.5066, "step": 24489 }, { "epoch": 0.6697112229271495, "grad_norm": 1.377374291419983, "learning_rate": 5.197372268312972e-06, "loss": 0.4859, "step": 24490 }, { "epoch": 0.6697385692408663, "grad_norm": 1.182010531425476, "learning_rate": 5.196595414192136e-06, "loss": 0.3608, "step": 24491 }, { "epoch": 0.6697659155545832, "grad_norm": 1.4381307363510132, "learning_rate": 5.195818597752652e-06, "loss": 0.4939, "step": 24492 }, { "epoch": 0.6697932618683001, "grad_norm": 1.284193992614746, "learning_rate": 5.195041819000624e-06, "loss": 0.4905, "step": 24493 }, { "epoch": 0.669820608182017, "grad_norm": 1.2003860473632812, "learning_rate": 5.19426507794214e-06, "loss": 0.4616, "step": 24494 }, { "epoch": 0.669847954495734, "grad_norm": 1.195212960243225, "learning_rate": 5.193488374583294e-06, "loss": 0.5033, "step": 24495 }, { "epoch": 0.6698753008094509, "grad_norm": 1.2147239446640015, "learning_rate": 5.192711708930176e-06, "loss": 0.4665, "step": 24496 }, { "epoch": 0.6699026471231678, "grad_norm": 1.4458189010620117, "learning_rate": 5.191935080988884e-06, "loss": 0.4639, "step": 24497 }, { "epoch": 0.6699299934368848, "grad_norm": 1.3300082683563232, "learning_rate": 5.19115849076551e-06, "loss": 0.4929, "step": 24498 }, { "epoch": 0.6699573397506016, "grad_norm": 1.609581470489502, "learning_rate": 5.190381938266139e-06, "loss": 0.367, "step": 24499 }, { "epoch": 0.6699846860643185, "grad_norm": 1.2301268577575684, "learning_rate": 5.189605423496873e-06, "loss": 0.429, "step": 24500 }, { "epoch": 0.6700120323780354, "grad_norm": 1.3674076795578003, "learning_rate": 5.188828946463799e-06, "loss": 0.3636, "step": 24501 }, { "epoch": 0.6700393786917523, "grad_norm": 1.2972642183303833, "learning_rate": 5.188052507173002e-06, "loss": 0.4887, "step": 24502 }, { "epoch": 0.6700667250054693, "grad_norm": 1.844107747077942, "learning_rate": 5.187276105630585e-06, "loss": 0.4843, "step": 24503 }, { "epoch": 0.6700940713191862, "grad_norm": 1.284331202507019, "learning_rate": 5.186499741842632e-06, "loss": 0.4622, "step": 24504 }, { "epoch": 0.6701214176329031, "grad_norm": 1.19496750831604, "learning_rate": 5.1857234158152295e-06, "loss": 0.4494, "step": 24505 }, { "epoch": 0.67014876394662, "grad_norm": 1.1802302598953247, "learning_rate": 5.184947127554476e-06, "loss": 0.4657, "step": 24506 }, { "epoch": 0.6701761102603369, "grad_norm": 1.224480390548706, "learning_rate": 5.184170877066457e-06, "loss": 0.4927, "step": 24507 }, { "epoch": 0.6702034565740538, "grad_norm": 1.2150071859359741, "learning_rate": 5.183394664357262e-06, "loss": 0.4794, "step": 24508 }, { "epoch": 0.6702308028877707, "grad_norm": 1.288061499595642, "learning_rate": 5.182618489432978e-06, "loss": 0.4916, "step": 24509 }, { "epoch": 0.6702581492014876, "grad_norm": 2.8479623794555664, "learning_rate": 5.1818423522996985e-06, "loss": 0.3613, "step": 24510 }, { "epoch": 0.6702854955152046, "grad_norm": 1.5040159225463867, "learning_rate": 5.18106625296351e-06, "loss": 0.4506, "step": 24511 }, { "epoch": 0.6703128418289215, "grad_norm": 1.371388554573059, "learning_rate": 5.180290191430497e-06, "loss": 0.5028, "step": 24512 }, { "epoch": 0.6703401881426384, "grad_norm": 1.674027681350708, "learning_rate": 5.179514167706756e-06, "loss": 0.4692, "step": 24513 }, { "epoch": 0.6703675344563553, "grad_norm": 1.2997074127197266, "learning_rate": 5.178738181798369e-06, "loss": 0.4341, "step": 24514 }, { "epoch": 0.6703948807700721, "grad_norm": 1.3814448118209839, "learning_rate": 5.17796223371142e-06, "loss": 0.4676, "step": 24515 }, { "epoch": 0.6704222270837891, "grad_norm": 1.845412015914917, "learning_rate": 5.177186323452004e-06, "loss": 0.3318, "step": 24516 }, { "epoch": 0.670449573397506, "grad_norm": 1.3938149213790894, "learning_rate": 5.176410451026207e-06, "loss": 0.3207, "step": 24517 }, { "epoch": 0.6704769197112229, "grad_norm": 1.235870122909546, "learning_rate": 5.175634616440106e-06, "loss": 0.4555, "step": 24518 }, { "epoch": 0.6705042660249398, "grad_norm": 1.6718099117279053, "learning_rate": 5.1748588196997975e-06, "loss": 0.4697, "step": 24519 }, { "epoch": 0.6705316123386568, "grad_norm": 1.359391689300537, "learning_rate": 5.174083060811364e-06, "loss": 0.4581, "step": 24520 }, { "epoch": 0.6705589586523737, "grad_norm": 1.4666403532028198, "learning_rate": 5.173307339780886e-06, "loss": 0.4783, "step": 24521 }, { "epoch": 0.6705863049660906, "grad_norm": 1.2269891500473022, "learning_rate": 5.172531656614458e-06, "loss": 0.4605, "step": 24522 }, { "epoch": 0.6706136512798074, "grad_norm": 1.222176432609558, "learning_rate": 5.171756011318161e-06, "loss": 0.4943, "step": 24523 }, { "epoch": 0.6706409975935244, "grad_norm": 1.7403844594955444, "learning_rate": 5.170980403898074e-06, "loss": 0.4152, "step": 24524 }, { "epoch": 0.6706683439072413, "grad_norm": 1.2194275856018066, "learning_rate": 5.170204834360292e-06, "loss": 0.4966, "step": 24525 }, { "epoch": 0.6706956902209582, "grad_norm": 1.5534858703613281, "learning_rate": 5.169429302710893e-06, "loss": 0.3739, "step": 24526 }, { "epoch": 0.6707230365346751, "grad_norm": 1.3865342140197754, "learning_rate": 5.168653808955958e-06, "loss": 0.7377, "step": 24527 }, { "epoch": 0.6707503828483921, "grad_norm": 1.3562114238739014, "learning_rate": 5.1678783531015786e-06, "loss": 0.501, "step": 24528 }, { "epoch": 0.670777729162109, "grad_norm": 1.5868926048278809, "learning_rate": 5.167102935153834e-06, "loss": 0.3572, "step": 24529 }, { "epoch": 0.6708050754758259, "grad_norm": 1.5373644828796387, "learning_rate": 5.166327555118805e-06, "loss": 0.4578, "step": 24530 }, { "epoch": 0.6708324217895427, "grad_norm": 2.7836525440216064, "learning_rate": 5.165552213002574e-06, "loss": 0.327, "step": 24531 }, { "epoch": 0.6708597681032596, "grad_norm": 1.3723632097244263, "learning_rate": 5.164776908811228e-06, "loss": 0.7385, "step": 24532 }, { "epoch": 0.6708871144169766, "grad_norm": 1.2550246715545654, "learning_rate": 5.1640016425508465e-06, "loss": 0.4812, "step": 24533 }, { "epoch": 0.6709144607306935, "grad_norm": 1.4248982667922974, "learning_rate": 5.163226414227508e-06, "loss": 0.4584, "step": 24534 }, { "epoch": 0.6709418070444104, "grad_norm": 1.323758602142334, "learning_rate": 5.1624512238473005e-06, "loss": 0.4694, "step": 24535 }, { "epoch": 0.6709691533581273, "grad_norm": 1.3318125009536743, "learning_rate": 5.161676071416302e-06, "loss": 0.495, "step": 24536 }, { "epoch": 0.6709964996718443, "grad_norm": 1.3458843231201172, "learning_rate": 5.160900956940589e-06, "loss": 0.4843, "step": 24537 }, { "epoch": 0.6710238459855612, "grad_norm": 1.3231953382492065, "learning_rate": 5.16012588042625e-06, "loss": 0.3895, "step": 24538 }, { "epoch": 0.671051192299278, "grad_norm": 1.2825947999954224, "learning_rate": 5.159350841879362e-06, "loss": 0.496, "step": 24539 }, { "epoch": 0.6710785386129949, "grad_norm": 1.4773547649383545, "learning_rate": 5.158575841305999e-06, "loss": 0.3147, "step": 24540 }, { "epoch": 0.6711058849267119, "grad_norm": 1.3911091089248657, "learning_rate": 5.157800878712251e-06, "loss": 0.4961, "step": 24541 }, { "epoch": 0.6711332312404288, "grad_norm": 1.4749473333358765, "learning_rate": 5.1570259541041925e-06, "loss": 0.4854, "step": 24542 }, { "epoch": 0.6711605775541457, "grad_norm": 1.3241068124771118, "learning_rate": 5.1562510674879025e-06, "loss": 0.4977, "step": 24543 }, { "epoch": 0.6711879238678626, "grad_norm": 1.3036255836486816, "learning_rate": 5.1554762188694555e-06, "loss": 0.4483, "step": 24544 }, { "epoch": 0.6712152701815796, "grad_norm": 1.9744004011154175, "learning_rate": 5.154701408254938e-06, "loss": 0.7531, "step": 24545 }, { "epoch": 0.6712426164952965, "grad_norm": 1.1901189088821411, "learning_rate": 5.153926635650424e-06, "loss": 0.4757, "step": 24546 }, { "epoch": 0.6712699628090133, "grad_norm": 1.5077382326126099, "learning_rate": 5.153151901061989e-06, "loss": 0.5011, "step": 24547 }, { "epoch": 0.6712973091227302, "grad_norm": 1.2420666217803955, "learning_rate": 5.152377204495717e-06, "loss": 0.4828, "step": 24548 }, { "epoch": 0.6713246554364471, "grad_norm": 1.297661304473877, "learning_rate": 5.151602545957681e-06, "loss": 0.4698, "step": 24549 }, { "epoch": 0.6713520017501641, "grad_norm": 1.4468516111373901, "learning_rate": 5.150827925453954e-06, "loss": 0.4585, "step": 24550 }, { "epoch": 0.671379348063881, "grad_norm": 2.835758686065674, "learning_rate": 5.1500533429906215e-06, "loss": 0.4665, "step": 24551 }, { "epoch": 0.6714066943775979, "grad_norm": 2.004460573196411, "learning_rate": 5.149278798573757e-06, "loss": 0.3241, "step": 24552 }, { "epoch": 0.6714340406913148, "grad_norm": 1.3172922134399414, "learning_rate": 5.148504292209434e-06, "loss": 0.4807, "step": 24553 }, { "epoch": 0.6714613870050318, "grad_norm": 1.4609161615371704, "learning_rate": 5.147729823903731e-06, "loss": 0.5231, "step": 24554 }, { "epoch": 0.6714887333187486, "grad_norm": 1.1651806831359863, "learning_rate": 5.146955393662721e-06, "loss": 0.4623, "step": 24555 }, { "epoch": 0.6715160796324655, "grad_norm": 1.3939763307571411, "learning_rate": 5.146181001492475e-06, "loss": 0.4993, "step": 24556 }, { "epoch": 0.6715434259461824, "grad_norm": 1.3471038341522217, "learning_rate": 5.145406647399078e-06, "loss": 0.4883, "step": 24557 }, { "epoch": 0.6715707722598994, "grad_norm": 1.2937171459197998, "learning_rate": 5.1446323313885996e-06, "loss": 0.6933, "step": 24558 }, { "epoch": 0.6715981185736163, "grad_norm": 1.431347370147705, "learning_rate": 5.14385805346711e-06, "loss": 0.4727, "step": 24559 }, { "epoch": 0.6716254648873332, "grad_norm": 1.5770171880722046, "learning_rate": 5.143083813640693e-06, "loss": 0.4281, "step": 24560 }, { "epoch": 0.6716528112010501, "grad_norm": 1.2151390314102173, "learning_rate": 5.142309611915415e-06, "loss": 0.4855, "step": 24561 }, { "epoch": 0.6716801575147671, "grad_norm": 1.613263487815857, "learning_rate": 5.141535448297346e-06, "loss": 0.3585, "step": 24562 }, { "epoch": 0.6717075038284839, "grad_norm": 1.3773318529129028, "learning_rate": 5.1407613227925705e-06, "loss": 0.7465, "step": 24563 }, { "epoch": 0.6717348501422008, "grad_norm": 1.4833353757858276, "learning_rate": 5.139987235407154e-06, "loss": 0.4882, "step": 24564 }, { "epoch": 0.6717621964559177, "grad_norm": 1.5197272300720215, "learning_rate": 5.1392131861471695e-06, "loss": 0.451, "step": 24565 }, { "epoch": 0.6717895427696347, "grad_norm": 1.170658826828003, "learning_rate": 5.138439175018686e-06, "loss": 0.4817, "step": 24566 }, { "epoch": 0.6718168890833516, "grad_norm": 1.294312834739685, "learning_rate": 5.1376652020277816e-06, "loss": 0.4577, "step": 24567 }, { "epoch": 0.6718442353970685, "grad_norm": 1.4185101985931396, "learning_rate": 5.136891267180527e-06, "loss": 0.4539, "step": 24568 }, { "epoch": 0.6718715817107854, "grad_norm": 1.280059814453125, "learning_rate": 5.136117370482987e-06, "loss": 0.4544, "step": 24569 }, { "epoch": 0.6718989280245024, "grad_norm": 1.4613052606582642, "learning_rate": 5.135343511941242e-06, "loss": 0.4909, "step": 24570 }, { "epoch": 0.6719262743382192, "grad_norm": 1.224328875541687, "learning_rate": 5.134569691561358e-06, "loss": 0.4533, "step": 24571 }, { "epoch": 0.6719536206519361, "grad_norm": 1.3544515371322632, "learning_rate": 5.133795909349401e-06, "loss": 0.454, "step": 24572 }, { "epoch": 0.671980966965653, "grad_norm": 2.1540608406066895, "learning_rate": 5.133022165311451e-06, "loss": 0.4601, "step": 24573 }, { "epoch": 0.6720083132793699, "grad_norm": 1.6262576580047607, "learning_rate": 5.13224845945357e-06, "loss": 0.4448, "step": 24574 }, { "epoch": 0.6720356595930869, "grad_norm": 1.3664547204971313, "learning_rate": 5.131474791781829e-06, "loss": 0.4485, "step": 24575 }, { "epoch": 0.6720630059068038, "grad_norm": 1.5288499593734741, "learning_rate": 5.130701162302301e-06, "loss": 0.3038, "step": 24576 }, { "epoch": 0.6720903522205207, "grad_norm": 1.4744867086410522, "learning_rate": 5.1299275710210504e-06, "loss": 0.484, "step": 24577 }, { "epoch": 0.6721176985342376, "grad_norm": 1.2606961727142334, "learning_rate": 5.129154017944149e-06, "loss": 0.4976, "step": 24578 }, { "epoch": 0.6721450448479545, "grad_norm": 1.8026435375213623, "learning_rate": 5.128380503077659e-06, "loss": 0.3493, "step": 24579 }, { "epoch": 0.6721723911616714, "grad_norm": 1.5356602668762207, "learning_rate": 5.1276070264276565e-06, "loss": 0.4742, "step": 24580 }, { "epoch": 0.6721997374753883, "grad_norm": 1.7450522184371948, "learning_rate": 5.126833588000206e-06, "loss": 0.3336, "step": 24581 }, { "epoch": 0.6722270837891052, "grad_norm": 1.2899541854858398, "learning_rate": 5.1260601878013716e-06, "loss": 0.7089, "step": 24582 }, { "epoch": 0.6722544301028222, "grad_norm": 1.5130059719085693, "learning_rate": 5.125286825837227e-06, "loss": 0.4601, "step": 24583 }, { "epoch": 0.6722817764165391, "grad_norm": 3.9871163368225098, "learning_rate": 5.124513502113835e-06, "loss": 0.3693, "step": 24584 }, { "epoch": 0.672309122730256, "grad_norm": 1.4821619987487793, "learning_rate": 5.123740216637258e-06, "loss": 0.4659, "step": 24585 }, { "epoch": 0.6723364690439728, "grad_norm": 1.3133692741394043, "learning_rate": 5.122966969413572e-06, "loss": 0.4811, "step": 24586 }, { "epoch": 0.6723638153576897, "grad_norm": 1.3164396286010742, "learning_rate": 5.122193760448837e-06, "loss": 0.4745, "step": 24587 }, { "epoch": 0.6723911616714067, "grad_norm": 1.5854339599609375, "learning_rate": 5.121420589749118e-06, "loss": 0.5061, "step": 24588 }, { "epoch": 0.6724185079851236, "grad_norm": 1.2459275722503662, "learning_rate": 5.120647457320482e-06, "loss": 0.4678, "step": 24589 }, { "epoch": 0.6724458542988405, "grad_norm": 1.2983753681182861, "learning_rate": 5.119874363168995e-06, "loss": 0.4713, "step": 24590 }, { "epoch": 0.6724732006125574, "grad_norm": 1.5197972059249878, "learning_rate": 5.119101307300716e-06, "loss": 0.4543, "step": 24591 }, { "epoch": 0.6725005469262744, "grad_norm": 1.6941176652908325, "learning_rate": 5.118328289721717e-06, "loss": 0.33, "step": 24592 }, { "epoch": 0.6725278932399913, "grad_norm": 1.189235806465149, "learning_rate": 5.117555310438058e-06, "loss": 0.7077, "step": 24593 }, { "epoch": 0.6725552395537081, "grad_norm": 1.3121267557144165, "learning_rate": 5.116782369455801e-06, "loss": 0.4636, "step": 24594 }, { "epoch": 0.672582585867425, "grad_norm": 1.552978277206421, "learning_rate": 5.116009466781015e-06, "loss": 0.7341, "step": 24595 }, { "epoch": 0.672609932181142, "grad_norm": 1.5892348289489746, "learning_rate": 5.11523660241976e-06, "loss": 0.3301, "step": 24596 }, { "epoch": 0.6726372784948589, "grad_norm": 1.4190247058868408, "learning_rate": 5.114463776378096e-06, "loss": 0.4761, "step": 24597 }, { "epoch": 0.6726646248085758, "grad_norm": 1.40321683883667, "learning_rate": 5.113690988662092e-06, "loss": 0.457, "step": 24598 }, { "epoch": 0.6726919711222927, "grad_norm": 1.2455973625183105, "learning_rate": 5.112918239277809e-06, "loss": 0.4899, "step": 24599 }, { "epoch": 0.6727193174360097, "grad_norm": 1.3255665302276611, "learning_rate": 5.112145528231305e-06, "loss": 0.4873, "step": 24600 }, { "epoch": 0.6727466637497266, "grad_norm": 1.1638270616531372, "learning_rate": 5.11137285552864e-06, "loss": 0.7235, "step": 24601 }, { "epoch": 0.6727740100634434, "grad_norm": 1.5530822277069092, "learning_rate": 5.110600221175884e-06, "loss": 0.4696, "step": 24602 }, { "epoch": 0.6728013563771603, "grad_norm": 1.4137786626815796, "learning_rate": 5.109827625179094e-06, "loss": 0.5055, "step": 24603 }, { "epoch": 0.6728287026908772, "grad_norm": 1.4021023511886597, "learning_rate": 5.109055067544324e-06, "loss": 0.4799, "step": 24604 }, { "epoch": 0.6728560490045942, "grad_norm": 1.5152541399002075, "learning_rate": 5.108282548277645e-06, "loss": 0.456, "step": 24605 }, { "epoch": 0.6728833953183111, "grad_norm": 1.5974568128585815, "learning_rate": 5.107510067385113e-06, "loss": 0.4527, "step": 24606 }, { "epoch": 0.672910741632028, "grad_norm": 1.6933714151382446, "learning_rate": 5.106737624872784e-06, "loss": 0.3493, "step": 24607 }, { "epoch": 0.672938087945745, "grad_norm": 1.3791643381118774, "learning_rate": 5.105965220746726e-06, "loss": 0.4645, "step": 24608 }, { "epoch": 0.6729654342594619, "grad_norm": 1.4310681819915771, "learning_rate": 5.1051928550129925e-06, "loss": 0.3748, "step": 24609 }, { "epoch": 0.6729927805731787, "grad_norm": 1.5825225114822388, "learning_rate": 5.10442052767764e-06, "loss": 0.4446, "step": 24610 }, { "epoch": 0.6730201268868956, "grad_norm": 1.6329561471939087, "learning_rate": 5.1036482387467345e-06, "loss": 0.3445, "step": 24611 }, { "epoch": 0.6730474732006125, "grad_norm": 1.677371621131897, "learning_rate": 5.10287598822633e-06, "loss": 0.492, "step": 24612 }, { "epoch": 0.6730748195143295, "grad_norm": 1.376569390296936, "learning_rate": 5.102103776122483e-06, "loss": 0.4637, "step": 24613 }, { "epoch": 0.6731021658280464, "grad_norm": 1.8043993711471558, "learning_rate": 5.101331602441255e-06, "loss": 0.4767, "step": 24614 }, { "epoch": 0.6731295121417633, "grad_norm": 1.2907696962356567, "learning_rate": 5.100559467188704e-06, "loss": 0.476, "step": 24615 }, { "epoch": 0.6731568584554802, "grad_norm": 1.2561122179031372, "learning_rate": 5.099787370370883e-06, "loss": 0.4466, "step": 24616 }, { "epoch": 0.6731842047691972, "grad_norm": 1.5646944046020508, "learning_rate": 5.099015311993849e-06, "loss": 0.4789, "step": 24617 }, { "epoch": 0.673211551082914, "grad_norm": 1.4566363096237183, "learning_rate": 5.098243292063665e-06, "loss": 0.4956, "step": 24618 }, { "epoch": 0.6732388973966309, "grad_norm": 1.3012794256210327, "learning_rate": 5.097471310586382e-06, "loss": 0.4753, "step": 24619 }, { "epoch": 0.6732662437103478, "grad_norm": 1.1749000549316406, "learning_rate": 5.096699367568052e-06, "loss": 0.4713, "step": 24620 }, { "epoch": 0.6732935900240647, "grad_norm": 2.2867820262908936, "learning_rate": 5.09592746301474e-06, "loss": 0.4636, "step": 24621 }, { "epoch": 0.6733209363377817, "grad_norm": 1.3051198720932007, "learning_rate": 5.095155596932497e-06, "loss": 0.4777, "step": 24622 }, { "epoch": 0.6733482826514986, "grad_norm": 1.2549124956130981, "learning_rate": 5.09438376932738e-06, "loss": 0.4794, "step": 24623 }, { "epoch": 0.6733756289652155, "grad_norm": 1.5536285638809204, "learning_rate": 5.09361198020544e-06, "loss": 0.452, "step": 24624 }, { "epoch": 0.6734029752789324, "grad_norm": 1.6689213514328003, "learning_rate": 5.092840229572733e-06, "loss": 0.3485, "step": 24625 }, { "epoch": 0.6734303215926493, "grad_norm": 1.2624695301055908, "learning_rate": 5.092068517435309e-06, "loss": 0.4815, "step": 24626 }, { "epoch": 0.6734576679063662, "grad_norm": 1.4634093046188354, "learning_rate": 5.091296843799232e-06, "loss": 0.4574, "step": 24627 }, { "epoch": 0.6734850142200831, "grad_norm": 1.587005376815796, "learning_rate": 5.090525208670548e-06, "loss": 0.4488, "step": 24628 }, { "epoch": 0.6735123605338, "grad_norm": 1.2106759548187256, "learning_rate": 5.089753612055309e-06, "loss": 0.4949, "step": 24629 }, { "epoch": 0.673539706847517, "grad_norm": 1.600404977798462, "learning_rate": 5.088982053959575e-06, "loss": 0.3481, "step": 24630 }, { "epoch": 0.6735670531612339, "grad_norm": 1.4013698101043701, "learning_rate": 5.088210534389395e-06, "loss": 0.4925, "step": 24631 }, { "epoch": 0.6735943994749508, "grad_norm": 1.2701292037963867, "learning_rate": 5.087439053350816e-06, "loss": 0.5088, "step": 24632 }, { "epoch": 0.6736217457886677, "grad_norm": 1.4210350513458252, "learning_rate": 5.0866676108499e-06, "loss": 0.4563, "step": 24633 }, { "epoch": 0.6736490921023846, "grad_norm": 1.2778339385986328, "learning_rate": 5.085896206892695e-06, "loss": 0.4625, "step": 24634 }, { "epoch": 0.6736764384161015, "grad_norm": 1.2504856586456299, "learning_rate": 5.085124841485245e-06, "loss": 0.4819, "step": 24635 }, { "epoch": 0.6737037847298184, "grad_norm": 1.449331283569336, "learning_rate": 5.084353514633613e-06, "loss": 0.447, "step": 24636 }, { "epoch": 0.6737311310435353, "grad_norm": 1.5088294744491577, "learning_rate": 5.083582226343844e-06, "loss": 0.5124, "step": 24637 }, { "epoch": 0.6737584773572523, "grad_norm": 1.3799926042556763, "learning_rate": 5.082810976621988e-06, "loss": 0.4686, "step": 24638 }, { "epoch": 0.6737858236709692, "grad_norm": 1.3126397132873535, "learning_rate": 5.082039765474093e-06, "loss": 0.451, "step": 24639 }, { "epoch": 0.6738131699846861, "grad_norm": 1.3031096458435059, "learning_rate": 5.0812685929062155e-06, "loss": 0.4947, "step": 24640 }, { "epoch": 0.673840516298403, "grad_norm": 1.1008085012435913, "learning_rate": 5.0804974589244016e-06, "loss": 0.33, "step": 24641 }, { "epoch": 0.6738678626121198, "grad_norm": 1.2677204608917236, "learning_rate": 5.079726363534697e-06, "loss": 0.4697, "step": 24642 }, { "epoch": 0.6738952089258368, "grad_norm": 1.5769492387771606, "learning_rate": 5.078955306743158e-06, "loss": 0.451, "step": 24643 }, { "epoch": 0.6739225552395537, "grad_norm": 1.461408257484436, "learning_rate": 5.0781842885558295e-06, "loss": 0.5024, "step": 24644 }, { "epoch": 0.6739499015532706, "grad_norm": 1.370091438293457, "learning_rate": 5.077413308978758e-06, "loss": 0.4635, "step": 24645 }, { "epoch": 0.6739772478669875, "grad_norm": 1.2528104782104492, "learning_rate": 5.076642368017996e-06, "loss": 0.4879, "step": 24646 }, { "epoch": 0.6740045941807045, "grad_norm": 1.4628653526306152, "learning_rate": 5.075871465679589e-06, "loss": 0.4677, "step": 24647 }, { "epoch": 0.6740319404944214, "grad_norm": 1.349505066871643, "learning_rate": 5.0751006019695815e-06, "loss": 0.4673, "step": 24648 }, { "epoch": 0.6740592868081383, "grad_norm": 1.325904130935669, "learning_rate": 5.074329776894027e-06, "loss": 0.4588, "step": 24649 }, { "epoch": 0.6740866331218551, "grad_norm": 1.1925314664840698, "learning_rate": 5.0735589904589696e-06, "loss": 0.4904, "step": 24650 }, { "epoch": 0.674113979435572, "grad_norm": 1.8324705362319946, "learning_rate": 5.072788242670456e-06, "loss": 0.4683, "step": 24651 }, { "epoch": 0.674141325749289, "grad_norm": 1.324522852897644, "learning_rate": 5.072017533534528e-06, "loss": 0.472, "step": 24652 }, { "epoch": 0.6741686720630059, "grad_norm": 1.36262047290802, "learning_rate": 5.0712468630572375e-06, "loss": 0.4846, "step": 24653 }, { "epoch": 0.6741960183767228, "grad_norm": 1.2628488540649414, "learning_rate": 5.07047623124463e-06, "loss": 0.4655, "step": 24654 }, { "epoch": 0.6742233646904398, "grad_norm": 1.798977017402649, "learning_rate": 5.069705638102745e-06, "loss": 0.4579, "step": 24655 }, { "epoch": 0.6742507110041567, "grad_norm": 1.4373072385787964, "learning_rate": 5.068935083637636e-06, "loss": 0.4865, "step": 24656 }, { "epoch": 0.6742780573178736, "grad_norm": 1.4207112789154053, "learning_rate": 5.068164567855344e-06, "loss": 0.7234, "step": 24657 }, { "epoch": 0.6743054036315904, "grad_norm": 1.641518235206604, "learning_rate": 5.067394090761912e-06, "loss": 0.3604, "step": 24658 }, { "epoch": 0.6743327499453073, "grad_norm": 1.2295618057250977, "learning_rate": 5.066623652363385e-06, "loss": 0.4595, "step": 24659 }, { "epoch": 0.6743600962590243, "grad_norm": 1.318874716758728, "learning_rate": 5.065853252665808e-06, "loss": 0.7099, "step": 24660 }, { "epoch": 0.6743874425727412, "grad_norm": 1.4910629987716675, "learning_rate": 5.065082891675219e-06, "loss": 0.453, "step": 24661 }, { "epoch": 0.6744147888864581, "grad_norm": 1.1726183891296387, "learning_rate": 5.06431256939767e-06, "loss": 0.4484, "step": 24662 }, { "epoch": 0.674442135200175, "grad_norm": 1.3180441856384277, "learning_rate": 5.0635422858392e-06, "loss": 0.4617, "step": 24663 }, { "epoch": 0.674469481513892, "grad_norm": 1.2096943855285645, "learning_rate": 5.062772041005848e-06, "loss": 0.4718, "step": 24664 }, { "epoch": 0.6744968278276089, "grad_norm": 1.3022271394729614, "learning_rate": 5.0620018349036635e-06, "loss": 0.4507, "step": 24665 }, { "epoch": 0.6745241741413257, "grad_norm": 1.1185481548309326, "learning_rate": 5.061231667538684e-06, "loss": 0.354, "step": 24666 }, { "epoch": 0.6745515204550426, "grad_norm": 1.3454444408416748, "learning_rate": 5.06046153891695e-06, "loss": 0.7287, "step": 24667 }, { "epoch": 0.6745788667687596, "grad_norm": 1.3061684370040894, "learning_rate": 5.059691449044508e-06, "loss": 0.4609, "step": 24668 }, { "epoch": 0.6746062130824765, "grad_norm": 1.326151728630066, "learning_rate": 5.0589213979273964e-06, "loss": 0.4396, "step": 24669 }, { "epoch": 0.6746335593961934, "grad_norm": 1.1922622919082642, "learning_rate": 5.058151385571653e-06, "loss": 0.474, "step": 24670 }, { "epoch": 0.6746609057099103, "grad_norm": 1.517638087272644, "learning_rate": 5.057381411983323e-06, "loss": 0.4967, "step": 24671 }, { "epoch": 0.6746882520236273, "grad_norm": 1.2911477088928223, "learning_rate": 5.056611477168446e-06, "loss": 0.4994, "step": 24672 }, { "epoch": 0.6747155983373442, "grad_norm": 1.3285197019577026, "learning_rate": 5.055841581133061e-06, "loss": 0.4777, "step": 24673 }, { "epoch": 0.674742944651061, "grad_norm": 1.5093357563018799, "learning_rate": 5.055071723883202e-06, "loss": 0.47, "step": 24674 }, { "epoch": 0.6747702909647779, "grad_norm": 1.3204063177108765, "learning_rate": 5.054301905424921e-06, "loss": 0.4464, "step": 24675 }, { "epoch": 0.6747976372784948, "grad_norm": 1.3303661346435547, "learning_rate": 5.053532125764247e-06, "loss": 0.4537, "step": 24676 }, { "epoch": 0.6748249835922118, "grad_norm": 1.3141118288040161, "learning_rate": 5.052762384907217e-06, "loss": 0.4772, "step": 24677 }, { "epoch": 0.6748523299059287, "grad_norm": 1.2782570123672485, "learning_rate": 5.0519926828598795e-06, "loss": 0.4932, "step": 24678 }, { "epoch": 0.6748796762196456, "grad_norm": 1.3217673301696777, "learning_rate": 5.051223019628266e-06, "loss": 0.5031, "step": 24679 }, { "epoch": 0.6749070225333625, "grad_norm": 2.905991792678833, "learning_rate": 5.050453395218411e-06, "loss": 0.4672, "step": 24680 }, { "epoch": 0.6749343688470795, "grad_norm": 1.2670912742614746, "learning_rate": 5.049683809636361e-06, "loss": 0.4617, "step": 24681 }, { "epoch": 0.6749617151607963, "grad_norm": 1.4378982782363892, "learning_rate": 5.048914262888148e-06, "loss": 0.4799, "step": 24682 }, { "epoch": 0.6749890614745132, "grad_norm": 1.2735216617584229, "learning_rate": 5.048144754979806e-06, "loss": 0.503, "step": 24683 }, { "epoch": 0.6750164077882301, "grad_norm": 1.2646968364715576, "learning_rate": 5.0473752859173775e-06, "loss": 0.4261, "step": 24684 }, { "epoch": 0.6750437541019471, "grad_norm": 1.1126463413238525, "learning_rate": 5.046605855706896e-06, "loss": 0.4765, "step": 24685 }, { "epoch": 0.675071100415664, "grad_norm": 1.3220769166946411, "learning_rate": 5.045836464354399e-06, "loss": 0.4663, "step": 24686 }, { "epoch": 0.6750984467293809, "grad_norm": 1.5802605152130127, "learning_rate": 5.045067111865916e-06, "loss": 0.3509, "step": 24687 }, { "epoch": 0.6751257930430978, "grad_norm": 1.2728297710418701, "learning_rate": 5.04429779824749e-06, "loss": 0.4489, "step": 24688 }, { "epoch": 0.6751531393568146, "grad_norm": 1.680328130722046, "learning_rate": 5.043528523505155e-06, "loss": 0.4535, "step": 24689 }, { "epoch": 0.6751804856705316, "grad_norm": 1.4474341869354248, "learning_rate": 5.042759287644939e-06, "loss": 0.4494, "step": 24690 }, { "epoch": 0.6752078319842485, "grad_norm": 1.3103621006011963, "learning_rate": 5.041990090672886e-06, "loss": 0.7592, "step": 24691 }, { "epoch": 0.6752351782979654, "grad_norm": 1.6460212469100952, "learning_rate": 5.041220932595023e-06, "loss": 0.381, "step": 24692 }, { "epoch": 0.6752625246116823, "grad_norm": 1.6323291063308716, "learning_rate": 5.040451813417389e-06, "loss": 0.5003, "step": 24693 }, { "epoch": 0.6752898709253993, "grad_norm": 1.3025094270706177, "learning_rate": 5.039682733146014e-06, "loss": 0.4684, "step": 24694 }, { "epoch": 0.6753172172391162, "grad_norm": 1.385864019393921, "learning_rate": 5.038913691786931e-06, "loss": 0.4733, "step": 24695 }, { "epoch": 0.6753445635528331, "grad_norm": 1.2547162771224976, "learning_rate": 5.038144689346171e-06, "loss": 0.3556, "step": 24696 }, { "epoch": 0.6753719098665499, "grad_norm": 1.5230311155319214, "learning_rate": 5.037375725829774e-06, "loss": 0.4817, "step": 24697 }, { "epoch": 0.6753992561802669, "grad_norm": 1.6872144937515259, "learning_rate": 5.0366068012437665e-06, "loss": 0.4259, "step": 24698 }, { "epoch": 0.6754266024939838, "grad_norm": 1.279015064239502, "learning_rate": 5.035837915594178e-06, "loss": 0.4622, "step": 24699 }, { "epoch": 0.6754539488077007, "grad_norm": 1.2790275812149048, "learning_rate": 5.035069068887049e-06, "loss": 0.4723, "step": 24700 }, { "epoch": 0.6754812951214176, "grad_norm": 1.690053105354309, "learning_rate": 5.0343002611284045e-06, "loss": 0.3406, "step": 24701 }, { "epoch": 0.6755086414351346, "grad_norm": 1.3352460861206055, "learning_rate": 5.033531492324275e-06, "loss": 0.326, "step": 24702 }, { "epoch": 0.6755359877488515, "grad_norm": 1.451130747795105, "learning_rate": 5.032762762480695e-06, "loss": 0.4802, "step": 24703 }, { "epoch": 0.6755633340625684, "grad_norm": 1.6950792074203491, "learning_rate": 5.031994071603694e-06, "loss": 0.3199, "step": 24704 }, { "epoch": 0.6755906803762852, "grad_norm": 1.7808785438537598, "learning_rate": 5.031225419699298e-06, "loss": 0.5203, "step": 24705 }, { "epoch": 0.6756180266900021, "grad_norm": 2.024705171585083, "learning_rate": 5.030456806773545e-06, "loss": 0.4508, "step": 24706 }, { "epoch": 0.6756453730037191, "grad_norm": 1.2376458644866943, "learning_rate": 5.029688232832457e-06, "loss": 0.4909, "step": 24707 }, { "epoch": 0.675672719317436, "grad_norm": 1.2397470474243164, "learning_rate": 5.028919697882069e-06, "loss": 0.3591, "step": 24708 }, { "epoch": 0.6757000656311529, "grad_norm": 1.2150673866271973, "learning_rate": 5.028151201928401e-06, "loss": 0.4924, "step": 24709 }, { "epoch": 0.6757274119448698, "grad_norm": 1.4266674518585205, "learning_rate": 5.0273827449774925e-06, "loss": 0.4505, "step": 24710 }, { "epoch": 0.6757547582585868, "grad_norm": 1.4555665254592896, "learning_rate": 5.0266143270353664e-06, "loss": 0.4909, "step": 24711 }, { "epoch": 0.6757821045723037, "grad_norm": 1.2132312059402466, "learning_rate": 5.025845948108048e-06, "loss": 0.471, "step": 24712 }, { "epoch": 0.6758094508860205, "grad_norm": 1.3415578603744507, "learning_rate": 5.0250776082015714e-06, "loss": 0.4566, "step": 24713 }, { "epoch": 0.6758367971997374, "grad_norm": 1.7029595375061035, "learning_rate": 5.024309307321962e-06, "loss": 0.3569, "step": 24714 }, { "epoch": 0.6758641435134544, "grad_norm": 1.187601089477539, "learning_rate": 5.023541045475242e-06, "loss": 0.7089, "step": 24715 }, { "epoch": 0.6758914898271713, "grad_norm": 1.239288330078125, "learning_rate": 5.022772822667445e-06, "loss": 0.4474, "step": 24716 }, { "epoch": 0.6759188361408882, "grad_norm": 1.2960264682769775, "learning_rate": 5.022004638904594e-06, "loss": 0.4644, "step": 24717 }, { "epoch": 0.6759461824546051, "grad_norm": 1.3999593257904053, "learning_rate": 5.0212364941927135e-06, "loss": 0.471, "step": 24718 }, { "epoch": 0.6759735287683221, "grad_norm": 2.0381197929382324, "learning_rate": 5.0204683885378335e-06, "loss": 0.3558, "step": 24719 }, { "epoch": 0.676000875082039, "grad_norm": 1.1302001476287842, "learning_rate": 5.019700321945979e-06, "loss": 0.7494, "step": 24720 }, { "epoch": 0.6760282213957558, "grad_norm": 1.4950981140136719, "learning_rate": 5.01893229442317e-06, "loss": 0.4219, "step": 24721 }, { "epoch": 0.6760555677094727, "grad_norm": 2.802335500717163, "learning_rate": 5.018164305975439e-06, "loss": 0.7475, "step": 24722 }, { "epoch": 0.6760829140231897, "grad_norm": 1.6556832790374756, "learning_rate": 5.017396356608806e-06, "loss": 0.3435, "step": 24723 }, { "epoch": 0.6761102603369066, "grad_norm": 1.6433135271072388, "learning_rate": 5.016628446329297e-06, "loss": 0.4568, "step": 24724 }, { "epoch": 0.6761376066506235, "grad_norm": 1.283872127532959, "learning_rate": 5.015860575142932e-06, "loss": 0.4738, "step": 24725 }, { "epoch": 0.6761649529643404, "grad_norm": 1.2235348224639893, "learning_rate": 5.0150927430557425e-06, "loss": 0.45, "step": 24726 }, { "epoch": 0.6761922992780574, "grad_norm": 1.396354079246521, "learning_rate": 5.014324950073746e-06, "loss": 0.4783, "step": 24727 }, { "epoch": 0.6762196455917743, "grad_norm": 1.4908604621887207, "learning_rate": 5.013557196202965e-06, "loss": 0.5006, "step": 24728 }, { "epoch": 0.6762469919054911, "grad_norm": 1.4604179859161377, "learning_rate": 5.012789481449431e-06, "loss": 0.5102, "step": 24729 }, { "epoch": 0.676274338219208, "grad_norm": 1.4613605737686157, "learning_rate": 5.012021805819156e-06, "loss": 0.4783, "step": 24730 }, { "epoch": 0.6763016845329249, "grad_norm": 1.479872465133667, "learning_rate": 5.011254169318162e-06, "loss": 0.3534, "step": 24731 }, { "epoch": 0.6763290308466419, "grad_norm": 1.3239139318466187, "learning_rate": 5.010486571952481e-06, "loss": 0.4674, "step": 24732 }, { "epoch": 0.6763563771603588, "grad_norm": 1.6966806650161743, "learning_rate": 5.009719013728127e-06, "loss": 0.3186, "step": 24733 }, { "epoch": 0.6763837234740757, "grad_norm": 1.4210214614868164, "learning_rate": 5.0089514946511196e-06, "loss": 0.7208, "step": 24734 }, { "epoch": 0.6764110697877926, "grad_norm": 1.7111048698425293, "learning_rate": 5.0081840147274865e-06, "loss": 0.363, "step": 24735 }, { "epoch": 0.6764384161015096, "grad_norm": 1.2354971170425415, "learning_rate": 5.007416573963246e-06, "loss": 0.4723, "step": 24736 }, { "epoch": 0.6764657624152264, "grad_norm": 1.1146889925003052, "learning_rate": 5.006649172364413e-06, "loss": 0.4655, "step": 24737 }, { "epoch": 0.6764931087289433, "grad_norm": 1.1985459327697754, "learning_rate": 5.005881809937015e-06, "loss": 0.4756, "step": 24738 }, { "epoch": 0.6765204550426602, "grad_norm": 1.2578530311584473, "learning_rate": 5.005114486687071e-06, "loss": 0.4819, "step": 24739 }, { "epoch": 0.6765478013563772, "grad_norm": 1.471184492111206, "learning_rate": 5.004347202620592e-06, "loss": 0.439, "step": 24740 }, { "epoch": 0.6765751476700941, "grad_norm": 1.47281014919281, "learning_rate": 5.0035799577436075e-06, "loss": 0.452, "step": 24741 }, { "epoch": 0.676602493983811, "grad_norm": 1.3327515125274658, "learning_rate": 5.002812752062133e-06, "loss": 0.4885, "step": 24742 }, { "epoch": 0.6766298402975279, "grad_norm": 1.224707007408142, "learning_rate": 5.002045585582183e-06, "loss": 0.4751, "step": 24743 }, { "epoch": 0.6766571866112449, "grad_norm": 1.2418842315673828, "learning_rate": 5.0012784583097806e-06, "loss": 0.4744, "step": 24744 }, { "epoch": 0.6766845329249617, "grad_norm": 1.565072774887085, "learning_rate": 5.000511370250942e-06, "loss": 0.3104, "step": 24745 }, { "epoch": 0.6767118792386786, "grad_norm": 1.198522925376892, "learning_rate": 4.999744321411686e-06, "loss": 0.442, "step": 24746 }, { "epoch": 0.6767392255523955, "grad_norm": 1.328437328338623, "learning_rate": 4.998977311798025e-06, "loss": 0.4794, "step": 24747 }, { "epoch": 0.6767665718661124, "grad_norm": 1.199318289756775, "learning_rate": 4.998210341415982e-06, "loss": 0.4791, "step": 24748 }, { "epoch": 0.6767939181798294, "grad_norm": 1.4359936714172363, "learning_rate": 4.997443410271573e-06, "loss": 0.7197, "step": 24749 }, { "epoch": 0.6768212644935463, "grad_norm": 3.839690685272217, "learning_rate": 4.996676518370808e-06, "loss": 0.5092, "step": 24750 }, { "epoch": 0.6768486108072632, "grad_norm": 1.7201659679412842, "learning_rate": 4.99590966571971e-06, "loss": 0.3257, "step": 24751 }, { "epoch": 0.6768759571209801, "grad_norm": 1.317090392112732, "learning_rate": 4.995142852324294e-06, "loss": 0.4777, "step": 24752 }, { "epoch": 0.676903303434697, "grad_norm": 1.2148631811141968, "learning_rate": 4.99437607819057e-06, "loss": 0.3419, "step": 24753 }, { "epoch": 0.6769306497484139, "grad_norm": 1.8379557132720947, "learning_rate": 4.993609343324561e-06, "loss": 0.4513, "step": 24754 }, { "epoch": 0.6769579960621308, "grad_norm": 1.2382317781448364, "learning_rate": 4.992842647732277e-06, "loss": 0.4891, "step": 24755 }, { "epoch": 0.6769853423758477, "grad_norm": 1.3143510818481445, "learning_rate": 4.992075991419729e-06, "loss": 0.4477, "step": 24756 }, { "epoch": 0.6770126886895647, "grad_norm": 1.6276065111160278, "learning_rate": 4.991309374392941e-06, "loss": 0.4842, "step": 24757 }, { "epoch": 0.6770400350032816, "grad_norm": 1.3413631916046143, "learning_rate": 4.99054279665792e-06, "loss": 0.4863, "step": 24758 }, { "epoch": 0.6770673813169985, "grad_norm": 1.282776117324829, "learning_rate": 4.989776258220682e-06, "loss": 0.4934, "step": 24759 }, { "epoch": 0.6770947276307154, "grad_norm": 1.2723276615142822, "learning_rate": 4.989009759087235e-06, "loss": 0.4759, "step": 24760 }, { "epoch": 0.6771220739444322, "grad_norm": 1.3023192882537842, "learning_rate": 4.9882432992636e-06, "loss": 0.4728, "step": 24761 }, { "epoch": 0.6771494202581492, "grad_norm": 1.1381280422210693, "learning_rate": 4.987476878755785e-06, "loss": 0.3547, "step": 24762 }, { "epoch": 0.6771767665718661, "grad_norm": 1.3447721004486084, "learning_rate": 4.986710497569801e-06, "loss": 0.4667, "step": 24763 }, { "epoch": 0.677204112885583, "grad_norm": 1.417311191558838, "learning_rate": 4.985944155711668e-06, "loss": 0.4764, "step": 24764 }, { "epoch": 0.6772314591993, "grad_norm": 1.7390397787094116, "learning_rate": 4.985177853187386e-06, "loss": 0.4405, "step": 24765 }, { "epoch": 0.6772588055130169, "grad_norm": 1.3622323274612427, "learning_rate": 4.9844115900029765e-06, "loss": 0.7304, "step": 24766 }, { "epoch": 0.6772861518267338, "grad_norm": 1.1630221605300903, "learning_rate": 4.983645366164446e-06, "loss": 0.5019, "step": 24767 }, { "epoch": 0.6773134981404507, "grad_norm": 1.333882212638855, "learning_rate": 4.982879181677806e-06, "loss": 0.5064, "step": 24768 }, { "epoch": 0.6773408444541675, "grad_norm": 1.8009322881698608, "learning_rate": 4.982113036549062e-06, "loss": 0.7116, "step": 24769 }, { "epoch": 0.6773681907678845, "grad_norm": 1.2943230867385864, "learning_rate": 4.981346930784233e-06, "loss": 0.4439, "step": 24770 }, { "epoch": 0.6773955370816014, "grad_norm": 1.8605067729949951, "learning_rate": 4.980580864389326e-06, "loss": 0.3383, "step": 24771 }, { "epoch": 0.6774228833953183, "grad_norm": 1.5022978782653809, "learning_rate": 4.979814837370346e-06, "loss": 0.4854, "step": 24772 }, { "epoch": 0.6774502297090352, "grad_norm": 1.307768702507019, "learning_rate": 4.979048849733309e-06, "loss": 0.5009, "step": 24773 }, { "epoch": 0.6774775760227522, "grad_norm": 4.84666633605957, "learning_rate": 4.978282901484221e-06, "loss": 0.4599, "step": 24774 }, { "epoch": 0.6775049223364691, "grad_norm": 1.5294239521026611, "learning_rate": 4.9775169926290854e-06, "loss": 0.3707, "step": 24775 }, { "epoch": 0.677532268650186, "grad_norm": 1.653944492340088, "learning_rate": 4.976751123173921e-06, "loss": 0.3172, "step": 24776 }, { "epoch": 0.6775596149639028, "grad_norm": 1.6625325679779053, "learning_rate": 4.97598529312473e-06, "loss": 0.4477, "step": 24777 }, { "epoch": 0.6775869612776197, "grad_norm": 1.2919591665267944, "learning_rate": 4.975219502487516e-06, "loss": 0.7378, "step": 24778 }, { "epoch": 0.6776143075913367, "grad_norm": 1.5541349649429321, "learning_rate": 4.974453751268294e-06, "loss": 0.5019, "step": 24779 }, { "epoch": 0.6776416539050536, "grad_norm": 1.70781672000885, "learning_rate": 4.97368803947307e-06, "loss": 0.4691, "step": 24780 }, { "epoch": 0.6776690002187705, "grad_norm": 1.4347991943359375, "learning_rate": 4.972922367107847e-06, "loss": 0.4781, "step": 24781 }, { "epoch": 0.6776963465324874, "grad_norm": 1.3933824300765991, "learning_rate": 4.972156734178631e-06, "loss": 0.4861, "step": 24782 }, { "epoch": 0.6777236928462044, "grad_norm": 1.338650107383728, "learning_rate": 4.971391140691434e-06, "loss": 0.4695, "step": 24783 }, { "epoch": 0.6777510391599213, "grad_norm": 1.4385040998458862, "learning_rate": 4.970625586652258e-06, "loss": 0.4825, "step": 24784 }, { "epoch": 0.6777783854736381, "grad_norm": 1.379011869430542, "learning_rate": 4.969860072067104e-06, "loss": 0.3571, "step": 24785 }, { "epoch": 0.677805731787355, "grad_norm": 1.5200307369232178, "learning_rate": 4.969094596941988e-06, "loss": 0.448, "step": 24786 }, { "epoch": 0.677833078101072, "grad_norm": 1.4699615240097046, "learning_rate": 4.968329161282908e-06, "loss": 0.3313, "step": 24787 }, { "epoch": 0.6778604244147889, "grad_norm": 1.4544459581375122, "learning_rate": 4.967563765095865e-06, "loss": 0.5103, "step": 24788 }, { "epoch": 0.6778877707285058, "grad_norm": 1.2074939012527466, "learning_rate": 4.966798408386874e-06, "loss": 0.4961, "step": 24789 }, { "epoch": 0.6779151170422227, "grad_norm": 1.4267895221710205, "learning_rate": 4.966033091161932e-06, "loss": 0.4389, "step": 24790 }, { "epoch": 0.6779424633559397, "grad_norm": 1.1141257286071777, "learning_rate": 4.965267813427039e-06, "loss": 0.3254, "step": 24791 }, { "epoch": 0.6779698096696565, "grad_norm": 1.4053508043289185, "learning_rate": 4.964502575188207e-06, "loss": 0.4776, "step": 24792 }, { "epoch": 0.6779971559833734, "grad_norm": 2.006777286529541, "learning_rate": 4.963737376451436e-06, "loss": 0.4584, "step": 24793 }, { "epoch": 0.6780245022970903, "grad_norm": 1.8201583623886108, "learning_rate": 4.962972217222728e-06, "loss": 0.3618, "step": 24794 }, { "epoch": 0.6780518486108073, "grad_norm": 1.4556387662887573, "learning_rate": 4.962207097508081e-06, "loss": 0.4967, "step": 24795 }, { "epoch": 0.6780791949245242, "grad_norm": 1.3282307386398315, "learning_rate": 4.961442017313505e-06, "loss": 0.4683, "step": 24796 }, { "epoch": 0.6781065412382411, "grad_norm": 1.2708407640457153, "learning_rate": 4.9606769766450005e-06, "loss": 0.4842, "step": 24797 }, { "epoch": 0.678133887551958, "grad_norm": 1.3801790475845337, "learning_rate": 4.959911975508561e-06, "loss": 0.4817, "step": 24798 }, { "epoch": 0.678161233865675, "grad_norm": 1.331434726715088, "learning_rate": 4.959147013910203e-06, "loss": 0.4526, "step": 24799 }, { "epoch": 0.6781885801793918, "grad_norm": 1.1087294816970825, "learning_rate": 4.958382091855909e-06, "loss": 0.4675, "step": 24800 }, { "epoch": 0.6782159264931087, "grad_norm": 1.2712281942367554, "learning_rate": 4.957617209351693e-06, "loss": 0.4719, "step": 24801 }, { "epoch": 0.6782432728068256, "grad_norm": 1.2772843837738037, "learning_rate": 4.95685236640355e-06, "loss": 0.4468, "step": 24802 }, { "epoch": 0.6782706191205425, "grad_norm": 1.3344788551330566, "learning_rate": 4.956087563017482e-06, "loss": 0.4673, "step": 24803 }, { "epoch": 0.6782979654342595, "grad_norm": 1.3995599746704102, "learning_rate": 4.955322799199485e-06, "loss": 0.4712, "step": 24804 }, { "epoch": 0.6783253117479764, "grad_norm": 1.466914415359497, "learning_rate": 4.9545580749555626e-06, "loss": 0.7409, "step": 24805 }, { "epoch": 0.6783526580616933, "grad_norm": 1.6079668998718262, "learning_rate": 4.953793390291712e-06, "loss": 0.3161, "step": 24806 }, { "epoch": 0.6783800043754102, "grad_norm": 1.9034367799758911, "learning_rate": 4.95302874521393e-06, "loss": 0.4476, "step": 24807 }, { "epoch": 0.678407350689127, "grad_norm": 1.6282750368118286, "learning_rate": 4.95226413972822e-06, "loss": 0.4396, "step": 24808 }, { "epoch": 0.678434697002844, "grad_norm": 1.4521013498306274, "learning_rate": 4.951499573840578e-06, "loss": 0.4645, "step": 24809 }, { "epoch": 0.6784620433165609, "grad_norm": 1.476035714149475, "learning_rate": 4.950735047556996e-06, "loss": 0.4467, "step": 24810 }, { "epoch": 0.6784893896302778, "grad_norm": 1.5421631336212158, "learning_rate": 4.949970560883482e-06, "loss": 0.3326, "step": 24811 }, { "epoch": 0.6785167359439948, "grad_norm": 1.4148499965667725, "learning_rate": 4.949206113826026e-06, "loss": 0.4694, "step": 24812 }, { "epoch": 0.6785440822577117, "grad_norm": 1.28174889087677, "learning_rate": 4.948441706390624e-06, "loss": 0.4833, "step": 24813 }, { "epoch": 0.6785714285714286, "grad_norm": 1.3743317127227783, "learning_rate": 4.9476773385832786e-06, "loss": 0.4886, "step": 24814 }, { "epoch": 0.6785987748851455, "grad_norm": 1.4046568870544434, "learning_rate": 4.946913010409983e-06, "loss": 0.4986, "step": 24815 }, { "epoch": 0.6786261211988623, "grad_norm": 1.7019821405410767, "learning_rate": 4.9461487218767324e-06, "loss": 0.3669, "step": 24816 }, { "epoch": 0.6786534675125793, "grad_norm": 1.334228754043579, "learning_rate": 4.945384472989518e-06, "loss": 0.73, "step": 24817 }, { "epoch": 0.6786808138262962, "grad_norm": 1.3215446472167969, "learning_rate": 4.944620263754345e-06, "loss": 0.4502, "step": 24818 }, { "epoch": 0.6787081601400131, "grad_norm": 1.3724534511566162, "learning_rate": 4.943856094177203e-06, "loss": 0.464, "step": 24819 }, { "epoch": 0.67873550645373, "grad_norm": 1.3488906621932983, "learning_rate": 4.943091964264084e-06, "loss": 0.4866, "step": 24820 }, { "epoch": 0.678762852767447, "grad_norm": 1.3593708276748657, "learning_rate": 4.942327874020988e-06, "loss": 0.468, "step": 24821 }, { "epoch": 0.6787901990811639, "grad_norm": 1.478423833847046, "learning_rate": 4.941563823453908e-06, "loss": 0.5107, "step": 24822 }, { "epoch": 0.6788175453948808, "grad_norm": 1.3389192819595337, "learning_rate": 4.940799812568831e-06, "loss": 0.3364, "step": 24823 }, { "epoch": 0.6788448917085976, "grad_norm": 1.157379150390625, "learning_rate": 4.940035841371759e-06, "loss": 0.4409, "step": 24824 }, { "epoch": 0.6788722380223146, "grad_norm": 1.2637614011764526, "learning_rate": 4.939271909868684e-06, "loss": 0.4388, "step": 24825 }, { "epoch": 0.6788995843360315, "grad_norm": 1.7849329710006714, "learning_rate": 4.938508018065591e-06, "loss": 0.3207, "step": 24826 }, { "epoch": 0.6789269306497484, "grad_norm": 1.5236518383026123, "learning_rate": 4.937744165968482e-06, "loss": 0.7009, "step": 24827 }, { "epoch": 0.6789542769634653, "grad_norm": 1.1814712285995483, "learning_rate": 4.936980353583346e-06, "loss": 0.4747, "step": 24828 }, { "epoch": 0.6789816232771823, "grad_norm": 1.317155361175537, "learning_rate": 4.936216580916174e-06, "loss": 0.4592, "step": 24829 }, { "epoch": 0.6790089695908992, "grad_norm": 1.3649603128433228, "learning_rate": 4.935452847972955e-06, "loss": 0.4561, "step": 24830 }, { "epoch": 0.6790363159046161, "grad_norm": 1.2410032749176025, "learning_rate": 4.934689154759685e-06, "loss": 0.4662, "step": 24831 }, { "epoch": 0.6790636622183329, "grad_norm": 1.5946356058120728, "learning_rate": 4.9339255012823554e-06, "loss": 0.4668, "step": 24832 }, { "epoch": 0.6790910085320498, "grad_norm": 1.4589117765426636, "learning_rate": 4.933161887546949e-06, "loss": 0.4676, "step": 24833 }, { "epoch": 0.6791183548457668, "grad_norm": 1.3921271562576294, "learning_rate": 4.93239831355947e-06, "loss": 0.4711, "step": 24834 }, { "epoch": 0.6791457011594837, "grad_norm": 1.2438161373138428, "learning_rate": 4.931634779325892e-06, "loss": 0.4988, "step": 24835 }, { "epoch": 0.6791730474732006, "grad_norm": 1.3560755252838135, "learning_rate": 4.930871284852218e-06, "loss": 0.4929, "step": 24836 }, { "epoch": 0.6792003937869175, "grad_norm": 1.4000613689422607, "learning_rate": 4.93010783014443e-06, "loss": 0.433, "step": 24837 }, { "epoch": 0.6792277401006345, "grad_norm": 1.1993399858474731, "learning_rate": 4.929344415208521e-06, "loss": 0.7213, "step": 24838 }, { "epoch": 0.6792550864143514, "grad_norm": 1.2693583965301514, "learning_rate": 4.928581040050474e-06, "loss": 0.4628, "step": 24839 }, { "epoch": 0.6792824327280682, "grad_norm": 1.1898452043533325, "learning_rate": 4.927817704676284e-06, "loss": 0.4843, "step": 24840 }, { "epoch": 0.6793097790417851, "grad_norm": 1.2467557191848755, "learning_rate": 4.927054409091938e-06, "loss": 0.455, "step": 24841 }, { "epoch": 0.6793371253555021, "grad_norm": 1.3273353576660156, "learning_rate": 4.926291153303418e-06, "loss": 0.4651, "step": 24842 }, { "epoch": 0.679364471669219, "grad_norm": 1.1956369876861572, "learning_rate": 4.9255279373167206e-06, "loss": 0.4032, "step": 24843 }, { "epoch": 0.6793918179829359, "grad_norm": 1.2889220714569092, "learning_rate": 4.924764761137828e-06, "loss": 0.4752, "step": 24844 }, { "epoch": 0.6794191642966528, "grad_norm": 1.188118577003479, "learning_rate": 4.924001624772724e-06, "loss": 0.4455, "step": 24845 }, { "epoch": 0.6794465106103698, "grad_norm": 1.3484909534454346, "learning_rate": 4.923238528227402e-06, "loss": 0.4768, "step": 24846 }, { "epoch": 0.6794738569240867, "grad_norm": 1.4900401830673218, "learning_rate": 4.922475471507846e-06, "loss": 0.7241, "step": 24847 }, { "epoch": 0.6795012032378035, "grad_norm": 1.5794042348861694, "learning_rate": 4.921712454620036e-06, "loss": 0.5053, "step": 24848 }, { "epoch": 0.6795285495515204, "grad_norm": 1.5301469564437866, "learning_rate": 4.920949477569967e-06, "loss": 0.491, "step": 24849 }, { "epoch": 0.6795558958652373, "grad_norm": 1.2975873947143555, "learning_rate": 4.920186540363621e-06, "loss": 0.4749, "step": 24850 }, { "epoch": 0.6795832421789543, "grad_norm": 1.297890067100525, "learning_rate": 4.91942364300698e-06, "loss": 0.4799, "step": 24851 }, { "epoch": 0.6796105884926712, "grad_norm": 1.7683757543563843, "learning_rate": 4.918660785506027e-06, "loss": 0.3427, "step": 24852 }, { "epoch": 0.6796379348063881, "grad_norm": 2.1117637157440186, "learning_rate": 4.917897967866756e-06, "loss": 0.6939, "step": 24853 }, { "epoch": 0.679665281120105, "grad_norm": 1.2784332036972046, "learning_rate": 4.917135190095144e-06, "loss": 0.7003, "step": 24854 }, { "epoch": 0.679692627433822, "grad_norm": 1.722460150718689, "learning_rate": 4.916372452197172e-06, "loss": 0.3533, "step": 24855 }, { "epoch": 0.6797199737475388, "grad_norm": 1.79764986038208, "learning_rate": 4.915609754178832e-06, "loss": 0.7189, "step": 24856 }, { "epoch": 0.6797473200612557, "grad_norm": 1.2339845895767212, "learning_rate": 4.914847096046101e-06, "loss": 0.7098, "step": 24857 }, { "epoch": 0.6797746663749726, "grad_norm": 1.6901551485061646, "learning_rate": 4.914084477804961e-06, "loss": 0.3598, "step": 24858 }, { "epoch": 0.6798020126886896, "grad_norm": 1.1672937870025635, "learning_rate": 4.913321899461401e-06, "loss": 0.4425, "step": 24859 }, { "epoch": 0.6798293590024065, "grad_norm": 1.2790955305099487, "learning_rate": 4.912559361021398e-06, "loss": 0.4451, "step": 24860 }, { "epoch": 0.6798567053161234, "grad_norm": 1.577545166015625, "learning_rate": 4.911796862490933e-06, "loss": 0.314, "step": 24861 }, { "epoch": 0.6798840516298403, "grad_norm": 1.5855690240859985, "learning_rate": 4.911034403875992e-06, "loss": 0.3061, "step": 24862 }, { "epoch": 0.6799113979435573, "grad_norm": 1.3210829496383667, "learning_rate": 4.910271985182554e-06, "loss": 0.4935, "step": 24863 }, { "epoch": 0.6799387442572741, "grad_norm": 1.3048129081726074, "learning_rate": 4.909509606416597e-06, "loss": 0.5143, "step": 24864 }, { "epoch": 0.679966090570991, "grad_norm": 1.3050390481948853, "learning_rate": 4.908747267584108e-06, "loss": 0.4972, "step": 24865 }, { "epoch": 0.6799934368847079, "grad_norm": 1.185204267501831, "learning_rate": 4.907984968691063e-06, "loss": 0.4716, "step": 24866 }, { "epoch": 0.6800207831984248, "grad_norm": 1.1991053819656372, "learning_rate": 4.9072227097434445e-06, "loss": 0.4561, "step": 24867 }, { "epoch": 0.6800481295121418, "grad_norm": 1.16571044921875, "learning_rate": 4.906460490747225e-06, "loss": 0.4372, "step": 24868 }, { "epoch": 0.6800754758258587, "grad_norm": 1.210863709449768, "learning_rate": 4.905698311708399e-06, "loss": 0.5016, "step": 24869 }, { "epoch": 0.6801028221395756, "grad_norm": 1.5526269674301147, "learning_rate": 4.904936172632927e-06, "loss": 0.3173, "step": 24870 }, { "epoch": 0.6801301684532925, "grad_norm": 1.4483267068862915, "learning_rate": 4.904174073526802e-06, "loss": 0.4551, "step": 24871 }, { "epoch": 0.6801575147670094, "grad_norm": 1.2711328268051147, "learning_rate": 4.9034120143959965e-06, "loss": 0.5018, "step": 24872 }, { "epoch": 0.6801848610807263, "grad_norm": 1.2658612728118896, "learning_rate": 4.9026499952464845e-06, "loss": 0.4561, "step": 24873 }, { "epoch": 0.6802122073944432, "grad_norm": 1.2422131299972534, "learning_rate": 4.9018880160842545e-06, "loss": 0.4564, "step": 24874 }, { "epoch": 0.6802395537081601, "grad_norm": 1.3112645149230957, "learning_rate": 4.901126076915277e-06, "loss": 0.4975, "step": 24875 }, { "epoch": 0.6802669000218771, "grad_norm": 1.3084716796875, "learning_rate": 4.900364177745531e-06, "loss": 0.491, "step": 24876 }, { "epoch": 0.680294246335594, "grad_norm": 1.427595853805542, "learning_rate": 4.899602318580989e-06, "loss": 0.498, "step": 24877 }, { "epoch": 0.6803215926493109, "grad_norm": 1.237999677658081, "learning_rate": 4.898840499427635e-06, "loss": 0.4557, "step": 24878 }, { "epoch": 0.6803489389630278, "grad_norm": 1.621454119682312, "learning_rate": 4.8980787202914425e-06, "loss": 0.4649, "step": 24879 }, { "epoch": 0.6803762852767447, "grad_norm": 1.4575676918029785, "learning_rate": 4.897316981178382e-06, "loss": 0.7197, "step": 24880 }, { "epoch": 0.6804036315904616, "grad_norm": 1.7586075067520142, "learning_rate": 4.8965552820944374e-06, "loss": 0.4465, "step": 24881 }, { "epoch": 0.6804309779041785, "grad_norm": 1.8585060834884644, "learning_rate": 4.8957936230455814e-06, "loss": 0.4381, "step": 24882 }, { "epoch": 0.6804583242178954, "grad_norm": 1.3096463680267334, "learning_rate": 4.895032004037783e-06, "loss": 0.4436, "step": 24883 }, { "epoch": 0.6804856705316124, "grad_norm": 1.7845786809921265, "learning_rate": 4.894270425077027e-06, "loss": 0.3586, "step": 24884 }, { "epoch": 0.6805130168453293, "grad_norm": 1.2282928228378296, "learning_rate": 4.893508886169281e-06, "loss": 0.4308, "step": 24885 }, { "epoch": 0.6805403631590462, "grad_norm": 1.326462984085083, "learning_rate": 4.892747387320519e-06, "loss": 0.47, "step": 24886 }, { "epoch": 0.680567709472763, "grad_norm": 1.3375163078308105, "learning_rate": 4.891985928536719e-06, "loss": 0.5002, "step": 24887 }, { "epoch": 0.6805950557864799, "grad_norm": 1.3384231328964233, "learning_rate": 4.891224509823851e-06, "loss": 0.4896, "step": 24888 }, { "epoch": 0.6806224021001969, "grad_norm": 1.7841980457305908, "learning_rate": 4.89046313118789e-06, "loss": 0.3545, "step": 24889 }, { "epoch": 0.6806497484139138, "grad_norm": 1.2189875841140747, "learning_rate": 4.889701792634804e-06, "loss": 0.4977, "step": 24890 }, { "epoch": 0.6806770947276307, "grad_norm": 1.0667699575424194, "learning_rate": 4.888940494170573e-06, "loss": 0.4421, "step": 24891 }, { "epoch": 0.6807044410413476, "grad_norm": 1.8274723291397095, "learning_rate": 4.888179235801166e-06, "loss": 0.4774, "step": 24892 }, { "epoch": 0.6807317873550646, "grad_norm": 1.940161108970642, "learning_rate": 4.8874180175325495e-06, "loss": 0.7202, "step": 24893 }, { "epoch": 0.6807591336687815, "grad_norm": 1.3877756595611572, "learning_rate": 4.886656839370704e-06, "loss": 0.4434, "step": 24894 }, { "epoch": 0.6807864799824983, "grad_norm": 1.382799744606018, "learning_rate": 4.885895701321597e-06, "loss": 0.4584, "step": 24895 }, { "epoch": 0.6808138262962152, "grad_norm": 2.266244888305664, "learning_rate": 4.885134603391194e-06, "loss": 0.7017, "step": 24896 }, { "epoch": 0.6808411726099322, "grad_norm": 1.258328914642334, "learning_rate": 4.884373545585475e-06, "loss": 0.6879, "step": 24897 }, { "epoch": 0.6808685189236491, "grad_norm": 1.4373642206192017, "learning_rate": 4.883612527910407e-06, "loss": 0.4657, "step": 24898 }, { "epoch": 0.680895865237366, "grad_norm": 1.3560320138931274, "learning_rate": 4.882851550371954e-06, "loss": 0.448, "step": 24899 }, { "epoch": 0.6809232115510829, "grad_norm": 1.256790280342102, "learning_rate": 4.882090612976095e-06, "loss": 0.477, "step": 24900 }, { "epoch": 0.6809505578647999, "grad_norm": 1.2831696271896362, "learning_rate": 4.881329715728793e-06, "loss": 0.4321, "step": 24901 }, { "epoch": 0.6809779041785168, "grad_norm": 1.431820034980774, "learning_rate": 4.880568858636021e-06, "loss": 0.4932, "step": 24902 }, { "epoch": 0.6810052504922336, "grad_norm": 1.2783565521240234, "learning_rate": 4.8798080417037405e-06, "loss": 0.6899, "step": 24903 }, { "epoch": 0.6810325968059505, "grad_norm": 1.4189386367797852, "learning_rate": 4.8790472649379295e-06, "loss": 0.4584, "step": 24904 }, { "epoch": 0.6810599431196674, "grad_norm": 1.413580060005188, "learning_rate": 4.878286528344552e-06, "loss": 0.4955, "step": 24905 }, { "epoch": 0.6810872894333844, "grad_norm": 1.3781495094299316, "learning_rate": 4.8775258319295734e-06, "loss": 0.7214, "step": 24906 }, { "epoch": 0.6811146357471013, "grad_norm": 1.7010046243667603, "learning_rate": 4.876765175698964e-06, "loss": 0.5044, "step": 24907 }, { "epoch": 0.6811419820608182, "grad_norm": 1.1977431774139404, "learning_rate": 4.876004559658687e-06, "loss": 0.7169, "step": 24908 }, { "epoch": 0.6811693283745351, "grad_norm": 1.3182488679885864, "learning_rate": 4.875243983814715e-06, "loss": 0.498, "step": 24909 }, { "epoch": 0.6811966746882521, "grad_norm": 1.153938889503479, "learning_rate": 4.874483448173012e-06, "loss": 0.4531, "step": 24910 }, { "epoch": 0.6812240210019689, "grad_norm": 1.2527844905853271, "learning_rate": 4.8737229527395435e-06, "loss": 0.4925, "step": 24911 }, { "epoch": 0.6812513673156858, "grad_norm": 1.1856924295425415, "learning_rate": 4.872962497520273e-06, "loss": 0.7246, "step": 24912 }, { "epoch": 0.6812787136294027, "grad_norm": 1.393515944480896, "learning_rate": 4.872202082521172e-06, "loss": 0.4759, "step": 24913 }, { "epoch": 0.6813060599431197, "grad_norm": 1.2704678773880005, "learning_rate": 4.871441707748202e-06, "loss": 0.4718, "step": 24914 }, { "epoch": 0.6813334062568366, "grad_norm": 1.360910177230835, "learning_rate": 4.8706813732073264e-06, "loss": 0.3309, "step": 24915 }, { "epoch": 0.6813607525705535, "grad_norm": 1.4381402730941772, "learning_rate": 4.8699210789045145e-06, "loss": 0.4641, "step": 24916 }, { "epoch": 0.6813880988842704, "grad_norm": 1.2995907068252563, "learning_rate": 4.869160824845728e-06, "loss": 0.4611, "step": 24917 }, { "epoch": 0.6814154451979874, "grad_norm": 1.3246110677719116, "learning_rate": 4.868400611036928e-06, "loss": 0.4649, "step": 24918 }, { "epoch": 0.6814427915117042, "grad_norm": 1.544144868850708, "learning_rate": 4.867640437484083e-06, "loss": 0.4245, "step": 24919 }, { "epoch": 0.6814701378254211, "grad_norm": 1.2213956117630005, "learning_rate": 4.866880304193157e-06, "loss": 0.4634, "step": 24920 }, { "epoch": 0.681497484139138, "grad_norm": 1.8089027404785156, "learning_rate": 4.866120211170104e-06, "loss": 0.361, "step": 24921 }, { "epoch": 0.681524830452855, "grad_norm": 1.3178945779800415, "learning_rate": 4.865360158420899e-06, "loss": 0.468, "step": 24922 }, { "epoch": 0.6815521767665719, "grad_norm": 1.3731549978256226, "learning_rate": 4.864600145951497e-06, "loss": 0.4677, "step": 24923 }, { "epoch": 0.6815795230802888, "grad_norm": 1.4571475982666016, "learning_rate": 4.863840173767862e-06, "loss": 0.4509, "step": 24924 }, { "epoch": 0.6816068693940057, "grad_norm": 1.616565465927124, "learning_rate": 4.863080241875952e-06, "loss": 0.4731, "step": 24925 }, { "epoch": 0.6816342157077226, "grad_norm": 1.3239279985427856, "learning_rate": 4.862320350281736e-06, "loss": 0.455, "step": 24926 }, { "epoch": 0.6816615620214395, "grad_norm": 1.573945164680481, "learning_rate": 4.86156049899117e-06, "loss": 0.451, "step": 24927 }, { "epoch": 0.6816889083351564, "grad_norm": 1.6698328256607056, "learning_rate": 4.860800688010214e-06, "loss": 0.3738, "step": 24928 }, { "epoch": 0.6817162546488733, "grad_norm": 1.5096136331558228, "learning_rate": 4.8600409173448315e-06, "loss": 0.498, "step": 24929 }, { "epoch": 0.6817436009625902, "grad_norm": 1.2456481456756592, "learning_rate": 4.859281187000982e-06, "loss": 0.3157, "step": 24930 }, { "epoch": 0.6817709472763072, "grad_norm": 1.2233049869537354, "learning_rate": 4.858521496984623e-06, "loss": 0.4774, "step": 24931 }, { "epoch": 0.6817982935900241, "grad_norm": 1.1017217636108398, "learning_rate": 4.857761847301717e-06, "loss": 0.4209, "step": 24932 }, { "epoch": 0.681825639903741, "grad_norm": 1.750599980354309, "learning_rate": 4.857002237958224e-06, "loss": 0.3587, "step": 24933 }, { "epoch": 0.6818529862174579, "grad_norm": 1.7088767290115356, "learning_rate": 4.856242668960097e-06, "loss": 0.3331, "step": 24934 }, { "epoch": 0.6818803325311747, "grad_norm": 1.348885416984558, "learning_rate": 4.855483140313303e-06, "loss": 0.4825, "step": 24935 }, { "epoch": 0.6819076788448917, "grad_norm": 1.259704828262329, "learning_rate": 4.854723652023795e-06, "loss": 0.7336, "step": 24936 }, { "epoch": 0.6819350251586086, "grad_norm": 2.2157766819000244, "learning_rate": 4.853964204097532e-06, "loss": 0.7703, "step": 24937 }, { "epoch": 0.6819623714723255, "grad_norm": 1.3069028854370117, "learning_rate": 4.8532047965404684e-06, "loss": 0.4572, "step": 24938 }, { "epoch": 0.6819897177860424, "grad_norm": 1.326117992401123, "learning_rate": 4.8524454293585675e-06, "loss": 0.4776, "step": 24939 }, { "epoch": 0.6820170640997594, "grad_norm": 1.5633388757705688, "learning_rate": 4.851686102557784e-06, "loss": 0.4606, "step": 24940 }, { "epoch": 0.6820444104134763, "grad_norm": 1.3562037944793701, "learning_rate": 4.850926816144075e-06, "loss": 0.4763, "step": 24941 }, { "epoch": 0.6820717567271932, "grad_norm": 1.7176823616027832, "learning_rate": 4.850167570123395e-06, "loss": 0.4765, "step": 24942 }, { "epoch": 0.68209910304091, "grad_norm": 1.2879836559295654, "learning_rate": 4.8494083645016976e-06, "loss": 0.4774, "step": 24943 }, { "epoch": 0.682126449354627, "grad_norm": 1.2844411134719849, "learning_rate": 4.848649199284947e-06, "loss": 0.4779, "step": 24944 }, { "epoch": 0.6821537956683439, "grad_norm": 1.1569105386734009, "learning_rate": 4.847890074479093e-06, "loss": 0.7219, "step": 24945 }, { "epoch": 0.6821811419820608, "grad_norm": 1.6072301864624023, "learning_rate": 4.847130990090091e-06, "loss": 0.4317, "step": 24946 }, { "epoch": 0.6822084882957777, "grad_norm": 1.6496199369430542, "learning_rate": 4.8463719461238925e-06, "loss": 0.4675, "step": 24947 }, { "epoch": 0.6822358346094947, "grad_norm": 1.3825818300247192, "learning_rate": 4.84561294258646e-06, "loss": 0.4497, "step": 24948 }, { "epoch": 0.6822631809232116, "grad_norm": 1.3265581130981445, "learning_rate": 4.844853979483742e-06, "loss": 0.5018, "step": 24949 }, { "epoch": 0.6822905272369285, "grad_norm": 1.4461991786956787, "learning_rate": 4.844095056821691e-06, "loss": 0.4925, "step": 24950 }, { "epoch": 0.6823178735506453, "grad_norm": 1.371590256690979, "learning_rate": 4.843336174606268e-06, "loss": 0.4416, "step": 24951 }, { "epoch": 0.6823452198643623, "grad_norm": 1.3594876527786255, "learning_rate": 4.84257733284342e-06, "loss": 0.3607, "step": 24952 }, { "epoch": 0.6823725661780792, "grad_norm": 1.2457258701324463, "learning_rate": 4.841818531539099e-06, "loss": 0.4794, "step": 24953 }, { "epoch": 0.6823999124917961, "grad_norm": 1.2072292566299438, "learning_rate": 4.841059770699263e-06, "loss": 0.4522, "step": 24954 }, { "epoch": 0.682427258805513, "grad_norm": 1.4043976068496704, "learning_rate": 4.840301050329861e-06, "loss": 0.4727, "step": 24955 }, { "epoch": 0.68245460511923, "grad_norm": 1.771404504776001, "learning_rate": 4.839542370436841e-06, "loss": 0.477, "step": 24956 }, { "epoch": 0.6824819514329469, "grad_norm": 1.1983706951141357, "learning_rate": 4.838783731026163e-06, "loss": 0.4668, "step": 24957 }, { "epoch": 0.6825092977466638, "grad_norm": 1.5215567350387573, "learning_rate": 4.838025132103775e-06, "loss": 0.3257, "step": 24958 }, { "epoch": 0.6825366440603806, "grad_norm": 1.2538824081420898, "learning_rate": 4.8372665736756265e-06, "loss": 0.4905, "step": 24959 }, { "epoch": 0.6825639903740975, "grad_norm": 1.4018748998641968, "learning_rate": 4.836508055747665e-06, "loss": 0.4347, "step": 24960 }, { "epoch": 0.6825913366878145, "grad_norm": 1.6703803539276123, "learning_rate": 4.835749578325848e-06, "loss": 0.3629, "step": 24961 }, { "epoch": 0.6826186830015314, "grad_norm": 1.1102203130722046, "learning_rate": 4.834991141416123e-06, "loss": 0.4794, "step": 24962 }, { "epoch": 0.6826460293152483, "grad_norm": 1.4463557004928589, "learning_rate": 4.834232745024435e-06, "loss": 0.7143, "step": 24963 }, { "epoch": 0.6826733756289652, "grad_norm": 1.2165249586105347, "learning_rate": 4.833474389156741e-06, "loss": 0.4887, "step": 24964 }, { "epoch": 0.6827007219426822, "grad_norm": 1.1607314348220825, "learning_rate": 4.832716073818986e-06, "loss": 0.4672, "step": 24965 }, { "epoch": 0.6827280682563991, "grad_norm": 1.294227957725525, "learning_rate": 4.831957799017116e-06, "loss": 0.4735, "step": 24966 }, { "epoch": 0.6827554145701159, "grad_norm": 1.4292770624160767, "learning_rate": 4.831199564757087e-06, "loss": 0.3085, "step": 24967 }, { "epoch": 0.6827827608838328, "grad_norm": 1.2825692892074585, "learning_rate": 4.830441371044842e-06, "loss": 0.4921, "step": 24968 }, { "epoch": 0.6828101071975498, "grad_norm": 1.5275847911834717, "learning_rate": 4.829683217886325e-06, "loss": 0.4522, "step": 24969 }, { "epoch": 0.6828374535112667, "grad_norm": 1.3632150888442993, "learning_rate": 4.828925105287493e-06, "loss": 0.4661, "step": 24970 }, { "epoch": 0.6828647998249836, "grad_norm": 1.2849466800689697, "learning_rate": 4.8281670332542886e-06, "loss": 0.4557, "step": 24971 }, { "epoch": 0.6828921461387005, "grad_norm": 1.5385303497314453, "learning_rate": 4.827409001792653e-06, "loss": 0.3178, "step": 24972 }, { "epoch": 0.6829194924524175, "grad_norm": 1.5080987215042114, "learning_rate": 4.826651010908543e-06, "loss": 0.3449, "step": 24973 }, { "epoch": 0.6829468387661344, "grad_norm": 1.3127543926239014, "learning_rate": 4.8258930606079e-06, "loss": 0.4578, "step": 24974 }, { "epoch": 0.6829741850798512, "grad_norm": 1.2466175556182861, "learning_rate": 4.82513515089667e-06, "loss": 0.428, "step": 24975 }, { "epoch": 0.6830015313935681, "grad_norm": 1.6286245584487915, "learning_rate": 4.8243772817807975e-06, "loss": 0.4652, "step": 24976 }, { "epoch": 0.683028877707285, "grad_norm": 1.2518339157104492, "learning_rate": 4.823619453266229e-06, "loss": 0.3732, "step": 24977 }, { "epoch": 0.683056224021002, "grad_norm": 1.1291886568069458, "learning_rate": 4.822861665358907e-06, "loss": 0.4702, "step": 24978 }, { "epoch": 0.6830835703347189, "grad_norm": 1.243668794631958, "learning_rate": 4.82210391806478e-06, "loss": 0.7301, "step": 24979 }, { "epoch": 0.6831109166484358, "grad_norm": 2.203202962875366, "learning_rate": 4.821346211389791e-06, "loss": 0.3417, "step": 24980 }, { "epoch": 0.6831382629621527, "grad_norm": 1.1738946437835693, "learning_rate": 4.820588545339884e-06, "loss": 0.4488, "step": 24981 }, { "epoch": 0.6831656092758697, "grad_norm": 1.3966684341430664, "learning_rate": 4.8198309199209984e-06, "loss": 0.7263, "step": 24982 }, { "epoch": 0.6831929555895865, "grad_norm": 1.2778449058532715, "learning_rate": 4.819073335139084e-06, "loss": 0.4181, "step": 24983 }, { "epoch": 0.6832203019033034, "grad_norm": 1.5069353580474854, "learning_rate": 4.8183157910000825e-06, "loss": 0.466, "step": 24984 }, { "epoch": 0.6832476482170203, "grad_norm": 1.4680284261703491, "learning_rate": 4.817558287509931e-06, "loss": 0.476, "step": 24985 }, { "epoch": 0.6832749945307373, "grad_norm": 1.4511414766311646, "learning_rate": 4.816800824674581e-06, "loss": 0.4744, "step": 24986 }, { "epoch": 0.6833023408444542, "grad_norm": 1.1228325366973877, "learning_rate": 4.8160434024999695e-06, "loss": 0.4775, "step": 24987 }, { "epoch": 0.6833296871581711, "grad_norm": 1.425050973892212, "learning_rate": 4.815286020992034e-06, "loss": 0.463, "step": 24988 }, { "epoch": 0.683357033471888, "grad_norm": 1.4692500829696655, "learning_rate": 4.814528680156725e-06, "loss": 0.4747, "step": 24989 }, { "epoch": 0.6833843797856048, "grad_norm": 1.19050133228302, "learning_rate": 4.81377137999998e-06, "loss": 0.4841, "step": 24990 }, { "epoch": 0.6834117260993218, "grad_norm": 1.3160649538040161, "learning_rate": 4.813014120527734e-06, "loss": 0.4864, "step": 24991 }, { "epoch": 0.6834390724130387, "grad_norm": 1.6414165496826172, "learning_rate": 4.812256901745936e-06, "loss": 0.3447, "step": 24992 }, { "epoch": 0.6834664187267556, "grad_norm": 1.59601628780365, "learning_rate": 4.8114997236605235e-06, "loss": 0.4298, "step": 24993 }, { "epoch": 0.6834937650404725, "grad_norm": 1.5627529621124268, "learning_rate": 4.810742586277432e-06, "loss": 0.4644, "step": 24994 }, { "epoch": 0.6835211113541895, "grad_norm": 1.5673799514770508, "learning_rate": 4.809985489602606e-06, "loss": 0.3119, "step": 24995 }, { "epoch": 0.6835484576679064, "grad_norm": 1.3375434875488281, "learning_rate": 4.8092284336419855e-06, "loss": 0.4627, "step": 24996 }, { "epoch": 0.6835758039816233, "grad_norm": 1.2304160594940186, "learning_rate": 4.808471418401507e-06, "loss": 0.4537, "step": 24997 }, { "epoch": 0.6836031502953401, "grad_norm": 1.4060213565826416, "learning_rate": 4.807714443887105e-06, "loss": 0.4629, "step": 24998 }, { "epoch": 0.6836304966090571, "grad_norm": 1.2562075853347778, "learning_rate": 4.8069575101047264e-06, "loss": 0.4822, "step": 24999 }, { "epoch": 0.683657842922774, "grad_norm": 1.1801332235336304, "learning_rate": 4.8062006170603035e-06, "loss": 0.444, "step": 25000 }, { "epoch": 0.6836851892364909, "grad_norm": 1.636942744255066, "learning_rate": 4.805443764759772e-06, "loss": 0.3524, "step": 25001 }, { "epoch": 0.6837125355502078, "grad_norm": 1.38789701461792, "learning_rate": 4.804686953209076e-06, "loss": 0.5002, "step": 25002 }, { "epoch": 0.6837398818639248, "grad_norm": 1.3287791013717651, "learning_rate": 4.803930182414149e-06, "loss": 0.484, "step": 25003 }, { "epoch": 0.6837672281776417, "grad_norm": 1.1345947980880737, "learning_rate": 4.8031734523809224e-06, "loss": 0.452, "step": 25004 }, { "epoch": 0.6837945744913586, "grad_norm": 1.5747653245925903, "learning_rate": 4.802416763115343e-06, "loss": 0.47, "step": 25005 }, { "epoch": 0.6838219208050754, "grad_norm": 1.732642412185669, "learning_rate": 4.80166011462334e-06, "loss": 0.3778, "step": 25006 }, { "epoch": 0.6838492671187923, "grad_norm": 1.560639500617981, "learning_rate": 4.8009035069108474e-06, "loss": 0.3495, "step": 25007 }, { "epoch": 0.6838766134325093, "grad_norm": 1.4770252704620361, "learning_rate": 4.800146939983808e-06, "loss": 0.3262, "step": 25008 }, { "epoch": 0.6839039597462262, "grad_norm": 1.592971920967102, "learning_rate": 4.799390413848152e-06, "loss": 0.314, "step": 25009 }, { "epoch": 0.6839313060599431, "grad_norm": 1.5290054082870483, "learning_rate": 4.798633928509813e-06, "loss": 0.4614, "step": 25010 }, { "epoch": 0.68395865237366, "grad_norm": 1.4658299684524536, "learning_rate": 4.797877483974729e-06, "loss": 0.4508, "step": 25011 }, { "epoch": 0.683985998687377, "grad_norm": 1.22267746925354, "learning_rate": 4.7971210802488325e-06, "loss": 0.4821, "step": 25012 }, { "epoch": 0.6840133450010939, "grad_norm": 1.2584187984466553, "learning_rate": 4.796364717338051e-06, "loss": 0.4439, "step": 25013 }, { "epoch": 0.6840406913148107, "grad_norm": 1.3678505420684814, "learning_rate": 4.79560839524833e-06, "loss": 0.7064, "step": 25014 }, { "epoch": 0.6840680376285276, "grad_norm": 1.3421367406845093, "learning_rate": 4.7948521139855955e-06, "loss": 0.4711, "step": 25015 }, { "epoch": 0.6840953839422446, "grad_norm": 1.831657886505127, "learning_rate": 4.794095873555777e-06, "loss": 0.4169, "step": 25016 }, { "epoch": 0.6841227302559615, "grad_norm": 1.4474132061004639, "learning_rate": 4.793339673964815e-06, "loss": 0.4616, "step": 25017 }, { "epoch": 0.6841500765696784, "grad_norm": 1.5507298707962036, "learning_rate": 4.792583515218639e-06, "loss": 0.4156, "step": 25018 }, { "epoch": 0.6841774228833953, "grad_norm": 1.2999022006988525, "learning_rate": 4.791827397323178e-06, "loss": 0.4715, "step": 25019 }, { "epoch": 0.6842047691971123, "grad_norm": 1.5425059795379639, "learning_rate": 4.791071320284364e-06, "loss": 0.3443, "step": 25020 }, { "epoch": 0.6842321155108292, "grad_norm": 1.3563110828399658, "learning_rate": 4.790315284108133e-06, "loss": 0.4807, "step": 25021 }, { "epoch": 0.684259461824546, "grad_norm": 1.268467664718628, "learning_rate": 4.789559288800411e-06, "loss": 0.4696, "step": 25022 }, { "epoch": 0.6842868081382629, "grad_norm": 1.2962324619293213, "learning_rate": 4.788803334367127e-06, "loss": 0.6939, "step": 25023 }, { "epoch": 0.6843141544519799, "grad_norm": 1.3384255170822144, "learning_rate": 4.788047420814218e-06, "loss": 0.4375, "step": 25024 }, { "epoch": 0.6843415007656968, "grad_norm": 1.1964718103408813, "learning_rate": 4.78729154814761e-06, "loss": 0.4726, "step": 25025 }, { "epoch": 0.6843688470794137, "grad_norm": 1.207380771636963, "learning_rate": 4.786535716373229e-06, "loss": 0.4814, "step": 25026 }, { "epoch": 0.6843961933931306, "grad_norm": 1.3085445165634155, "learning_rate": 4.785779925497013e-06, "loss": 0.4841, "step": 25027 }, { "epoch": 0.6844235397068476, "grad_norm": 1.3144112825393677, "learning_rate": 4.785024175524885e-06, "loss": 0.698, "step": 25028 }, { "epoch": 0.6844508860205645, "grad_norm": 1.5545854568481445, "learning_rate": 4.784268466462771e-06, "loss": 0.4799, "step": 25029 }, { "epoch": 0.6844782323342813, "grad_norm": 1.5877258777618408, "learning_rate": 4.7835127983166074e-06, "loss": 0.4465, "step": 25030 }, { "epoch": 0.6845055786479982, "grad_norm": 1.338574767112732, "learning_rate": 4.782757171092317e-06, "loss": 0.5139, "step": 25031 }, { "epoch": 0.6845329249617151, "grad_norm": 1.7853730916976929, "learning_rate": 4.782001584795829e-06, "loss": 0.3806, "step": 25032 }, { "epoch": 0.6845602712754321, "grad_norm": 2.564127206802368, "learning_rate": 4.781246039433066e-06, "loss": 0.3258, "step": 25033 }, { "epoch": 0.684587617589149, "grad_norm": 1.2567484378814697, "learning_rate": 4.780490535009963e-06, "loss": 0.4582, "step": 25034 }, { "epoch": 0.6846149639028659, "grad_norm": 1.324777603149414, "learning_rate": 4.779735071532443e-06, "loss": 0.4448, "step": 25035 }, { "epoch": 0.6846423102165828, "grad_norm": 1.3658682107925415, "learning_rate": 4.778979649006429e-06, "loss": 0.7325, "step": 25036 }, { "epoch": 0.6846696565302998, "grad_norm": 1.2937986850738525, "learning_rate": 4.778224267437853e-06, "loss": 0.4856, "step": 25037 }, { "epoch": 0.6846970028440166, "grad_norm": 1.3372571468353271, "learning_rate": 4.777468926832638e-06, "loss": 0.4836, "step": 25038 }, { "epoch": 0.6847243491577335, "grad_norm": 1.2993321418762207, "learning_rate": 4.776713627196706e-06, "loss": 0.4483, "step": 25039 }, { "epoch": 0.6847516954714504, "grad_norm": 1.4096843004226685, "learning_rate": 4.77595836853599e-06, "loss": 0.4717, "step": 25040 }, { "epoch": 0.6847790417851674, "grad_norm": 0.9235270619392395, "learning_rate": 4.77520315085641e-06, "loss": 0.3237, "step": 25041 }, { "epoch": 0.6848063880988843, "grad_norm": 1.274994969367981, "learning_rate": 4.774447974163888e-06, "loss": 0.4748, "step": 25042 }, { "epoch": 0.6848337344126012, "grad_norm": 1.185494303703308, "learning_rate": 4.773692838464355e-06, "loss": 0.481, "step": 25043 }, { "epoch": 0.6848610807263181, "grad_norm": 1.2991795539855957, "learning_rate": 4.772937743763729e-06, "loss": 0.4451, "step": 25044 }, { "epoch": 0.684888427040035, "grad_norm": 1.3800939321517944, "learning_rate": 4.772182690067936e-06, "loss": 0.4698, "step": 25045 }, { "epoch": 0.6849157733537519, "grad_norm": 1.358887791633606, "learning_rate": 4.7714276773829e-06, "loss": 0.4642, "step": 25046 }, { "epoch": 0.6849431196674688, "grad_norm": 1.4016170501708984, "learning_rate": 4.77067270571454e-06, "loss": 0.4923, "step": 25047 }, { "epoch": 0.6849704659811857, "grad_norm": 1.4781980514526367, "learning_rate": 4.769917775068779e-06, "loss": 0.4586, "step": 25048 }, { "epoch": 0.6849978122949026, "grad_norm": 1.2486884593963623, "learning_rate": 4.769162885451545e-06, "loss": 0.4488, "step": 25049 }, { "epoch": 0.6850251586086196, "grad_norm": 1.2489265203475952, "learning_rate": 4.768408036868755e-06, "loss": 0.4952, "step": 25050 }, { "epoch": 0.6850525049223365, "grad_norm": 1.3997498750686646, "learning_rate": 4.767653229326328e-06, "loss": 0.5029, "step": 25051 }, { "epoch": 0.6850798512360534, "grad_norm": 1.2719568014144897, "learning_rate": 4.766898462830194e-06, "loss": 0.4859, "step": 25052 }, { "epoch": 0.6851071975497703, "grad_norm": 1.505835771560669, "learning_rate": 4.766143737386267e-06, "loss": 0.437, "step": 25053 }, { "epoch": 0.6851345438634872, "grad_norm": 1.5589174032211304, "learning_rate": 4.765389053000471e-06, "loss": 0.4763, "step": 25054 }, { "epoch": 0.6851618901772041, "grad_norm": 1.2243382930755615, "learning_rate": 4.76463440967872e-06, "loss": 0.4556, "step": 25055 }, { "epoch": 0.685189236490921, "grad_norm": 1.4866483211517334, "learning_rate": 4.763879807426943e-06, "loss": 0.4891, "step": 25056 }, { "epoch": 0.6852165828046379, "grad_norm": 1.322913408279419, "learning_rate": 4.763125246251054e-06, "loss": 0.4847, "step": 25057 }, { "epoch": 0.6852439291183549, "grad_norm": 1.4224036931991577, "learning_rate": 4.7623707261569715e-06, "loss": 0.4977, "step": 25058 }, { "epoch": 0.6852712754320718, "grad_norm": 1.3065265417099, "learning_rate": 4.761616247150621e-06, "loss": 0.5019, "step": 25059 }, { "epoch": 0.6852986217457887, "grad_norm": 1.224989414215088, "learning_rate": 4.760861809237916e-06, "loss": 0.4459, "step": 25060 }, { "epoch": 0.6853259680595056, "grad_norm": 1.2429704666137695, "learning_rate": 4.7601074124247715e-06, "loss": 0.4714, "step": 25061 }, { "epoch": 0.6853533143732224, "grad_norm": 1.56398606300354, "learning_rate": 4.759353056717113e-06, "loss": 0.4508, "step": 25062 }, { "epoch": 0.6853806606869394, "grad_norm": 1.2332203388214111, "learning_rate": 4.758598742120856e-06, "loss": 0.4966, "step": 25063 }, { "epoch": 0.6854080070006563, "grad_norm": 1.6158112287521362, "learning_rate": 4.7578444686419124e-06, "loss": 0.3239, "step": 25064 }, { "epoch": 0.6854353533143732, "grad_norm": 1.5648037195205688, "learning_rate": 4.7570902362862085e-06, "loss": 0.4425, "step": 25065 }, { "epoch": 0.6854626996280901, "grad_norm": 1.4569789171218872, "learning_rate": 4.7563360450596556e-06, "loss": 0.4676, "step": 25066 }, { "epoch": 0.6854900459418071, "grad_norm": 1.7220618724822998, "learning_rate": 4.75558189496817e-06, "loss": 0.3144, "step": 25067 }, { "epoch": 0.685517392255524, "grad_norm": 1.2552940845489502, "learning_rate": 4.754827786017665e-06, "loss": 0.479, "step": 25068 }, { "epoch": 0.6855447385692409, "grad_norm": 1.271765947341919, "learning_rate": 4.7540737182140635e-06, "loss": 0.7158, "step": 25069 }, { "epoch": 0.6855720848829577, "grad_norm": 1.429865837097168, "learning_rate": 4.753319691563278e-06, "loss": 0.4663, "step": 25070 }, { "epoch": 0.6855994311966747, "grad_norm": 1.5075219869613647, "learning_rate": 4.752565706071219e-06, "loss": 0.3494, "step": 25071 }, { "epoch": 0.6856267775103916, "grad_norm": 1.3367431163787842, "learning_rate": 4.75181176174381e-06, "loss": 0.3613, "step": 25072 }, { "epoch": 0.6856541238241085, "grad_norm": 1.2231124639511108, "learning_rate": 4.7510578585869595e-06, "loss": 0.676, "step": 25073 }, { "epoch": 0.6856814701378254, "grad_norm": 1.277562141418457, "learning_rate": 4.750303996606579e-06, "loss": 0.4508, "step": 25074 }, { "epoch": 0.6857088164515424, "grad_norm": 1.242859125137329, "learning_rate": 4.749550175808591e-06, "loss": 0.4812, "step": 25075 }, { "epoch": 0.6857361627652593, "grad_norm": 1.2722554206848145, "learning_rate": 4.748796396198904e-06, "loss": 0.442, "step": 25076 }, { "epoch": 0.6857635090789762, "grad_norm": 1.1266334056854248, "learning_rate": 4.748042657783427e-06, "loss": 0.4696, "step": 25077 }, { "epoch": 0.685790855392693, "grad_norm": 1.2248996496200562, "learning_rate": 4.747288960568081e-06, "loss": 0.4451, "step": 25078 }, { "epoch": 0.68581820170641, "grad_norm": 1.4942973852157593, "learning_rate": 4.746535304558776e-06, "loss": 0.3369, "step": 25079 }, { "epoch": 0.6858455480201269, "grad_norm": 1.7859091758728027, "learning_rate": 4.745781689761423e-06, "loss": 0.4811, "step": 25080 }, { "epoch": 0.6858728943338438, "grad_norm": 1.5241029262542725, "learning_rate": 4.745028116181934e-06, "loss": 0.4694, "step": 25081 }, { "epoch": 0.6859002406475607, "grad_norm": 1.1495356559753418, "learning_rate": 4.744274583826219e-06, "loss": 0.7056, "step": 25082 }, { "epoch": 0.6859275869612776, "grad_norm": 1.66530442237854, "learning_rate": 4.743521092700188e-06, "loss": 0.4598, "step": 25083 }, { "epoch": 0.6859549332749946, "grad_norm": 1.2892266511917114, "learning_rate": 4.742767642809759e-06, "loss": 0.4709, "step": 25084 }, { "epoch": 0.6859822795887115, "grad_norm": 1.475504755973816, "learning_rate": 4.742014234160837e-06, "loss": 0.4611, "step": 25085 }, { "epoch": 0.6860096259024283, "grad_norm": 1.307367205619812, "learning_rate": 4.741260866759332e-06, "loss": 0.7207, "step": 25086 }, { "epoch": 0.6860369722161452, "grad_norm": 1.5751062631607056, "learning_rate": 4.7405075406111576e-06, "loss": 0.5203, "step": 25087 }, { "epoch": 0.6860643185298622, "grad_norm": 1.436544418334961, "learning_rate": 4.739754255722222e-06, "loss": 0.4643, "step": 25088 }, { "epoch": 0.6860916648435791, "grad_norm": 1.4758143424987793, "learning_rate": 4.739001012098433e-06, "loss": 0.7241, "step": 25089 }, { "epoch": 0.686119011157296, "grad_norm": 1.356537103652954, "learning_rate": 4.738247809745697e-06, "loss": 0.4719, "step": 25090 }, { "epoch": 0.6861463574710129, "grad_norm": 1.3448312282562256, "learning_rate": 4.737494648669931e-06, "loss": 0.4652, "step": 25091 }, { "epoch": 0.6861737037847299, "grad_norm": 1.2110178470611572, "learning_rate": 4.736741528877038e-06, "loss": 0.424, "step": 25092 }, { "epoch": 0.6862010500984467, "grad_norm": 2.163322925567627, "learning_rate": 4.735988450372923e-06, "loss": 0.4372, "step": 25093 }, { "epoch": 0.6862283964121636, "grad_norm": 1.8495405912399292, "learning_rate": 4.735235413163501e-06, "loss": 0.4762, "step": 25094 }, { "epoch": 0.6862557427258805, "grad_norm": 1.4230282306671143, "learning_rate": 4.734482417254674e-06, "loss": 0.476, "step": 25095 }, { "epoch": 0.6862830890395974, "grad_norm": 1.2889957427978516, "learning_rate": 4.733729462652349e-06, "loss": 0.4806, "step": 25096 }, { "epoch": 0.6863104353533144, "grad_norm": 1.3976212739944458, "learning_rate": 4.732976549362437e-06, "loss": 0.4694, "step": 25097 }, { "epoch": 0.6863377816670313, "grad_norm": 1.3151553869247437, "learning_rate": 4.732223677390842e-06, "loss": 0.4404, "step": 25098 }, { "epoch": 0.6863651279807482, "grad_norm": 1.720499038696289, "learning_rate": 4.731470846743464e-06, "loss": 0.4455, "step": 25099 }, { "epoch": 0.6863924742944651, "grad_norm": 1.3487069606781006, "learning_rate": 4.730718057426221e-06, "loss": 0.4634, "step": 25100 }, { "epoch": 0.686419820608182, "grad_norm": 1.647554636001587, "learning_rate": 4.729965309445011e-06, "loss": 0.4571, "step": 25101 }, { "epoch": 0.6864471669218989, "grad_norm": 1.3761179447174072, "learning_rate": 4.729212602805736e-06, "loss": 0.485, "step": 25102 }, { "epoch": 0.6864745132356158, "grad_norm": 1.3282970190048218, "learning_rate": 4.7284599375143095e-06, "loss": 0.4938, "step": 25103 }, { "epoch": 0.6865018595493327, "grad_norm": 1.3445719480514526, "learning_rate": 4.72770731357663e-06, "loss": 0.4653, "step": 25104 }, { "epoch": 0.6865292058630497, "grad_norm": 1.3251148462295532, "learning_rate": 4.726954730998603e-06, "loss": 0.4659, "step": 25105 }, { "epoch": 0.6865565521767666, "grad_norm": 1.2646119594573975, "learning_rate": 4.726202189786129e-06, "loss": 0.4388, "step": 25106 }, { "epoch": 0.6865838984904835, "grad_norm": 1.150535225868225, "learning_rate": 4.725449689945118e-06, "loss": 0.4437, "step": 25107 }, { "epoch": 0.6866112448042004, "grad_norm": 1.589235782623291, "learning_rate": 4.72469723148147e-06, "loss": 0.4287, "step": 25108 }, { "epoch": 0.6866385911179173, "grad_norm": 1.7531819343566895, "learning_rate": 4.723944814401084e-06, "loss": 0.3302, "step": 25109 }, { "epoch": 0.6866659374316342, "grad_norm": 1.2307186126708984, "learning_rate": 4.723192438709869e-06, "loss": 0.4969, "step": 25110 }, { "epoch": 0.6866932837453511, "grad_norm": 1.22451651096344, "learning_rate": 4.722440104413726e-06, "loss": 0.4704, "step": 25111 }, { "epoch": 0.686720630059068, "grad_norm": 1.24177885055542, "learning_rate": 4.72168781151855e-06, "loss": 0.4474, "step": 25112 }, { "epoch": 0.686747976372785, "grad_norm": 1.2780393362045288, "learning_rate": 4.720935560030252e-06, "loss": 0.4666, "step": 25113 }, { "epoch": 0.6867753226865019, "grad_norm": 1.3854764699935913, "learning_rate": 4.720183349954729e-06, "loss": 0.4604, "step": 25114 }, { "epoch": 0.6868026690002188, "grad_norm": 1.5294737815856934, "learning_rate": 4.719431181297877e-06, "loss": 0.4625, "step": 25115 }, { "epoch": 0.6868300153139357, "grad_norm": 1.2095668315887451, "learning_rate": 4.71867905406561e-06, "loss": 0.4802, "step": 25116 }, { "epoch": 0.6868573616276525, "grad_norm": 1.4103457927703857, "learning_rate": 4.717926968263815e-06, "loss": 0.481, "step": 25117 }, { "epoch": 0.6868847079413695, "grad_norm": 1.336797833442688, "learning_rate": 4.717174923898393e-06, "loss": 0.4729, "step": 25118 }, { "epoch": 0.6869120542550864, "grad_norm": 1.3197236061096191, "learning_rate": 4.716422920975251e-06, "loss": 0.4661, "step": 25119 }, { "epoch": 0.6869394005688033, "grad_norm": 1.5847086906433105, "learning_rate": 4.715670959500284e-06, "loss": 0.3483, "step": 25120 }, { "epoch": 0.6869667468825202, "grad_norm": 1.3511232137680054, "learning_rate": 4.714919039479387e-06, "loss": 0.4956, "step": 25121 }, { "epoch": 0.6869940931962372, "grad_norm": 1.343075156211853, "learning_rate": 4.714167160918468e-06, "loss": 0.4754, "step": 25122 }, { "epoch": 0.6870214395099541, "grad_norm": 1.6137397289276123, "learning_rate": 4.713415323823418e-06, "loss": 0.3385, "step": 25123 }, { "epoch": 0.687048785823671, "grad_norm": 1.6538830995559692, "learning_rate": 4.712663528200136e-06, "loss": 0.4773, "step": 25124 }, { "epoch": 0.6870761321373878, "grad_norm": 1.5145232677459717, "learning_rate": 4.711911774054522e-06, "loss": 0.4712, "step": 25125 }, { "epoch": 0.6871034784511048, "grad_norm": 1.6755362749099731, "learning_rate": 4.7111600613924725e-06, "loss": 0.3446, "step": 25126 }, { "epoch": 0.6871308247648217, "grad_norm": 1.4218274354934692, "learning_rate": 4.710408390219884e-06, "loss": 0.732, "step": 25127 }, { "epoch": 0.6871581710785386, "grad_norm": 1.256890892982483, "learning_rate": 4.709656760542649e-06, "loss": 0.4808, "step": 25128 }, { "epoch": 0.6871855173922555, "grad_norm": 1.2151548862457275, "learning_rate": 4.7089051723666726e-06, "loss": 0.4513, "step": 25129 }, { "epoch": 0.6872128637059725, "grad_norm": 1.150455117225647, "learning_rate": 4.708153625697844e-06, "loss": 0.6839, "step": 25130 }, { "epoch": 0.6872402100196894, "grad_norm": 1.4699589014053345, "learning_rate": 4.70740212054206e-06, "loss": 0.4518, "step": 25131 }, { "epoch": 0.6872675563334063, "grad_norm": 1.29515540599823, "learning_rate": 4.706650656905218e-06, "loss": 0.4906, "step": 25132 }, { "epoch": 0.6872949026471231, "grad_norm": 1.2626594305038452, "learning_rate": 4.705899234793214e-06, "loss": 0.4783, "step": 25133 }, { "epoch": 0.68732224896084, "grad_norm": 1.2556955814361572, "learning_rate": 4.705147854211936e-06, "loss": 0.4581, "step": 25134 }, { "epoch": 0.687349595274557, "grad_norm": 1.6520328521728516, "learning_rate": 4.704396515167287e-06, "loss": 0.3268, "step": 25135 }, { "epoch": 0.6873769415882739, "grad_norm": 1.2675786018371582, "learning_rate": 4.703645217665157e-06, "loss": 0.3531, "step": 25136 }, { "epoch": 0.6874042879019908, "grad_norm": 1.32809317111969, "learning_rate": 4.702893961711436e-06, "loss": 0.7303, "step": 25137 }, { "epoch": 0.6874316342157077, "grad_norm": 1.3599246740341187, "learning_rate": 4.702142747312024e-06, "loss": 0.4717, "step": 25138 }, { "epoch": 0.6874589805294247, "grad_norm": 1.202743649482727, "learning_rate": 4.701391574472813e-06, "loss": 0.4714, "step": 25139 }, { "epoch": 0.6874863268431416, "grad_norm": 1.1656837463378906, "learning_rate": 4.700640443199692e-06, "loss": 0.3407, "step": 25140 }, { "epoch": 0.6875136731568584, "grad_norm": 1.2864420413970947, "learning_rate": 4.699889353498552e-06, "loss": 0.6995, "step": 25141 }, { "epoch": 0.6875410194705753, "grad_norm": 1.3706293106079102, "learning_rate": 4.699138305375294e-06, "loss": 0.4797, "step": 25142 }, { "epoch": 0.6875683657842923, "grad_norm": 1.5217548608779907, "learning_rate": 4.698387298835802e-06, "loss": 0.7362, "step": 25143 }, { "epoch": 0.6875957120980092, "grad_norm": 1.3503507375717163, "learning_rate": 4.697636333885967e-06, "loss": 0.4661, "step": 25144 }, { "epoch": 0.6876230584117261, "grad_norm": 1.225575566291809, "learning_rate": 4.696885410531686e-06, "loss": 0.3733, "step": 25145 }, { "epoch": 0.687650404725443, "grad_norm": 1.3822813034057617, "learning_rate": 4.696134528778847e-06, "loss": 0.4809, "step": 25146 }, { "epoch": 0.68767775103916, "grad_norm": 1.4135438203811646, "learning_rate": 4.695383688633336e-06, "loss": 0.4733, "step": 25147 }, { "epoch": 0.6877050973528769, "grad_norm": 1.2584280967712402, "learning_rate": 4.694632890101051e-06, "loss": 0.455, "step": 25148 }, { "epoch": 0.6877324436665937, "grad_norm": 1.2866171598434448, "learning_rate": 4.693882133187878e-06, "loss": 0.4611, "step": 25149 }, { "epoch": 0.6877597899803106, "grad_norm": 1.3808703422546387, "learning_rate": 4.693131417899702e-06, "loss": 0.4589, "step": 25150 }, { "epoch": 0.6877871362940275, "grad_norm": 1.3570772409439087, "learning_rate": 4.692380744242424e-06, "loss": 0.4531, "step": 25151 }, { "epoch": 0.6878144826077445, "grad_norm": 3.028839349746704, "learning_rate": 4.6916301122219236e-06, "loss": 0.3336, "step": 25152 }, { "epoch": 0.6878418289214614, "grad_norm": 1.4534015655517578, "learning_rate": 4.690879521844085e-06, "loss": 0.3434, "step": 25153 }, { "epoch": 0.6878691752351783, "grad_norm": 1.5991547107696533, "learning_rate": 4.690128973114807e-06, "loss": 0.4682, "step": 25154 }, { "epoch": 0.6878965215488952, "grad_norm": 1.2221142053604126, "learning_rate": 4.689378466039973e-06, "loss": 0.4696, "step": 25155 }, { "epoch": 0.6879238678626122, "grad_norm": 1.192549228668213, "learning_rate": 4.6886280006254655e-06, "loss": 0.4612, "step": 25156 }, { "epoch": 0.687951214176329, "grad_norm": 1.260317087173462, "learning_rate": 4.687877576877183e-06, "loss": 0.4688, "step": 25157 }, { "epoch": 0.6879785604900459, "grad_norm": 1.0934178829193115, "learning_rate": 4.687127194801003e-06, "loss": 0.4275, "step": 25158 }, { "epoch": 0.6880059068037628, "grad_norm": 1.3665351867675781, "learning_rate": 4.686376854402813e-06, "loss": 0.4566, "step": 25159 }, { "epoch": 0.6880332531174798, "grad_norm": 2.2307863235473633, "learning_rate": 4.685626555688505e-06, "loss": 0.4832, "step": 25160 }, { "epoch": 0.6880605994311967, "grad_norm": 1.4154597520828247, "learning_rate": 4.6848762986639595e-06, "loss": 0.4818, "step": 25161 }, { "epoch": 0.6880879457449136, "grad_norm": 2.827397346496582, "learning_rate": 4.6841260833350655e-06, "loss": 0.7258, "step": 25162 }, { "epoch": 0.6881152920586305, "grad_norm": 1.6660453081130981, "learning_rate": 4.683375909707702e-06, "loss": 0.462, "step": 25163 }, { "epoch": 0.6881426383723475, "grad_norm": 1.1861153841018677, "learning_rate": 4.6826257777877615e-06, "loss": 0.4667, "step": 25164 }, { "epoch": 0.6881699846860643, "grad_norm": 1.2325133085250854, "learning_rate": 4.681875687581125e-06, "loss": 0.4315, "step": 25165 }, { "epoch": 0.6881973309997812, "grad_norm": 1.537800669670105, "learning_rate": 4.6811256390936745e-06, "loss": 0.4713, "step": 25166 }, { "epoch": 0.6882246773134981, "grad_norm": 1.2963289022445679, "learning_rate": 4.6803756323313e-06, "loss": 0.4498, "step": 25167 }, { "epoch": 0.688252023627215, "grad_norm": 1.6177475452423096, "learning_rate": 4.679625667299881e-06, "loss": 0.6855, "step": 25168 }, { "epoch": 0.688279369940932, "grad_norm": 1.3489755392074585, "learning_rate": 4.678875744005298e-06, "loss": 0.4492, "step": 25169 }, { "epoch": 0.6883067162546489, "grad_norm": 1.341111421585083, "learning_rate": 4.678125862453441e-06, "loss": 0.5027, "step": 25170 }, { "epoch": 0.6883340625683658, "grad_norm": 1.9321987628936768, "learning_rate": 4.677376022650188e-06, "loss": 0.3579, "step": 25171 }, { "epoch": 0.6883614088820827, "grad_norm": 1.6058831214904785, "learning_rate": 4.67662622460142e-06, "loss": 0.322, "step": 25172 }, { "epoch": 0.6883887551957996, "grad_norm": 1.9221680164337158, "learning_rate": 4.675876468313025e-06, "loss": 0.3697, "step": 25173 }, { "epoch": 0.6884161015095165, "grad_norm": 1.1717523336410522, "learning_rate": 4.675126753790879e-06, "loss": 0.4632, "step": 25174 }, { "epoch": 0.6884434478232334, "grad_norm": 1.2973524332046509, "learning_rate": 4.674377081040866e-06, "loss": 0.4655, "step": 25175 }, { "epoch": 0.6884707941369503, "grad_norm": 1.162348985671997, "learning_rate": 4.673627450068863e-06, "loss": 0.459, "step": 25176 }, { "epoch": 0.6884981404506673, "grad_norm": 1.3965007066726685, "learning_rate": 4.672877860880757e-06, "loss": 0.4765, "step": 25177 }, { "epoch": 0.6885254867643842, "grad_norm": 1.3902517557144165, "learning_rate": 4.6721283134824245e-06, "loss": 0.4869, "step": 25178 }, { "epoch": 0.6885528330781011, "grad_norm": 1.5957062244415283, "learning_rate": 4.6713788078797426e-06, "loss": 0.4398, "step": 25179 }, { "epoch": 0.688580179391818, "grad_norm": 1.2436888217926025, "learning_rate": 4.670629344078597e-06, "loss": 0.5002, "step": 25180 }, { "epoch": 0.6886075257055349, "grad_norm": 1.3326398134231567, "learning_rate": 4.669879922084866e-06, "loss": 0.4503, "step": 25181 }, { "epoch": 0.6886348720192518, "grad_norm": 1.525697112083435, "learning_rate": 4.669130541904423e-06, "loss": 0.3348, "step": 25182 }, { "epoch": 0.6886622183329687, "grad_norm": 1.1142932176589966, "learning_rate": 4.6683812035431545e-06, "loss": 0.4799, "step": 25183 }, { "epoch": 0.6886895646466856, "grad_norm": 1.2496777772903442, "learning_rate": 4.6676319070069345e-06, "loss": 0.4644, "step": 25184 }, { "epoch": 0.6887169109604026, "grad_norm": 1.6045098304748535, "learning_rate": 4.666882652301637e-06, "loss": 0.3329, "step": 25185 }, { "epoch": 0.6887442572741195, "grad_norm": 1.2795569896697998, "learning_rate": 4.666133439433152e-06, "loss": 0.489, "step": 25186 }, { "epoch": 0.6887716035878364, "grad_norm": 1.6836930513381958, "learning_rate": 4.665384268407347e-06, "loss": 0.4686, "step": 25187 }, { "epoch": 0.6887989499015533, "grad_norm": 1.255860686302185, "learning_rate": 4.6646351392300955e-06, "loss": 0.476, "step": 25188 }, { "epoch": 0.6888262962152701, "grad_norm": 1.3741132020950317, "learning_rate": 4.663886051907285e-06, "loss": 0.4112, "step": 25189 }, { "epoch": 0.6888536425289871, "grad_norm": 1.1421946287155151, "learning_rate": 4.6631370064447855e-06, "loss": 0.4825, "step": 25190 }, { "epoch": 0.688880988842704, "grad_norm": 1.2845779657363892, "learning_rate": 4.662388002848471e-06, "loss": 0.4236, "step": 25191 }, { "epoch": 0.6889083351564209, "grad_norm": 1.8557424545288086, "learning_rate": 4.661639041124224e-06, "loss": 0.3276, "step": 25192 }, { "epoch": 0.6889356814701378, "grad_norm": 1.364208698272705, "learning_rate": 4.660890121277916e-06, "loss": 0.4659, "step": 25193 }, { "epoch": 0.6889630277838548, "grad_norm": 1.3085849285125732, "learning_rate": 4.66014124331542e-06, "loss": 0.4815, "step": 25194 }, { "epoch": 0.6889903740975717, "grad_norm": 1.2518447637557983, "learning_rate": 4.659392407242616e-06, "loss": 0.448, "step": 25195 }, { "epoch": 0.6890177204112885, "grad_norm": 2.6357665061950684, "learning_rate": 4.658643613065377e-06, "loss": 0.4823, "step": 25196 }, { "epoch": 0.6890450667250054, "grad_norm": 1.3113226890563965, "learning_rate": 4.657894860789575e-06, "loss": 0.4892, "step": 25197 }, { "epoch": 0.6890724130387224, "grad_norm": 1.4027869701385498, "learning_rate": 4.657146150421079e-06, "loss": 0.5097, "step": 25198 }, { "epoch": 0.6890997593524393, "grad_norm": 1.262738585472107, "learning_rate": 4.656397481965773e-06, "loss": 0.4535, "step": 25199 }, { "epoch": 0.6891271056661562, "grad_norm": 1.2765545845031738, "learning_rate": 4.655648855429525e-06, "loss": 0.4652, "step": 25200 }, { "epoch": 0.6891544519798731, "grad_norm": 1.5047669410705566, "learning_rate": 4.654900270818204e-06, "loss": 0.4488, "step": 25201 }, { "epoch": 0.68918179829359, "grad_norm": 1.2427195310592651, "learning_rate": 4.654151728137689e-06, "loss": 0.4587, "step": 25202 }, { "epoch": 0.689209144607307, "grad_norm": 1.593082308769226, "learning_rate": 4.65340322739385e-06, "loss": 0.4538, "step": 25203 }, { "epoch": 0.6892364909210238, "grad_norm": 1.1982910633087158, "learning_rate": 4.6526547685925535e-06, "loss": 0.4841, "step": 25204 }, { "epoch": 0.6892638372347407, "grad_norm": 1.4693458080291748, "learning_rate": 4.651906351739681e-06, "loss": 0.4688, "step": 25205 }, { "epoch": 0.6892911835484576, "grad_norm": 1.4671556949615479, "learning_rate": 4.651157976841097e-06, "loss": 0.4465, "step": 25206 }, { "epoch": 0.6893185298621746, "grad_norm": 1.3383073806762695, "learning_rate": 4.650409643902669e-06, "loss": 0.4481, "step": 25207 }, { "epoch": 0.6893458761758915, "grad_norm": 1.1692664623260498, "learning_rate": 4.649661352930276e-06, "loss": 0.4753, "step": 25208 }, { "epoch": 0.6893732224896084, "grad_norm": 1.5679432153701782, "learning_rate": 4.648913103929784e-06, "loss": 0.4963, "step": 25209 }, { "epoch": 0.6894005688033253, "grad_norm": 1.4075579643249512, "learning_rate": 4.648164896907063e-06, "loss": 0.4794, "step": 25210 }, { "epoch": 0.6894279151170423, "grad_norm": 1.4740723371505737, "learning_rate": 4.647416731867979e-06, "loss": 0.4617, "step": 25211 }, { "epoch": 0.6894552614307591, "grad_norm": 1.2317962646484375, "learning_rate": 4.646668608818406e-06, "loss": 0.3224, "step": 25212 }, { "epoch": 0.689482607744476, "grad_norm": 1.3902193307876587, "learning_rate": 4.645920527764214e-06, "loss": 0.4606, "step": 25213 }, { "epoch": 0.6895099540581929, "grad_norm": 1.246699333190918, "learning_rate": 4.645172488711264e-06, "loss": 0.4609, "step": 25214 }, { "epoch": 0.6895373003719099, "grad_norm": 1.202996015548706, "learning_rate": 4.644424491665432e-06, "loss": 0.4757, "step": 25215 }, { "epoch": 0.6895646466856268, "grad_norm": 1.5537145137786865, "learning_rate": 4.643676536632582e-06, "loss": 0.424, "step": 25216 }, { "epoch": 0.6895919929993437, "grad_norm": 1.4563130140304565, "learning_rate": 4.64292862361858e-06, "loss": 0.3279, "step": 25217 }, { "epoch": 0.6896193393130606, "grad_norm": 1.3419408798217773, "learning_rate": 4.642180752629299e-06, "loss": 0.4003, "step": 25218 }, { "epoch": 0.6896466856267776, "grad_norm": 1.5067616701126099, "learning_rate": 4.641432923670601e-06, "loss": 0.4104, "step": 25219 }, { "epoch": 0.6896740319404944, "grad_norm": 1.7674332857131958, "learning_rate": 4.640685136748351e-06, "loss": 0.3706, "step": 25220 }, { "epoch": 0.6897013782542113, "grad_norm": 1.3492416143417358, "learning_rate": 4.639937391868424e-06, "loss": 0.4858, "step": 25221 }, { "epoch": 0.6897287245679282, "grad_norm": 1.7823216915130615, "learning_rate": 4.639189689036677e-06, "loss": 0.3311, "step": 25222 }, { "epoch": 0.6897560708816451, "grad_norm": 1.4079033136367798, "learning_rate": 4.638442028258975e-06, "loss": 0.4686, "step": 25223 }, { "epoch": 0.6897834171953621, "grad_norm": 1.288098931312561, "learning_rate": 4.637694409541189e-06, "loss": 0.4981, "step": 25224 }, { "epoch": 0.689810763509079, "grad_norm": 1.2108943462371826, "learning_rate": 4.636946832889182e-06, "loss": 0.6628, "step": 25225 }, { "epoch": 0.6898381098227959, "grad_norm": 1.2748667001724243, "learning_rate": 4.636199298308813e-06, "loss": 0.4828, "step": 25226 }, { "epoch": 0.6898654561365128, "grad_norm": 1.198813796043396, "learning_rate": 4.635451805805956e-06, "loss": 0.7069, "step": 25227 }, { "epoch": 0.6898928024502297, "grad_norm": 1.2502176761627197, "learning_rate": 4.634704355386469e-06, "loss": 0.4207, "step": 25228 }, { "epoch": 0.6899201487639466, "grad_norm": 1.2908689975738525, "learning_rate": 4.633956947056213e-06, "loss": 0.4512, "step": 25229 }, { "epoch": 0.6899474950776635, "grad_norm": 1.3369638919830322, "learning_rate": 4.633209580821058e-06, "loss": 0.4625, "step": 25230 }, { "epoch": 0.6899748413913804, "grad_norm": 1.3921973705291748, "learning_rate": 4.6324622566868645e-06, "loss": 0.5002, "step": 25231 }, { "epoch": 0.6900021877050974, "grad_norm": 1.2426966428756714, "learning_rate": 4.6317149746594935e-06, "loss": 0.4713, "step": 25232 }, { "epoch": 0.6900295340188143, "grad_norm": 1.7025777101516724, "learning_rate": 4.630967734744805e-06, "loss": 0.3602, "step": 25233 }, { "epoch": 0.6900568803325312, "grad_norm": 1.8411725759506226, "learning_rate": 4.630220536948665e-06, "loss": 0.3587, "step": 25234 }, { "epoch": 0.6900842266462481, "grad_norm": 1.446608543395996, "learning_rate": 4.629473381276935e-06, "loss": 0.4612, "step": 25235 }, { "epoch": 0.690111572959965, "grad_norm": 1.25681471824646, "learning_rate": 4.628726267735472e-06, "loss": 0.4657, "step": 25236 }, { "epoch": 0.6901389192736819, "grad_norm": 1.217933177947998, "learning_rate": 4.627979196330142e-06, "loss": 0.4488, "step": 25237 }, { "epoch": 0.6901662655873988, "grad_norm": 2.5596373081207275, "learning_rate": 4.627232167066805e-06, "loss": 0.4913, "step": 25238 }, { "epoch": 0.6901936119011157, "grad_norm": 1.2199615240097046, "learning_rate": 4.626485179951316e-06, "loss": 0.4828, "step": 25239 }, { "epoch": 0.6902209582148326, "grad_norm": 1.727656602859497, "learning_rate": 4.625738234989541e-06, "loss": 0.3708, "step": 25240 }, { "epoch": 0.6902483045285496, "grad_norm": 1.2344635725021362, "learning_rate": 4.6249913321873375e-06, "loss": 0.4603, "step": 25241 }, { "epoch": 0.6902756508422665, "grad_norm": 1.3440678119659424, "learning_rate": 4.624244471550559e-06, "loss": 0.4563, "step": 25242 }, { "epoch": 0.6903029971559834, "grad_norm": 1.3490750789642334, "learning_rate": 4.6234976530850766e-06, "loss": 0.4138, "step": 25243 }, { "epoch": 0.6903303434697002, "grad_norm": 1.5534663200378418, "learning_rate": 4.62275087679674e-06, "loss": 0.5312, "step": 25244 }, { "epoch": 0.6903576897834172, "grad_norm": 1.6568152904510498, "learning_rate": 4.622004142691407e-06, "loss": 0.3198, "step": 25245 }, { "epoch": 0.6903850360971341, "grad_norm": 1.4294071197509766, "learning_rate": 4.621257450774941e-06, "loss": 0.4675, "step": 25246 }, { "epoch": 0.690412382410851, "grad_norm": 1.3599809408187866, "learning_rate": 4.620510801053196e-06, "loss": 0.449, "step": 25247 }, { "epoch": 0.6904397287245679, "grad_norm": 1.1438730955123901, "learning_rate": 4.61976419353203e-06, "loss": 0.7241, "step": 25248 }, { "epoch": 0.6904670750382849, "grad_norm": 1.4966213703155518, "learning_rate": 4.619017628217297e-06, "loss": 0.4748, "step": 25249 }, { "epoch": 0.6904944213520018, "grad_norm": 1.332873821258545, "learning_rate": 4.6182711051148585e-06, "loss": 0.5028, "step": 25250 }, { "epoch": 0.6905217676657187, "grad_norm": 1.3516000509262085, "learning_rate": 4.617524624230571e-06, "loss": 0.7038, "step": 25251 }, { "epoch": 0.6905491139794355, "grad_norm": 1.5767625570297241, "learning_rate": 4.616778185570282e-06, "loss": 0.3374, "step": 25252 }, { "epoch": 0.6905764602931524, "grad_norm": 1.589795708656311, "learning_rate": 4.616031789139859e-06, "loss": 0.3453, "step": 25253 }, { "epoch": 0.6906038066068694, "grad_norm": 1.345805048942566, "learning_rate": 4.615285434945149e-06, "loss": 0.4577, "step": 25254 }, { "epoch": 0.6906311529205863, "grad_norm": 1.1288819313049316, "learning_rate": 4.614539122992008e-06, "loss": 0.3488, "step": 25255 }, { "epoch": 0.6906584992343032, "grad_norm": 1.3695127964019775, "learning_rate": 4.613792853286299e-06, "loss": 0.4679, "step": 25256 }, { "epoch": 0.6906858455480201, "grad_norm": 1.405346155166626, "learning_rate": 4.613046625833865e-06, "loss": 0.4265, "step": 25257 }, { "epoch": 0.6907131918617371, "grad_norm": 1.3414307832717896, "learning_rate": 4.612300440640559e-06, "loss": 0.4683, "step": 25258 }, { "epoch": 0.690740538175454, "grad_norm": 1.303228735923767, "learning_rate": 4.611554297712245e-06, "loss": 0.4561, "step": 25259 }, { "epoch": 0.6907678844891708, "grad_norm": 1.4241811037063599, "learning_rate": 4.610808197054772e-06, "loss": 0.4804, "step": 25260 }, { "epoch": 0.6907952308028877, "grad_norm": 1.2564027309417725, "learning_rate": 4.610062138673987e-06, "loss": 0.4976, "step": 25261 }, { "epoch": 0.6908225771166047, "grad_norm": 1.4085299968719482, "learning_rate": 4.609316122575752e-06, "loss": 0.4152, "step": 25262 }, { "epoch": 0.6908499234303216, "grad_norm": 1.6057285070419312, "learning_rate": 4.608570148765914e-06, "loss": 0.4851, "step": 25263 }, { "epoch": 0.6908772697440385, "grad_norm": 1.2348124980926514, "learning_rate": 4.607824217250323e-06, "loss": 0.4687, "step": 25264 }, { "epoch": 0.6909046160577554, "grad_norm": 1.33854079246521, "learning_rate": 4.607078328034837e-06, "loss": 0.4597, "step": 25265 }, { "epoch": 0.6909319623714724, "grad_norm": 1.3461756706237793, "learning_rate": 4.606332481125304e-06, "loss": 0.4707, "step": 25266 }, { "epoch": 0.6909593086851893, "grad_norm": 1.5391446352005005, "learning_rate": 4.605586676527571e-06, "loss": 0.3358, "step": 25267 }, { "epoch": 0.6909866549989061, "grad_norm": 1.7013702392578125, "learning_rate": 4.6048409142474974e-06, "loss": 0.3589, "step": 25268 }, { "epoch": 0.691014001312623, "grad_norm": 1.5153231620788574, "learning_rate": 4.604095194290926e-06, "loss": 0.4219, "step": 25269 }, { "epoch": 0.69104134762634, "grad_norm": 1.5014222860336304, "learning_rate": 4.603349516663711e-06, "loss": 0.4603, "step": 25270 }, { "epoch": 0.6910686939400569, "grad_norm": 1.311290979385376, "learning_rate": 4.602603881371696e-06, "loss": 0.455, "step": 25271 }, { "epoch": 0.6910960402537738, "grad_norm": 1.168303370475769, "learning_rate": 4.6018582884207384e-06, "loss": 0.4502, "step": 25272 }, { "epoch": 0.6911233865674907, "grad_norm": 1.1462388038635254, "learning_rate": 4.601112737816683e-06, "loss": 0.3211, "step": 25273 }, { "epoch": 0.6911507328812077, "grad_norm": 1.8161593675613403, "learning_rate": 4.6003672295653755e-06, "loss": 0.3064, "step": 25274 }, { "epoch": 0.6911780791949246, "grad_norm": 1.1051549911499023, "learning_rate": 4.5996217636726715e-06, "loss": 0.3732, "step": 25275 }, { "epoch": 0.6912054255086414, "grad_norm": 1.7214088439941406, "learning_rate": 4.598876340144415e-06, "loss": 0.3373, "step": 25276 }, { "epoch": 0.6912327718223583, "grad_norm": 1.2809548377990723, "learning_rate": 4.5981309589864486e-06, "loss": 0.4852, "step": 25277 }, { "epoch": 0.6912601181360752, "grad_norm": 1.314764380455017, "learning_rate": 4.59738562020463e-06, "loss": 0.4609, "step": 25278 }, { "epoch": 0.6912874644497922, "grad_norm": 1.467910885810852, "learning_rate": 4.5966403238048e-06, "loss": 0.4584, "step": 25279 }, { "epoch": 0.6913148107635091, "grad_norm": 1.604104995727539, "learning_rate": 4.595895069792802e-06, "loss": 0.485, "step": 25280 }, { "epoch": 0.691342157077226, "grad_norm": 1.1143461465835571, "learning_rate": 4.59514985817449e-06, "loss": 0.4713, "step": 25281 }, { "epoch": 0.6913695033909429, "grad_norm": 1.3392010927200317, "learning_rate": 4.594404688955707e-06, "loss": 0.5089, "step": 25282 }, { "epoch": 0.6913968497046599, "grad_norm": 1.139788031578064, "learning_rate": 4.5936595621422975e-06, "loss": 0.4541, "step": 25283 }, { "epoch": 0.6914241960183767, "grad_norm": 1.230902910232544, "learning_rate": 4.592914477740104e-06, "loss": 0.445, "step": 25284 }, { "epoch": 0.6914515423320936, "grad_norm": 1.4259313344955444, "learning_rate": 4.5921694357549774e-06, "loss": 0.4622, "step": 25285 }, { "epoch": 0.6914788886458105, "grad_norm": 2.054630756378174, "learning_rate": 4.591424436192761e-06, "loss": 0.475, "step": 25286 }, { "epoch": 0.6915062349595275, "grad_norm": 2.162787675857544, "learning_rate": 4.590679479059294e-06, "loss": 0.4817, "step": 25287 }, { "epoch": 0.6915335812732444, "grad_norm": 1.1842674016952515, "learning_rate": 4.589934564360426e-06, "loss": 0.4155, "step": 25288 }, { "epoch": 0.6915609275869613, "grad_norm": 1.2093756198883057, "learning_rate": 4.589189692102001e-06, "loss": 0.4777, "step": 25289 }, { "epoch": 0.6915882739006782, "grad_norm": 1.295800805091858, "learning_rate": 4.588444862289855e-06, "loss": 0.4595, "step": 25290 }, { "epoch": 0.6916156202143952, "grad_norm": 1.3021397590637207, "learning_rate": 4.587700074929841e-06, "loss": 0.4501, "step": 25291 }, { "epoch": 0.691642966528112, "grad_norm": 1.3042933940887451, "learning_rate": 4.5869553300277995e-06, "loss": 0.4813, "step": 25292 }, { "epoch": 0.6916703128418289, "grad_norm": 1.3068665266036987, "learning_rate": 4.586210627589564e-06, "loss": 0.4712, "step": 25293 }, { "epoch": 0.6916976591555458, "grad_norm": 1.3363144397735596, "learning_rate": 4.585465967620985e-06, "loss": 0.4922, "step": 25294 }, { "epoch": 0.6917250054692627, "grad_norm": 1.2028989791870117, "learning_rate": 4.584721350127901e-06, "loss": 0.4538, "step": 25295 }, { "epoch": 0.6917523517829797, "grad_norm": 1.325571060180664, "learning_rate": 4.583976775116151e-06, "loss": 0.4685, "step": 25296 }, { "epoch": 0.6917796980966966, "grad_norm": 1.3787726163864136, "learning_rate": 4.583232242591583e-06, "loss": 0.4699, "step": 25297 }, { "epoch": 0.6918070444104135, "grad_norm": 1.2708313465118408, "learning_rate": 4.582487752560034e-06, "loss": 0.4535, "step": 25298 }, { "epoch": 0.6918343907241303, "grad_norm": 1.5861541032791138, "learning_rate": 4.58174330502734e-06, "loss": 0.3279, "step": 25299 }, { "epoch": 0.6918617370378473, "grad_norm": 1.3433494567871094, "learning_rate": 4.580998899999348e-06, "loss": 0.7317, "step": 25300 }, { "epoch": 0.6918890833515642, "grad_norm": 1.6235508918762207, "learning_rate": 4.580254537481896e-06, "loss": 0.3253, "step": 25301 }, { "epoch": 0.6919164296652811, "grad_norm": 1.2056899070739746, "learning_rate": 4.579510217480817e-06, "loss": 0.4649, "step": 25302 }, { "epoch": 0.691943775978998, "grad_norm": 1.389096736907959, "learning_rate": 4.578765940001959e-06, "loss": 0.484, "step": 25303 }, { "epoch": 0.691971122292715, "grad_norm": 2.3780338764190674, "learning_rate": 4.578021705051157e-06, "loss": 0.3366, "step": 25304 }, { "epoch": 0.6919984686064319, "grad_norm": 1.4368610382080078, "learning_rate": 4.577277512634248e-06, "loss": 0.7101, "step": 25305 }, { "epoch": 0.6920258149201488, "grad_norm": 1.3102049827575684, "learning_rate": 4.576533362757068e-06, "loss": 0.4711, "step": 25306 }, { "epoch": 0.6920531612338656, "grad_norm": 1.774876356124878, "learning_rate": 4.57578925542546e-06, "loss": 0.4109, "step": 25307 }, { "epoch": 0.6920805075475825, "grad_norm": 1.282210350036621, "learning_rate": 4.57504519064526e-06, "loss": 0.4707, "step": 25308 }, { "epoch": 0.6921078538612995, "grad_norm": 1.1785213947296143, "learning_rate": 4.5743011684223e-06, "loss": 0.4764, "step": 25309 }, { "epoch": 0.6921352001750164, "grad_norm": 1.428842544555664, "learning_rate": 4.573557188762423e-06, "loss": 0.4578, "step": 25310 }, { "epoch": 0.6921625464887333, "grad_norm": 1.9937753677368164, "learning_rate": 4.572813251671463e-06, "loss": 0.6985, "step": 25311 }, { "epoch": 0.6921898928024502, "grad_norm": 3.8876113891601562, "learning_rate": 4.572069357155252e-06, "loss": 0.3364, "step": 25312 }, { "epoch": 0.6922172391161672, "grad_norm": 1.5482544898986816, "learning_rate": 4.571325505219633e-06, "loss": 0.4886, "step": 25313 }, { "epoch": 0.6922445854298841, "grad_norm": 1.3820600509643555, "learning_rate": 4.570581695870438e-06, "loss": 0.4778, "step": 25314 }, { "epoch": 0.6922719317436009, "grad_norm": 1.2090229988098145, "learning_rate": 4.569837929113499e-06, "loss": 0.4786, "step": 25315 }, { "epoch": 0.6922992780573178, "grad_norm": 1.1773089170455933, "learning_rate": 4.569094204954655e-06, "loss": 0.447, "step": 25316 }, { "epoch": 0.6923266243710348, "grad_norm": 1.397570252418518, "learning_rate": 4.56835052339974e-06, "loss": 0.4813, "step": 25317 }, { "epoch": 0.6923539706847517, "grad_norm": 1.4673857688903809, "learning_rate": 4.567606884454587e-06, "loss": 0.7197, "step": 25318 }, { "epoch": 0.6923813169984686, "grad_norm": 1.7201448678970337, "learning_rate": 4.566863288125023e-06, "loss": 0.476, "step": 25319 }, { "epoch": 0.6924086633121855, "grad_norm": 1.2270748615264893, "learning_rate": 4.566119734416893e-06, "loss": 0.4873, "step": 25320 }, { "epoch": 0.6924360096259025, "grad_norm": 1.4038680791854858, "learning_rate": 4.565376223336024e-06, "loss": 0.4544, "step": 25321 }, { "epoch": 0.6924633559396194, "grad_norm": 1.9365315437316895, "learning_rate": 4.564632754888245e-06, "loss": 0.4633, "step": 25322 }, { "epoch": 0.6924907022533362, "grad_norm": 1.269883155822754, "learning_rate": 4.563889329079398e-06, "loss": 0.4657, "step": 25323 }, { "epoch": 0.6925180485670531, "grad_norm": 1.3521674871444702, "learning_rate": 4.563145945915307e-06, "loss": 0.5331, "step": 25324 }, { "epoch": 0.69254539488077, "grad_norm": 2.1007399559020996, "learning_rate": 4.562402605401803e-06, "loss": 0.3452, "step": 25325 }, { "epoch": 0.692572741194487, "grad_norm": 1.3796944618225098, "learning_rate": 4.561659307544725e-06, "loss": 0.4424, "step": 25326 }, { "epoch": 0.6926000875082039, "grad_norm": 1.1373008489608765, "learning_rate": 4.560916052349902e-06, "loss": 0.4531, "step": 25327 }, { "epoch": 0.6926274338219208, "grad_norm": 1.6928573846817017, "learning_rate": 4.5601728398231545e-06, "loss": 0.3437, "step": 25328 }, { "epoch": 0.6926547801356377, "grad_norm": 1.226871132850647, "learning_rate": 4.559429669970324e-06, "loss": 0.3352, "step": 25329 }, { "epoch": 0.6926821264493547, "grad_norm": 1.2306010723114014, "learning_rate": 4.558686542797237e-06, "loss": 0.4596, "step": 25330 }, { "epoch": 0.6927094727630715, "grad_norm": 1.2987947463989258, "learning_rate": 4.557943458309718e-06, "loss": 0.3267, "step": 25331 }, { "epoch": 0.6927368190767884, "grad_norm": 1.4262189865112305, "learning_rate": 4.557200416513606e-06, "loss": 0.4636, "step": 25332 }, { "epoch": 0.6927641653905053, "grad_norm": 1.3901745080947876, "learning_rate": 4.556457417414725e-06, "loss": 0.4647, "step": 25333 }, { "epoch": 0.6927915117042223, "grad_norm": 1.5901275873184204, "learning_rate": 4.555714461018898e-06, "loss": 0.337, "step": 25334 }, { "epoch": 0.6928188580179392, "grad_norm": 1.3288867473602295, "learning_rate": 4.554971547331965e-06, "loss": 0.4717, "step": 25335 }, { "epoch": 0.6928462043316561, "grad_norm": 1.3543075323104858, "learning_rate": 4.5542286763597465e-06, "loss": 0.4996, "step": 25336 }, { "epoch": 0.692873550645373, "grad_norm": 1.5184074640274048, "learning_rate": 4.553485848108068e-06, "loss": 0.3177, "step": 25337 }, { "epoch": 0.69290089695909, "grad_norm": 1.6286555528640747, "learning_rate": 4.552743062582764e-06, "loss": 0.4689, "step": 25338 }, { "epoch": 0.6929282432728068, "grad_norm": 1.6039741039276123, "learning_rate": 4.552000319789658e-06, "loss": 0.3374, "step": 25339 }, { "epoch": 0.6929555895865237, "grad_norm": 1.552091360092163, "learning_rate": 4.551257619734576e-06, "loss": 0.4249, "step": 25340 }, { "epoch": 0.6929829359002406, "grad_norm": 1.4395948648452759, "learning_rate": 4.550514962423342e-06, "loss": 0.4727, "step": 25341 }, { "epoch": 0.6930102822139576, "grad_norm": 1.277320146560669, "learning_rate": 4.549772347861786e-06, "loss": 0.4756, "step": 25342 }, { "epoch": 0.6930376285276745, "grad_norm": 1.3870965242385864, "learning_rate": 4.5490297760557335e-06, "loss": 0.4619, "step": 25343 }, { "epoch": 0.6930649748413914, "grad_norm": 1.3183186054229736, "learning_rate": 4.548287247011004e-06, "loss": 0.4749, "step": 25344 }, { "epoch": 0.6930923211551083, "grad_norm": 1.6951770782470703, "learning_rate": 4.547544760733431e-06, "loss": 0.318, "step": 25345 }, { "epoch": 0.6931196674688253, "grad_norm": 1.32618248462677, "learning_rate": 4.5468023172288345e-06, "loss": 0.4775, "step": 25346 }, { "epoch": 0.6931470137825421, "grad_norm": 1.3698844909667969, "learning_rate": 4.546059916503036e-06, "loss": 0.4683, "step": 25347 }, { "epoch": 0.693174360096259, "grad_norm": 1.2874959707260132, "learning_rate": 4.545317558561866e-06, "loss": 0.7395, "step": 25348 }, { "epoch": 0.6932017064099759, "grad_norm": 1.1094545125961304, "learning_rate": 4.5445752434111444e-06, "loss": 0.4463, "step": 25349 }, { "epoch": 0.6932290527236928, "grad_norm": 1.16322660446167, "learning_rate": 4.543832971056692e-06, "loss": 0.4753, "step": 25350 }, { "epoch": 0.6932563990374098, "grad_norm": 1.6907919645309448, "learning_rate": 4.543090741504337e-06, "loss": 0.7203, "step": 25351 }, { "epoch": 0.6932837453511267, "grad_norm": 1.5759248733520508, "learning_rate": 4.5423485547599e-06, "loss": 0.7194, "step": 25352 }, { "epoch": 0.6933110916648436, "grad_norm": 1.6018683910369873, "learning_rate": 4.541606410829199e-06, "loss": 0.3453, "step": 25353 }, { "epoch": 0.6933384379785605, "grad_norm": 1.190495491027832, "learning_rate": 4.540864309718063e-06, "loss": 0.4712, "step": 25354 }, { "epoch": 0.6933657842922774, "grad_norm": 1.6256799697875977, "learning_rate": 4.54012225143231e-06, "loss": 0.4272, "step": 25355 }, { "epoch": 0.6933931306059943, "grad_norm": 1.283286213874817, "learning_rate": 4.539380235977762e-06, "loss": 0.4448, "step": 25356 }, { "epoch": 0.6934204769197112, "grad_norm": 1.5837100744247437, "learning_rate": 4.538638263360235e-06, "loss": 0.3062, "step": 25357 }, { "epoch": 0.6934478232334281, "grad_norm": 1.2180238962173462, "learning_rate": 4.537896333585558e-06, "loss": 0.4803, "step": 25358 }, { "epoch": 0.693475169547145, "grad_norm": 1.3807337284088135, "learning_rate": 4.537154446659546e-06, "loss": 0.4531, "step": 25359 }, { "epoch": 0.693502515860862, "grad_norm": 1.1440260410308838, "learning_rate": 4.536412602588019e-06, "loss": 0.4386, "step": 25360 }, { "epoch": 0.6935298621745789, "grad_norm": 1.4322619438171387, "learning_rate": 4.535670801376798e-06, "loss": 0.34, "step": 25361 }, { "epoch": 0.6935572084882958, "grad_norm": 1.4600131511688232, "learning_rate": 4.534929043031707e-06, "loss": 0.4822, "step": 25362 }, { "epoch": 0.6935845548020126, "grad_norm": 1.2769380807876587, "learning_rate": 4.534187327558552e-06, "loss": 0.47, "step": 25363 }, { "epoch": 0.6936119011157296, "grad_norm": 1.315462350845337, "learning_rate": 4.533445654963163e-06, "loss": 0.4844, "step": 25364 }, { "epoch": 0.6936392474294465, "grad_norm": 1.6274772882461548, "learning_rate": 4.532704025251356e-06, "loss": 0.4462, "step": 25365 }, { "epoch": 0.6936665937431634, "grad_norm": 1.2961652278900146, "learning_rate": 4.53196243842894e-06, "loss": 0.4511, "step": 25366 }, { "epoch": 0.6936939400568803, "grad_norm": 1.4430259466171265, "learning_rate": 4.5312208945017464e-06, "loss": 0.4802, "step": 25367 }, { "epoch": 0.6937212863705973, "grad_norm": 1.5193771123886108, "learning_rate": 4.5304793934755855e-06, "loss": 0.5044, "step": 25368 }, { "epoch": 0.6937486326843142, "grad_norm": 1.4516828060150146, "learning_rate": 4.52973793535627e-06, "loss": 0.4876, "step": 25369 }, { "epoch": 0.6937759789980311, "grad_norm": 1.1488910913467407, "learning_rate": 4.528996520149625e-06, "loss": 0.4807, "step": 25370 }, { "epoch": 0.6938033253117479, "grad_norm": 1.579312801361084, "learning_rate": 4.528255147861463e-06, "loss": 0.4713, "step": 25371 }, { "epoch": 0.6938306716254649, "grad_norm": 1.304198980331421, "learning_rate": 4.527513818497596e-06, "loss": 0.4387, "step": 25372 }, { "epoch": 0.6938580179391818, "grad_norm": 1.3141915798187256, "learning_rate": 4.5267725320638454e-06, "loss": 0.4709, "step": 25373 }, { "epoch": 0.6938853642528987, "grad_norm": 1.2422425746917725, "learning_rate": 4.526031288566025e-06, "loss": 0.4741, "step": 25374 }, { "epoch": 0.6939127105666156, "grad_norm": 1.8271416425704956, "learning_rate": 4.525290088009945e-06, "loss": 0.369, "step": 25375 }, { "epoch": 0.6939400568803326, "grad_norm": 1.4395418167114258, "learning_rate": 4.524548930401427e-06, "loss": 0.4651, "step": 25376 }, { "epoch": 0.6939674031940495, "grad_norm": 1.2304270267486572, "learning_rate": 4.523807815746281e-06, "loss": 0.4509, "step": 25377 }, { "epoch": 0.6939947495077664, "grad_norm": 1.2238932847976685, "learning_rate": 4.523066744050322e-06, "loss": 0.47, "step": 25378 }, { "epoch": 0.6940220958214832, "grad_norm": 1.4999372959136963, "learning_rate": 4.522325715319359e-06, "loss": 0.4795, "step": 25379 }, { "epoch": 0.6940494421352001, "grad_norm": 1.8689498901367188, "learning_rate": 4.521584729559214e-06, "loss": 0.3601, "step": 25380 }, { "epoch": 0.6940767884489171, "grad_norm": 1.6830962896347046, "learning_rate": 4.5208437867756935e-06, "loss": 0.4632, "step": 25381 }, { "epoch": 0.694104134762634, "grad_norm": 1.2133269309997559, "learning_rate": 4.52010288697461e-06, "loss": 0.4506, "step": 25382 }, { "epoch": 0.6941314810763509, "grad_norm": 1.1595346927642822, "learning_rate": 4.519362030161779e-06, "loss": 0.4655, "step": 25383 }, { "epoch": 0.6941588273900678, "grad_norm": 1.2253557443618774, "learning_rate": 4.518621216343011e-06, "loss": 0.4712, "step": 25384 }, { "epoch": 0.6941861737037848, "grad_norm": 1.512802243232727, "learning_rate": 4.517880445524113e-06, "loss": 0.4886, "step": 25385 }, { "epoch": 0.6942135200175017, "grad_norm": 1.5113675594329834, "learning_rate": 4.517139717710904e-06, "loss": 0.4532, "step": 25386 }, { "epoch": 0.6942408663312185, "grad_norm": 1.25710928440094, "learning_rate": 4.516399032909192e-06, "loss": 0.4685, "step": 25387 }, { "epoch": 0.6942682126449354, "grad_norm": 1.3045103549957275, "learning_rate": 4.515658391124782e-06, "loss": 0.4637, "step": 25388 }, { "epoch": 0.6942955589586524, "grad_norm": 1.281803011894226, "learning_rate": 4.514917792363491e-06, "loss": 0.7189, "step": 25389 }, { "epoch": 0.6943229052723693, "grad_norm": 1.2249259948730469, "learning_rate": 4.5141772366311276e-06, "loss": 0.4356, "step": 25390 }, { "epoch": 0.6943502515860862, "grad_norm": 1.282753348350525, "learning_rate": 4.5134367239335e-06, "loss": 0.6981, "step": 25391 }, { "epoch": 0.6943775978998031, "grad_norm": 1.3115670680999756, "learning_rate": 4.512696254276413e-06, "loss": 0.4552, "step": 25392 }, { "epoch": 0.6944049442135201, "grad_norm": 1.237218976020813, "learning_rate": 4.511955827665684e-06, "loss": 0.4962, "step": 25393 }, { "epoch": 0.694432290527237, "grad_norm": 1.319298505783081, "learning_rate": 4.5112154441071164e-06, "loss": 0.4678, "step": 25394 }, { "epoch": 0.6944596368409538, "grad_norm": 1.5807594060897827, "learning_rate": 4.510475103606516e-06, "loss": 0.3467, "step": 25395 }, { "epoch": 0.6944869831546707, "grad_norm": 1.1644526720046997, "learning_rate": 4.509734806169696e-06, "loss": 0.4256, "step": 25396 }, { "epoch": 0.6945143294683876, "grad_norm": 1.454728364944458, "learning_rate": 4.508994551802461e-06, "loss": 0.4924, "step": 25397 }, { "epoch": 0.6945416757821046, "grad_norm": 1.1685460805892944, "learning_rate": 4.508254340510619e-06, "loss": 0.4779, "step": 25398 }, { "epoch": 0.6945690220958215, "grad_norm": 1.3004566431045532, "learning_rate": 4.507514172299977e-06, "loss": 0.4802, "step": 25399 }, { "epoch": 0.6945963684095384, "grad_norm": 1.2316031455993652, "learning_rate": 4.5067740471763414e-06, "loss": 0.4565, "step": 25400 }, { "epoch": 0.6946237147232553, "grad_norm": 1.3210539817810059, "learning_rate": 4.506033965145512e-06, "loss": 0.4369, "step": 25401 }, { "epoch": 0.6946510610369722, "grad_norm": 1.3933428525924683, "learning_rate": 4.505293926213304e-06, "loss": 0.4356, "step": 25402 }, { "epoch": 0.6946784073506891, "grad_norm": 1.6459656953811646, "learning_rate": 4.504553930385519e-06, "loss": 0.3473, "step": 25403 }, { "epoch": 0.694705753664406, "grad_norm": 1.6729623079299927, "learning_rate": 4.503813977667958e-06, "loss": 0.3129, "step": 25404 }, { "epoch": 0.6947330999781229, "grad_norm": 1.432968258857727, "learning_rate": 4.503074068066434e-06, "loss": 0.4561, "step": 25405 }, { "epoch": 0.6947604462918399, "grad_norm": 1.2773106098175049, "learning_rate": 4.5023342015867464e-06, "loss": 0.4865, "step": 25406 }, { "epoch": 0.6947877926055568, "grad_norm": 1.5468400716781616, "learning_rate": 4.501594378234696e-06, "loss": 0.4608, "step": 25407 }, { "epoch": 0.6948151389192737, "grad_norm": 1.1978050470352173, "learning_rate": 4.500854598016095e-06, "loss": 0.4706, "step": 25408 }, { "epoch": 0.6948424852329906, "grad_norm": 1.4030834436416626, "learning_rate": 4.500114860936741e-06, "loss": 0.463, "step": 25409 }, { "epoch": 0.6948698315467075, "grad_norm": 1.2458747625350952, "learning_rate": 4.499375167002434e-06, "loss": 0.4939, "step": 25410 }, { "epoch": 0.6948971778604244, "grad_norm": 2.483907699584961, "learning_rate": 4.498635516218985e-06, "loss": 0.4378, "step": 25411 }, { "epoch": 0.6949245241741413, "grad_norm": 1.489068865776062, "learning_rate": 4.497895908592193e-06, "loss": 0.3247, "step": 25412 }, { "epoch": 0.6949518704878582, "grad_norm": 1.408280372619629, "learning_rate": 4.497156344127859e-06, "loss": 0.4913, "step": 25413 }, { "epoch": 0.6949792168015752, "grad_norm": 1.1979089975357056, "learning_rate": 4.4964168228317805e-06, "loss": 0.4916, "step": 25414 }, { "epoch": 0.6950065631152921, "grad_norm": 1.5346695184707642, "learning_rate": 4.495677344709767e-06, "loss": 0.4465, "step": 25415 }, { "epoch": 0.695033909429009, "grad_norm": 1.41631281375885, "learning_rate": 4.494937909767615e-06, "loss": 0.4504, "step": 25416 }, { "epoch": 0.6950612557427259, "grad_norm": 1.2991998195648193, "learning_rate": 4.494198518011123e-06, "loss": 0.4503, "step": 25417 }, { "epoch": 0.6950886020564427, "grad_norm": 1.2260245084762573, "learning_rate": 4.493459169446098e-06, "loss": 0.4763, "step": 25418 }, { "epoch": 0.6951159483701597, "grad_norm": 1.2176966667175293, "learning_rate": 4.492719864078336e-06, "loss": 0.4768, "step": 25419 }, { "epoch": 0.6951432946838766, "grad_norm": 1.4559392929077148, "learning_rate": 4.491980601913632e-06, "loss": 0.4769, "step": 25420 }, { "epoch": 0.6951706409975935, "grad_norm": 1.0975438356399536, "learning_rate": 4.491241382957796e-06, "loss": 0.4562, "step": 25421 }, { "epoch": 0.6951979873113104, "grad_norm": 1.6633280515670776, "learning_rate": 4.490502207216619e-06, "loss": 0.4857, "step": 25422 }, { "epoch": 0.6952253336250274, "grad_norm": 1.3106529712677002, "learning_rate": 4.4897630746959e-06, "loss": 0.4469, "step": 25423 }, { "epoch": 0.6952526799387443, "grad_norm": 1.6729974746704102, "learning_rate": 4.489023985401441e-06, "loss": 0.3218, "step": 25424 }, { "epoch": 0.6952800262524612, "grad_norm": 1.191943645477295, "learning_rate": 4.488284939339038e-06, "loss": 0.3242, "step": 25425 }, { "epoch": 0.695307372566178, "grad_norm": 1.7136566638946533, "learning_rate": 4.487545936514489e-06, "loss": 0.4586, "step": 25426 }, { "epoch": 0.695334718879895, "grad_norm": 1.4448120594024658, "learning_rate": 4.486806976933587e-06, "loss": 0.5046, "step": 25427 }, { "epoch": 0.6953620651936119, "grad_norm": 1.52643620967865, "learning_rate": 4.486068060602137e-06, "loss": 0.453, "step": 25428 }, { "epoch": 0.6953894115073288, "grad_norm": 1.1826465129852295, "learning_rate": 4.48532918752593e-06, "loss": 0.4669, "step": 25429 }, { "epoch": 0.6954167578210457, "grad_norm": 1.3321508169174194, "learning_rate": 4.4845903577107605e-06, "loss": 0.4827, "step": 25430 }, { "epoch": 0.6954441041347627, "grad_norm": 1.5467520952224731, "learning_rate": 4.483851571162431e-06, "loss": 0.4517, "step": 25431 }, { "epoch": 0.6954714504484796, "grad_norm": 1.2611868381500244, "learning_rate": 4.483112827886735e-06, "loss": 0.4502, "step": 25432 }, { "epoch": 0.6954987967621965, "grad_norm": 1.1859062910079956, "learning_rate": 4.482374127889464e-06, "loss": 0.446, "step": 25433 }, { "epoch": 0.6955261430759133, "grad_norm": 1.3317303657531738, "learning_rate": 4.481635471176417e-06, "loss": 0.4629, "step": 25434 }, { "epoch": 0.6955534893896302, "grad_norm": 1.248166799545288, "learning_rate": 4.4808968577533855e-06, "loss": 0.4603, "step": 25435 }, { "epoch": 0.6955808357033472, "grad_norm": 1.8595551252365112, "learning_rate": 4.480158287626161e-06, "loss": 0.3749, "step": 25436 }, { "epoch": 0.6956081820170641, "grad_norm": 1.4375730752944946, "learning_rate": 4.4794197608005465e-06, "loss": 0.432, "step": 25437 }, { "epoch": 0.695635528330781, "grad_norm": 2.319875955581665, "learning_rate": 4.478681277282329e-06, "loss": 0.7673, "step": 25438 }, { "epoch": 0.6956628746444979, "grad_norm": 1.3428058624267578, "learning_rate": 4.4779428370773004e-06, "loss": 0.3064, "step": 25439 }, { "epoch": 0.6956902209582149, "grad_norm": 1.359405755996704, "learning_rate": 4.477204440191259e-06, "loss": 0.4701, "step": 25440 }, { "epoch": 0.6957175672719318, "grad_norm": 1.3436918258666992, "learning_rate": 4.4764660866299946e-06, "loss": 0.4524, "step": 25441 }, { "epoch": 0.6957449135856486, "grad_norm": 1.3134657144546509, "learning_rate": 4.4757277763992966e-06, "loss": 0.7001, "step": 25442 }, { "epoch": 0.6957722598993655, "grad_norm": 1.6081161499023438, "learning_rate": 4.474989509504962e-06, "loss": 0.4889, "step": 25443 }, { "epoch": 0.6957996062130825, "grad_norm": 1.3567076921463013, "learning_rate": 4.474251285952781e-06, "loss": 0.4785, "step": 25444 }, { "epoch": 0.6958269525267994, "grad_norm": 2.0374433994293213, "learning_rate": 4.473513105748539e-06, "loss": 0.6732, "step": 25445 }, { "epoch": 0.6958542988405163, "grad_norm": 1.36285400390625, "learning_rate": 4.472774968898036e-06, "loss": 0.5018, "step": 25446 }, { "epoch": 0.6958816451542332, "grad_norm": 1.466731309890747, "learning_rate": 4.472036875407057e-06, "loss": 0.504, "step": 25447 }, { "epoch": 0.6959089914679502, "grad_norm": 1.6226081848144531, "learning_rate": 4.471298825281393e-06, "loss": 0.4691, "step": 25448 }, { "epoch": 0.6959363377816671, "grad_norm": 1.7324670553207397, "learning_rate": 4.47056081852683e-06, "loss": 0.466, "step": 25449 }, { "epoch": 0.6959636840953839, "grad_norm": 1.580795407295227, "learning_rate": 4.4698228551491654e-06, "loss": 0.4681, "step": 25450 }, { "epoch": 0.6959910304091008, "grad_norm": 1.2517356872558594, "learning_rate": 4.469084935154184e-06, "loss": 0.4761, "step": 25451 }, { "epoch": 0.6960183767228177, "grad_norm": 1.1332532167434692, "learning_rate": 4.4683470585476715e-06, "loss": 0.4528, "step": 25452 }, { "epoch": 0.6960457230365347, "grad_norm": 1.3713901042938232, "learning_rate": 4.4676092253354235e-06, "loss": 0.4274, "step": 25453 }, { "epoch": 0.6960730693502516, "grad_norm": 1.215440273284912, "learning_rate": 4.4668714355232235e-06, "loss": 0.4361, "step": 25454 }, { "epoch": 0.6961004156639685, "grad_norm": 1.5584790706634521, "learning_rate": 4.466133689116856e-06, "loss": 0.464, "step": 25455 }, { "epoch": 0.6961277619776854, "grad_norm": 4.638485431671143, "learning_rate": 4.465395986122116e-06, "loss": 0.3528, "step": 25456 }, { "epoch": 0.6961551082914024, "grad_norm": 1.2879619598388672, "learning_rate": 4.4646583265447875e-06, "loss": 0.494, "step": 25457 }, { "epoch": 0.6961824546051192, "grad_norm": 1.5672974586486816, "learning_rate": 4.463920710390652e-06, "loss": 0.3025, "step": 25458 }, { "epoch": 0.6962098009188361, "grad_norm": 1.4096852540969849, "learning_rate": 4.463183137665505e-06, "loss": 0.4648, "step": 25459 }, { "epoch": 0.696237147232553, "grad_norm": 1.3553348779678345, "learning_rate": 4.462445608375129e-06, "loss": 0.4588, "step": 25460 }, { "epoch": 0.69626449354627, "grad_norm": 1.4403210878372192, "learning_rate": 4.461708122525307e-06, "loss": 0.433, "step": 25461 }, { "epoch": 0.6962918398599869, "grad_norm": 1.1967899799346924, "learning_rate": 4.460970680121823e-06, "loss": 0.4625, "step": 25462 }, { "epoch": 0.6963191861737038, "grad_norm": 1.3005223274230957, "learning_rate": 4.46023328117047e-06, "loss": 0.4597, "step": 25463 }, { "epoch": 0.6963465324874207, "grad_norm": 1.3256418704986572, "learning_rate": 4.459495925677027e-06, "loss": 0.4672, "step": 25464 }, { "epoch": 0.6963738788011377, "grad_norm": 1.5825843811035156, "learning_rate": 4.458758613647276e-06, "loss": 0.4748, "step": 25465 }, { "epoch": 0.6964012251148545, "grad_norm": 1.3311266899108887, "learning_rate": 4.4580213450870065e-06, "loss": 0.4727, "step": 25466 }, { "epoch": 0.6964285714285714, "grad_norm": 1.215204119682312, "learning_rate": 4.457284120002002e-06, "loss": 0.7276, "step": 25467 }, { "epoch": 0.6964559177422883, "grad_norm": 1.2801730632781982, "learning_rate": 4.456546938398043e-06, "loss": 0.4806, "step": 25468 }, { "epoch": 0.6964832640560052, "grad_norm": 1.34140944480896, "learning_rate": 4.455809800280912e-06, "loss": 0.4848, "step": 25469 }, { "epoch": 0.6965106103697222, "grad_norm": 1.299188494682312, "learning_rate": 4.455072705656395e-06, "loss": 0.4802, "step": 25470 }, { "epoch": 0.6965379566834391, "grad_norm": 1.1527906656265259, "learning_rate": 4.454335654530266e-06, "loss": 0.4506, "step": 25471 }, { "epoch": 0.696565302997156, "grad_norm": 1.1860905885696411, "learning_rate": 4.453598646908318e-06, "loss": 0.4518, "step": 25472 }, { "epoch": 0.696592649310873, "grad_norm": 1.1133054494857788, "learning_rate": 4.452861682796328e-06, "loss": 0.4437, "step": 25473 }, { "epoch": 0.6966199956245898, "grad_norm": 1.567050814628601, "learning_rate": 4.452124762200073e-06, "loss": 0.457, "step": 25474 }, { "epoch": 0.6966473419383067, "grad_norm": 1.1341166496276855, "learning_rate": 4.4513878851253425e-06, "loss": 0.4694, "step": 25475 }, { "epoch": 0.6966746882520236, "grad_norm": 1.4926718473434448, "learning_rate": 4.450651051577911e-06, "loss": 0.478, "step": 25476 }, { "epoch": 0.6967020345657405, "grad_norm": 1.3166691064834595, "learning_rate": 4.449914261563557e-06, "loss": 0.4923, "step": 25477 }, { "epoch": 0.6967293808794575, "grad_norm": 1.415316104888916, "learning_rate": 4.449177515088068e-06, "loss": 0.4588, "step": 25478 }, { "epoch": 0.6967567271931744, "grad_norm": 1.6429322957992554, "learning_rate": 4.4484408121572185e-06, "loss": 0.4572, "step": 25479 }, { "epoch": 0.6967840735068913, "grad_norm": 1.4547590017318726, "learning_rate": 4.447704152776785e-06, "loss": 0.4802, "step": 25480 }, { "epoch": 0.6968114198206082, "grad_norm": 1.4140510559082031, "learning_rate": 4.4469675369525545e-06, "loss": 0.5281, "step": 25481 }, { "epoch": 0.696838766134325, "grad_norm": 1.1935760974884033, "learning_rate": 4.4462309646903005e-06, "loss": 0.4408, "step": 25482 }, { "epoch": 0.696866112448042, "grad_norm": 1.705832839012146, "learning_rate": 4.445494435995801e-06, "loss": 0.3234, "step": 25483 }, { "epoch": 0.6968934587617589, "grad_norm": 1.2109731435775757, "learning_rate": 4.44475795087483e-06, "loss": 0.4567, "step": 25484 }, { "epoch": 0.6969208050754758, "grad_norm": 1.3915314674377441, "learning_rate": 4.4440215093331755e-06, "loss": 0.3264, "step": 25485 }, { "epoch": 0.6969481513891927, "grad_norm": 1.7254537343978882, "learning_rate": 4.443285111376607e-06, "loss": 0.3606, "step": 25486 }, { "epoch": 0.6969754977029097, "grad_norm": 1.2226842641830444, "learning_rate": 4.4425487570109e-06, "loss": 0.4272, "step": 25487 }, { "epoch": 0.6970028440166266, "grad_norm": 1.2108933925628662, "learning_rate": 4.441812446241838e-06, "loss": 0.458, "step": 25488 }, { "epoch": 0.6970301903303435, "grad_norm": 1.2563292980194092, "learning_rate": 4.441076179075192e-06, "loss": 0.4694, "step": 25489 }, { "epoch": 0.6970575366440603, "grad_norm": 1.657409429550171, "learning_rate": 4.440339955516736e-06, "loss": 0.3164, "step": 25490 }, { "epoch": 0.6970848829577773, "grad_norm": 2.115089178085327, "learning_rate": 4.439603775572252e-06, "loss": 0.7185, "step": 25491 }, { "epoch": 0.6971122292714942, "grad_norm": 1.646359920501709, "learning_rate": 4.4388676392475115e-06, "loss": 0.3256, "step": 25492 }, { "epoch": 0.6971395755852111, "grad_norm": 1.3456703424453735, "learning_rate": 4.438131546548286e-06, "loss": 0.4907, "step": 25493 }, { "epoch": 0.697166921898928, "grad_norm": 1.6702808141708374, "learning_rate": 4.437395497480356e-06, "loss": 0.3247, "step": 25494 }, { "epoch": 0.697194268212645, "grad_norm": 1.2496105432510376, "learning_rate": 4.436659492049493e-06, "loss": 0.4593, "step": 25495 }, { "epoch": 0.6972216145263619, "grad_norm": 1.5531572103500366, "learning_rate": 4.435923530261468e-06, "loss": 0.7246, "step": 25496 }, { "epoch": 0.6972489608400787, "grad_norm": 1.4759749174118042, "learning_rate": 4.435187612122061e-06, "loss": 0.4529, "step": 25497 }, { "epoch": 0.6972763071537956, "grad_norm": 1.3446094989776611, "learning_rate": 4.4344517376370385e-06, "loss": 0.4378, "step": 25498 }, { "epoch": 0.6973036534675126, "grad_norm": 1.5607489347457886, "learning_rate": 4.433715906812178e-06, "loss": 0.4572, "step": 25499 }, { "epoch": 0.6973309997812295, "grad_norm": 1.267865538597107, "learning_rate": 4.432980119653246e-06, "loss": 0.4779, "step": 25500 }, { "epoch": 0.6973583460949464, "grad_norm": 1.2111146450042725, "learning_rate": 4.4322443761660216e-06, "loss": 0.4445, "step": 25501 }, { "epoch": 0.6973856924086633, "grad_norm": 1.3987188339233398, "learning_rate": 4.4315086763562735e-06, "loss": 0.4682, "step": 25502 }, { "epoch": 0.6974130387223803, "grad_norm": 1.5733689069747925, "learning_rate": 4.430773020229772e-06, "loss": 0.2857, "step": 25503 }, { "epoch": 0.6974403850360972, "grad_norm": 1.6305574178695679, "learning_rate": 4.430037407792289e-06, "loss": 0.459, "step": 25504 }, { "epoch": 0.697467731349814, "grad_norm": 1.2273552417755127, "learning_rate": 4.429301839049593e-06, "loss": 0.4716, "step": 25505 }, { "epoch": 0.6974950776635309, "grad_norm": 1.2509737014770508, "learning_rate": 4.428566314007457e-06, "loss": 0.476, "step": 25506 }, { "epoch": 0.6975224239772478, "grad_norm": 1.6448581218719482, "learning_rate": 4.4278308326716525e-06, "loss": 0.4888, "step": 25507 }, { "epoch": 0.6975497702909648, "grad_norm": 1.4387485980987549, "learning_rate": 4.427095395047947e-06, "loss": 0.4772, "step": 25508 }, { "epoch": 0.6975771166046817, "grad_norm": 1.5621320009231567, "learning_rate": 4.426360001142105e-06, "loss": 0.3152, "step": 25509 }, { "epoch": 0.6976044629183986, "grad_norm": 1.4841606616973877, "learning_rate": 4.425624650959904e-06, "loss": 0.4544, "step": 25510 }, { "epoch": 0.6976318092321155, "grad_norm": 1.2133634090423584, "learning_rate": 4.424889344507109e-06, "loss": 0.482, "step": 25511 }, { "epoch": 0.6976591555458325, "grad_norm": 1.261096715927124, "learning_rate": 4.424154081789484e-06, "loss": 0.4588, "step": 25512 }, { "epoch": 0.6976865018595493, "grad_norm": 1.3801735639572144, "learning_rate": 4.423418862812806e-06, "loss": 0.4973, "step": 25513 }, { "epoch": 0.6977138481732662, "grad_norm": 1.50166916847229, "learning_rate": 4.422683687582837e-06, "loss": 0.4408, "step": 25514 }, { "epoch": 0.6977411944869831, "grad_norm": 1.2602946758270264, "learning_rate": 4.4219485561053424e-06, "loss": 0.4466, "step": 25515 }, { "epoch": 0.6977685408007, "grad_norm": 1.2804967164993286, "learning_rate": 4.421213468386093e-06, "loss": 0.4657, "step": 25516 }, { "epoch": 0.697795887114417, "grad_norm": 1.632001519203186, "learning_rate": 4.420478424430855e-06, "loss": 0.4911, "step": 25517 }, { "epoch": 0.6978232334281339, "grad_norm": 1.4240422248840332, "learning_rate": 4.419743424245391e-06, "loss": 0.4372, "step": 25518 }, { "epoch": 0.6978505797418508, "grad_norm": 1.2722082138061523, "learning_rate": 4.419008467835472e-06, "loss": 0.4458, "step": 25519 }, { "epoch": 0.6978779260555678, "grad_norm": 1.2549511194229126, "learning_rate": 4.41827355520686e-06, "loss": 0.4869, "step": 25520 }, { "epoch": 0.6979052723692846, "grad_norm": 1.3756238222122192, "learning_rate": 4.417538686365322e-06, "loss": 0.4712, "step": 25521 }, { "epoch": 0.6979326186830015, "grad_norm": 1.2942296266555786, "learning_rate": 4.4168038613166185e-06, "loss": 0.6875, "step": 25522 }, { "epoch": 0.6979599649967184, "grad_norm": 1.1725099086761475, "learning_rate": 4.4160690800665205e-06, "loss": 0.4473, "step": 25523 }, { "epoch": 0.6979873113104353, "grad_norm": 1.3888230323791504, "learning_rate": 4.415334342620791e-06, "loss": 0.4677, "step": 25524 }, { "epoch": 0.6980146576241523, "grad_norm": 1.2488510608673096, "learning_rate": 4.414599648985186e-06, "loss": 0.3245, "step": 25525 }, { "epoch": 0.6980420039378692, "grad_norm": 1.239952564239502, "learning_rate": 4.413864999165479e-06, "loss": 0.4799, "step": 25526 }, { "epoch": 0.6980693502515861, "grad_norm": 1.2564128637313843, "learning_rate": 4.413130393167429e-06, "loss": 0.4532, "step": 25527 }, { "epoch": 0.698096696565303, "grad_norm": 1.4721792936325073, "learning_rate": 4.412395830996797e-06, "loss": 0.4609, "step": 25528 }, { "epoch": 0.6981240428790199, "grad_norm": 1.2494189739227295, "learning_rate": 4.411661312659349e-06, "loss": 0.4525, "step": 25529 }, { "epoch": 0.6981513891927368, "grad_norm": 1.163621425628662, "learning_rate": 4.410926838160846e-06, "loss": 0.4718, "step": 25530 }, { "epoch": 0.6981787355064537, "grad_norm": 1.3154525756835938, "learning_rate": 4.410192407507045e-06, "loss": 0.4563, "step": 25531 }, { "epoch": 0.6982060818201706, "grad_norm": 1.7966705560684204, "learning_rate": 4.4094580207037155e-06, "loss": 0.3706, "step": 25532 }, { "epoch": 0.6982334281338876, "grad_norm": 2.498528242111206, "learning_rate": 4.408723677756613e-06, "loss": 0.4305, "step": 25533 }, { "epoch": 0.6982607744476045, "grad_norm": 2.386586904525757, "learning_rate": 4.4079893786715025e-06, "loss": 0.4384, "step": 25534 }, { "epoch": 0.6982881207613214, "grad_norm": 1.390522837638855, "learning_rate": 4.407255123454136e-06, "loss": 0.4471, "step": 25535 }, { "epoch": 0.6983154670750383, "grad_norm": 1.2187474966049194, "learning_rate": 4.4065209121102825e-06, "loss": 0.7192, "step": 25536 }, { "epoch": 0.6983428133887551, "grad_norm": 1.4640998840332031, "learning_rate": 4.4057867446456996e-06, "loss": 0.4311, "step": 25537 }, { "epoch": 0.6983701597024721, "grad_norm": 1.321254014968872, "learning_rate": 4.405052621066145e-06, "loss": 0.4923, "step": 25538 }, { "epoch": 0.698397506016189, "grad_norm": 1.468677043914795, "learning_rate": 4.404318541377377e-06, "loss": 0.4489, "step": 25539 }, { "epoch": 0.6984248523299059, "grad_norm": 1.2773033380508423, "learning_rate": 4.403584505585152e-06, "loss": 0.4315, "step": 25540 }, { "epoch": 0.6984521986436228, "grad_norm": 1.5874441862106323, "learning_rate": 4.402850513695236e-06, "loss": 0.4712, "step": 25541 }, { "epoch": 0.6984795449573398, "grad_norm": 1.5485225915908813, "learning_rate": 4.402116565713383e-06, "loss": 0.4259, "step": 25542 }, { "epoch": 0.6985068912710567, "grad_norm": 1.0854729413986206, "learning_rate": 4.401382661645349e-06, "loss": 0.4636, "step": 25543 }, { "epoch": 0.6985342375847736, "grad_norm": 1.1561002731323242, "learning_rate": 4.400648801496888e-06, "loss": 0.4433, "step": 25544 }, { "epoch": 0.6985615838984904, "grad_norm": 1.4141892194747925, "learning_rate": 4.399914985273767e-06, "loss": 0.4677, "step": 25545 }, { "epoch": 0.6985889302122074, "grad_norm": 1.1381456851959229, "learning_rate": 4.399181212981736e-06, "loss": 0.6973, "step": 25546 }, { "epoch": 0.6986162765259243, "grad_norm": 1.2950506210327148, "learning_rate": 4.398447484626549e-06, "loss": 0.4728, "step": 25547 }, { "epoch": 0.6986436228396412, "grad_norm": 1.4511427879333496, "learning_rate": 4.3977138002139685e-06, "loss": 0.4793, "step": 25548 }, { "epoch": 0.6986709691533581, "grad_norm": 1.3034483194351196, "learning_rate": 4.396980159749746e-06, "loss": 0.4778, "step": 25549 }, { "epoch": 0.6986983154670751, "grad_norm": 1.4863436222076416, "learning_rate": 4.396246563239635e-06, "loss": 0.4277, "step": 25550 }, { "epoch": 0.698725661780792, "grad_norm": 1.7934626340866089, "learning_rate": 4.395513010689395e-06, "loss": 0.3333, "step": 25551 }, { "epoch": 0.6987530080945089, "grad_norm": 1.3473448753356934, "learning_rate": 4.394779502104778e-06, "loss": 0.4602, "step": 25552 }, { "epoch": 0.6987803544082257, "grad_norm": 1.3799047470092773, "learning_rate": 4.394046037491535e-06, "loss": 0.4785, "step": 25553 }, { "epoch": 0.6988077007219426, "grad_norm": 1.1134963035583496, "learning_rate": 4.393312616855428e-06, "loss": 0.702, "step": 25554 }, { "epoch": 0.6988350470356596, "grad_norm": 1.2427464723587036, "learning_rate": 4.392579240202205e-06, "loss": 0.7102, "step": 25555 }, { "epoch": 0.6988623933493765, "grad_norm": 1.7671875953674316, "learning_rate": 4.391845907537619e-06, "loss": 0.3499, "step": 25556 }, { "epoch": 0.6988897396630934, "grad_norm": 1.326019048690796, "learning_rate": 4.39111261886742e-06, "loss": 0.487, "step": 25557 }, { "epoch": 0.6989170859768103, "grad_norm": 1.306125521659851, "learning_rate": 4.390379374197369e-06, "loss": 0.4951, "step": 25558 }, { "epoch": 0.6989444322905273, "grad_norm": 3.1684415340423584, "learning_rate": 4.389646173533212e-06, "loss": 0.7337, "step": 25559 }, { "epoch": 0.6989717786042442, "grad_norm": 1.2271250486373901, "learning_rate": 4.388913016880699e-06, "loss": 0.4629, "step": 25560 }, { "epoch": 0.698999124917961, "grad_norm": 1.8576186895370483, "learning_rate": 4.388179904245588e-06, "loss": 0.3458, "step": 25561 }, { "epoch": 0.6990264712316779, "grad_norm": 1.2750186920166016, "learning_rate": 4.387446835633625e-06, "loss": 0.4433, "step": 25562 }, { "epoch": 0.6990538175453949, "grad_norm": 1.2940499782562256, "learning_rate": 4.3867138110505595e-06, "loss": 0.4638, "step": 25563 }, { "epoch": 0.6990811638591118, "grad_norm": 1.1550414562225342, "learning_rate": 4.3859808305021475e-06, "loss": 0.7305, "step": 25564 }, { "epoch": 0.6991085101728287, "grad_norm": 1.4350652694702148, "learning_rate": 4.385247893994137e-06, "loss": 0.4314, "step": 25565 }, { "epoch": 0.6991358564865456, "grad_norm": 1.178189754486084, "learning_rate": 4.384515001532273e-06, "loss": 0.7111, "step": 25566 }, { "epoch": 0.6991632028002626, "grad_norm": 1.4022033214569092, "learning_rate": 4.38378215312231e-06, "loss": 0.405, "step": 25567 }, { "epoch": 0.6991905491139795, "grad_norm": 1.6088109016418457, "learning_rate": 4.383049348769998e-06, "loss": 0.4633, "step": 25568 }, { "epoch": 0.6992178954276963, "grad_norm": 1.3513137102127075, "learning_rate": 4.382316588481082e-06, "loss": 0.4493, "step": 25569 }, { "epoch": 0.6992452417414132, "grad_norm": 1.501863718032837, "learning_rate": 4.381583872261307e-06, "loss": 0.4949, "step": 25570 }, { "epoch": 0.6992725880551302, "grad_norm": 1.4885001182556152, "learning_rate": 4.3808512001164305e-06, "loss": 0.4504, "step": 25571 }, { "epoch": 0.6992999343688471, "grad_norm": 1.3664273023605347, "learning_rate": 4.380118572052194e-06, "loss": 0.7269, "step": 25572 }, { "epoch": 0.699327280682564, "grad_norm": 1.3259419202804565, "learning_rate": 4.379385988074346e-06, "loss": 0.4855, "step": 25573 }, { "epoch": 0.6993546269962809, "grad_norm": 1.6284592151641846, "learning_rate": 4.378653448188633e-06, "loss": 0.4262, "step": 25574 }, { "epoch": 0.6993819733099979, "grad_norm": 1.3587970733642578, "learning_rate": 4.377920952400798e-06, "loss": 0.3315, "step": 25575 }, { "epoch": 0.6994093196237148, "grad_norm": 1.5220727920532227, "learning_rate": 4.3771885007165945e-06, "loss": 0.4753, "step": 25576 }, { "epoch": 0.6994366659374316, "grad_norm": 1.7818125486373901, "learning_rate": 4.376456093141766e-06, "loss": 0.3192, "step": 25577 }, { "epoch": 0.6994640122511485, "grad_norm": 1.5532432794570923, "learning_rate": 4.375723729682056e-06, "loss": 0.4625, "step": 25578 }, { "epoch": 0.6994913585648654, "grad_norm": 1.3346948623657227, "learning_rate": 4.374991410343206e-06, "loss": 0.4723, "step": 25579 }, { "epoch": 0.6995187048785824, "grad_norm": 1.2276434898376465, "learning_rate": 4.37425913513097e-06, "loss": 0.3161, "step": 25580 }, { "epoch": 0.6995460511922993, "grad_norm": 1.380707025527954, "learning_rate": 4.373526904051089e-06, "loss": 0.4695, "step": 25581 }, { "epoch": 0.6995733975060162, "grad_norm": 1.3915107250213623, "learning_rate": 4.372794717109301e-06, "loss": 0.4714, "step": 25582 }, { "epoch": 0.6996007438197331, "grad_norm": 1.305321216583252, "learning_rate": 4.3720625743113575e-06, "loss": 0.4702, "step": 25583 }, { "epoch": 0.6996280901334501, "grad_norm": 1.8390823602676392, "learning_rate": 4.371330475663e-06, "loss": 0.47, "step": 25584 }, { "epoch": 0.6996554364471669, "grad_norm": 1.5368767976760864, "learning_rate": 4.370598421169968e-06, "loss": 0.4248, "step": 25585 }, { "epoch": 0.6996827827608838, "grad_norm": 1.287371277809143, "learning_rate": 4.3698664108380106e-06, "loss": 0.4238, "step": 25586 }, { "epoch": 0.6997101290746007, "grad_norm": 1.6036391258239746, "learning_rate": 4.369134444672867e-06, "loss": 0.3281, "step": 25587 }, { "epoch": 0.6997374753883177, "grad_norm": 1.4164329767227173, "learning_rate": 4.368402522680275e-06, "loss": 0.4684, "step": 25588 }, { "epoch": 0.6997648217020346, "grad_norm": 1.276768445968628, "learning_rate": 4.367670644865983e-06, "loss": 0.4723, "step": 25589 }, { "epoch": 0.6997921680157515, "grad_norm": 1.4014036655426025, "learning_rate": 4.366938811235732e-06, "loss": 0.4538, "step": 25590 }, { "epoch": 0.6998195143294684, "grad_norm": 1.1988860368728638, "learning_rate": 4.366207021795258e-06, "loss": 0.4638, "step": 25591 }, { "epoch": 0.6998468606431854, "grad_norm": 1.3065810203552246, "learning_rate": 4.3654752765503025e-06, "loss": 0.5111, "step": 25592 }, { "epoch": 0.6998742069569022, "grad_norm": 1.291572093963623, "learning_rate": 4.364743575506611e-06, "loss": 0.4702, "step": 25593 }, { "epoch": 0.6999015532706191, "grad_norm": 1.2774525880813599, "learning_rate": 4.36401191866992e-06, "loss": 0.456, "step": 25594 }, { "epoch": 0.699928899584336, "grad_norm": 1.312742829322815, "learning_rate": 4.3632803060459665e-06, "loss": 0.4755, "step": 25595 }, { "epoch": 0.6999562458980529, "grad_norm": 1.2409627437591553, "learning_rate": 4.362548737640495e-06, "loss": 0.4729, "step": 25596 }, { "epoch": 0.6999835922117699, "grad_norm": 1.4953138828277588, "learning_rate": 4.361817213459244e-06, "loss": 0.4056, "step": 25597 }, { "epoch": 0.7000109385254868, "grad_norm": 1.3270641565322876, "learning_rate": 4.361085733507945e-06, "loss": 0.4501, "step": 25598 }, { "epoch": 0.7000382848392037, "grad_norm": 1.289028286933899, "learning_rate": 4.360354297792346e-06, "loss": 0.7147, "step": 25599 }, { "epoch": 0.7000656311529205, "grad_norm": 1.807876467704773, "learning_rate": 4.359622906318179e-06, "loss": 0.429, "step": 25600 }, { "epoch": 0.7000929774666375, "grad_norm": 1.8151549100875854, "learning_rate": 4.358891559091182e-06, "loss": 0.442, "step": 25601 }, { "epoch": 0.7001203237803544, "grad_norm": 1.461766242980957, "learning_rate": 4.3581602561170946e-06, "loss": 0.2955, "step": 25602 }, { "epoch": 0.7001476700940713, "grad_norm": 1.0344290733337402, "learning_rate": 4.357428997401653e-06, "loss": 0.3732, "step": 25603 }, { "epoch": 0.7001750164077882, "grad_norm": 1.5765265226364136, "learning_rate": 4.356697782950589e-06, "loss": 0.6794, "step": 25604 }, { "epoch": 0.7002023627215052, "grad_norm": 1.6794147491455078, "learning_rate": 4.355966612769646e-06, "loss": 0.36, "step": 25605 }, { "epoch": 0.7002297090352221, "grad_norm": 1.4095083475112915, "learning_rate": 4.355235486864558e-06, "loss": 0.4566, "step": 25606 }, { "epoch": 0.700257055348939, "grad_norm": 1.336270809173584, "learning_rate": 4.354504405241058e-06, "loss": 0.4916, "step": 25607 }, { "epoch": 0.7002844016626558, "grad_norm": 2.245500326156616, "learning_rate": 4.353773367904882e-06, "loss": 0.6844, "step": 25608 }, { "epoch": 0.7003117479763727, "grad_norm": 1.3016793727874756, "learning_rate": 4.353042374861766e-06, "loss": 0.4617, "step": 25609 }, { "epoch": 0.7003390942900897, "grad_norm": 1.2622122764587402, "learning_rate": 4.352311426117438e-06, "loss": 0.4419, "step": 25610 }, { "epoch": 0.7003664406038066, "grad_norm": 1.2437137365341187, "learning_rate": 4.351580521677643e-06, "loss": 0.4278, "step": 25611 }, { "epoch": 0.7003937869175235, "grad_norm": 1.2290856838226318, "learning_rate": 4.350849661548108e-06, "loss": 0.4849, "step": 25612 }, { "epoch": 0.7004211332312404, "grad_norm": 1.2844719886779785, "learning_rate": 4.350118845734568e-06, "loss": 0.4627, "step": 25613 }, { "epoch": 0.7004484795449574, "grad_norm": 1.1725209951400757, "learning_rate": 4.349388074242752e-06, "loss": 0.3386, "step": 25614 }, { "epoch": 0.7004758258586743, "grad_norm": 1.5418132543563843, "learning_rate": 4.3486573470784e-06, "loss": 0.4898, "step": 25615 }, { "epoch": 0.7005031721723911, "grad_norm": 1.276315689086914, "learning_rate": 4.347926664247241e-06, "loss": 0.4929, "step": 25616 }, { "epoch": 0.700530518486108, "grad_norm": 1.257338523864746, "learning_rate": 4.347196025755002e-06, "loss": 0.7225, "step": 25617 }, { "epoch": 0.700557864799825, "grad_norm": 1.4010355472564697, "learning_rate": 4.346465431607424e-06, "loss": 0.456, "step": 25618 }, { "epoch": 0.7005852111135419, "grad_norm": 1.2814730405807495, "learning_rate": 4.345734881810233e-06, "loss": 0.4705, "step": 25619 }, { "epoch": 0.7006125574272588, "grad_norm": 1.2271051406860352, "learning_rate": 4.345004376369158e-06, "loss": 0.4612, "step": 25620 }, { "epoch": 0.7006399037409757, "grad_norm": 1.4863300323486328, "learning_rate": 4.344273915289934e-06, "loss": 0.4248, "step": 25621 }, { "epoch": 0.7006672500546927, "grad_norm": 1.3714865446090698, "learning_rate": 4.343543498578291e-06, "loss": 0.4838, "step": 25622 }, { "epoch": 0.7006945963684096, "grad_norm": 1.646742820739746, "learning_rate": 4.342813126239953e-06, "loss": 0.4578, "step": 25623 }, { "epoch": 0.7007219426821264, "grad_norm": 1.1594735383987427, "learning_rate": 4.3420827982806576e-06, "loss": 0.6899, "step": 25624 }, { "epoch": 0.7007492889958433, "grad_norm": 1.187043309211731, "learning_rate": 4.341352514706131e-06, "loss": 0.4596, "step": 25625 }, { "epoch": 0.7007766353095602, "grad_norm": 1.324342131614685, "learning_rate": 4.3406222755220976e-06, "loss": 0.4494, "step": 25626 }, { "epoch": 0.7008039816232772, "grad_norm": 1.4991306066513062, "learning_rate": 4.339892080734293e-06, "loss": 0.4806, "step": 25627 }, { "epoch": 0.7008313279369941, "grad_norm": 1.20549738407135, "learning_rate": 4.339161930348442e-06, "loss": 0.4725, "step": 25628 }, { "epoch": 0.700858674250711, "grad_norm": 1.3923734426498413, "learning_rate": 4.338431824370273e-06, "loss": 0.4743, "step": 25629 }, { "epoch": 0.700886020564428, "grad_norm": 1.1921557188034058, "learning_rate": 4.33770176280551e-06, "loss": 0.7237, "step": 25630 }, { "epoch": 0.7009133668781449, "grad_norm": 1.2924830913543701, "learning_rate": 4.336971745659886e-06, "loss": 0.4446, "step": 25631 }, { "epoch": 0.7009407131918617, "grad_norm": 1.1941852569580078, "learning_rate": 4.3362417729391245e-06, "loss": 0.4666, "step": 25632 }, { "epoch": 0.7009680595055786, "grad_norm": 1.2502259016036987, "learning_rate": 4.33551184464895e-06, "loss": 0.4266, "step": 25633 }, { "epoch": 0.7009954058192955, "grad_norm": 1.5337300300598145, "learning_rate": 4.334781960795094e-06, "loss": 0.326, "step": 25634 }, { "epoch": 0.7010227521330125, "grad_norm": 1.700081706047058, "learning_rate": 4.334052121383279e-06, "loss": 0.4463, "step": 25635 }, { "epoch": 0.7010500984467294, "grad_norm": 1.181732177734375, "learning_rate": 4.333322326419227e-06, "loss": 0.4485, "step": 25636 }, { "epoch": 0.7010774447604463, "grad_norm": 1.3251270055770874, "learning_rate": 4.3325925759086695e-06, "loss": 0.4808, "step": 25637 }, { "epoch": 0.7011047910741632, "grad_norm": 1.3838332891464233, "learning_rate": 4.331862869857329e-06, "loss": 0.4916, "step": 25638 }, { "epoch": 0.7011321373878802, "grad_norm": 1.3315914869308472, "learning_rate": 4.331133208270925e-06, "loss": 0.4782, "step": 25639 }, { "epoch": 0.701159483701597, "grad_norm": 1.4626870155334473, "learning_rate": 4.330403591155191e-06, "loss": 0.4803, "step": 25640 }, { "epoch": 0.7011868300153139, "grad_norm": 1.3477157354354858, "learning_rate": 4.329674018515844e-06, "loss": 0.7535, "step": 25641 }, { "epoch": 0.7012141763290308, "grad_norm": 1.8070024251937866, "learning_rate": 4.328944490358609e-06, "loss": 0.4379, "step": 25642 }, { "epoch": 0.7012415226427477, "grad_norm": 1.3174419403076172, "learning_rate": 4.328215006689208e-06, "loss": 0.4942, "step": 25643 }, { "epoch": 0.7012688689564647, "grad_norm": 1.289854645729065, "learning_rate": 4.327485567513364e-06, "loss": 0.4843, "step": 25644 }, { "epoch": 0.7012962152701816, "grad_norm": 1.3180068731307983, "learning_rate": 4.326756172836795e-06, "loss": 0.4762, "step": 25645 }, { "epoch": 0.7013235615838985, "grad_norm": 1.1759957075119019, "learning_rate": 4.326026822665232e-06, "loss": 0.4656, "step": 25646 }, { "epoch": 0.7013509078976154, "grad_norm": 1.3789666891098022, "learning_rate": 4.325297517004392e-06, "loss": 0.4889, "step": 25647 }, { "epoch": 0.7013782542113323, "grad_norm": 1.230899453163147, "learning_rate": 4.324568255859992e-06, "loss": 0.473, "step": 25648 }, { "epoch": 0.7014056005250492, "grad_norm": 1.6879034042358398, "learning_rate": 4.32383903923776e-06, "loss": 0.3504, "step": 25649 }, { "epoch": 0.7014329468387661, "grad_norm": 1.8140119314193726, "learning_rate": 4.323109867143415e-06, "loss": 0.7367, "step": 25650 }, { "epoch": 0.701460293152483, "grad_norm": 1.6203525066375732, "learning_rate": 4.322380739582673e-06, "loss": 0.3268, "step": 25651 }, { "epoch": 0.7014876394662, "grad_norm": 1.1994132995605469, "learning_rate": 4.321651656561255e-06, "loss": 0.4816, "step": 25652 }, { "epoch": 0.7015149857799169, "grad_norm": 1.672540545463562, "learning_rate": 4.320922618084884e-06, "loss": 0.3257, "step": 25653 }, { "epoch": 0.7015423320936338, "grad_norm": 1.5781525373458862, "learning_rate": 4.320193624159276e-06, "loss": 0.5049, "step": 25654 }, { "epoch": 0.7015696784073507, "grad_norm": 1.2935901880264282, "learning_rate": 4.319464674790149e-06, "loss": 0.3481, "step": 25655 }, { "epoch": 0.7015970247210676, "grad_norm": 1.3299717903137207, "learning_rate": 4.318735769983225e-06, "loss": 0.4958, "step": 25656 }, { "epoch": 0.7016243710347845, "grad_norm": 1.7120429277420044, "learning_rate": 4.318006909744221e-06, "loss": 0.3261, "step": 25657 }, { "epoch": 0.7016517173485014, "grad_norm": 1.4349629878997803, "learning_rate": 4.317278094078851e-06, "loss": 0.5109, "step": 25658 }, { "epoch": 0.7016790636622183, "grad_norm": 1.6317270994186401, "learning_rate": 4.316549322992837e-06, "loss": 0.4654, "step": 25659 }, { "epoch": 0.7017064099759353, "grad_norm": 1.3194165229797363, "learning_rate": 4.315820596491894e-06, "loss": 0.4677, "step": 25660 }, { "epoch": 0.7017337562896522, "grad_norm": 1.3587239980697632, "learning_rate": 4.315091914581736e-06, "loss": 0.4542, "step": 25661 }, { "epoch": 0.7017611026033691, "grad_norm": 1.1689077615737915, "learning_rate": 4.314363277268085e-06, "loss": 0.3386, "step": 25662 }, { "epoch": 0.701788448917086, "grad_norm": 1.406726598739624, "learning_rate": 4.3136346845566545e-06, "loss": 0.4543, "step": 25663 }, { "epoch": 0.7018157952308028, "grad_norm": 1.5217081308364868, "learning_rate": 4.31290613645316e-06, "loss": 0.5063, "step": 25664 }, { "epoch": 0.7018431415445198, "grad_norm": 1.088230848312378, "learning_rate": 4.312177632963312e-06, "loss": 0.3467, "step": 25665 }, { "epoch": 0.7018704878582367, "grad_norm": 1.7114821672439575, "learning_rate": 4.311449174092833e-06, "loss": 0.4687, "step": 25666 }, { "epoch": 0.7018978341719536, "grad_norm": 1.52958083152771, "learning_rate": 4.310720759847436e-06, "loss": 0.4929, "step": 25667 }, { "epoch": 0.7019251804856705, "grad_norm": 1.406348466873169, "learning_rate": 4.309992390232829e-06, "loss": 0.3051, "step": 25668 }, { "epoch": 0.7019525267993875, "grad_norm": 1.1806889772415161, "learning_rate": 4.309264065254733e-06, "loss": 0.4835, "step": 25669 }, { "epoch": 0.7019798731131044, "grad_norm": 1.5093027353286743, "learning_rate": 4.30853578491886e-06, "loss": 0.7492, "step": 25670 }, { "epoch": 0.7020072194268213, "grad_norm": 1.3962557315826416, "learning_rate": 4.307807549230919e-06, "loss": 0.4805, "step": 25671 }, { "epoch": 0.7020345657405381, "grad_norm": 1.6539888381958008, "learning_rate": 4.307079358196628e-06, "loss": 0.4816, "step": 25672 }, { "epoch": 0.702061912054255, "grad_norm": 1.4333595037460327, "learning_rate": 4.306351211821699e-06, "loss": 0.4734, "step": 25673 }, { "epoch": 0.702089258367972, "grad_norm": 1.4023085832595825, "learning_rate": 4.305623110111837e-06, "loss": 0.3288, "step": 25674 }, { "epoch": 0.7021166046816889, "grad_norm": 1.6896692514419556, "learning_rate": 4.304895053072764e-06, "loss": 0.3302, "step": 25675 }, { "epoch": 0.7021439509954058, "grad_norm": 1.3891081809997559, "learning_rate": 4.304167040710186e-06, "loss": 0.7066, "step": 25676 }, { "epoch": 0.7021712973091228, "grad_norm": 1.3309394121170044, "learning_rate": 4.303439073029815e-06, "loss": 0.7133, "step": 25677 }, { "epoch": 0.7021986436228397, "grad_norm": 1.7350510358810425, "learning_rate": 4.302711150037358e-06, "loss": 0.3446, "step": 25678 }, { "epoch": 0.7022259899365566, "grad_norm": 1.4226126670837402, "learning_rate": 4.301983271738535e-06, "loss": 0.4573, "step": 25679 }, { "epoch": 0.7022533362502734, "grad_norm": 1.1909773349761963, "learning_rate": 4.301255438139042e-06, "loss": 0.4623, "step": 25680 }, { "epoch": 0.7022806825639903, "grad_norm": 1.115228295326233, "learning_rate": 4.3005276492446015e-06, "loss": 0.3384, "step": 25681 }, { "epoch": 0.7023080288777073, "grad_norm": 1.1442970037460327, "learning_rate": 4.299799905060918e-06, "loss": 0.3389, "step": 25682 }, { "epoch": 0.7023353751914242, "grad_norm": 1.3598871231079102, "learning_rate": 4.2990722055936955e-06, "loss": 0.4541, "step": 25683 }, { "epoch": 0.7023627215051411, "grad_norm": 1.2391140460968018, "learning_rate": 4.29834455084865e-06, "loss": 0.4689, "step": 25684 }, { "epoch": 0.702390067818858, "grad_norm": 1.5789105892181396, "learning_rate": 4.2976169408314875e-06, "loss": 0.3228, "step": 25685 }, { "epoch": 0.702417414132575, "grad_norm": 1.112969994544983, "learning_rate": 4.2968893755479155e-06, "loss": 0.4402, "step": 25686 }, { "epoch": 0.7024447604462919, "grad_norm": 1.245635986328125, "learning_rate": 4.296161855003638e-06, "loss": 0.4638, "step": 25687 }, { "epoch": 0.7024721067600087, "grad_norm": 1.2716054916381836, "learning_rate": 4.295434379204369e-06, "loss": 0.4387, "step": 25688 }, { "epoch": 0.7024994530737256, "grad_norm": 1.5077900886535645, "learning_rate": 4.294706948155812e-06, "loss": 0.4405, "step": 25689 }, { "epoch": 0.7025267993874426, "grad_norm": 1.134499192237854, "learning_rate": 4.2939795618636684e-06, "loss": 0.4702, "step": 25690 }, { "epoch": 0.7025541457011595, "grad_norm": 1.1423677206039429, "learning_rate": 4.293252220333654e-06, "loss": 0.4864, "step": 25691 }, { "epoch": 0.7025814920148764, "grad_norm": 1.575188398361206, "learning_rate": 4.292524923571469e-06, "loss": 0.4353, "step": 25692 }, { "epoch": 0.7026088383285933, "grad_norm": 1.2799184322357178, "learning_rate": 4.291797671582818e-06, "loss": 0.4368, "step": 25693 }, { "epoch": 0.7026361846423103, "grad_norm": 1.3642714023590088, "learning_rate": 4.29107046437341e-06, "loss": 0.4407, "step": 25694 }, { "epoch": 0.7026635309560272, "grad_norm": 1.3605449199676514, "learning_rate": 4.2903433019489485e-06, "loss": 0.444, "step": 25695 }, { "epoch": 0.702690877269744, "grad_norm": 1.3100203275680542, "learning_rate": 4.289616184315133e-06, "loss": 0.3582, "step": 25696 }, { "epoch": 0.7027182235834609, "grad_norm": 1.3370285034179688, "learning_rate": 4.2888891114776755e-06, "loss": 0.4562, "step": 25697 }, { "epoch": 0.7027455698971778, "grad_norm": 1.2189099788665771, "learning_rate": 4.2881620834422745e-06, "loss": 0.4358, "step": 25698 }, { "epoch": 0.7027729162108948, "grad_norm": 1.2486820220947266, "learning_rate": 4.287435100214635e-06, "loss": 0.4662, "step": 25699 }, { "epoch": 0.7028002625246117, "grad_norm": 1.3967673778533936, "learning_rate": 4.2867081618004565e-06, "loss": 0.4838, "step": 25700 }, { "epoch": 0.7028276088383286, "grad_norm": 1.2838231325149536, "learning_rate": 4.285981268205448e-06, "loss": 0.6902, "step": 25701 }, { "epoch": 0.7028549551520455, "grad_norm": 1.5843626260757446, "learning_rate": 4.285254419435308e-06, "loss": 0.4651, "step": 25702 }, { "epoch": 0.7028823014657624, "grad_norm": 1.4689768552780151, "learning_rate": 4.284527615495735e-06, "loss": 0.4464, "step": 25703 }, { "epoch": 0.7029096477794793, "grad_norm": 1.4398739337921143, "learning_rate": 4.283800856392438e-06, "loss": 0.3623, "step": 25704 }, { "epoch": 0.7029369940931962, "grad_norm": 1.1921292543411255, "learning_rate": 4.283074142131116e-06, "loss": 0.4471, "step": 25705 }, { "epoch": 0.7029643404069131, "grad_norm": 1.2006033658981323, "learning_rate": 4.282347472717464e-06, "loss": 0.4588, "step": 25706 }, { "epoch": 0.7029916867206301, "grad_norm": 1.3466609716415405, "learning_rate": 4.28162084815719e-06, "loss": 0.4314, "step": 25707 }, { "epoch": 0.703019033034347, "grad_norm": 1.2871971130371094, "learning_rate": 4.280894268455992e-06, "loss": 0.4538, "step": 25708 }, { "epoch": 0.7030463793480639, "grad_norm": 1.2121825218200684, "learning_rate": 4.280167733619565e-06, "loss": 0.4714, "step": 25709 }, { "epoch": 0.7030737256617808, "grad_norm": 1.394925832748413, "learning_rate": 4.279441243653616e-06, "loss": 0.4609, "step": 25710 }, { "epoch": 0.7031010719754976, "grad_norm": 1.2963464260101318, "learning_rate": 4.27871479856384e-06, "loss": 0.7016, "step": 25711 }, { "epoch": 0.7031284182892146, "grad_norm": 1.1395949125289917, "learning_rate": 4.277988398355937e-06, "loss": 0.6946, "step": 25712 }, { "epoch": 0.7031557646029315, "grad_norm": 1.2112748622894287, "learning_rate": 4.2772620430356e-06, "loss": 0.4562, "step": 25713 }, { "epoch": 0.7031831109166484, "grad_norm": 1.1905690431594849, "learning_rate": 4.276535732608541e-06, "loss": 0.437, "step": 25714 }, { "epoch": 0.7032104572303653, "grad_norm": 4.040637016296387, "learning_rate": 4.27580946708044e-06, "loss": 0.4499, "step": 25715 }, { "epoch": 0.7032378035440823, "grad_norm": 1.9346562623977661, "learning_rate": 4.275083246457007e-06, "loss": 0.3591, "step": 25716 }, { "epoch": 0.7032651498577992, "grad_norm": 1.332021713256836, "learning_rate": 4.274357070743934e-06, "loss": 0.457, "step": 25717 }, { "epoch": 0.7032924961715161, "grad_norm": 1.6312477588653564, "learning_rate": 4.273630939946916e-06, "loss": 0.7064, "step": 25718 }, { "epoch": 0.7033198424852329, "grad_norm": 1.4056613445281982, "learning_rate": 4.272904854071653e-06, "loss": 0.478, "step": 25719 }, { "epoch": 0.7033471887989499, "grad_norm": 1.55897057056427, "learning_rate": 4.272178813123842e-06, "loss": 0.5004, "step": 25720 }, { "epoch": 0.7033745351126668, "grad_norm": 1.169824481010437, "learning_rate": 4.271452817109175e-06, "loss": 0.4875, "step": 25721 }, { "epoch": 0.7034018814263837, "grad_norm": 1.5677450895309448, "learning_rate": 4.2707268660333454e-06, "loss": 0.453, "step": 25722 }, { "epoch": 0.7034292277401006, "grad_norm": 1.6134337186813354, "learning_rate": 4.270000959902055e-06, "loss": 0.4568, "step": 25723 }, { "epoch": 0.7034565740538176, "grad_norm": 1.528486728668213, "learning_rate": 4.2692750987209945e-06, "loss": 0.4699, "step": 25724 }, { "epoch": 0.7034839203675345, "grad_norm": 1.3529993295669556, "learning_rate": 4.2685492824958545e-06, "loss": 0.47, "step": 25725 }, { "epoch": 0.7035112666812514, "grad_norm": 1.1788806915283203, "learning_rate": 4.2678235112323355e-06, "loss": 0.4427, "step": 25726 }, { "epoch": 0.7035386129949682, "grad_norm": 3.065950393676758, "learning_rate": 4.267097784936129e-06, "loss": 0.7326, "step": 25727 }, { "epoch": 0.7035659593086852, "grad_norm": 1.2946401834487915, "learning_rate": 4.266372103612921e-06, "loss": 0.4736, "step": 25728 }, { "epoch": 0.7035933056224021, "grad_norm": 1.2802842855453491, "learning_rate": 4.265646467268416e-06, "loss": 0.437, "step": 25729 }, { "epoch": 0.703620651936119, "grad_norm": 1.312955379486084, "learning_rate": 4.2649208759083e-06, "loss": 0.4422, "step": 25730 }, { "epoch": 0.7036479982498359, "grad_norm": 1.739648699760437, "learning_rate": 4.264195329538262e-06, "loss": 0.4963, "step": 25731 }, { "epoch": 0.7036753445635529, "grad_norm": 1.3094642162322998, "learning_rate": 4.263469828164001e-06, "loss": 0.4573, "step": 25732 }, { "epoch": 0.7037026908772698, "grad_norm": 1.4258214235305786, "learning_rate": 4.262744371791205e-06, "loss": 0.4766, "step": 25733 }, { "epoch": 0.7037300371909867, "grad_norm": 1.2079020738601685, "learning_rate": 4.262018960425566e-06, "loss": 0.4588, "step": 25734 }, { "epoch": 0.7037573835047035, "grad_norm": 1.2936958074569702, "learning_rate": 4.261293594072768e-06, "loss": 0.4826, "step": 25735 }, { "epoch": 0.7037847298184204, "grad_norm": 1.653035283088684, "learning_rate": 4.2605682727385115e-06, "loss": 0.4218, "step": 25736 }, { "epoch": 0.7038120761321374, "grad_norm": 1.7364925146102905, "learning_rate": 4.25984299642848e-06, "loss": 0.4541, "step": 25737 }, { "epoch": 0.7038394224458543, "grad_norm": 1.28628671169281, "learning_rate": 4.2591177651483625e-06, "loss": 0.3333, "step": 25738 }, { "epoch": 0.7038667687595712, "grad_norm": 1.47946035861969, "learning_rate": 4.258392578903855e-06, "loss": 0.4943, "step": 25739 }, { "epoch": 0.7038941150732881, "grad_norm": 1.3346513509750366, "learning_rate": 4.25766743770064e-06, "loss": 0.4632, "step": 25740 }, { "epoch": 0.7039214613870051, "grad_norm": 1.2354153394699097, "learning_rate": 4.256942341544406e-06, "loss": 0.4561, "step": 25741 }, { "epoch": 0.703948807700722, "grad_norm": 1.3942629098892212, "learning_rate": 4.256217290440846e-06, "loss": 0.4686, "step": 25742 }, { "epoch": 0.7039761540144388, "grad_norm": 1.4446524381637573, "learning_rate": 4.2554922843956446e-06, "loss": 0.4906, "step": 25743 }, { "epoch": 0.7040035003281557, "grad_norm": 1.673959493637085, "learning_rate": 4.254767323414486e-06, "loss": 0.3345, "step": 25744 }, { "epoch": 0.7040308466418727, "grad_norm": 1.2438348531723022, "learning_rate": 4.254042407503065e-06, "loss": 0.4994, "step": 25745 }, { "epoch": 0.7040581929555896, "grad_norm": 1.3432972431182861, "learning_rate": 4.253317536667064e-06, "loss": 0.481, "step": 25746 }, { "epoch": 0.7040855392693065, "grad_norm": 1.227428913116455, "learning_rate": 4.252592710912165e-06, "loss": 0.4771, "step": 25747 }, { "epoch": 0.7041128855830234, "grad_norm": 1.3539814949035645, "learning_rate": 4.251867930244064e-06, "loss": 0.4759, "step": 25748 }, { "epoch": 0.7041402318967404, "grad_norm": 1.8725759983062744, "learning_rate": 4.251143194668444e-06, "loss": 0.3288, "step": 25749 }, { "epoch": 0.7041675782104573, "grad_norm": 1.3905869722366333, "learning_rate": 4.2504185041909816e-06, "loss": 0.4562, "step": 25750 }, { "epoch": 0.7041949245241741, "grad_norm": 1.4095498323440552, "learning_rate": 4.249693858817371e-06, "loss": 0.4585, "step": 25751 }, { "epoch": 0.704222270837891, "grad_norm": 1.8002023696899414, "learning_rate": 4.248969258553294e-06, "loss": 0.4324, "step": 25752 }, { "epoch": 0.7042496171516079, "grad_norm": 1.4884428977966309, "learning_rate": 4.248244703404431e-06, "loss": 0.4426, "step": 25753 }, { "epoch": 0.7042769634653249, "grad_norm": 1.2255117893218994, "learning_rate": 4.247520193376472e-06, "loss": 0.4596, "step": 25754 }, { "epoch": 0.7043043097790418, "grad_norm": 1.2865325212478638, "learning_rate": 4.246795728475099e-06, "loss": 0.4517, "step": 25755 }, { "epoch": 0.7043316560927587, "grad_norm": 1.1522341966629028, "learning_rate": 4.246071308705991e-06, "loss": 0.7055, "step": 25756 }, { "epoch": 0.7043590024064756, "grad_norm": 1.5965967178344727, "learning_rate": 4.245346934074839e-06, "loss": 0.3232, "step": 25757 }, { "epoch": 0.7043863487201926, "grad_norm": 1.1396585702896118, "learning_rate": 4.244622604587319e-06, "loss": 0.7128, "step": 25758 }, { "epoch": 0.7044136950339094, "grad_norm": 1.3388020992279053, "learning_rate": 4.243898320249116e-06, "loss": 0.4904, "step": 25759 }, { "epoch": 0.7044410413476263, "grad_norm": 1.4418998956680298, "learning_rate": 4.243174081065907e-06, "loss": 0.4787, "step": 25760 }, { "epoch": 0.7044683876613432, "grad_norm": 1.5314505100250244, "learning_rate": 4.24244988704338e-06, "loss": 0.4527, "step": 25761 }, { "epoch": 0.7044957339750602, "grad_norm": 1.6236563920974731, "learning_rate": 4.241725738187214e-06, "loss": 0.339, "step": 25762 }, { "epoch": 0.7045230802887771, "grad_norm": 1.2058396339416504, "learning_rate": 4.241001634503085e-06, "loss": 0.4362, "step": 25763 }, { "epoch": 0.704550426602494, "grad_norm": 1.0835639238357544, "learning_rate": 4.240277575996683e-06, "loss": 0.3039, "step": 25764 }, { "epoch": 0.7045777729162109, "grad_norm": 1.4758119583129883, "learning_rate": 4.239553562673681e-06, "loss": 0.4541, "step": 25765 }, { "epoch": 0.7046051192299279, "grad_norm": 1.4342782497406006, "learning_rate": 4.238829594539756e-06, "loss": 0.4335, "step": 25766 }, { "epoch": 0.7046324655436447, "grad_norm": 1.4468662738800049, "learning_rate": 4.2381056716005955e-06, "loss": 0.4232, "step": 25767 }, { "epoch": 0.7046598118573616, "grad_norm": 1.2879759073257446, "learning_rate": 4.2373817938618744e-06, "loss": 0.4554, "step": 25768 }, { "epoch": 0.7046871581710785, "grad_norm": 2.4576942920684814, "learning_rate": 4.236657961329269e-06, "loss": 0.3634, "step": 25769 }, { "epoch": 0.7047145044847954, "grad_norm": 2.3105123043060303, "learning_rate": 4.235934174008463e-06, "loss": 0.3626, "step": 25770 }, { "epoch": 0.7047418507985124, "grad_norm": 1.3077523708343506, "learning_rate": 4.23521043190513e-06, "loss": 0.4551, "step": 25771 }, { "epoch": 0.7047691971122293, "grad_norm": 1.9737132787704468, "learning_rate": 4.23448673502495e-06, "loss": 0.4639, "step": 25772 }, { "epoch": 0.7047965434259462, "grad_norm": 1.2852060794830322, "learning_rate": 4.233763083373594e-06, "loss": 0.4295, "step": 25773 }, { "epoch": 0.7048238897396631, "grad_norm": 1.488674521446228, "learning_rate": 4.2330394769567496e-06, "loss": 0.482, "step": 25774 }, { "epoch": 0.70485123605338, "grad_norm": 2.741849184036255, "learning_rate": 4.232315915780086e-06, "loss": 0.7044, "step": 25775 }, { "epoch": 0.7048785823670969, "grad_norm": 1.239780306816101, "learning_rate": 4.231592399849278e-06, "loss": 0.6832, "step": 25776 }, { "epoch": 0.7049059286808138, "grad_norm": 1.5262192487716675, "learning_rate": 4.230868929170008e-06, "loss": 0.759, "step": 25777 }, { "epoch": 0.7049332749945307, "grad_norm": 1.337514877319336, "learning_rate": 4.230145503747947e-06, "loss": 0.4847, "step": 25778 }, { "epoch": 0.7049606213082477, "grad_norm": 1.4360358715057373, "learning_rate": 4.229422123588767e-06, "loss": 0.4676, "step": 25779 }, { "epoch": 0.7049879676219646, "grad_norm": 1.4224669933319092, "learning_rate": 4.22869878869815e-06, "loss": 0.4487, "step": 25780 }, { "epoch": 0.7050153139356815, "grad_norm": 1.7590374946594238, "learning_rate": 4.227975499081768e-06, "loss": 0.4025, "step": 25781 }, { "epoch": 0.7050426602493984, "grad_norm": 1.8102216720581055, "learning_rate": 4.227252254745289e-06, "loss": 0.474, "step": 25782 }, { "epoch": 0.7050700065631152, "grad_norm": 1.4329018592834473, "learning_rate": 4.226529055694396e-06, "loss": 0.448, "step": 25783 }, { "epoch": 0.7050973528768322, "grad_norm": 1.326507568359375, "learning_rate": 4.2258059019347605e-06, "loss": 0.4783, "step": 25784 }, { "epoch": 0.7051246991905491, "grad_norm": 1.157814383506775, "learning_rate": 4.2250827934720465e-06, "loss": 0.6619, "step": 25785 }, { "epoch": 0.705152045504266, "grad_norm": 1.520146131515503, "learning_rate": 4.224359730311935e-06, "loss": 0.4589, "step": 25786 }, { "epoch": 0.705179391817983, "grad_norm": 1.4841208457946777, "learning_rate": 4.223636712460097e-06, "loss": 0.4611, "step": 25787 }, { "epoch": 0.7052067381316999, "grad_norm": 1.6547919511795044, "learning_rate": 4.2229137399222e-06, "loss": 0.4727, "step": 25788 }, { "epoch": 0.7052340844454168, "grad_norm": 1.36210298538208, "learning_rate": 4.222190812703923e-06, "loss": 0.4265, "step": 25789 }, { "epoch": 0.7052614307591337, "grad_norm": 1.3951517343521118, "learning_rate": 4.22146793081093e-06, "loss": 0.4887, "step": 25790 }, { "epoch": 0.7052887770728505, "grad_norm": 1.5563606023788452, "learning_rate": 4.220745094248895e-06, "loss": 0.504, "step": 25791 }, { "epoch": 0.7053161233865675, "grad_norm": 1.1977940797805786, "learning_rate": 4.22002230302349e-06, "loss": 0.4724, "step": 25792 }, { "epoch": 0.7053434697002844, "grad_norm": 1.2651737928390503, "learning_rate": 4.219299557140384e-06, "loss": 0.5017, "step": 25793 }, { "epoch": 0.7053708160140013, "grad_norm": 1.7614800930023193, "learning_rate": 4.218576856605245e-06, "loss": 0.3427, "step": 25794 }, { "epoch": 0.7053981623277182, "grad_norm": 1.1609151363372803, "learning_rate": 4.217854201423741e-06, "loss": 0.4587, "step": 25795 }, { "epoch": 0.7054255086414352, "grad_norm": 2.8514864444732666, "learning_rate": 4.217131591601547e-06, "loss": 0.7222, "step": 25796 }, { "epoch": 0.7054528549551521, "grad_norm": 1.6215922832489014, "learning_rate": 4.216409027144329e-06, "loss": 0.4376, "step": 25797 }, { "epoch": 0.705480201268869, "grad_norm": 1.3025614023208618, "learning_rate": 4.215686508057751e-06, "loss": 0.4535, "step": 25798 }, { "epoch": 0.7055075475825858, "grad_norm": 1.2202585935592651, "learning_rate": 4.214964034347486e-06, "loss": 0.4807, "step": 25799 }, { "epoch": 0.7055348938963028, "grad_norm": 1.3380117416381836, "learning_rate": 4.214241606019201e-06, "loss": 0.4763, "step": 25800 }, { "epoch": 0.7055622402100197, "grad_norm": 1.1647311449050903, "learning_rate": 4.213519223078559e-06, "loss": 0.7093, "step": 25801 }, { "epoch": 0.7055895865237366, "grad_norm": 1.648015022277832, "learning_rate": 4.212796885531234e-06, "loss": 0.4185, "step": 25802 }, { "epoch": 0.7056169328374535, "grad_norm": 1.8892773389816284, "learning_rate": 4.212074593382889e-06, "loss": 0.3219, "step": 25803 }, { "epoch": 0.7056442791511705, "grad_norm": 1.259237289428711, "learning_rate": 4.211352346639186e-06, "loss": 0.4689, "step": 25804 }, { "epoch": 0.7056716254648874, "grad_norm": 1.2586480379104614, "learning_rate": 4.210630145305799e-06, "loss": 0.4589, "step": 25805 }, { "epoch": 0.7056989717786042, "grad_norm": 1.3312087059020996, "learning_rate": 4.209907989388388e-06, "loss": 0.456, "step": 25806 }, { "epoch": 0.7057263180923211, "grad_norm": 1.3875505924224854, "learning_rate": 4.2091858788926195e-06, "loss": 0.449, "step": 25807 }, { "epoch": 0.705753664406038, "grad_norm": 1.3721277713775635, "learning_rate": 4.208463813824155e-06, "loss": 0.4756, "step": 25808 }, { "epoch": 0.705781010719755, "grad_norm": 1.843802571296692, "learning_rate": 4.207741794188667e-06, "loss": 0.404, "step": 25809 }, { "epoch": 0.7058083570334719, "grad_norm": 1.2764267921447754, "learning_rate": 4.207019819991813e-06, "loss": 0.4737, "step": 25810 }, { "epoch": 0.7058357033471888, "grad_norm": 1.6080516576766968, "learning_rate": 4.206297891239254e-06, "loss": 0.4757, "step": 25811 }, { "epoch": 0.7058630496609057, "grad_norm": 1.256500244140625, "learning_rate": 4.2055760079366625e-06, "loss": 0.4827, "step": 25812 }, { "epoch": 0.7058903959746227, "grad_norm": 1.2365376949310303, "learning_rate": 4.204854170089696e-06, "loss": 0.4936, "step": 25813 }, { "epoch": 0.7059177422883395, "grad_norm": 1.5132735967636108, "learning_rate": 4.204132377704013e-06, "loss": 0.458, "step": 25814 }, { "epoch": 0.7059450886020564, "grad_norm": 1.4009878635406494, "learning_rate": 4.203410630785285e-06, "loss": 0.4759, "step": 25815 }, { "epoch": 0.7059724349157733, "grad_norm": 1.3149299621582031, "learning_rate": 4.20268892933917e-06, "loss": 0.4721, "step": 25816 }, { "epoch": 0.7059997812294903, "grad_norm": 1.7448959350585938, "learning_rate": 4.201967273371325e-06, "loss": 0.319, "step": 25817 }, { "epoch": 0.7060271275432072, "grad_norm": 1.6062476634979248, "learning_rate": 4.201245662887417e-06, "loss": 0.4712, "step": 25818 }, { "epoch": 0.7060544738569241, "grad_norm": 1.4428589344024658, "learning_rate": 4.20052409789311e-06, "loss": 0.4761, "step": 25819 }, { "epoch": 0.706081820170641, "grad_norm": 1.586052417755127, "learning_rate": 4.199802578394052e-06, "loss": 0.3054, "step": 25820 }, { "epoch": 0.706109166484358, "grad_norm": 1.4475656747817993, "learning_rate": 4.199081104395914e-06, "loss": 0.4728, "step": 25821 }, { "epoch": 0.7061365127980748, "grad_norm": 1.3378822803497314, "learning_rate": 4.198359675904352e-06, "loss": 0.4734, "step": 25822 }, { "epoch": 0.7061638591117917, "grad_norm": 1.234271764755249, "learning_rate": 4.1976382929250225e-06, "loss": 0.4673, "step": 25823 }, { "epoch": 0.7061912054255086, "grad_norm": 1.3790837526321411, "learning_rate": 4.196916955463591e-06, "loss": 0.4724, "step": 25824 }, { "epoch": 0.7062185517392255, "grad_norm": 1.2302789688110352, "learning_rate": 4.196195663525712e-06, "loss": 0.4566, "step": 25825 }, { "epoch": 0.7062458980529425, "grad_norm": 1.7208884954452515, "learning_rate": 4.195474417117041e-06, "loss": 0.4641, "step": 25826 }, { "epoch": 0.7062732443666594, "grad_norm": 1.4942845106124878, "learning_rate": 4.194753216243245e-06, "loss": 0.4768, "step": 25827 }, { "epoch": 0.7063005906803763, "grad_norm": 1.390419840812683, "learning_rate": 4.194032060909975e-06, "loss": 0.459, "step": 25828 }, { "epoch": 0.7063279369940932, "grad_norm": 1.3525350093841553, "learning_rate": 4.19331095112289e-06, "loss": 0.486, "step": 25829 }, { "epoch": 0.7063552833078101, "grad_norm": 1.3106964826583862, "learning_rate": 4.1925898868876425e-06, "loss": 0.717, "step": 25830 }, { "epoch": 0.706382629621527, "grad_norm": 1.4050086736679077, "learning_rate": 4.191868868209897e-06, "loss": 0.5039, "step": 25831 }, { "epoch": 0.7064099759352439, "grad_norm": 1.7148611545562744, "learning_rate": 4.191147895095306e-06, "loss": 0.4478, "step": 25832 }, { "epoch": 0.7064373222489608, "grad_norm": 1.1048531532287598, "learning_rate": 4.19042696754952e-06, "loss": 0.4516, "step": 25833 }, { "epoch": 0.7064646685626778, "grad_norm": 1.314074993133545, "learning_rate": 4.189706085578205e-06, "loss": 0.4716, "step": 25834 }, { "epoch": 0.7064920148763947, "grad_norm": 1.4529430866241455, "learning_rate": 4.188985249187009e-06, "loss": 0.4127, "step": 25835 }, { "epoch": 0.7065193611901116, "grad_norm": 1.6284527778625488, "learning_rate": 4.188264458381586e-06, "loss": 0.4531, "step": 25836 }, { "epoch": 0.7065467075038285, "grad_norm": 1.2658621072769165, "learning_rate": 4.187543713167595e-06, "loss": 0.4787, "step": 25837 }, { "epoch": 0.7065740538175453, "grad_norm": 1.4348196983337402, "learning_rate": 4.186823013550688e-06, "loss": 0.4645, "step": 25838 }, { "epoch": 0.7066014001312623, "grad_norm": 1.8717790842056274, "learning_rate": 4.186102359536516e-06, "loss": 0.3526, "step": 25839 }, { "epoch": 0.7066287464449792, "grad_norm": 1.5300383567810059, "learning_rate": 4.185381751130738e-06, "loss": 0.717, "step": 25840 }, { "epoch": 0.7066560927586961, "grad_norm": 1.353885293006897, "learning_rate": 4.184661188339003e-06, "loss": 0.4715, "step": 25841 }, { "epoch": 0.706683439072413, "grad_norm": 1.973812222480774, "learning_rate": 4.183940671166965e-06, "loss": 0.723, "step": 25842 }, { "epoch": 0.70671078538613, "grad_norm": 1.2570165395736694, "learning_rate": 4.183220199620272e-06, "loss": 0.4631, "step": 25843 }, { "epoch": 0.7067381316998469, "grad_norm": 1.2562495470046997, "learning_rate": 4.182499773704583e-06, "loss": 0.4663, "step": 25844 }, { "epoch": 0.7067654780135638, "grad_norm": 1.2027020454406738, "learning_rate": 4.181779393425546e-06, "loss": 0.351, "step": 25845 }, { "epoch": 0.7067928243272806, "grad_norm": 1.3915420770645142, "learning_rate": 4.18105905878881e-06, "loss": 0.4326, "step": 25846 }, { "epoch": 0.7068201706409976, "grad_norm": 1.2763057947158813, "learning_rate": 4.180338769800031e-06, "loss": 0.4674, "step": 25847 }, { "epoch": 0.7068475169547145, "grad_norm": 1.3653970956802368, "learning_rate": 4.179618526464856e-06, "loss": 0.4647, "step": 25848 }, { "epoch": 0.7068748632684314, "grad_norm": 1.28514564037323, "learning_rate": 4.178898328788931e-06, "loss": 0.3328, "step": 25849 }, { "epoch": 0.7069022095821483, "grad_norm": 1.7864189147949219, "learning_rate": 4.1781781767779164e-06, "loss": 0.3089, "step": 25850 }, { "epoch": 0.7069295558958653, "grad_norm": 1.447916865348816, "learning_rate": 4.177458070437454e-06, "loss": 0.4091, "step": 25851 }, { "epoch": 0.7069569022095822, "grad_norm": 1.6315422058105469, "learning_rate": 4.1767380097731915e-06, "loss": 0.3222, "step": 25852 }, { "epoch": 0.7069842485232991, "grad_norm": 1.2529113292694092, "learning_rate": 4.176017994790784e-06, "loss": 0.487, "step": 25853 }, { "epoch": 0.7070115948370159, "grad_norm": 1.7425334453582764, "learning_rate": 4.175298025495876e-06, "loss": 0.4372, "step": 25854 }, { "epoch": 0.7070389411507328, "grad_norm": 1.2332223653793335, "learning_rate": 4.174578101894117e-06, "loss": 0.4841, "step": 25855 }, { "epoch": 0.7070662874644498, "grad_norm": 1.3716908693313599, "learning_rate": 4.173858223991152e-06, "loss": 0.461, "step": 25856 }, { "epoch": 0.7070936337781667, "grad_norm": 1.4151140451431274, "learning_rate": 4.17313839179263e-06, "loss": 0.4662, "step": 25857 }, { "epoch": 0.7071209800918836, "grad_norm": 1.9729689359664917, "learning_rate": 4.172418605304194e-06, "loss": 0.4645, "step": 25858 }, { "epoch": 0.7071483264056005, "grad_norm": 1.3187309503555298, "learning_rate": 4.171698864531497e-06, "loss": 0.4606, "step": 25859 }, { "epoch": 0.7071756727193175, "grad_norm": 1.4778645038604736, "learning_rate": 4.170979169480183e-06, "loss": 0.4706, "step": 25860 }, { "epoch": 0.7072030190330344, "grad_norm": 1.7486070394515991, "learning_rate": 4.170259520155892e-06, "loss": 0.4604, "step": 25861 }, { "epoch": 0.7072303653467512, "grad_norm": 1.7433958053588867, "learning_rate": 4.169539916564281e-06, "loss": 0.477, "step": 25862 }, { "epoch": 0.7072577116604681, "grad_norm": 1.2144125699996948, "learning_rate": 4.168820358710985e-06, "loss": 0.444, "step": 25863 }, { "epoch": 0.7072850579741851, "grad_norm": 1.3846960067749023, "learning_rate": 4.168100846601655e-06, "loss": 0.3602, "step": 25864 }, { "epoch": 0.707312404287902, "grad_norm": 1.4438936710357666, "learning_rate": 4.167381380241927e-06, "loss": 0.432, "step": 25865 }, { "epoch": 0.7073397506016189, "grad_norm": 1.8486946821212769, "learning_rate": 4.166661959637456e-06, "loss": 0.3413, "step": 25866 }, { "epoch": 0.7073670969153358, "grad_norm": 1.2383956909179688, "learning_rate": 4.165942584793879e-06, "loss": 0.7339, "step": 25867 }, { "epoch": 0.7073944432290528, "grad_norm": 1.6271220445632935, "learning_rate": 4.165223255716837e-06, "loss": 0.3419, "step": 25868 }, { "epoch": 0.7074217895427697, "grad_norm": 1.0811762809753418, "learning_rate": 4.164503972411981e-06, "loss": 0.3639, "step": 25869 }, { "epoch": 0.7074491358564865, "grad_norm": 1.5435364246368408, "learning_rate": 4.163784734884948e-06, "loss": 0.4629, "step": 25870 }, { "epoch": 0.7074764821702034, "grad_norm": 1.2425298690795898, "learning_rate": 4.163065543141379e-06, "loss": 0.6756, "step": 25871 }, { "epoch": 0.7075038284839203, "grad_norm": 5.345130443572998, "learning_rate": 4.16234639718692e-06, "loss": 0.6953, "step": 25872 }, { "epoch": 0.7075311747976373, "grad_norm": 1.4903817176818848, "learning_rate": 4.161627297027212e-06, "loss": 0.4671, "step": 25873 }, { "epoch": 0.7075585211113542, "grad_norm": 1.3971532583236694, "learning_rate": 4.160908242667892e-06, "loss": 0.4646, "step": 25874 }, { "epoch": 0.7075858674250711, "grad_norm": 1.2367432117462158, "learning_rate": 4.160189234114605e-06, "loss": 0.4695, "step": 25875 }, { "epoch": 0.707613213738788, "grad_norm": 1.3231134414672852, "learning_rate": 4.159470271372992e-06, "loss": 0.4344, "step": 25876 }, { "epoch": 0.707640560052505, "grad_norm": 1.2488634586334229, "learning_rate": 4.1587513544486855e-06, "loss": 0.4943, "step": 25877 }, { "epoch": 0.7076679063662218, "grad_norm": 1.2597213983535767, "learning_rate": 4.158032483347336e-06, "loss": 0.3332, "step": 25878 }, { "epoch": 0.7076952526799387, "grad_norm": 2.0248782634735107, "learning_rate": 4.157313658074576e-06, "loss": 0.6709, "step": 25879 }, { "epoch": 0.7077225989936556, "grad_norm": 1.6380916833877563, "learning_rate": 4.156594878636048e-06, "loss": 0.3374, "step": 25880 }, { "epoch": 0.7077499453073726, "grad_norm": 1.3346872329711914, "learning_rate": 4.155876145037384e-06, "loss": 0.3699, "step": 25881 }, { "epoch": 0.7077772916210895, "grad_norm": 1.4062232971191406, "learning_rate": 4.155157457284231e-06, "loss": 0.4672, "step": 25882 }, { "epoch": 0.7078046379348064, "grad_norm": 1.0276472568511963, "learning_rate": 4.154438815382222e-06, "loss": 0.3079, "step": 25883 }, { "epoch": 0.7078319842485233, "grad_norm": 1.4200623035430908, "learning_rate": 4.153720219336993e-06, "loss": 0.4479, "step": 25884 }, { "epoch": 0.7078593305622403, "grad_norm": 1.3231408596038818, "learning_rate": 4.153001669154186e-06, "loss": 0.4485, "step": 25885 }, { "epoch": 0.7078866768759571, "grad_norm": 1.288760781288147, "learning_rate": 4.152283164839437e-06, "loss": 0.4823, "step": 25886 }, { "epoch": 0.707914023189674, "grad_norm": 1.6350102424621582, "learning_rate": 4.1515647063983755e-06, "loss": 0.4351, "step": 25887 }, { "epoch": 0.7079413695033909, "grad_norm": 1.3235158920288086, "learning_rate": 4.150846293836648e-06, "loss": 0.4855, "step": 25888 }, { "epoch": 0.7079687158171079, "grad_norm": 1.4498653411865234, "learning_rate": 4.150127927159884e-06, "loss": 0.4686, "step": 25889 }, { "epoch": 0.7079960621308248, "grad_norm": 1.8439658880233765, "learning_rate": 4.149409606373721e-06, "loss": 0.4666, "step": 25890 }, { "epoch": 0.7080234084445417, "grad_norm": 1.471588134765625, "learning_rate": 4.148691331483793e-06, "loss": 0.4507, "step": 25891 }, { "epoch": 0.7080507547582586, "grad_norm": 1.2653203010559082, "learning_rate": 4.1479731024957345e-06, "loss": 0.4755, "step": 25892 }, { "epoch": 0.7080781010719756, "grad_norm": 1.367956280708313, "learning_rate": 4.147254919415177e-06, "loss": 0.4879, "step": 25893 }, { "epoch": 0.7081054473856924, "grad_norm": 1.5030092000961304, "learning_rate": 4.146536782247761e-06, "loss": 0.4357, "step": 25894 }, { "epoch": 0.7081327936994093, "grad_norm": 1.530177116394043, "learning_rate": 4.145818690999116e-06, "loss": 0.2853, "step": 25895 }, { "epoch": 0.7081601400131262, "grad_norm": 1.2128024101257324, "learning_rate": 4.145100645674872e-06, "loss": 0.4694, "step": 25896 }, { "epoch": 0.7081874863268431, "grad_norm": 1.1779106855392456, "learning_rate": 4.14438264628067e-06, "loss": 0.649, "step": 25897 }, { "epoch": 0.7082148326405601, "grad_norm": 1.7109124660491943, "learning_rate": 4.143664692822139e-06, "loss": 0.3767, "step": 25898 }, { "epoch": 0.708242178954277, "grad_norm": 2.0959794521331787, "learning_rate": 4.142946785304905e-06, "loss": 0.3097, "step": 25899 }, { "epoch": 0.7082695252679939, "grad_norm": 1.3130935430526733, "learning_rate": 4.14222892373461e-06, "loss": 0.4859, "step": 25900 }, { "epoch": 0.7082968715817108, "grad_norm": 1.2548494338989258, "learning_rate": 4.141511108116881e-06, "loss": 0.4802, "step": 25901 }, { "epoch": 0.7083242178954277, "grad_norm": 1.3665094375610352, "learning_rate": 4.140793338457348e-06, "loss": 0.4528, "step": 25902 }, { "epoch": 0.7083515642091446, "grad_norm": 1.2497528791427612, "learning_rate": 4.140075614761639e-06, "loss": 0.437, "step": 25903 }, { "epoch": 0.7083789105228615, "grad_norm": 1.2837682962417603, "learning_rate": 4.139357937035391e-06, "loss": 0.4816, "step": 25904 }, { "epoch": 0.7084062568365784, "grad_norm": 1.7274237871170044, "learning_rate": 4.138640305284231e-06, "loss": 0.4641, "step": 25905 }, { "epoch": 0.7084336031502954, "grad_norm": 1.5027108192443848, "learning_rate": 4.137922719513784e-06, "loss": 0.4752, "step": 25906 }, { "epoch": 0.7084609494640123, "grad_norm": 1.4006389379501343, "learning_rate": 4.137205179729688e-06, "loss": 0.4847, "step": 25907 }, { "epoch": 0.7084882957777292, "grad_norm": 1.408710241317749, "learning_rate": 4.136487685937566e-06, "loss": 0.4541, "step": 25908 }, { "epoch": 0.708515642091446, "grad_norm": 1.3104541301727295, "learning_rate": 4.135770238143045e-06, "loss": 0.7008, "step": 25909 }, { "epoch": 0.7085429884051629, "grad_norm": 1.163025975227356, "learning_rate": 4.135052836351761e-06, "loss": 0.489, "step": 25910 }, { "epoch": 0.7085703347188799, "grad_norm": 1.1880172491073608, "learning_rate": 4.134335480569336e-06, "loss": 0.4583, "step": 25911 }, { "epoch": 0.7085976810325968, "grad_norm": 1.858397126197815, "learning_rate": 4.133618170801394e-06, "loss": 0.7174, "step": 25912 }, { "epoch": 0.7086250273463137, "grad_norm": 1.5452797412872314, "learning_rate": 4.1329009070535705e-06, "loss": 0.3216, "step": 25913 }, { "epoch": 0.7086523736600306, "grad_norm": 1.4485028982162476, "learning_rate": 4.132183689331489e-06, "loss": 0.7152, "step": 25914 }, { "epoch": 0.7086797199737476, "grad_norm": 1.3192689418792725, "learning_rate": 4.131466517640775e-06, "loss": 0.3352, "step": 25915 }, { "epoch": 0.7087070662874645, "grad_norm": 1.283463716506958, "learning_rate": 4.130749391987051e-06, "loss": 0.4642, "step": 25916 }, { "epoch": 0.7087344126011813, "grad_norm": 1.3039891719818115, "learning_rate": 4.13003231237595e-06, "loss": 0.3218, "step": 25917 }, { "epoch": 0.7087617589148982, "grad_norm": 1.233193278312683, "learning_rate": 4.129315278813093e-06, "loss": 0.445, "step": 25918 }, { "epoch": 0.7087891052286152, "grad_norm": 1.1665006875991821, "learning_rate": 4.128598291304102e-06, "loss": 0.4327, "step": 25919 }, { "epoch": 0.7088164515423321, "grad_norm": 1.2603399753570557, "learning_rate": 4.127881349854609e-06, "loss": 0.4625, "step": 25920 }, { "epoch": 0.708843797856049, "grad_norm": 1.3810771703720093, "learning_rate": 4.127164454470234e-06, "loss": 0.4518, "step": 25921 }, { "epoch": 0.7088711441697659, "grad_norm": 1.6976277828216553, "learning_rate": 4.1264476051565975e-06, "loss": 0.4888, "step": 25922 }, { "epoch": 0.7088984904834829, "grad_norm": 1.5202932357788086, "learning_rate": 4.12573080191933e-06, "loss": 0.4719, "step": 25923 }, { "epoch": 0.7089258367971998, "grad_norm": 1.3048795461654663, "learning_rate": 4.125014044764051e-06, "loss": 0.458, "step": 25924 }, { "epoch": 0.7089531831109166, "grad_norm": 1.2096797227859497, "learning_rate": 4.124297333696384e-06, "loss": 0.4795, "step": 25925 }, { "epoch": 0.7089805294246335, "grad_norm": 1.712052583694458, "learning_rate": 4.12358066872195e-06, "loss": 0.3369, "step": 25926 }, { "epoch": 0.7090078757383504, "grad_norm": 1.3426657915115356, "learning_rate": 4.122864049846373e-06, "loss": 0.4441, "step": 25927 }, { "epoch": 0.7090352220520674, "grad_norm": 1.1981375217437744, "learning_rate": 4.12214747707527e-06, "loss": 0.7502, "step": 25928 }, { "epoch": 0.7090625683657843, "grad_norm": 1.4847384691238403, "learning_rate": 4.121430950414269e-06, "loss": 0.474, "step": 25929 }, { "epoch": 0.7090899146795012, "grad_norm": 1.3588842153549194, "learning_rate": 4.120714469868987e-06, "loss": 0.4547, "step": 25930 }, { "epoch": 0.7091172609932181, "grad_norm": 1.6055727005004883, "learning_rate": 4.119998035445043e-06, "loss": 0.4963, "step": 25931 }, { "epoch": 0.7091446073069351, "grad_norm": 1.5127818584442139, "learning_rate": 4.119281647148064e-06, "loss": 0.4822, "step": 25932 }, { "epoch": 0.7091719536206519, "grad_norm": 1.2882570028305054, "learning_rate": 4.118565304983664e-06, "loss": 0.4842, "step": 25933 }, { "epoch": 0.7091992999343688, "grad_norm": 1.3502293825149536, "learning_rate": 4.1178490089574595e-06, "loss": 0.4681, "step": 25934 }, { "epoch": 0.7092266462480857, "grad_norm": 1.1931400299072266, "learning_rate": 4.11713275907508e-06, "loss": 0.4575, "step": 25935 }, { "epoch": 0.7092539925618027, "grad_norm": 1.2373632192611694, "learning_rate": 4.116416555342138e-06, "loss": 0.4572, "step": 25936 }, { "epoch": 0.7092813388755196, "grad_norm": 1.537899374961853, "learning_rate": 4.115700397764251e-06, "loss": 0.3376, "step": 25937 }, { "epoch": 0.7093086851892365, "grad_norm": 1.6282660961151123, "learning_rate": 4.114984286347036e-06, "loss": 0.3288, "step": 25938 }, { "epoch": 0.7093360315029534, "grad_norm": 1.2617813348770142, "learning_rate": 4.114268221096116e-06, "loss": 0.4497, "step": 25939 }, { "epoch": 0.7093633778166704, "grad_norm": 1.5610965490341187, "learning_rate": 4.113552202017105e-06, "loss": 0.3191, "step": 25940 }, { "epoch": 0.7093907241303872, "grad_norm": 1.1886558532714844, "learning_rate": 4.112836229115617e-06, "loss": 0.723, "step": 25941 }, { "epoch": 0.7094180704441041, "grad_norm": 1.5495120286941528, "learning_rate": 4.112120302397276e-06, "loss": 0.4804, "step": 25942 }, { "epoch": 0.709445416757821, "grad_norm": 1.7547067403793335, "learning_rate": 4.1114044218676954e-06, "loss": 0.3625, "step": 25943 }, { "epoch": 0.709472763071538, "grad_norm": 1.8312063217163086, "learning_rate": 4.110688587532485e-06, "loss": 0.4157, "step": 25944 }, { "epoch": 0.7095001093852549, "grad_norm": 1.1495952606201172, "learning_rate": 4.109972799397269e-06, "loss": 0.4613, "step": 25945 }, { "epoch": 0.7095274556989718, "grad_norm": 1.504905343055725, "learning_rate": 4.109257057467658e-06, "loss": 0.4042, "step": 25946 }, { "epoch": 0.7095548020126887, "grad_norm": 1.3733885288238525, "learning_rate": 4.108541361749265e-06, "loss": 0.4657, "step": 25947 }, { "epoch": 0.7095821483264056, "grad_norm": 1.3581557273864746, "learning_rate": 4.1078257122477105e-06, "loss": 0.4794, "step": 25948 }, { "epoch": 0.7096094946401225, "grad_norm": 1.5555310249328613, "learning_rate": 4.107110108968605e-06, "loss": 0.2968, "step": 25949 }, { "epoch": 0.7096368409538394, "grad_norm": 1.3312523365020752, "learning_rate": 4.106394551917563e-06, "loss": 0.4473, "step": 25950 }, { "epoch": 0.7096641872675563, "grad_norm": 1.3872392177581787, "learning_rate": 4.105679041100192e-06, "loss": 0.4713, "step": 25951 }, { "epoch": 0.7096915335812732, "grad_norm": 1.2855008840560913, "learning_rate": 4.104963576522115e-06, "loss": 0.4573, "step": 25952 }, { "epoch": 0.7097188798949902, "grad_norm": 1.130128264427185, "learning_rate": 4.104248158188939e-06, "loss": 0.4613, "step": 25953 }, { "epoch": 0.7097462262087071, "grad_norm": 1.0244174003601074, "learning_rate": 4.103532786106275e-06, "loss": 0.3617, "step": 25954 }, { "epoch": 0.709773572522424, "grad_norm": 1.2601902484893799, "learning_rate": 4.102817460279738e-06, "loss": 0.4467, "step": 25955 }, { "epoch": 0.7098009188361409, "grad_norm": 1.426802635192871, "learning_rate": 4.10210218071494e-06, "loss": 0.4789, "step": 25956 }, { "epoch": 0.7098282651498578, "grad_norm": 1.5217723846435547, "learning_rate": 4.101386947417486e-06, "loss": 0.6987, "step": 25957 }, { "epoch": 0.7098556114635747, "grad_norm": 1.4305497407913208, "learning_rate": 4.100671760392996e-06, "loss": 0.5107, "step": 25958 }, { "epoch": 0.7098829577772916, "grad_norm": 1.201496958732605, "learning_rate": 4.0999566196470756e-06, "loss": 0.467, "step": 25959 }, { "epoch": 0.7099103040910085, "grad_norm": 1.5869390964508057, "learning_rate": 4.0992415251853344e-06, "loss": 0.3563, "step": 25960 }, { "epoch": 0.7099376504047255, "grad_norm": 1.325492262840271, "learning_rate": 4.098526477013384e-06, "loss": 0.4917, "step": 25961 }, { "epoch": 0.7099649967184424, "grad_norm": 1.2046688795089722, "learning_rate": 4.097811475136832e-06, "loss": 0.4423, "step": 25962 }, { "epoch": 0.7099923430321593, "grad_norm": 1.685524582862854, "learning_rate": 4.097096519561283e-06, "loss": 0.4682, "step": 25963 }, { "epoch": 0.7100196893458762, "grad_norm": 1.699798345565796, "learning_rate": 4.096381610292356e-06, "loss": 0.3802, "step": 25964 }, { "epoch": 0.710047035659593, "grad_norm": 1.1651636362075806, "learning_rate": 4.095666747335652e-06, "loss": 0.4695, "step": 25965 }, { "epoch": 0.71007438197331, "grad_norm": 1.2649328708648682, "learning_rate": 4.09495193069678e-06, "loss": 0.4606, "step": 25966 }, { "epoch": 0.7101017282870269, "grad_norm": 1.1865156888961792, "learning_rate": 4.09423716038135e-06, "loss": 0.4697, "step": 25967 }, { "epoch": 0.7101290746007438, "grad_norm": 1.3941024541854858, "learning_rate": 4.093522436394968e-06, "loss": 0.4569, "step": 25968 }, { "epoch": 0.7101564209144607, "grad_norm": 1.1989760398864746, "learning_rate": 4.092807758743235e-06, "loss": 0.458, "step": 25969 }, { "epoch": 0.7101837672281777, "grad_norm": 1.4816056489944458, "learning_rate": 4.092093127431769e-06, "loss": 0.4674, "step": 25970 }, { "epoch": 0.7102111135418946, "grad_norm": 1.52689528465271, "learning_rate": 4.091378542466168e-06, "loss": 0.4649, "step": 25971 }, { "epoch": 0.7102384598556115, "grad_norm": 1.553076982498169, "learning_rate": 4.090664003852039e-06, "loss": 0.3367, "step": 25972 }, { "epoch": 0.7102658061693283, "grad_norm": 1.6810470819473267, "learning_rate": 4.089949511594986e-06, "loss": 0.3049, "step": 25973 }, { "epoch": 0.7102931524830453, "grad_norm": 1.4322189092636108, "learning_rate": 4.0892350657006184e-06, "loss": 0.491, "step": 25974 }, { "epoch": 0.7103204987967622, "grad_norm": 1.1682612895965576, "learning_rate": 4.088520666174539e-06, "loss": 0.4418, "step": 25975 }, { "epoch": 0.7103478451104791, "grad_norm": 1.2703279256820679, "learning_rate": 4.087806313022348e-06, "loss": 0.4382, "step": 25976 }, { "epoch": 0.710375191424196, "grad_norm": 1.3756451606750488, "learning_rate": 4.087092006249655e-06, "loss": 0.4787, "step": 25977 }, { "epoch": 0.710402537737913, "grad_norm": 1.452686071395874, "learning_rate": 4.0863777458620614e-06, "loss": 0.3207, "step": 25978 }, { "epoch": 0.7104298840516299, "grad_norm": 1.2353343963623047, "learning_rate": 4.0856635318651676e-06, "loss": 0.4742, "step": 25979 }, { "epoch": 0.7104572303653468, "grad_norm": 1.7280447483062744, "learning_rate": 4.084949364264581e-06, "loss": 0.4628, "step": 25980 }, { "epoch": 0.7104845766790636, "grad_norm": 1.277209997177124, "learning_rate": 4.084235243065902e-06, "loss": 0.4553, "step": 25981 }, { "epoch": 0.7105119229927805, "grad_norm": 1.6285444498062134, "learning_rate": 4.083521168274731e-06, "loss": 0.3108, "step": 25982 }, { "epoch": 0.7105392693064975, "grad_norm": 1.5497585535049438, "learning_rate": 4.082807139896674e-06, "loss": 0.4585, "step": 25983 }, { "epoch": 0.7105666156202144, "grad_norm": 1.3459794521331787, "learning_rate": 4.082093157937329e-06, "loss": 0.4936, "step": 25984 }, { "epoch": 0.7105939619339313, "grad_norm": 1.2620292901992798, "learning_rate": 4.081379222402299e-06, "loss": 0.4418, "step": 25985 }, { "epoch": 0.7106213082476482, "grad_norm": 1.34294593334198, "learning_rate": 4.080665333297179e-06, "loss": 0.4993, "step": 25986 }, { "epoch": 0.7106486545613652, "grad_norm": 1.3437587022781372, "learning_rate": 4.079951490627578e-06, "loss": 0.4761, "step": 25987 }, { "epoch": 0.7106760008750821, "grad_norm": 1.5544427633285522, "learning_rate": 4.079237694399089e-06, "loss": 0.4643, "step": 25988 }, { "epoch": 0.7107033471887989, "grad_norm": 1.6332924365997314, "learning_rate": 4.078523944617313e-06, "loss": 0.4889, "step": 25989 }, { "epoch": 0.7107306935025158, "grad_norm": 1.2159029245376587, "learning_rate": 4.077810241287853e-06, "loss": 0.4778, "step": 25990 }, { "epoch": 0.7107580398162328, "grad_norm": 1.4900522232055664, "learning_rate": 4.0770965844163055e-06, "loss": 0.4719, "step": 25991 }, { "epoch": 0.7107853861299497, "grad_norm": 1.2570444345474243, "learning_rate": 4.076382974008265e-06, "loss": 0.4721, "step": 25992 }, { "epoch": 0.7108127324436666, "grad_norm": 1.231115698814392, "learning_rate": 4.075669410069335e-06, "loss": 0.4597, "step": 25993 }, { "epoch": 0.7108400787573835, "grad_norm": 1.3086696863174438, "learning_rate": 4.074955892605112e-06, "loss": 0.4432, "step": 25994 }, { "epoch": 0.7108674250711005, "grad_norm": 1.449419617652893, "learning_rate": 4.074242421621194e-06, "loss": 0.4714, "step": 25995 }, { "epoch": 0.7108947713848174, "grad_norm": 1.2169421911239624, "learning_rate": 4.073528997123175e-06, "loss": 0.3453, "step": 25996 }, { "epoch": 0.7109221176985342, "grad_norm": 1.3099368810653687, "learning_rate": 4.072815619116654e-06, "loss": 0.4316, "step": 25997 }, { "epoch": 0.7109494640122511, "grad_norm": 1.7276427745819092, "learning_rate": 4.0721022876072225e-06, "loss": 0.2966, "step": 25998 }, { "epoch": 0.710976810325968, "grad_norm": 1.3304755687713623, "learning_rate": 4.071389002600483e-06, "loss": 0.4887, "step": 25999 }, { "epoch": 0.711004156639685, "grad_norm": 1.1322797536849976, "learning_rate": 4.070675764102029e-06, "loss": 0.7262, "step": 26000 } ], "logging_steps": 1.0, "max_steps": 36568, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.28367967922254e+20, "train_batch_size": 2, "trial_name": null, "trial_params": null }