diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,11713 @@ +{ + "best_metric": 0.10333551466464996, + "best_model_checkpoint": "checkpoints/mHubert-basque-ASR-30ep/checkpoint-144000", + "epoch": 24.116286752560292, + "eval_steps": 1000, + "global_step": 146000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.016518004625041296, + "grad_norm": 11.923916816711426, + "learning_rate": 5.506001541680432e-08, + "loss": 24.4951, + "step": 100 + }, + { + "epoch": 0.03303600925008259, + "grad_norm": 14.201896667480469, + "learning_rate": 1.1012003083360864e-07, + "loss": 24.611, + "step": 200 + }, + { + "epoch": 0.049554013875123884, + "grad_norm": 16.669321060180664, + "learning_rate": 1.6518004625041296e-07, + "loss": 24.4459, + "step": 300 + }, + { + "epoch": 0.06607201850016518, + "grad_norm": 15.999144554138184, + "learning_rate": 2.2024006166721728e-07, + "loss": 23.9418, + "step": 400 + }, + { + "epoch": 0.08259002312520647, + "grad_norm": 15.337122917175293, + "learning_rate": 2.753000770840216e-07, + "loss": 23.6429, + "step": 500 + }, + { + "epoch": 0.09910802775024777, + "grad_norm": 20.767488479614258, + "learning_rate": 3.3036009250082593e-07, + "loss": 22.9927, + "step": 600 + }, + { + "epoch": 0.11562603237528907, + "grad_norm": 25.313589096069336, + "learning_rate": 3.8542010791763027e-07, + "loss": 22.5284, + "step": 700 + }, + { + "epoch": 0.13214403700033037, + "grad_norm": 24.19838523864746, + "learning_rate": 4.4048012333443456e-07, + "loss": 21.2884, + "step": 800 + }, + { + "epoch": 0.14866204162537167, + "grad_norm": 27.155614852905273, + "learning_rate": 4.955401387512389e-07, + "loss": 20.3421, + "step": 900 + }, + { + "epoch": 0.16518004625041294, + "grad_norm": 30.608129501342773, + "learning_rate": 5.506001541680432e-07, + "loss": 19.4542, + "step": 1000 + }, + { + "epoch": 0.16518004625041294, + "eval_cer": 0.9883050441448223, + "eval_loss": 16.246232986450195, + "eval_runtime": 48.061, + "eval_samples_per_second": 35.184, + "eval_steps_per_second": 8.801, + "eval_wer": 0.9999375507400238, + "step": 1000 + }, + { + "epoch": 0.18169805087545424, + "grad_norm": 27.388935089111328, + "learning_rate": 6.056601695848476e-07, + "loss": 18.666, + "step": 1100 + }, + { + "epoch": 0.19821605550049554, + "grad_norm": 35.63807678222656, + "learning_rate": 6.607201850016519e-07, + "loss": 17.1664, + "step": 1200 + }, + { + "epoch": 0.21473406012553684, + "grad_norm": 46.33199691772461, + "learning_rate": 7.157802004184563e-07, + "loss": 15.5691, + "step": 1300 + }, + { + "epoch": 0.23125206475057813, + "grad_norm": 37.326534271240234, + "learning_rate": 7.708402158352605e-07, + "loss": 14.6982, + "step": 1400 + }, + { + "epoch": 0.24777006937561943, + "grad_norm": 32.61579895019531, + "learning_rate": 8.259002312520647e-07, + "loss": 14.1957, + "step": 1500 + }, + { + "epoch": 0.26428807400066073, + "grad_norm": 32.68245315551758, + "learning_rate": 8.809602466688691e-07, + "loss": 13.7794, + "step": 1600 + }, + { + "epoch": 0.280806078625702, + "grad_norm": 36.71652603149414, + "learning_rate": 9.360202620856734e-07, + "loss": 13.1926, + "step": 1700 + }, + { + "epoch": 0.29732408325074333, + "grad_norm": 33.95559310913086, + "learning_rate": 9.910802775024778e-07, + "loss": 12.549, + "step": 1800 + }, + { + "epoch": 0.3138420878757846, + "grad_norm": 34.27357864379883, + "learning_rate": 1.0461402929192822e-06, + "loss": 12.9556, + "step": 1900 + }, + { + "epoch": 0.3303600925008259, + "grad_norm": 29.778766632080078, + "learning_rate": 1.1012003083360864e-06, + "loss": 11.7038, + "step": 2000 + }, + { + "epoch": 0.3303600925008259, + "eval_cer": 0.9883050441448223, + "eval_loss": 10.252079963684082, + "eval_runtime": 47.4771, + "eval_samples_per_second": 35.617, + "eval_steps_per_second": 8.91, + "eval_wer": 0.9999375507400238, + "step": 2000 + }, + { + "epoch": 0.3468780971258672, + "grad_norm": 34.33523941040039, + "learning_rate": 1.1562603237528908e-06, + "loss": 12.5611, + "step": 2100 + }, + { + "epoch": 0.3633961017509085, + "grad_norm": 34.92203140258789, + "learning_rate": 1.2113203391696951e-06, + "loss": 11.634, + "step": 2200 + }, + { + "epoch": 0.3799141063759498, + "grad_norm": 35.76356887817383, + "learning_rate": 1.2663803545864995e-06, + "loss": 12.158, + "step": 2300 + }, + { + "epoch": 0.39643211100099107, + "grad_norm": 33.04209518432617, + "learning_rate": 1.3214403700033037e-06, + "loss": 11.245, + "step": 2400 + }, + { + "epoch": 0.4129501156260324, + "grad_norm": 36.18589782714844, + "learning_rate": 1.3765003854201081e-06, + "loss": 11.5037, + "step": 2500 + }, + { + "epoch": 0.42946812025107367, + "grad_norm": 30.83759307861328, + "learning_rate": 1.4315604008369125e-06, + "loss": 11.6204, + "step": 2600 + }, + { + "epoch": 0.44598612487611494, + "grad_norm": 31.8559627532959, + "learning_rate": 1.4866204162537167e-06, + "loss": 10.6165, + "step": 2700 + }, + { + "epoch": 0.46250412950115627, + "grad_norm": 2.7403502464294434, + "learning_rate": 1.541680431670521e-06, + "loss": 10.7605, + "step": 2800 + }, + { + "epoch": 0.47902213412619754, + "grad_norm": 3.411168098449707, + "learning_rate": 1.5967404470873255e-06, + "loss": 10.6777, + "step": 2900 + }, + { + "epoch": 0.49554013875123887, + "grad_norm": 31.218460083007812, + "learning_rate": 1.6518004625041294e-06, + "loss": 10.5214, + "step": 3000 + }, + { + "epoch": 0.49554013875123887, + "eval_cer": 0.9883050441448223, + "eval_loss": 8.999906539916992, + "eval_runtime": 47.8317, + "eval_samples_per_second": 35.353, + "eval_steps_per_second": 8.844, + "eval_wer": 0.9999375507400238, + "step": 3000 + }, + { + "epoch": 0.5120581433762802, + "grad_norm": 33.284549713134766, + "learning_rate": 1.7068604779209338e-06, + "loss": 10.5589, + "step": 3100 + }, + { + "epoch": 0.5285761480013215, + "grad_norm": 29.90840721130371, + "learning_rate": 1.7619204933377382e-06, + "loss": 10.1695, + "step": 3200 + }, + { + "epoch": 0.5450941526263627, + "grad_norm": 37.552734375, + "learning_rate": 1.8169805087545426e-06, + "loss": 10.1221, + "step": 3300 + }, + { + "epoch": 0.561612157251404, + "grad_norm": 33.36090850830078, + "learning_rate": 1.8720405241713468e-06, + "loss": 9.6691, + "step": 3400 + }, + { + "epoch": 0.5781301618764453, + "grad_norm": 28.224496841430664, + "learning_rate": 1.9271005395881514e-06, + "loss": 9.6393, + "step": 3500 + }, + { + "epoch": 0.5946481665014867, + "grad_norm": 29.11280059814453, + "learning_rate": 1.9821605550049556e-06, + "loss": 9.6595, + "step": 3600 + }, + { + "epoch": 0.6111661711265279, + "grad_norm": 30.484336853027344, + "learning_rate": 2.0372205704217598e-06, + "loss": 9.4354, + "step": 3700 + }, + { + "epoch": 0.6276841757515692, + "grad_norm": 28.713830947875977, + "learning_rate": 2.0922805858385644e-06, + "loss": 9.107, + "step": 3800 + }, + { + "epoch": 0.6442021803766105, + "grad_norm": 28.442527770996094, + "learning_rate": 2.1473406012553685e-06, + "loss": 9.1297, + "step": 3900 + }, + { + "epoch": 0.6607201850016517, + "grad_norm": 30.577152252197266, + "learning_rate": 2.2024006166721727e-06, + "loss": 8.8754, + "step": 4000 + }, + { + "epoch": 0.6607201850016517, + "eval_cer": 0.9883050441448223, + "eval_loss": 7.591804504394531, + "eval_runtime": 48.3109, + "eval_samples_per_second": 35.002, + "eval_steps_per_second": 8.756, + "eval_wer": 0.9999375507400238, + "step": 4000 + }, + { + "epoch": 0.6772381896266931, + "grad_norm": 33.752967834472656, + "learning_rate": 2.2574606320889773e-06, + "loss": 8.5952, + "step": 4100 + }, + { + "epoch": 0.6937561942517344, + "grad_norm": 29.171703338623047, + "learning_rate": 2.3125206475057815e-06, + "loss": 8.363, + "step": 4200 + }, + { + "epoch": 0.7102741988767757, + "grad_norm": 29.746253967285156, + "learning_rate": 2.3675806629225857e-06, + "loss": 8.3281, + "step": 4300 + }, + { + "epoch": 0.726792203501817, + "grad_norm": 31.43389129638672, + "learning_rate": 2.4226406783393903e-06, + "loss": 8.0488, + "step": 4400 + }, + { + "epoch": 0.7433102081268583, + "grad_norm": 26.350412368774414, + "learning_rate": 2.4777006937561945e-06, + "loss": 8.0053, + "step": 4500 + }, + { + "epoch": 0.7598282127518996, + "grad_norm": 24.809051513671875, + "learning_rate": 2.532760709172999e-06, + "loss": 7.8292, + "step": 4600 + }, + { + "epoch": 0.7763462173769409, + "grad_norm": 5.660928726196289, + "learning_rate": 2.587820724589803e-06, + "loss": 7.2419, + "step": 4700 + }, + { + "epoch": 0.7928642220019821, + "grad_norm": 25.451852798461914, + "learning_rate": 2.6428807400066074e-06, + "loss": 7.3732, + "step": 4800 + }, + { + "epoch": 0.8093822266270234, + "grad_norm": 26.097301483154297, + "learning_rate": 2.6979407554234116e-06, + "loss": 7.1173, + "step": 4900 + }, + { + "epoch": 0.8259002312520648, + "grad_norm": 22.98796844482422, + "learning_rate": 2.7530007708402162e-06, + "loss": 6.9653, + "step": 5000 + }, + { + "epoch": 0.8259002312520648, + "eval_cer": 0.9883050441448223, + "eval_loss": 5.927879810333252, + "eval_runtime": 47.9693, + "eval_samples_per_second": 35.252, + "eval_steps_per_second": 8.818, + "eval_wer": 0.9999375507400238, + "step": 5000 + }, + { + "epoch": 0.8424182358771061, + "grad_norm": 22.172231674194336, + "learning_rate": 2.8080607862570204e-06, + "loss": 6.5446, + "step": 5100 + }, + { + "epoch": 0.8589362405021473, + "grad_norm": 20.156213760375977, + "learning_rate": 2.863120801673825e-06, + "loss": 6.5427, + "step": 5200 + }, + { + "epoch": 0.8754542451271886, + "grad_norm": 23.265291213989258, + "learning_rate": 2.9181808170906288e-06, + "loss": 6.1793, + "step": 5300 + }, + { + "epoch": 0.8919722497522299, + "grad_norm": 19.185626983642578, + "learning_rate": 2.9732408325074334e-06, + "loss": 6.0592, + "step": 5400 + }, + { + "epoch": 0.9084902543772713, + "grad_norm": 2.092716693878174, + "learning_rate": 3.0283008479242375e-06, + "loss": 5.7659, + "step": 5500 + }, + { + "epoch": 0.9250082590023125, + "grad_norm": 21.02352523803711, + "learning_rate": 3.083360863341042e-06, + "loss": 5.6754, + "step": 5600 + }, + { + "epoch": 0.9415262636273538, + "grad_norm": 18.27204132080078, + "learning_rate": 3.1384208787578463e-06, + "loss": 5.5381, + "step": 5700 + }, + { + "epoch": 0.9580442682523951, + "grad_norm": 18.369115829467773, + "learning_rate": 3.193480894174651e-06, + "loss": 5.4687, + "step": 5800 + }, + { + "epoch": 0.9745622728774364, + "grad_norm": 19.799074172973633, + "learning_rate": 3.248540909591455e-06, + "loss": 5.2652, + "step": 5900 + }, + { + "epoch": 0.9910802775024777, + "grad_norm": 16.94482421875, + "learning_rate": 3.303600925008259e-06, + "loss": 5.0711, + "step": 6000 + }, + { + "epoch": 0.9910802775024777, + "eval_cer": 0.9883050441448223, + "eval_loss": 4.399995803833008, + "eval_runtime": 48.2208, + "eval_samples_per_second": 35.068, + "eval_steps_per_second": 8.772, + "eval_wer": 0.9999375507400238, + "step": 6000 + }, + { + "epoch": 1.007598282127519, + "grad_norm": 13.568734169006348, + "learning_rate": 3.3586609404250635e-06, + "loss": 4.7523, + "step": 6100 + }, + { + "epoch": 1.0241162867525604, + "grad_norm": 13.136434555053711, + "learning_rate": 3.4137209558418677e-06, + "loss": 4.5941, + "step": 6200 + }, + { + "epoch": 1.0406342913776017, + "grad_norm": 12.08462905883789, + "learning_rate": 3.4687809712586723e-06, + "loss": 4.431, + "step": 6300 + }, + { + "epoch": 1.057152296002643, + "grad_norm": 11.658442497253418, + "learning_rate": 3.5238409866754764e-06, + "loss": 4.4636, + "step": 6400 + }, + { + "epoch": 1.0736703006276842, + "grad_norm": 11.084771156311035, + "learning_rate": 3.578901002092281e-06, + "loss": 4.2883, + "step": 6500 + }, + { + "epoch": 1.0901883052527255, + "grad_norm": 9.543913841247559, + "learning_rate": 3.6339610175090852e-06, + "loss": 4.1244, + "step": 6600 + }, + { + "epoch": 1.1067063098777667, + "grad_norm": 9.388916015625, + "learning_rate": 3.68902103292589e-06, + "loss": 3.9917, + "step": 6700 + }, + { + "epoch": 1.123224314502808, + "grad_norm": 6.666196346282959, + "learning_rate": 3.7440810483426936e-06, + "loss": 3.8466, + "step": 6800 + }, + { + "epoch": 1.1397423191278493, + "grad_norm": 16.961889266967773, + "learning_rate": 3.799141063759498e-06, + "loss": 3.7877, + "step": 6900 + }, + { + "epoch": 1.1562603237528906, + "grad_norm": 5.992101669311523, + "learning_rate": 3.854201079176303e-06, + "loss": 3.6487, + "step": 7000 + }, + { + "epoch": 1.1562603237528906, + "eval_cer": 0.9883050441448223, + "eval_loss": 3.413706064224243, + "eval_runtime": 47.9091, + "eval_samples_per_second": 35.296, + "eval_steps_per_second": 8.829, + "eval_wer": 0.9999375507400238, + "step": 7000 + }, + { + "epoch": 1.172778328377932, + "grad_norm": 6.866231441497803, + "learning_rate": 3.909261094593107e-06, + "loss": 3.5613, + "step": 7100 + }, + { + "epoch": 1.1892963330029733, + "grad_norm": 2.480027675628662, + "learning_rate": 3.964321110009911e-06, + "loss": 3.4725, + "step": 7200 + }, + { + "epoch": 1.2058143376280146, + "grad_norm": 4.010397911071777, + "learning_rate": 4.019381125426716e-06, + "loss": 3.4202, + "step": 7300 + }, + { + "epoch": 1.2223323422530559, + "grad_norm": 5.230499267578125, + "learning_rate": 4.0744411408435195e-06, + "loss": 3.3595, + "step": 7400 + }, + { + "epoch": 1.2388503468780971, + "grad_norm": 3.2052972316741943, + "learning_rate": 4.129501156260324e-06, + "loss": 3.3002, + "step": 7500 + }, + { + "epoch": 1.2553683515031384, + "grad_norm": 1.894927978515625, + "learning_rate": 4.184561171677129e-06, + "loss": 3.1986, + "step": 7600 + }, + { + "epoch": 1.2718863561281797, + "grad_norm": 2.197263240814209, + "learning_rate": 4.239621187093933e-06, + "loss": 3.1351, + "step": 7700 + }, + { + "epoch": 1.288404360753221, + "grad_norm": 2.1114447116851807, + "learning_rate": 4.294681202510737e-06, + "loss": 3.126, + "step": 7800 + }, + { + "epoch": 1.3049223653782622, + "grad_norm": 1.9289065599441528, + "learning_rate": 4.349741217927541e-06, + "loss": 3.0814, + "step": 7900 + }, + { + "epoch": 1.3214403700033035, + "grad_norm": 1.314790964126587, + "learning_rate": 4.4048012333443454e-06, + "loss": 3.061, + "step": 8000 + }, + { + "epoch": 1.3214403700033035, + "eval_cer": 0.9883050441448223, + "eval_loss": 2.9990100860595703, + "eval_runtime": 47.905, + "eval_samples_per_second": 35.299, + "eval_steps_per_second": 8.83, + "eval_wer": 0.9999375507400238, + "step": 8000 + }, + { + "epoch": 1.3379583746283448, + "grad_norm": 1.4136910438537598, + "learning_rate": 4.45986124876115e-06, + "loss": 3.0138, + "step": 8100 + }, + { + "epoch": 1.3544763792533863, + "grad_norm": 1.2747600078582764, + "learning_rate": 4.514921264177955e-06, + "loss": 2.9893, + "step": 8200 + }, + { + "epoch": 1.3709943838784275, + "grad_norm": 1.3073844909667969, + "learning_rate": 4.569981279594758e-06, + "loss": 2.9698, + "step": 8300 + }, + { + "epoch": 1.3875123885034688, + "grad_norm": 1.0685036182403564, + "learning_rate": 4.625041295011563e-06, + "loss": 2.9459, + "step": 8400 + }, + { + "epoch": 1.40403039312851, + "grad_norm": 1.4280977249145508, + "learning_rate": 4.680101310428367e-06, + "loss": 2.9276, + "step": 8500 + }, + { + "epoch": 1.4205483977535514, + "grad_norm": 0.5592168569564819, + "learning_rate": 4.735161325845171e-06, + "loss": 2.9128, + "step": 8600 + }, + { + "epoch": 1.4370664023785926, + "grad_norm": 0.6144903302192688, + "learning_rate": 4.790221341261976e-06, + "loss": 2.89, + "step": 8700 + }, + { + "epoch": 1.453584407003634, + "grad_norm": 0.5364680886268616, + "learning_rate": 4.845281356678781e-06, + "loss": 2.884, + "step": 8800 + }, + { + "epoch": 1.4701024116286754, + "grad_norm": 0.42214369773864746, + "learning_rate": 4.900341372095584e-06, + "loss": 2.8763, + "step": 8900 + }, + { + "epoch": 1.4866204162537167, + "grad_norm": 1.1128541231155396, + "learning_rate": 4.955401387512389e-06, + "loss": 2.8674, + "step": 9000 + }, + { + "epoch": 1.4866204162537167, + "eval_cer": 0.9883050441448223, + "eval_loss": 2.8672590255737305, + "eval_runtime": 47.8727, + "eval_samples_per_second": 35.323, + "eval_steps_per_second": 8.836, + "eval_wer": 0.9999375507400238, + "step": 9000 + }, + { + "epoch": 1.503138420878758, + "grad_norm": 1.4287844896316528, + "learning_rate": 5.0104614029291935e-06, + "loss": 2.8587, + "step": 9100 + }, + { + "epoch": 1.5196564255037992, + "grad_norm": 0.7117812037467957, + "learning_rate": 5.065521418345998e-06, + "loss": 2.8496, + "step": 9200 + }, + { + "epoch": 1.5361744301288405, + "grad_norm": 0.346927285194397, + "learning_rate": 5.120581433762803e-06, + "loss": 2.8843, + "step": 9300 + }, + { + "epoch": 1.5526924347538817, + "grad_norm": 0.28466975688934326, + "learning_rate": 5.175641449179606e-06, + "loss": 2.8361, + "step": 9400 + }, + { + "epoch": 1.569210439378923, + "grad_norm": 0.9709968566894531, + "learning_rate": 5.23070146459641e-06, + "loss": 2.8347, + "step": 9500 + }, + { + "epoch": 1.5857284440039643, + "grad_norm": 0.41504138708114624, + "learning_rate": 5.285761480013215e-06, + "loss": 2.8281, + "step": 9600 + }, + { + "epoch": 1.6022464486290056, + "grad_norm": 0.7209063172340393, + "learning_rate": 5.3408214954300195e-06, + "loss": 2.8216, + "step": 9700 + }, + { + "epoch": 1.6187644532540468, + "grad_norm": 0.17556777596473694, + "learning_rate": 5.395881510846823e-06, + "loss": 2.8191, + "step": 9800 + }, + { + "epoch": 1.635282457879088, + "grad_norm": 0.24542377889156342, + "learning_rate": 5.450941526263628e-06, + "loss": 2.8171, + "step": 9900 + }, + { + "epoch": 1.6518004625041294, + "grad_norm": 0.5793740153312683, + "learning_rate": 5.5060015416804324e-06, + "loss": 2.8139, + "step": 10000 + }, + { + "epoch": 1.6518004625041294, + "eval_cer": 0.9883050441448223, + "eval_loss": 2.8268349170684814, + "eval_runtime": 47.7716, + "eval_samples_per_second": 35.398, + "eval_steps_per_second": 8.855, + "eval_wer": 0.9999375507400238, + "step": 10000 + }, + { + "epoch": 1.6683184671291706, + "grad_norm": 0.5430779457092285, + "learning_rate": 5.561061557097236e-06, + "loss": 2.8118, + "step": 10100 + }, + { + "epoch": 1.6848364717542121, + "grad_norm": 0.16136805713176727, + "learning_rate": 5.616121572514041e-06, + "loss": 2.8084, + "step": 10200 + }, + { + "epoch": 1.7013544763792534, + "grad_norm": 0.180739626288414, + "learning_rate": 5.671181587930845e-06, + "loss": 2.8075, + "step": 10300 + }, + { + "epoch": 1.7178724810042947, + "grad_norm": 0.33964207768440247, + "learning_rate": 5.72624160334765e-06, + "loss": 2.8029, + "step": 10400 + }, + { + "epoch": 1.734390485629336, + "grad_norm": 0.3545405864715576, + "learning_rate": 5.781301618764453e-06, + "loss": 2.8026, + "step": 10500 + }, + { + "epoch": 1.7509084902543772, + "grad_norm": 0.6962557435035706, + "learning_rate": 5.8363616341812575e-06, + "loss": 2.8008, + "step": 10600 + }, + { + "epoch": 1.7674264948794187, + "grad_norm": 0.49546900391578674, + "learning_rate": 5.891421649598062e-06, + "loss": 2.798, + "step": 10700 + }, + { + "epoch": 1.78394449950446, + "grad_norm": 0.10469625890254974, + "learning_rate": 5.946481665014867e-06, + "loss": 2.803, + "step": 10800 + }, + { + "epoch": 1.8004625041295013, + "grad_norm": 0.48037204146385193, + "learning_rate": 6.0015416804316705e-06, + "loss": 2.7972, + "step": 10900 + }, + { + "epoch": 1.8169805087545425, + "grad_norm": 1.4760863780975342, + "learning_rate": 6.056601695848475e-06, + "loss": 2.7957, + "step": 11000 + }, + { + "epoch": 1.8169805087545425, + "eval_cer": 0.9883050441448223, + "eval_loss": 2.815251588821411, + "eval_runtime": 47.7131, + "eval_samples_per_second": 35.441, + "eval_steps_per_second": 8.865, + "eval_wer": 0.9999375507400238, + "step": 11000 + }, + { + "epoch": 1.8334985133795838, + "grad_norm": 0.6480938196182251, + "learning_rate": 6.11166171126528e-06, + "loss": 2.7959, + "step": 11100 + }, + { + "epoch": 1.850016518004625, + "grad_norm": 0.09613073617219925, + "learning_rate": 6.166721726682084e-06, + "loss": 2.795, + "step": 11200 + }, + { + "epoch": 1.8665345226296663, + "grad_norm": 0.7749711275100708, + "learning_rate": 6.221781742098888e-06, + "loss": 2.7929, + "step": 11300 + }, + { + "epoch": 1.8830525272547076, + "grad_norm": 0.3546479046344757, + "learning_rate": 6.276841757515693e-06, + "loss": 2.7918, + "step": 11400 + }, + { + "epoch": 1.899570531879749, + "grad_norm": 1.12019681930542, + "learning_rate": 6.331901772932497e-06, + "loss": 2.7918, + "step": 11500 + }, + { + "epoch": 1.9160885365047902, + "grad_norm": 0.8891560435295105, + "learning_rate": 6.386961788349302e-06, + "loss": 2.7891, + "step": 11600 + }, + { + "epoch": 1.9326065411298314, + "grad_norm": 0.17968548834323883, + "learning_rate": 6.442021803766106e-06, + "loss": 2.7893, + "step": 11700 + }, + { + "epoch": 1.9491245457548727, + "grad_norm": 0.7718554139137268, + "learning_rate": 6.49708181918291e-06, + "loss": 2.7906, + "step": 11800 + }, + { + "epoch": 1.965642550379914, + "grad_norm": 0.20580369234085083, + "learning_rate": 6.552141834599715e-06, + "loss": 2.7855, + "step": 11900 + }, + { + "epoch": 1.9821605550049552, + "grad_norm": 0.12557658553123474, + "learning_rate": 6.607201850016518e-06, + "loss": 2.7821, + "step": 12000 + }, + { + "epoch": 1.9821605550049552, + "eval_cer": 0.9883050441448223, + "eval_loss": 2.801440477371216, + "eval_runtime": 48.1673, + "eval_samples_per_second": 35.107, + "eval_steps_per_second": 8.782, + "eval_wer": 0.9999375507400238, + "step": 12000 + }, + { + "epoch": 1.9986785596299967, + "grad_norm": 0.29796159267425537, + "learning_rate": 6.662261865433322e-06, + "loss": 2.8061, + "step": 12100 + }, + { + "epoch": 2.015196564255038, + "grad_norm": 0.43523645401000977, + "learning_rate": 6.717321880850127e-06, + "loss": 2.7722, + "step": 12200 + }, + { + "epoch": 2.031714568880079, + "grad_norm": 0.8194575905799866, + "learning_rate": 6.7723818962669316e-06, + "loss": 2.7428, + "step": 12300 + }, + { + "epoch": 2.048232573505121, + "grad_norm": 0.5913192629814148, + "learning_rate": 6.827441911683735e-06, + "loss": 2.6796, + "step": 12400 + }, + { + "epoch": 2.064750578130162, + "grad_norm": 1.2272390127182007, + "learning_rate": 6.88250192710054e-06, + "loss": 2.5647, + "step": 12500 + }, + { + "epoch": 2.0812685827552033, + "grad_norm": 0.7809718251228333, + "learning_rate": 6.9375619425173445e-06, + "loss": 2.4445, + "step": 12600 + }, + { + "epoch": 2.0977865873802446, + "grad_norm": 1.4848648309707642, + "learning_rate": 6.992621957934149e-06, + "loss": 2.3472, + "step": 12700 + }, + { + "epoch": 2.114304592005286, + "grad_norm": 0.9019191265106201, + "learning_rate": 7.047681973350953e-06, + "loss": 2.2088, + "step": 12800 + }, + { + "epoch": 2.130822596630327, + "grad_norm": 1.6210988759994507, + "learning_rate": 7.1027419887677575e-06, + "loss": 2.0813, + "step": 12900 + }, + { + "epoch": 2.1473406012553684, + "grad_norm": 0.9672953486442566, + "learning_rate": 7.157802004184562e-06, + "loss": 1.9636, + "step": 13000 + }, + { + "epoch": 2.1473406012553684, + "eval_cer": 0.42876942029977416, + "eval_loss": 1.6961537599563599, + "eval_runtime": 48.2183, + "eval_samples_per_second": 35.07, + "eval_steps_per_second": 8.773, + "eval_wer": 0.9999375507400238, + "step": 13000 + }, + { + "epoch": 2.1638586058804097, + "grad_norm": 1.5448602437973022, + "learning_rate": 7.212862019601367e-06, + "loss": 1.8347, + "step": 13100 + }, + { + "epoch": 2.180376610505451, + "grad_norm": 1.3155843019485474, + "learning_rate": 7.2679220350181704e-06, + "loss": 1.6876, + "step": 13200 + }, + { + "epoch": 2.1968946151304922, + "grad_norm": 1.4173344373703003, + "learning_rate": 7.322982050434975e-06, + "loss": 1.6086, + "step": 13300 + }, + { + "epoch": 2.2134126197555335, + "grad_norm": 1.2968000173568726, + "learning_rate": 7.37804206585178e-06, + "loss": 1.4922, + "step": 13400 + }, + { + "epoch": 2.2299306243805748, + "grad_norm": 1.3589733839035034, + "learning_rate": 7.433102081268584e-06, + "loss": 1.4186, + "step": 13500 + }, + { + "epoch": 2.246448629005616, + "grad_norm": 1.5690885782241821, + "learning_rate": 7.488162096685387e-06, + "loss": 1.3541, + "step": 13600 + }, + { + "epoch": 2.2629666336306573, + "grad_norm": 1.1378659009933472, + "learning_rate": 7.543222112102192e-06, + "loss": 1.2945, + "step": 13700 + }, + { + "epoch": 2.2794846382556986, + "grad_norm": 1.2513694763183594, + "learning_rate": 7.598282127518996e-06, + "loss": 1.1947, + "step": 13800 + }, + { + "epoch": 2.29600264288074, + "grad_norm": 1.4649907350540161, + "learning_rate": 7.6533421429358e-06, + "loss": 1.1518, + "step": 13900 + }, + { + "epoch": 2.312520647505781, + "grad_norm": 1.2642732858657837, + "learning_rate": 7.708402158352606e-06, + "loss": 1.117, + "step": 14000 + }, + { + "epoch": 2.312520647505781, + "eval_cer": 0.13544589692697284, + "eval_loss": 0.8784080147743225, + "eval_runtime": 48.3911, + "eval_samples_per_second": 34.944, + "eval_steps_per_second": 8.741, + "eval_wer": 0.7124836070692562, + "step": 14000 + }, + { + "epoch": 2.329038652130823, + "grad_norm": 1.14898681640625, + "learning_rate": 7.76346217376941e-06, + "loss": 1.0267, + "step": 14100 + }, + { + "epoch": 2.345556656755864, + "grad_norm": 1.1903839111328125, + "learning_rate": 7.818522189186215e-06, + "loss": 0.994, + "step": 14200 + }, + { + "epoch": 2.3620746613809054, + "grad_norm": 1.2429312467575073, + "learning_rate": 7.873582204603017e-06, + "loss": 0.9589, + "step": 14300 + }, + { + "epoch": 2.3785926660059467, + "grad_norm": 1.2845401763916016, + "learning_rate": 7.928642220019822e-06, + "loss": 0.9027, + "step": 14400 + }, + { + "epoch": 2.395110670630988, + "grad_norm": 1.204010009765625, + "learning_rate": 7.983702235436626e-06, + "loss": 0.8773, + "step": 14500 + }, + { + "epoch": 2.411628675256029, + "grad_norm": 1.1621061563491821, + "learning_rate": 8.038762250853432e-06, + "loss": 0.8351, + "step": 14600 + }, + { + "epoch": 2.4281466798810705, + "grad_norm": 1.153045654296875, + "learning_rate": 8.093822266270235e-06, + "loss": 0.8008, + "step": 14700 + }, + { + "epoch": 2.4446646845061117, + "grad_norm": 1.1481854915618896, + "learning_rate": 8.148882281687039e-06, + "loss": 0.7573, + "step": 14800 + }, + { + "epoch": 2.461182689131153, + "grad_norm": 1.1236218214035034, + "learning_rate": 8.203942297103844e-06, + "loss": 0.7381, + "step": 14900 + }, + { + "epoch": 2.4777006937561943, + "grad_norm": 0.9569886326789856, + "learning_rate": 8.259002312520648e-06, + "loss": 0.7118, + "step": 15000 + }, + { + "epoch": 2.4777006937561943, + "eval_cer": 0.1136900280610499, + "eval_loss": 0.5557882189750671, + "eval_runtime": 48.3382, + "eval_samples_per_second": 34.983, + "eval_steps_per_second": 8.751, + "eval_wer": 0.6227440204833573, + "step": 15000 + }, + { + "epoch": 2.4942186983812356, + "grad_norm": 1.3618088960647583, + "learning_rate": 8.314062327937452e-06, + "loss": 0.7074, + "step": 15100 + }, + { + "epoch": 2.510736703006277, + "grad_norm": 0.933047354221344, + "learning_rate": 8.369122343354257e-06, + "loss": 0.6549, + "step": 15200 + }, + { + "epoch": 2.527254707631318, + "grad_norm": 1.266646146774292, + "learning_rate": 8.424182358771061e-06, + "loss": 0.6476, + "step": 15300 + }, + { + "epoch": 2.5437727122563594, + "grad_norm": 1.2776057720184326, + "learning_rate": 8.479242374187867e-06, + "loss": 0.6121, + "step": 15400 + }, + { + "epoch": 2.5602907168814006, + "grad_norm": 0.9074415564537048, + "learning_rate": 8.534302389604669e-06, + "loss": 0.5804, + "step": 15500 + }, + { + "epoch": 2.576808721506442, + "grad_norm": 0.9598638415336609, + "learning_rate": 8.589362405021474e-06, + "loss": 0.5695, + "step": 15600 + }, + { + "epoch": 2.593326726131483, + "grad_norm": 1.142428994178772, + "learning_rate": 8.644422420438278e-06, + "loss": 0.5548, + "step": 15700 + }, + { + "epoch": 2.6098447307565245, + "grad_norm": 1.1081598997116089, + "learning_rate": 8.699482435855082e-06, + "loss": 0.5482, + "step": 15800 + }, + { + "epoch": 2.6263627353815657, + "grad_norm": 1.1400047540664673, + "learning_rate": 8.754542451271887e-06, + "loss": 0.5071, + "step": 15900 + }, + { + "epoch": 2.642880740006607, + "grad_norm": 1.0958024263381958, + "learning_rate": 8.809602466688691e-06, + "loss": 0.4936, + "step": 16000 + }, + { + "epoch": 2.642880740006607, + "eval_cer": 0.09633153103825885, + "eval_loss": 0.38487282395362854, + "eval_runtime": 48.7501, + "eval_samples_per_second": 34.687, + "eval_steps_per_second": 8.677, + "eval_wer": 0.5314432023980515, + "step": 16000 + }, + { + "epoch": 2.6593987446316483, + "grad_norm": 1.1065205335617065, + "learning_rate": 8.864662482105496e-06, + "loss": 0.4899, + "step": 16100 + }, + { + "epoch": 2.6759167492566895, + "grad_norm": 1.138617992401123, + "learning_rate": 8.9197224975223e-06, + "loss": 0.4721, + "step": 16200 + }, + { + "epoch": 2.692434753881731, + "grad_norm": 1.2217905521392822, + "learning_rate": 8.974782512939104e-06, + "loss": 0.4723, + "step": 16300 + }, + { + "epoch": 2.7089527585067725, + "grad_norm": 1.0747772455215454, + "learning_rate": 9.02984252835591e-06, + "loss": 0.4861, + "step": 16400 + }, + { + "epoch": 2.725470763131814, + "grad_norm": 1.0680921077728271, + "learning_rate": 9.084902543772713e-06, + "loss": 0.4374, + "step": 16500 + }, + { + "epoch": 2.741988767756855, + "grad_norm": 1.0042054653167725, + "learning_rate": 9.139962559189517e-06, + "loss": 0.4247, + "step": 16600 + }, + { + "epoch": 2.7585067723818963, + "grad_norm": 0.9904269576072693, + "learning_rate": 9.195022574606322e-06, + "loss": 0.4356, + "step": 16700 + }, + { + "epoch": 2.7750247770069376, + "grad_norm": 1.1831291913986206, + "learning_rate": 9.250082590023126e-06, + "loss": 0.41, + "step": 16800 + }, + { + "epoch": 2.791542781631979, + "grad_norm": 0.973407506942749, + "learning_rate": 9.305142605439931e-06, + "loss": 0.4053, + "step": 16900 + }, + { + "epoch": 2.80806078625702, + "grad_norm": 1.0600470304489136, + "learning_rate": 9.360202620856734e-06, + "loss": 0.4109, + "step": 17000 + }, + { + "epoch": 2.80806078625702, + "eval_cer": 0.08471357196632674, + "eval_loss": 0.3102828562259674, + "eval_runtime": 50.4826, + "eval_samples_per_second": 33.497, + "eval_steps_per_second": 8.379, + "eval_wer": 0.46568413164304, + "step": 17000 + }, + { + "epoch": 2.8245787908820614, + "grad_norm": 1.3490804433822632, + "learning_rate": 9.415262636273539e-06, + "loss": 0.4253, + "step": 17100 + }, + { + "epoch": 2.8410967955071027, + "grad_norm": 0.9531931281089783, + "learning_rate": 9.470322651690343e-06, + "loss": 0.4057, + "step": 17200 + }, + { + "epoch": 2.857614800132144, + "grad_norm": 1.31855046749115, + "learning_rate": 9.525382667107148e-06, + "loss": 0.3935, + "step": 17300 + }, + { + "epoch": 2.8741328047571852, + "grad_norm": 0.9209637641906738, + "learning_rate": 9.580442682523952e-06, + "loss": 0.3822, + "step": 17400 + }, + { + "epoch": 2.8906508093822265, + "grad_norm": 1.0796180963516235, + "learning_rate": 9.635502697940756e-06, + "loss": 0.3829, + "step": 17500 + }, + { + "epoch": 2.907168814007268, + "grad_norm": 0.9043625593185425, + "learning_rate": 9.690562713357561e-06, + "loss": 0.358, + "step": 17600 + }, + { + "epoch": 2.923686818632309, + "grad_norm": 0.956969678401947, + "learning_rate": 9.745622728774365e-06, + "loss": 0.3563, + "step": 17700 + }, + { + "epoch": 2.9402048232573508, + "grad_norm": 0.9611093997955322, + "learning_rate": 9.800682744191169e-06, + "loss": 0.3886, + "step": 17800 + }, + { + "epoch": 2.956722827882392, + "grad_norm": 0.9433591365814209, + "learning_rate": 9.855742759607974e-06, + "loss": 0.3646, + "step": 17900 + }, + { + "epoch": 2.9732408325074333, + "grad_norm": 1.0778321027755737, + "learning_rate": 9.910802775024778e-06, + "loss": 0.3928, + "step": 18000 + }, + { + "epoch": 2.9732408325074333, + "eval_cer": 0.07708233522688386, + "eval_loss": 0.2756275534629822, + "eval_runtime": 48.2974, + "eval_samples_per_second": 35.012, + "eval_steps_per_second": 8.758, + "eval_wer": 0.42696559045775306, + "step": 18000 + }, + { + "epoch": 2.9897588371324746, + "grad_norm": 0.880342423915863, + "learning_rate": 9.965862790441582e-06, + "loss": 0.3485, + "step": 18100 + }, + { + "epoch": 3.006276841757516, + "grad_norm": 0.955066442489624, + "learning_rate": 9.997675243793513e-06, + "loss": 0.3558, + "step": 18200 + }, + { + "epoch": 3.022794846382557, + "grad_norm": 0.9584730863571167, + "learning_rate": 9.991557464302758e-06, + "loss": 0.3374, + "step": 18300 + }, + { + "epoch": 3.0393128510075984, + "grad_norm": 0.9742453694343567, + "learning_rate": 9.985439684812002e-06, + "loss": 0.3238, + "step": 18400 + }, + { + "epoch": 3.0558308556326397, + "grad_norm": 0.9343051910400391, + "learning_rate": 9.979321905321245e-06, + "loss": 0.325, + "step": 18500 + }, + { + "epoch": 3.072348860257681, + "grad_norm": 0.9533226490020752, + "learning_rate": 9.973204125830489e-06, + "loss": 0.3221, + "step": 18600 + }, + { + "epoch": 3.088866864882722, + "grad_norm": 1.0769504308700562, + "learning_rate": 9.967086346339734e-06, + "loss": 0.3212, + "step": 18700 + }, + { + "epoch": 3.1053848695077635, + "grad_norm": 1.0930256843566895, + "learning_rate": 9.960968566848977e-06, + "loss": 0.323, + "step": 18800 + }, + { + "epoch": 3.1219028741328048, + "grad_norm": 1.0789791345596313, + "learning_rate": 9.954850787358221e-06, + "loss": 0.3082, + "step": 18900 + }, + { + "epoch": 3.138420878757846, + "grad_norm": 0.8638594150543213, + "learning_rate": 9.948733007867464e-06, + "loss": 0.3282, + "step": 19000 + }, + { + "epoch": 3.138420878757846, + "eval_cer": 0.07125624529464103, + "eval_loss": 0.25253963470458984, + "eval_runtime": 48.5825, + "eval_samples_per_second": 34.807, + "eval_steps_per_second": 8.707, + "eval_wer": 0.3938050334103541, + "step": 19000 + }, + { + "epoch": 3.1549388833828873, + "grad_norm": 1.095402479171753, + "learning_rate": 9.94261522837671e-06, + "loss": 0.3278, + "step": 19100 + }, + { + "epoch": 3.1714568880079286, + "grad_norm": 0.7922815680503845, + "learning_rate": 9.936497448885953e-06, + "loss": 0.3157, + "step": 19200 + }, + { + "epoch": 3.18797489263297, + "grad_norm": 0.9539555907249451, + "learning_rate": 9.930379669395196e-06, + "loss": 0.3025, + "step": 19300 + }, + { + "epoch": 3.204492897258011, + "grad_norm": 0.8902342915534973, + "learning_rate": 9.92426188990444e-06, + "loss": 0.3065, + "step": 19400 + }, + { + "epoch": 3.2210109018830524, + "grad_norm": 0.8279675841331482, + "learning_rate": 9.918144110413685e-06, + "loss": 0.2993, + "step": 19500 + }, + { + "epoch": 3.2375289065080937, + "grad_norm": 0.8788127899169922, + "learning_rate": 9.912026330922929e-06, + "loss": 0.3052, + "step": 19600 + }, + { + "epoch": 3.254046911133135, + "grad_norm": 1.043555736541748, + "learning_rate": 9.905908551432174e-06, + "loss": 0.3041, + "step": 19700 + }, + { + "epoch": 3.270564915758176, + "grad_norm": 1.0483660697937012, + "learning_rate": 9.899790771941417e-06, + "loss": 0.3075, + "step": 19800 + }, + { + "epoch": 3.2870829203832175, + "grad_norm": 1.1182364225387573, + "learning_rate": 9.89367299245066e-06, + "loss": 0.2896, + "step": 19900 + }, + { + "epoch": 3.303600925008259, + "grad_norm": 0.951245903968811, + "learning_rate": 9.887555212959906e-06, + "loss": 0.2924, + "step": 20000 + }, + { + "epoch": 3.303600925008259, + "eval_cer": 0.06822770515365136, + "eval_loss": 0.23321548104286194, + "eval_runtime": 47.9978, + "eval_samples_per_second": 35.231, + "eval_steps_per_second": 8.813, + "eval_wer": 0.3738837194779242, + "step": 20000 + }, + { + "epoch": 3.3201189296333005, + "grad_norm": 0.8600097894668579, + "learning_rate": 9.88143743346915e-06, + "loss": 0.2907, + "step": 20100 + }, + { + "epoch": 3.3366369342583417, + "grad_norm": 1.1580179929733276, + "learning_rate": 9.875319653978393e-06, + "loss": 0.2864, + "step": 20200 + }, + { + "epoch": 3.353154938883383, + "grad_norm": 0.8637755513191223, + "learning_rate": 9.869201874487638e-06, + "loss": 0.2851, + "step": 20300 + }, + { + "epoch": 3.3696729435084243, + "grad_norm": 0.8564406633377075, + "learning_rate": 9.863084094996881e-06, + "loss": 0.2808, + "step": 20400 + }, + { + "epoch": 3.3861909481334656, + "grad_norm": 0.8136361241340637, + "learning_rate": 9.856966315506125e-06, + "loss": 0.3224, + "step": 20500 + }, + { + "epoch": 3.402708952758507, + "grad_norm": 1.0240442752838135, + "learning_rate": 9.850848536015368e-06, + "loss": 0.2779, + "step": 20600 + }, + { + "epoch": 3.419226957383548, + "grad_norm": 1.09860360622406, + "learning_rate": 9.844730756524613e-06, + "loss": 0.2817, + "step": 20700 + }, + { + "epoch": 3.4357449620085894, + "grad_norm": 1.0666810274124146, + "learning_rate": 9.838612977033857e-06, + "loss": 0.2917, + "step": 20800 + }, + { + "epoch": 3.4522629666336306, + "grad_norm": 0.9965100288391113, + "learning_rate": 9.8324951975431e-06, + "loss": 0.3204, + "step": 20900 + }, + { + "epoch": 3.468780971258672, + "grad_norm": 0.9994562864303589, + "learning_rate": 9.826377418052344e-06, + "loss": 0.2647, + "step": 21000 + }, + { + "epoch": 3.468780971258672, + "eval_cer": 0.06489973307781809, + "eval_loss": 0.22496198117733002, + "eval_runtime": 48.3102, + "eval_samples_per_second": 35.003, + "eval_steps_per_second": 8.756, + "eval_wer": 0.3570224192843315, + "step": 21000 + }, + { + "epoch": 3.485298975883713, + "grad_norm": 0.7928122878074646, + "learning_rate": 9.820259638561589e-06, + "loss": 0.3177, + "step": 21100 + }, + { + "epoch": 3.5018169805087545, + "grad_norm": 0.8977111577987671, + "learning_rate": 9.814141859070832e-06, + "loss": 0.2634, + "step": 21200 + }, + { + "epoch": 3.5183349851337957, + "grad_norm": 0.8508104085922241, + "learning_rate": 9.808024079580076e-06, + "loss": 0.2672, + "step": 21300 + }, + { + "epoch": 3.534852989758837, + "grad_norm": 0.8211712837219238, + "learning_rate": 9.80190630008932e-06, + "loss": 0.2742, + "step": 21400 + }, + { + "epoch": 3.5513709943838783, + "grad_norm": 0.739600658416748, + "learning_rate": 9.795788520598564e-06, + "loss": 0.2756, + "step": 21500 + }, + { + "epoch": 3.56788899900892, + "grad_norm": 1.0395748615264893, + "learning_rate": 9.789670741107808e-06, + "loss": 0.277, + "step": 21600 + }, + { + "epoch": 3.5844070036339613, + "grad_norm": 0.8592670559883118, + "learning_rate": 9.783552961617051e-06, + "loss": 0.2576, + "step": 21700 + }, + { + "epoch": 3.6009250082590025, + "grad_norm": 2.0866379737854004, + "learning_rate": 9.777435182126297e-06, + "loss": 0.2561, + "step": 21800 + }, + { + "epoch": 3.617443012884044, + "grad_norm": 0.7784512042999268, + "learning_rate": 9.77131740263554e-06, + "loss": 0.2649, + "step": 21900 + }, + { + "epoch": 3.633961017509085, + "grad_norm": 0.8441452383995056, + "learning_rate": 9.765199623144783e-06, + "loss": 0.2516, + "step": 22000 + }, + { + "epoch": 3.633961017509085, + "eval_cer": 0.06247861200465403, + "eval_loss": 0.20949821174144745, + "eval_runtime": 48.003, + "eval_samples_per_second": 35.227, + "eval_steps_per_second": 8.812, + "eval_wer": 0.3444701180291014, + "step": 22000 + }, + { + "epoch": 3.6504790221341263, + "grad_norm": 1.5408446788787842, + "learning_rate": 9.759081843654029e-06, + "loss": 0.2725, + "step": 22100 + }, + { + "epoch": 3.6669970267591676, + "grad_norm": 0.9491544961929321, + "learning_rate": 9.752964064163272e-06, + "loss": 0.2484, + "step": 22200 + }, + { + "epoch": 3.683515031384209, + "grad_norm": 0.9039120674133301, + "learning_rate": 9.746846284672517e-06, + "loss": 0.2673, + "step": 22300 + }, + { + "epoch": 3.70003303600925, + "grad_norm": 2.336216926574707, + "learning_rate": 9.74072850518176e-06, + "loss": 0.2588, + "step": 22400 + }, + { + "epoch": 3.7165510406342914, + "grad_norm": 0.889430046081543, + "learning_rate": 9.734610725691004e-06, + "loss": 0.2418, + "step": 22500 + }, + { + "epoch": 3.7330690452593327, + "grad_norm": 0.9641227722167969, + "learning_rate": 9.728492946200248e-06, + "loss": 0.2447, + "step": 22600 + }, + { + "epoch": 3.749587049884374, + "grad_norm": 1.2751914262771606, + "learning_rate": 9.722375166709493e-06, + "loss": 0.2481, + "step": 22700 + }, + { + "epoch": 3.7661050545094152, + "grad_norm": 0.8244801759719849, + "learning_rate": 9.716257387218736e-06, + "loss": 0.281, + "step": 22800 + }, + { + "epoch": 3.7826230591344565, + "grad_norm": 0.8751392364501953, + "learning_rate": 9.71013960772798e-06, + "loss": 0.2613, + "step": 22900 + }, + { + "epoch": 3.799141063759498, + "grad_norm": 0.8482999801635742, + "learning_rate": 9.704021828237223e-06, + "loss": 0.2372, + "step": 23000 + }, + { + "epoch": 3.799141063759498, + "eval_cer": 0.06080179316952981, + "eval_loss": 0.20372046530246735, + "eval_runtime": 48.3892, + "eval_samples_per_second": 34.946, + "eval_steps_per_second": 8.742, + "eval_wer": 0.33447823643289826, + "step": 23000 + }, + { + "epoch": 3.815659068384539, + "grad_norm": 0.7899025082588196, + "learning_rate": 9.697904048746468e-06, + "loss": 0.239, + "step": 23100 + }, + { + "epoch": 3.8321770730095803, + "grad_norm": 1.0239996910095215, + "learning_rate": 9.691786269255712e-06, + "loss": 0.2355, + "step": 23200 + }, + { + "epoch": 3.8486950776346216, + "grad_norm": 1.0043885707855225, + "learning_rate": 9.685668489764955e-06, + "loss": 0.2517, + "step": 23300 + }, + { + "epoch": 3.865213082259663, + "grad_norm": 0.7398520708084106, + "learning_rate": 9.679550710274199e-06, + "loss": 0.2349, + "step": 23400 + }, + { + "epoch": 3.881731086884704, + "grad_norm": 0.8725094199180603, + "learning_rate": 9.673432930783444e-06, + "loss": 0.2357, + "step": 23500 + }, + { + "epoch": 3.8982490915097454, + "grad_norm": 0.9465588927268982, + "learning_rate": 9.667315151292687e-06, + "loss": 0.249, + "step": 23600 + }, + { + "epoch": 3.9147670961347867, + "grad_norm": 0.814136266708374, + "learning_rate": 9.66119737180193e-06, + "loss": 0.229, + "step": 23700 + }, + { + "epoch": 3.931285100759828, + "grad_norm": 0.7686489820480347, + "learning_rate": 9.655079592311174e-06, + "loss": 0.2287, + "step": 23800 + }, + { + "epoch": 3.9478031053848697, + "grad_norm": 1.0001749992370605, + "learning_rate": 9.64896181282042e-06, + "loss": 0.2334, + "step": 23900 + }, + { + "epoch": 3.964321110009911, + "grad_norm": 0.9546142220497131, + "learning_rate": 9.642844033329663e-06, + "loss": 0.2447, + "step": 24000 + }, + { + "epoch": 3.964321110009911, + "eval_cer": 0.058791321607008416, + "eval_loss": 0.1985878199338913, + "eval_runtime": 48.7285, + "eval_samples_per_second": 34.702, + "eval_steps_per_second": 8.681, + "eval_wer": 0.32417410853681383, + "step": 24000 + }, + { + "epoch": 3.980839114634952, + "grad_norm": 0.7841982841491699, + "learning_rate": 9.636726253838908e-06, + "loss": 0.2366, + "step": 24100 + }, + { + "epoch": 3.9973571192599935, + "grad_norm": 0.7850095629692078, + "learning_rate": 9.630608474348151e-06, + "loss": 0.2393, + "step": 24200 + }, + { + "epoch": 4.013875123885034, + "grad_norm": 0.8924582004547119, + "learning_rate": 9.624490694857395e-06, + "loss": 0.2382, + "step": 24300 + }, + { + "epoch": 4.030393128510076, + "grad_norm": 0.7775335907936096, + "learning_rate": 9.61837291536664e-06, + "loss": 0.2317, + "step": 24400 + }, + { + "epoch": 4.046911133135117, + "grad_norm": 0.8482388257980347, + "learning_rate": 9.612255135875884e-06, + "loss": 0.2673, + "step": 24500 + }, + { + "epoch": 4.063429137760158, + "grad_norm": 0.8175519704818726, + "learning_rate": 9.606137356385127e-06, + "loss": 0.2271, + "step": 24600 + }, + { + "epoch": 4.0799471423852, + "grad_norm": 0.6910988688468933, + "learning_rate": 9.600019576894372e-06, + "loss": 0.2321, + "step": 24700 + }, + { + "epoch": 4.096465147010242, + "grad_norm": 0.8976187109947205, + "learning_rate": 9.593901797403616e-06, + "loss": 0.2348, + "step": 24800 + }, + { + "epoch": 4.112983151635283, + "grad_norm": 0.8644862174987793, + "learning_rate": 9.587784017912859e-06, + "loss": 0.2219, + "step": 24900 + }, + { + "epoch": 4.129501156260324, + "grad_norm": 0.7493522763252258, + "learning_rate": 9.581666238422103e-06, + "loss": 0.2304, + "step": 25000 + }, + { + "epoch": 4.129501156260324, + "eval_cer": 0.05679796044076381, + "eval_loss": 0.18797007203102112, + "eval_runtime": 48.2897, + "eval_samples_per_second": 35.018, + "eval_steps_per_second": 8.76, + "eval_wer": 0.3129956910010616, + "step": 25000 + }, + { + "epoch": 4.146019160885365, + "grad_norm": 0.7841621041297913, + "learning_rate": 9.575548458931348e-06, + "loss": 0.2297, + "step": 25100 + }, + { + "epoch": 4.162537165510407, + "grad_norm": 0.8792735934257507, + "learning_rate": 9.569430679440591e-06, + "loss": 0.2305, + "step": 25200 + }, + { + "epoch": 4.179055170135448, + "grad_norm": 1.1668968200683594, + "learning_rate": 9.563312899949835e-06, + "loss": 0.2595, + "step": 25300 + }, + { + "epoch": 4.195573174760489, + "grad_norm": 0.859511137008667, + "learning_rate": 9.557195120459078e-06, + "loss": 0.217, + "step": 25400 + }, + { + "epoch": 4.2120911793855305, + "grad_norm": 0.9139505624771118, + "learning_rate": 9.551077340968323e-06, + "loss": 0.2263, + "step": 25500 + }, + { + "epoch": 4.228609184010572, + "grad_norm": 0.776094377040863, + "learning_rate": 9.544959561477567e-06, + "loss": 0.2208, + "step": 25600 + }, + { + "epoch": 4.245127188635613, + "grad_norm": 0.8811630606651306, + "learning_rate": 9.53884178198681e-06, + "loss": 0.2229, + "step": 25700 + }, + { + "epoch": 4.261645193260654, + "grad_norm": 0.9367398619651794, + "learning_rate": 9.532724002496054e-06, + "loss": 0.2221, + "step": 25800 + }, + { + "epoch": 4.2781631978856955, + "grad_norm": 0.8413158655166626, + "learning_rate": 9.526606223005299e-06, + "loss": 0.2616, + "step": 25900 + }, + { + "epoch": 4.294681202510737, + "grad_norm": 0.9497280120849609, + "learning_rate": 9.520488443514542e-06, + "loss": 0.2828, + "step": 26000 + }, + { + "epoch": 4.294681202510737, + "eval_cer": 0.05571145027718842, + "eval_loss": 0.18053312599658966, + "eval_runtime": 48.2767, + "eval_samples_per_second": 35.027, + "eval_steps_per_second": 8.762, + "eval_wer": 0.30774995316305503, + "step": 26000 + }, + { + "epoch": 4.311199207135778, + "grad_norm": 0.7471584677696228, + "learning_rate": 9.514370664023786e-06, + "loss": 0.217, + "step": 26100 + }, + { + "epoch": 4.327717211760819, + "grad_norm": 0.9198097586631775, + "learning_rate": 9.508252884533031e-06, + "loss": 0.2093, + "step": 26200 + }, + { + "epoch": 4.344235216385861, + "grad_norm": 1.2914992570877075, + "learning_rate": 9.502135105042274e-06, + "loss": 0.2117, + "step": 26300 + }, + { + "epoch": 4.360753221010902, + "grad_norm": 1.1176624298095703, + "learning_rate": 9.496017325551518e-06, + "loss": 0.2213, + "step": 26400 + }, + { + "epoch": 4.377271225635943, + "grad_norm": 0.799958348274231, + "learning_rate": 9.489899546060763e-06, + "loss": 0.2175, + "step": 26500 + }, + { + "epoch": 4.3937892302609844, + "grad_norm": 0.8993442058563232, + "learning_rate": 9.483781766570006e-06, + "loss": 0.2126, + "step": 26600 + }, + { + "epoch": 4.410307234886026, + "grad_norm": 1.000191330909729, + "learning_rate": 9.477663987079252e-06, + "loss": 0.2044, + "step": 26700 + }, + { + "epoch": 4.426825239511067, + "grad_norm": 1.1478374004364014, + "learning_rate": 9.471546207588495e-06, + "loss": 0.2325, + "step": 26800 + }, + { + "epoch": 4.443343244136108, + "grad_norm": 0.7666307091712952, + "learning_rate": 9.465428428097739e-06, + "loss": 0.2086, + "step": 26900 + }, + { + "epoch": 4.4598612487611495, + "grad_norm": 0.9440354108810425, + "learning_rate": 9.459310648606982e-06, + "loss": 0.2054, + "step": 27000 + }, + { + "epoch": 4.4598612487611495, + "eval_cer": 0.05489870645404148, + "eval_loss": 0.17803701758384705, + "eval_runtime": 48.3147, + "eval_samples_per_second": 35.0, + "eval_steps_per_second": 8.755, + "eval_wer": 0.30181727346530945, + "step": 27000 + }, + { + "epoch": 4.476379253386191, + "grad_norm": 0.975606381893158, + "learning_rate": 9.453192869116227e-06, + "loss": 0.2188, + "step": 27100 + }, + { + "epoch": 4.492897258011232, + "grad_norm": 1.0825639963150024, + "learning_rate": 9.44707508962547e-06, + "loss": 0.1951, + "step": 27200 + }, + { + "epoch": 4.509415262636273, + "grad_norm": 0.858279824256897, + "learning_rate": 9.440957310134714e-06, + "loss": 0.2202, + "step": 27300 + }, + { + "epoch": 4.525933267261315, + "grad_norm": 0.8295080661773682, + "learning_rate": 9.434839530643958e-06, + "loss": 0.2143, + "step": 27400 + }, + { + "epoch": 4.542451271886356, + "grad_norm": 1.0606642961502075, + "learning_rate": 9.428721751153203e-06, + "loss": 0.2116, + "step": 27500 + }, + { + "epoch": 4.558969276511397, + "grad_norm": 0.8650080561637878, + "learning_rate": 9.422603971662446e-06, + "loss": 0.202, + "step": 27600 + }, + { + "epoch": 4.575487281136438, + "grad_norm": 0.7315616011619568, + "learning_rate": 9.41648619217169e-06, + "loss": 0.2088, + "step": 27700 + }, + { + "epoch": 4.59200528576148, + "grad_norm": 0.6952201724052429, + "learning_rate": 9.410368412680933e-06, + "loss": 0.2007, + "step": 27800 + }, + { + "epoch": 4.608523290386521, + "grad_norm": 1.3911277055740356, + "learning_rate": 9.404250633190178e-06, + "loss": 0.2242, + "step": 27900 + }, + { + "epoch": 4.625041295011562, + "grad_norm": 0.9672855734825134, + "learning_rate": 9.398132853699422e-06, + "loss": 0.2021, + "step": 28000 + }, + { + "epoch": 4.625041295011562, + "eval_cer": 0.05435117377318459, + "eval_loss": 0.17107851803302765, + "eval_runtime": 48.3911, + "eval_samples_per_second": 34.944, + "eval_steps_per_second": 8.741, + "eval_wer": 0.2986323612065197, + "step": 28000 + }, + { + "epoch": 4.6415592996366035, + "grad_norm": 0.869714617729187, + "learning_rate": 9.392015074208665e-06, + "loss": 0.2054, + "step": 28100 + }, + { + "epoch": 4.658077304261646, + "grad_norm": 0.7512599229812622, + "learning_rate": 9.385897294717909e-06, + "loss": 0.2449, + "step": 28200 + }, + { + "epoch": 4.674595308886687, + "grad_norm": 0.8734112977981567, + "learning_rate": 9.379779515227154e-06, + "loss": 0.2067, + "step": 28300 + }, + { + "epoch": 4.691113313511728, + "grad_norm": 1.0213971138000488, + "learning_rate": 9.373661735736397e-06, + "loss": 0.1941, + "step": 28400 + }, + { + "epoch": 4.7076313181367695, + "grad_norm": 0.8136150240898132, + "learning_rate": 9.367543956245642e-06, + "loss": 0.197, + "step": 28500 + }, + { + "epoch": 4.724149322761811, + "grad_norm": 0.889690637588501, + "learning_rate": 9.361426176754886e-06, + "loss": 0.1887, + "step": 28600 + }, + { + "epoch": 4.740667327386852, + "grad_norm": 0.8246539235115051, + "learning_rate": 9.35530839726413e-06, + "loss": 0.1963, + "step": 28700 + }, + { + "epoch": 4.757185332011893, + "grad_norm": 1.07891845703125, + "learning_rate": 9.349190617773374e-06, + "loss": 0.2013, + "step": 28800 + }, + { + "epoch": 4.773703336636935, + "grad_norm": 0.8867871165275574, + "learning_rate": 9.343072838282618e-06, + "loss": 0.1969, + "step": 28900 + }, + { + "epoch": 4.790221341261976, + "grad_norm": 1.0651185512542725, + "learning_rate": 9.336955058791861e-06, + "loss": 0.1944, + "step": 29000 + }, + { + "epoch": 4.790221341261976, + "eval_cer": 0.05322188761891725, + "eval_loss": 0.16940154135227203, + "eval_runtime": 48.8775, + "eval_samples_per_second": 34.597, + "eval_steps_per_second": 8.654, + "eval_wer": 0.29307437706863176, + "step": 29000 + }, + { + "epoch": 4.806739345887017, + "grad_norm": 0.8285331726074219, + "learning_rate": 9.330837279301107e-06, + "loss": 0.2038, + "step": 29100 + }, + { + "epoch": 4.823257350512058, + "grad_norm": 0.8632585406303406, + "learning_rate": 9.32471949981035e-06, + "loss": 0.1947, + "step": 29200 + }, + { + "epoch": 4.8397753551371, + "grad_norm": 0.7332074046134949, + "learning_rate": 9.318601720319593e-06, + "loss": 0.1929, + "step": 29300 + }, + { + "epoch": 4.856293359762141, + "grad_norm": 0.8279902935028076, + "learning_rate": 9.312483940828837e-06, + "loss": 0.188, + "step": 29400 + }, + { + "epoch": 4.872811364387182, + "grad_norm": 0.887501060962677, + "learning_rate": 9.306366161338082e-06, + "loss": 0.2087, + "step": 29500 + }, + { + "epoch": 4.8893293690122235, + "grad_norm": 0.8915200233459473, + "learning_rate": 9.300248381847326e-06, + "loss": 0.1861, + "step": 29600 + }, + { + "epoch": 4.905847373637265, + "grad_norm": 0.8219689130783081, + "learning_rate": 9.294130602356569e-06, + "loss": 0.1884, + "step": 29700 + }, + { + "epoch": 4.922365378262306, + "grad_norm": 0.8272607326507568, + "learning_rate": 9.288012822865812e-06, + "loss": 0.1991, + "step": 29800 + }, + { + "epoch": 4.938883382887347, + "grad_norm": 2.43432354927063, + "learning_rate": 9.281895043375058e-06, + "loss": 0.196, + "step": 29900 + }, + { + "epoch": 4.955401387512389, + "grad_norm": 0.922092616558075, + "learning_rate": 9.275777263884301e-06, + "loss": 0.1933, + "step": 30000 + }, + { + "epoch": 4.955401387512389, + "eval_cer": 0.0528796796933817, + "eval_loss": 0.1640305370092392, + "eval_runtime": 50.5291, + "eval_samples_per_second": 33.466, + "eval_steps_per_second": 8.371, + "eval_wer": 0.29063885592955724, + "step": 30000 + }, + { + "epoch": 4.97191939213743, + "grad_norm": 0.6855641603469849, + "learning_rate": 9.269659484393545e-06, + "loss": 0.1918, + "step": 30100 + }, + { + "epoch": 4.988437396762471, + "grad_norm": 0.7922428846359253, + "learning_rate": 9.263541704902788e-06, + "loss": 0.2464, + "step": 30200 + }, + { + "epoch": 5.004955401387512, + "grad_norm": 0.9734669923782349, + "learning_rate": 9.257423925412033e-06, + "loss": 0.1909, + "step": 30300 + }, + { + "epoch": 5.021473406012554, + "grad_norm": 0.9491944313049316, + "learning_rate": 9.251306145921277e-06, + "loss": 0.1824, + "step": 30400 + }, + { + "epoch": 5.037991410637595, + "grad_norm": 1.4745386838912964, + "learning_rate": 9.24518836643052e-06, + "loss": 0.1934, + "step": 30500 + }, + { + "epoch": 5.054509415262636, + "grad_norm": 0.8815566897392273, + "learning_rate": 9.239070586939765e-06, + "loss": 0.1844, + "step": 30600 + }, + { + "epoch": 5.0710274198876775, + "grad_norm": 0.741477906703949, + "learning_rate": 9.232952807449009e-06, + "loss": 0.2308, + "step": 30700 + }, + { + "epoch": 5.087545424512719, + "grad_norm": 0.784695029258728, + "learning_rate": 9.226835027958252e-06, + "loss": 0.1852, + "step": 30800 + }, + { + "epoch": 5.10406342913776, + "grad_norm": 0.7334086298942566, + "learning_rate": 9.220717248467497e-06, + "loss": 0.1885, + "step": 30900 + }, + { + "epoch": 5.120581433762801, + "grad_norm": 1.0273959636688232, + "learning_rate": 9.21459946897674e-06, + "loss": 0.1885, + "step": 31000 + }, + { + "epoch": 5.120581433762801, + "eval_cer": 0.05156217918006981, + "eval_loss": 0.16276109218597412, + "eval_runtime": 48.3633, + "eval_samples_per_second": 34.965, + "eval_steps_per_second": 8.746, + "eval_wer": 0.28289514769249985, + "step": 31000 + }, + { + "epoch": 5.1370994383878426, + "grad_norm": 0.8171842694282532, + "learning_rate": 9.208481689485986e-06, + "loss": 0.186, + "step": 31100 + }, + { + "epoch": 5.153617443012884, + "grad_norm": 0.9652734994888306, + "learning_rate": 9.20236390999523e-06, + "loss": 0.1813, + "step": 31200 + }, + { + "epoch": 5.170135447637925, + "grad_norm": 1.062037467956543, + "learning_rate": 9.196246130504473e-06, + "loss": 0.1744, + "step": 31300 + }, + { + "epoch": 5.186653452262966, + "grad_norm": 0.8532341718673706, + "learning_rate": 9.190128351013718e-06, + "loss": 0.1833, + "step": 31400 + }, + { + "epoch": 5.203171456888008, + "grad_norm": 0.7094704508781433, + "learning_rate": 9.184010571522962e-06, + "loss": 0.1827, + "step": 31500 + }, + { + "epoch": 5.219689461513049, + "grad_norm": 0.8176188468933105, + "learning_rate": 9.177892792032205e-06, + "loss": 0.1896, + "step": 31600 + }, + { + "epoch": 5.23620746613809, + "grad_norm": 0.9988218545913696, + "learning_rate": 9.171775012541448e-06, + "loss": 0.1953, + "step": 31700 + }, + { + "epoch": 5.2527254707631315, + "grad_norm": 1.0254257917404175, + "learning_rate": 9.165657233050694e-06, + "loss": 0.1858, + "step": 31800 + }, + { + "epoch": 5.269243475388173, + "grad_norm": 0.7182506918907166, + "learning_rate": 9.159539453559937e-06, + "loss": 0.2522, + "step": 31900 + }, + { + "epoch": 5.285761480013214, + "grad_norm": 0.8318942189216614, + "learning_rate": 9.15342167406918e-06, + "loss": 0.1871, + "step": 32000 + }, + { + "epoch": 5.285761480013214, + "eval_cer": 0.05092909451782903, + "eval_loss": 0.15823741257190704, + "eval_runtime": 48.5699, + "eval_samples_per_second": 34.816, + "eval_steps_per_second": 8.709, + "eval_wer": 0.28108411915318804, + "step": 32000 + }, + { + "epoch": 5.302279484638255, + "grad_norm": 0.6533938050270081, + "learning_rate": 9.147303894578424e-06, + "loss": 0.1801, + "step": 32100 + }, + { + "epoch": 5.3187974892632965, + "grad_norm": 0.8273053169250488, + "learning_rate": 9.141186115087669e-06, + "loss": 0.1815, + "step": 32200 + }, + { + "epoch": 5.335315493888339, + "grad_norm": 0.8062841892242432, + "learning_rate": 9.135068335596913e-06, + "loss": 0.2024, + "step": 32300 + }, + { + "epoch": 5.35183349851338, + "grad_norm": 0.9883460402488708, + "learning_rate": 9.128950556106156e-06, + "loss": 0.176, + "step": 32400 + }, + { + "epoch": 5.368351503138421, + "grad_norm": 1.0027878284454346, + "learning_rate": 9.1228327766154e-06, + "loss": 0.2513, + "step": 32500 + }, + { + "epoch": 5.3848695077634625, + "grad_norm": 0.6766846776008606, + "learning_rate": 9.116714997124645e-06, + "loss": 0.2155, + "step": 32600 + }, + { + "epoch": 5.401387512388504, + "grad_norm": 0.7808175086975098, + "learning_rate": 9.110597217633888e-06, + "loss": 0.188, + "step": 32700 + }, + { + "epoch": 5.417905517013545, + "grad_norm": 0.7467240691184998, + "learning_rate": 9.104479438143132e-06, + "loss": 0.182, + "step": 32800 + }, + { + "epoch": 5.434423521638586, + "grad_norm": 0.6580876708030701, + "learning_rate": 9.098361658652377e-06, + "loss": 0.1771, + "step": 32900 + }, + { + "epoch": 5.450941526263628, + "grad_norm": 0.7481684684753418, + "learning_rate": 9.09224387916162e-06, + "loss": 0.1811, + "step": 33000 + }, + { + "epoch": 5.450941526263628, + "eval_cer": 0.05002224351515981, + "eval_loss": 0.15250813961029053, + "eval_runtime": 49.105, + "eval_samples_per_second": 34.436, + "eval_steps_per_second": 8.614, + "eval_wer": 0.2747142946356086, + "step": 33000 + }, + { + "epoch": 5.467459530888669, + "grad_norm": 0.9323834180831909, + "learning_rate": 9.086126099670864e-06, + "loss": 0.2123, + "step": 33100 + }, + { + "epoch": 5.48397753551371, + "grad_norm": 0.779329776763916, + "learning_rate": 9.080008320180109e-06, + "loss": 0.1807, + "step": 33200 + }, + { + "epoch": 5.500495540138751, + "grad_norm": 1.0125453472137451, + "learning_rate": 9.073890540689352e-06, + "loss": 0.1746, + "step": 33300 + }, + { + "epoch": 5.517013544763793, + "grad_norm": 0.8062576055526733, + "learning_rate": 9.067772761198596e-06, + "loss": 0.1747, + "step": 33400 + }, + { + "epoch": 5.533531549388834, + "grad_norm": 0.817570686340332, + "learning_rate": 9.061654981707841e-06, + "loss": 0.1707, + "step": 33500 + }, + { + "epoch": 5.550049554013875, + "grad_norm": 0.7053462266921997, + "learning_rate": 9.055537202217084e-06, + "loss": 0.1777, + "step": 33600 + }, + { + "epoch": 5.5665675586389165, + "grad_norm": 0.8066178560256958, + "learning_rate": 9.049419422726328e-06, + "loss": 0.171, + "step": 33700 + }, + { + "epoch": 5.583085563263958, + "grad_norm": 0.8742169141769409, + "learning_rate": 9.043301643235573e-06, + "loss": 0.1721, + "step": 33800 + }, + { + "epoch": 5.599603567888999, + "grad_norm": 0.7562609314918518, + "learning_rate": 9.037183863744816e-06, + "loss": 0.2377, + "step": 33900 + }, + { + "epoch": 5.61612157251404, + "grad_norm": 1.084651231765747, + "learning_rate": 9.03106608425406e-06, + "loss": 0.2145, + "step": 34000 + }, + { + "epoch": 5.61612157251404, + "eval_cer": 0.04938060365478064, + "eval_loss": 0.14984501898288727, + "eval_runtime": 48.5125, + "eval_samples_per_second": 34.857, + "eval_steps_per_second": 8.719, + "eval_wer": 0.2715918316367951, + "step": 34000 + }, + { + "epoch": 5.632639577139082, + "grad_norm": 0.8906255960464478, + "learning_rate": 9.024948304763303e-06, + "loss": 0.188, + "step": 34100 + }, + { + "epoch": 5.649157581764123, + "grad_norm": 0.8091058135032654, + "learning_rate": 9.018830525272549e-06, + "loss": 0.1696, + "step": 34200 + }, + { + "epoch": 5.665675586389164, + "grad_norm": 0.9051063656806946, + "learning_rate": 9.012712745781792e-06, + "loss": 0.1744, + "step": 34300 + }, + { + "epoch": 5.682193591014205, + "grad_norm": 1.0009392499923706, + "learning_rate": 9.006594966291035e-06, + "loss": 0.1738, + "step": 34400 + }, + { + "epoch": 5.698711595639247, + "grad_norm": 0.8981117010116577, + "learning_rate": 9.000477186800279e-06, + "loss": 0.1855, + "step": 34500 + }, + { + "epoch": 5.715229600264288, + "grad_norm": 0.9005815386772156, + "learning_rate": 8.994359407309524e-06, + "loss": 0.1784, + "step": 34600 + }, + { + "epoch": 5.731747604889329, + "grad_norm": 0.9097332954406738, + "learning_rate": 8.988241627818768e-06, + "loss": 0.1833, + "step": 34700 + }, + { + "epoch": 5.7482656095143705, + "grad_norm": 0.7952153086662292, + "learning_rate": 8.982123848328011e-06, + "loss": 0.1676, + "step": 34800 + }, + { + "epoch": 5.764783614139412, + "grad_norm": 0.840761661529541, + "learning_rate": 8.976006068837254e-06, + "loss": 0.1744, + "step": 34900 + }, + { + "epoch": 5.781301618764453, + "grad_norm": 0.972186267375946, + "learning_rate": 8.9698882893465e-06, + "loss": 0.1713, + "step": 35000 + }, + { + "epoch": 5.781301618764453, + "eval_cer": 0.049508931626856476, + "eval_loss": 0.14971515536308289, + "eval_runtime": 48.1943, + "eval_samples_per_second": 35.087, + "eval_steps_per_second": 8.777, + "eval_wer": 0.2710297882970087, + "step": 35000 + }, + { + "epoch": 5.797819623389494, + "grad_norm": 0.7469506859779358, + "learning_rate": 8.963770509855743e-06, + "loss": 0.165, + "step": 35100 + }, + { + "epoch": 5.814337628014536, + "grad_norm": 0.8786357045173645, + "learning_rate": 8.957652730364987e-06, + "loss": 0.1715, + "step": 35200 + }, + { + "epoch": 5.830855632639577, + "grad_norm": 0.8044286370277405, + "learning_rate": 8.951534950874232e-06, + "loss": 0.1687, + "step": 35300 + }, + { + "epoch": 5.847373637264618, + "grad_norm": 0.859174370765686, + "learning_rate": 8.945417171383475e-06, + "loss": 0.1658, + "step": 35400 + }, + { + "epoch": 5.863891641889659, + "grad_norm": 0.8090763092041016, + "learning_rate": 8.93929939189272e-06, + "loss": 0.1836, + "step": 35500 + }, + { + "epoch": 5.880409646514701, + "grad_norm": 0.8392448425292969, + "learning_rate": 8.933181612401964e-06, + "loss": 0.1796, + "step": 35600 + }, + { + "epoch": 5.896927651139742, + "grad_norm": 0.8285984396934509, + "learning_rate": 8.927063832911207e-06, + "loss": 0.1703, + "step": 35700 + }, + { + "epoch": 5.913445655764784, + "grad_norm": 0.9240791201591492, + "learning_rate": 8.920946053420452e-06, + "loss": 0.2069, + "step": 35800 + }, + { + "epoch": 5.929963660389825, + "grad_norm": 0.9001137018203735, + "learning_rate": 8.914828273929696e-06, + "loss": 0.1724, + "step": 35900 + }, + { + "epoch": 5.946481665014867, + "grad_norm": 0.8891072273254395, + "learning_rate": 8.90871049443894e-06, + "loss": 0.1766, + "step": 36000 + }, + { + "epoch": 5.946481665014867, + "eval_cer": 0.04906406132366026, + "eval_loss": 0.14797988533973694, + "eval_runtime": 48.6094, + "eval_samples_per_second": 34.787, + "eval_steps_per_second": 8.702, + "eval_wer": 0.2692187597576969, + "step": 36000 + }, + { + "epoch": 5.962999669639908, + "grad_norm": 1.0912429094314575, + "learning_rate": 8.902592714948183e-06, + "loss": 0.1653, + "step": 36100 + }, + { + "epoch": 5.979517674264949, + "grad_norm": 1.203782320022583, + "learning_rate": 8.896474935457428e-06, + "loss": 0.1675, + "step": 36200 + }, + { + "epoch": 5.9960356788899905, + "grad_norm": 0.7841401696205139, + "learning_rate": 8.890357155966671e-06, + "loss": 0.1652, + "step": 36300 + }, + { + "epoch": 6.012553683515032, + "grad_norm": 0.841820478439331, + "learning_rate": 8.884239376475915e-06, + "loss": 0.1677, + "step": 36400 + }, + { + "epoch": 6.029071688140073, + "grad_norm": 0.8913053870201111, + "learning_rate": 8.878121596985158e-06, + "loss": 0.1613, + "step": 36500 + }, + { + "epoch": 6.045589692765114, + "grad_norm": 1.3167953491210938, + "learning_rate": 8.872003817494404e-06, + "loss": 0.1639, + "step": 36600 + }, + { + "epoch": 6.0621076973901555, + "grad_norm": 0.7834457159042358, + "learning_rate": 8.865886038003647e-06, + "loss": 0.1743, + "step": 36700 + }, + { + "epoch": 6.078625702015197, + "grad_norm": 0.790767252445221, + "learning_rate": 8.85976825851289e-06, + "loss": 0.1736, + "step": 36800 + }, + { + "epoch": 6.095143706640238, + "grad_norm": 0.8585237860679626, + "learning_rate": 8.853650479022134e-06, + "loss": 0.1764, + "step": 36900 + }, + { + "epoch": 6.111661711265279, + "grad_norm": 0.9544495344161987, + "learning_rate": 8.847532699531379e-06, + "loss": 0.1662, + "step": 37000 + }, + { + "epoch": 6.111661711265279, + "eval_cer": 0.04808021353774553, + "eval_loss": 0.1443062424659729, + "eval_runtime": 48.3927, + "eval_samples_per_second": 34.943, + "eval_steps_per_second": 8.741, + "eval_wer": 0.2633485293199276, + "step": 37000 + }, + { + "epoch": 6.128179715890321, + "grad_norm": 0.6950782537460327, + "learning_rate": 8.841414920040623e-06, + "loss": 0.1574, + "step": 37100 + }, + { + "epoch": 6.144697720515362, + "grad_norm": 0.7609145641326904, + "learning_rate": 8.835297140549866e-06, + "loss": 0.1699, + "step": 37200 + }, + { + "epoch": 6.161215725140403, + "grad_norm": 0.6571762561798096, + "learning_rate": 8.829179361059111e-06, + "loss": 0.1767, + "step": 37300 + }, + { + "epoch": 6.177733729765444, + "grad_norm": 1.0556763410568237, + "learning_rate": 8.823061581568355e-06, + "loss": 0.1562, + "step": 37400 + }, + { + "epoch": 6.194251734390486, + "grad_norm": 2.556347131729126, + "learning_rate": 8.816943802077598e-06, + "loss": 0.1628, + "step": 37500 + }, + { + "epoch": 6.210769739015527, + "grad_norm": 0.7034619450569153, + "learning_rate": 8.810826022586843e-06, + "loss": 0.1953, + "step": 37600 + }, + { + "epoch": 6.227287743640568, + "grad_norm": 0.7905510067939758, + "learning_rate": 8.804708243096087e-06, + "loss": 0.163, + "step": 37700 + }, + { + "epoch": 6.2438057482656095, + "grad_norm": 0.5802010893821716, + "learning_rate": 8.79859046360533e-06, + "loss": 0.1669, + "step": 37800 + }, + { + "epoch": 6.260323752890651, + "grad_norm": 0.6908354163169861, + "learning_rate": 8.792472684114575e-06, + "loss": 0.1594, + "step": 37900 + }, + { + "epoch": 6.276841757515692, + "grad_norm": 0.5971213579177856, + "learning_rate": 8.786354904623819e-06, + "loss": 0.1629, + "step": 38000 + }, + { + "epoch": 6.276841757515692, + "eval_cer": 0.04748134966805831, + "eval_loss": 0.14269790053367615, + "eval_runtime": 48.0871, + "eval_samples_per_second": 35.165, + "eval_steps_per_second": 8.797, + "eval_wer": 0.2614126022606632, + "step": 38000 + }, + { + "epoch": 6.293359762140733, + "grad_norm": 0.6142451763153076, + "learning_rate": 8.780237125133062e-06, + "loss": 0.1564, + "step": 38100 + }, + { + "epoch": 6.309877766765775, + "grad_norm": 0.6337829232215881, + "learning_rate": 8.774119345642307e-06, + "loss": 0.1605, + "step": 38200 + }, + { + "epoch": 6.326395771390816, + "grad_norm": 0.9899505972862244, + "learning_rate": 8.76800156615155e-06, + "loss": 0.1568, + "step": 38300 + }, + { + "epoch": 6.342913776015857, + "grad_norm": 0.8140648007392883, + "learning_rate": 8.761883786660794e-06, + "loss": 0.1671, + "step": 38400 + }, + { + "epoch": 6.359431780640898, + "grad_norm": 0.9894407987594604, + "learning_rate": 8.755766007170038e-06, + "loss": 0.1635, + "step": 38500 + }, + { + "epoch": 6.37594978526594, + "grad_norm": 0.6572480797767639, + "learning_rate": 8.749648227679283e-06, + "loss": 0.1673, + "step": 38600 + }, + { + "epoch": 6.392467789890981, + "grad_norm": 0.6784759759902954, + "learning_rate": 8.743530448188526e-06, + "loss": 0.1602, + "step": 38700 + }, + { + "epoch": 6.408985794516022, + "grad_norm": 1.053363561630249, + "learning_rate": 8.73741266869777e-06, + "loss": 0.1649, + "step": 38800 + }, + { + "epoch": 6.4255037991410635, + "grad_norm": 0.8504717946052551, + "learning_rate": 8.731294889207013e-06, + "loss": 0.1679, + "step": 38900 + }, + { + "epoch": 6.442021803766105, + "grad_norm": 0.8182229399681091, + "learning_rate": 8.725177109716258e-06, + "loss": 0.1607, + "step": 39000 + }, + { + "epoch": 6.442021803766105, + "eval_cer": 0.047164807336937925, + "eval_loss": 0.139786496758461, + "eval_runtime": 48.4561, + "eval_samples_per_second": 34.898, + "eval_steps_per_second": 8.73, + "eval_wer": 0.2596015737213514, + "step": 39000 + }, + { + "epoch": 6.458539808391146, + "grad_norm": 0.7049907445907593, + "learning_rate": 8.719059330225502e-06, + "loss": 0.1582, + "step": 39100 + }, + { + "epoch": 6.475057813016187, + "grad_norm": 0.928734540939331, + "learning_rate": 8.712941550734745e-06, + "loss": 0.1542, + "step": 39200 + }, + { + "epoch": 6.491575817641229, + "grad_norm": 0.7158243656158447, + "learning_rate": 8.706823771243989e-06, + "loss": 0.2327, + "step": 39300 + }, + { + "epoch": 6.50809382226627, + "grad_norm": 0.7434096336364746, + "learning_rate": 8.700705991753234e-06, + "loss": 0.1557, + "step": 39400 + }, + { + "epoch": 6.524611826891311, + "grad_norm": 0.9645175933837891, + "learning_rate": 8.694588212262477e-06, + "loss": 0.1658, + "step": 39500 + }, + { + "epoch": 6.541129831516352, + "grad_norm": 0.7772352695465088, + "learning_rate": 8.688470432771721e-06, + "loss": 0.1707, + "step": 39600 + }, + { + "epoch": 6.557647836141394, + "grad_norm": 0.7710452675819397, + "learning_rate": 8.682352653280966e-06, + "loss": 0.1546, + "step": 39700 + }, + { + "epoch": 6.574165840766435, + "grad_norm": 0.6807363033294678, + "learning_rate": 8.67623487379021e-06, + "loss": 0.1524, + "step": 39800 + }, + { + "epoch": 6.590683845391476, + "grad_norm": 0.6985335350036621, + "learning_rate": 8.670117094299455e-06, + "loss": 0.2057, + "step": 39900 + }, + { + "epoch": 6.607201850016518, + "grad_norm": 0.6793562173843384, + "learning_rate": 8.663999314808698e-06, + "loss": 0.1616, + "step": 40000 + }, + { + "epoch": 6.607201850016518, + "eval_cer": 0.04697659297789337, + "eval_loss": 0.14157184958457947, + "eval_runtime": 48.5024, + "eval_samples_per_second": 34.864, + "eval_steps_per_second": 8.721, + "eval_wer": 0.25810279148192095, + "step": 40000 + }, + { + "epoch": 6.62371985464156, + "grad_norm": 0.9306012988090515, + "learning_rate": 8.657881535317942e-06, + "loss": 0.1916, + "step": 40100 + }, + { + "epoch": 6.640237859266601, + "grad_norm": 0.7695144414901733, + "learning_rate": 8.651763755827187e-06, + "loss": 0.1581, + "step": 40200 + }, + { + "epoch": 6.656755863891642, + "grad_norm": 0.9468954205513, + "learning_rate": 8.64564597633643e-06, + "loss": 0.1543, + "step": 40300 + }, + { + "epoch": 6.6732738685166835, + "grad_norm": 0.9969133138656616, + "learning_rate": 8.639528196845674e-06, + "loss": 0.2421, + "step": 40400 + }, + { + "epoch": 6.689791873141725, + "grad_norm": 0.7657153606414795, + "learning_rate": 8.633410417354917e-06, + "loss": 0.1498, + "step": 40500 + }, + { + "epoch": 6.706309877766766, + "grad_norm": 0.6543861031532288, + "learning_rate": 8.627292637864162e-06, + "loss": 0.1592, + "step": 40600 + }, + { + "epoch": 6.722827882391807, + "grad_norm": 0.7110750675201416, + "learning_rate": 8.621174858373406e-06, + "loss": 0.1871, + "step": 40700 + }, + { + "epoch": 6.739345887016849, + "grad_norm": 0.6737387776374817, + "learning_rate": 8.61505707888265e-06, + "loss": 0.1634, + "step": 40800 + }, + { + "epoch": 6.75586389164189, + "grad_norm": 0.9051392078399658, + "learning_rate": 8.608939299391893e-06, + "loss": 0.1535, + "step": 40900 + }, + { + "epoch": 6.772381896266931, + "grad_norm": 0.7674338221549988, + "learning_rate": 8.602821519901138e-06, + "loss": 0.1512, + "step": 41000 + }, + { + "epoch": 6.772381896266931, + "eval_cer": 0.046300732324960646, + "eval_loss": 0.13961651921272278, + "eval_runtime": 48.6061, + "eval_samples_per_second": 34.79, + "eval_steps_per_second": 8.703, + "eval_wer": 0.2526697058639855, + "step": 41000 + }, + { + "epoch": 6.788899900891972, + "grad_norm": 0.9694798588752747, + "learning_rate": 8.596703740410381e-06, + "loss": 0.1555, + "step": 41100 + }, + { + "epoch": 6.805417905517014, + "grad_norm": 0.7667569518089294, + "learning_rate": 8.590585960919625e-06, + "loss": 0.1791, + "step": 41200 + }, + { + "epoch": 6.821935910142055, + "grad_norm": 0.8261126279830933, + "learning_rate": 8.584468181428868e-06, + "loss": 0.1554, + "step": 41300 + }, + { + "epoch": 6.838453914767096, + "grad_norm": 0.8309662342071533, + "learning_rate": 8.578350401938113e-06, + "loss": 0.1863, + "step": 41400 + }, + { + "epoch": 6.8549719193921375, + "grad_norm": 0.8523270487785339, + "learning_rate": 8.572232622447357e-06, + "loss": 0.1847, + "step": 41500 + }, + { + "epoch": 6.871489924017179, + "grad_norm": 0.7406333684921265, + "learning_rate": 8.5661148429566e-06, + "loss": 0.1486, + "step": 41600 + }, + { + "epoch": 6.88800792864222, + "grad_norm": 0.6426775455474854, + "learning_rate": 8.559997063465845e-06, + "loss": 0.1485, + "step": 41700 + }, + { + "epoch": 6.904525933267261, + "grad_norm": 0.705653727054596, + "learning_rate": 8.553879283975089e-06, + "loss": 0.1645, + "step": 41800 + }, + { + "epoch": 6.9210439378923025, + "grad_norm": 0.7143226265907288, + "learning_rate": 8.547761504484332e-06, + "loss": 0.1483, + "step": 41900 + }, + { + "epoch": 6.937561942517344, + "grad_norm": 0.6951993107795715, + "learning_rate": 8.541643724993578e-06, + "loss": 0.1497, + "step": 42000 + }, + { + "epoch": 6.937561942517344, + "eval_cer": 0.046266511532407094, + "eval_loss": 0.13769099116325378, + "eval_runtime": 48.8917, + "eval_samples_per_second": 34.587, + "eval_steps_per_second": 8.652, + "eval_wer": 0.2522950103041279, + "step": 42000 + }, + { + "epoch": 6.954079947142385, + "grad_norm": 0.781966507434845, + "learning_rate": 8.535525945502821e-06, + "loss": 0.1469, + "step": 42100 + }, + { + "epoch": 6.970597951767426, + "grad_norm": 0.7758731842041016, + "learning_rate": 8.529408166012065e-06, + "loss": 0.1523, + "step": 42200 + }, + { + "epoch": 6.987115956392468, + "grad_norm": 0.9695360064506531, + "learning_rate": 8.52329038652131e-06, + "loss": 0.1475, + "step": 42300 + }, + { + "epoch": 7.003633961017509, + "grad_norm": 0.8952251672744751, + "learning_rate": 8.517172607030553e-06, + "loss": 0.1509, + "step": 42400 + }, + { + "epoch": 7.02015196564255, + "grad_norm": 0.649591326713562, + "learning_rate": 8.511054827539797e-06, + "loss": 0.1454, + "step": 42500 + }, + { + "epoch": 7.0366699702675914, + "grad_norm": 0.6509200930595398, + "learning_rate": 8.504937048049042e-06, + "loss": 0.1546, + "step": 42600 + }, + { + "epoch": 7.053187974892633, + "grad_norm": 0.7854897975921631, + "learning_rate": 8.498819268558285e-06, + "loss": 0.1519, + "step": 42700 + }, + { + "epoch": 7.069705979517674, + "grad_norm": 0.7244015336036682, + "learning_rate": 8.492701489067529e-06, + "loss": 0.1485, + "step": 42800 + }, + { + "epoch": 7.086223984142715, + "grad_norm": 0.8875409960746765, + "learning_rate": 8.486583709576772e-06, + "loss": 0.1577, + "step": 42900 + }, + { + "epoch": 7.1027419887677565, + "grad_norm": 1.101585030555725, + "learning_rate": 8.480465930086017e-06, + "loss": 0.158, + "step": 43000 + }, + { + "epoch": 7.1027419887677565, + "eval_cer": 0.045624871672027924, + "eval_loss": 0.13763436675071716, + "eval_runtime": 48.2628, + "eval_samples_per_second": 35.037, + "eval_steps_per_second": 8.765, + "eval_wer": 0.24960969212514833, + "step": 43000 + }, + { + "epoch": 7.119259993392798, + "grad_norm": 0.7627003788948059, + "learning_rate": 8.47434815059526e-06, + "loss": 0.1601, + "step": 43100 + }, + { + "epoch": 7.135777998017839, + "grad_norm": 0.9609680771827698, + "learning_rate": 8.468230371104504e-06, + "loss": 0.1512, + "step": 43200 + }, + { + "epoch": 7.15229600264288, + "grad_norm": 0.9379695057868958, + "learning_rate": 8.462112591613748e-06, + "loss": 0.153, + "step": 43300 + }, + { + "epoch": 7.168814007267922, + "grad_norm": 0.6494946479797363, + "learning_rate": 8.455994812122993e-06, + "loss": 0.1518, + "step": 43400 + }, + { + "epoch": 7.185332011892964, + "grad_norm": 1.0411746501922607, + "learning_rate": 8.449877032632236e-06, + "loss": 0.1513, + "step": 43500 + }, + { + "epoch": 7.201850016518005, + "grad_norm": 0.6832746863365173, + "learning_rate": 8.44375925314148e-06, + "loss": 0.146, + "step": 43600 + }, + { + "epoch": 7.218368021143046, + "grad_norm": 0.7576162219047546, + "learning_rate": 8.437641473650723e-06, + "loss": 0.153, + "step": 43700 + }, + { + "epoch": 7.234886025768088, + "grad_norm": 0.7100921273231506, + "learning_rate": 8.431523694159968e-06, + "loss": 0.1504, + "step": 43800 + }, + { + "epoch": 7.251404030393129, + "grad_norm": 1.0959173440933228, + "learning_rate": 8.425405914669212e-06, + "loss": 0.1456, + "step": 43900 + }, + { + "epoch": 7.26792203501817, + "grad_norm": 0.6423998475074768, + "learning_rate": 8.419288135178455e-06, + "loss": 0.1498, + "step": 44000 + }, + { + "epoch": 7.26792203501817, + "eval_cer": 0.04530832934090753, + "eval_loss": 0.13714681565761566, + "eval_runtime": 48.9228, + "eval_samples_per_second": 34.565, + "eval_steps_per_second": 8.646, + "eval_wer": 0.24786111284581278, + "step": 44000 + }, + { + "epoch": 7.284440039643211, + "grad_norm": 0.8662620782852173, + "learning_rate": 8.4131703556877e-06, + "loss": 0.152, + "step": 44100 + }, + { + "epoch": 7.300958044268253, + "grad_norm": 0.6693345308303833, + "learning_rate": 8.407052576196944e-06, + "loss": 0.1499, + "step": 44200 + }, + { + "epoch": 7.317476048893294, + "grad_norm": 1.1424570083618164, + "learning_rate": 8.400934796706189e-06, + "loss": 0.1524, + "step": 44300 + }, + { + "epoch": 7.333994053518335, + "grad_norm": 0.9028821587562561, + "learning_rate": 8.394817017215433e-06, + "loss": 0.1433, + "step": 44400 + }, + { + "epoch": 7.3505120581433765, + "grad_norm": 0.7449358701705933, + "learning_rate": 8.388699237724676e-06, + "loss": 0.1497, + "step": 44500 + }, + { + "epoch": 7.367030062768418, + "grad_norm": 0.8447193503379822, + "learning_rate": 8.382581458233921e-06, + "loss": 0.1531, + "step": 44600 + }, + { + "epoch": 7.383548067393459, + "grad_norm": 0.661593496799469, + "learning_rate": 8.376463678743165e-06, + "loss": 0.1463, + "step": 44700 + }, + { + "epoch": 7.4000660720185, + "grad_norm": 0.9252416491508484, + "learning_rate": 8.370345899252408e-06, + "loss": 0.1459, + "step": 44800 + }, + { + "epoch": 7.416584076643542, + "grad_norm": 0.8766170740127563, + "learning_rate": 8.364228119761652e-06, + "loss": 0.1447, + "step": 44900 + }, + { + "epoch": 7.433102081268583, + "grad_norm": 0.7817343473434448, + "learning_rate": 8.358110340270897e-06, + "loss": 0.1477, + "step": 45000 + }, + { + "epoch": 7.433102081268583, + "eval_cer": 0.04499178700978715, + "eval_loss": 0.13690289855003357, + "eval_runtime": 48.2135, + "eval_samples_per_second": 35.073, + "eval_steps_per_second": 8.773, + "eval_wer": 0.24592518578654843, + "step": 45000 + }, + { + "epoch": 7.449620085893624, + "grad_norm": 0.891497790813446, + "learning_rate": 8.35199256078014e-06, + "loss": 0.1461, + "step": 45100 + }, + { + "epoch": 7.466138090518665, + "grad_norm": 0.7994738221168518, + "learning_rate": 8.345874781289384e-06, + "loss": 0.1498, + "step": 45200 + }, + { + "epoch": 7.482656095143707, + "grad_norm": 0.9430075287818909, + "learning_rate": 8.339757001798627e-06, + "loss": 0.1939, + "step": 45300 + }, + { + "epoch": 7.499174099768748, + "grad_norm": 0.7117358446121216, + "learning_rate": 8.333639222307872e-06, + "loss": 0.149, + "step": 45400 + }, + { + "epoch": 7.515692104393789, + "grad_norm": 0.6447555422782898, + "learning_rate": 8.327521442817116e-06, + "loss": 0.1509, + "step": 45500 + }, + { + "epoch": 7.5322101090188305, + "grad_norm": 0.6948108077049255, + "learning_rate": 8.32140366332636e-06, + "loss": 0.1488, + "step": 45600 + }, + { + "epoch": 7.548728113643872, + "grad_norm": 1.0623235702514648, + "learning_rate": 8.315285883835603e-06, + "loss": 0.135, + "step": 45700 + }, + { + "epoch": 7.565246118268913, + "grad_norm": 0.7654304504394531, + "learning_rate": 8.309168104344848e-06, + "loss": 0.1352, + "step": 45800 + }, + { + "epoch": 7.581764122893954, + "grad_norm": 0.8501843810081482, + "learning_rate": 8.303050324854091e-06, + "loss": 0.1487, + "step": 45900 + }, + { + "epoch": 7.598282127518996, + "grad_norm": 0.7539622783660889, + "learning_rate": 8.296932545363335e-06, + "loss": 0.1449, + "step": 46000 + }, + { + "epoch": 7.598282127518996, + "eval_cer": 0.04526555335021559, + "eval_loss": 0.13489525020122528, + "eval_runtime": 48.4075, + "eval_samples_per_second": 34.933, + "eval_steps_per_second": 8.738, + "eval_wer": 0.24605008430650097, + "step": 46000 + }, + { + "epoch": 7.614800132144037, + "grad_norm": 0.679431140422821, + "learning_rate": 8.29081476587258e-06, + "loss": 0.1418, + "step": 46100 + }, + { + "epoch": 7.631318136769078, + "grad_norm": 0.9577202796936035, + "learning_rate": 8.284696986381823e-06, + "loss": 0.1444, + "step": 46200 + }, + { + "epoch": 7.647836141394119, + "grad_norm": 0.7873533964157104, + "learning_rate": 8.278579206891067e-06, + "loss": 0.1443, + "step": 46300 + }, + { + "epoch": 7.664354146019161, + "grad_norm": 0.9830496907234192, + "learning_rate": 8.272461427400312e-06, + "loss": 0.1447, + "step": 46400 + }, + { + "epoch": 7.680872150644202, + "grad_norm": 0.7039462327957153, + "learning_rate": 8.266343647909555e-06, + "loss": 0.1546, + "step": 46500 + }, + { + "epoch": 7.697390155269243, + "grad_norm": 1.1326159238815308, + "learning_rate": 8.260225868418799e-06, + "loss": 0.1681, + "step": 46600 + }, + { + "epoch": 7.7139081598942845, + "grad_norm": 0.9435326457023621, + "learning_rate": 8.254108088928044e-06, + "loss": 0.1441, + "step": 46700 + }, + { + "epoch": 7.730426164519326, + "grad_norm": 1.0461517572402954, + "learning_rate": 8.247990309437287e-06, + "loss": 0.1438, + "step": 46800 + }, + { + "epoch": 7.746944169144367, + "grad_norm": 0.6983148455619812, + "learning_rate": 8.241872529946531e-06, + "loss": 0.1461, + "step": 46900 + }, + { + "epoch": 7.763462173769408, + "grad_norm": 0.6941544413566589, + "learning_rate": 8.235754750455776e-06, + "loss": 0.1414, + "step": 47000 + }, + { + "epoch": 7.763462173769408, + "eval_cer": 0.044256039969885703, + "eval_loss": 0.13219207525253296, + "eval_runtime": 48.7223, + "eval_samples_per_second": 34.707, + "eval_steps_per_second": 8.682, + "eval_wer": 0.24261537500780617, + "step": 47000 + }, + { + "epoch": 7.7799801783944496, + "grad_norm": 0.7406185865402222, + "learning_rate": 8.22963697096502e-06, + "loss": 0.1462, + "step": 47100 + }, + { + "epoch": 7.796498183019491, + "grad_norm": 0.7421987652778625, + "learning_rate": 8.223519191474263e-06, + "loss": 0.1377, + "step": 47200 + }, + { + "epoch": 7.813016187644532, + "grad_norm": 0.9080411195755005, + "learning_rate": 8.217401411983506e-06, + "loss": 0.1447, + "step": 47300 + }, + { + "epoch": 7.829534192269573, + "grad_norm": 0.9030922651290894, + "learning_rate": 8.211283632492752e-06, + "loss": 0.1488, + "step": 47400 + }, + { + "epoch": 7.846052196894615, + "grad_norm": 0.8082269430160522, + "learning_rate": 8.205165853001995e-06, + "loss": 0.1695, + "step": 47500 + }, + { + "epoch": 7.862570201519657, + "grad_norm": 0.8809986114501953, + "learning_rate": 8.199048073511239e-06, + "loss": 0.144, + "step": 47600 + }, + { + "epoch": 7.879088206144698, + "grad_norm": 0.6915613412857056, + "learning_rate": 8.192930294020482e-06, + "loss": 0.172, + "step": 47700 + }, + { + "epoch": 7.895606210769739, + "grad_norm": 0.8618057370185852, + "learning_rate": 8.186812514529727e-06, + "loss": 0.1362, + "step": 47800 + }, + { + "epoch": 7.912124215394781, + "grad_norm": 0.8087924122810364, + "learning_rate": 8.18069473503897e-06, + "loss": 0.1714, + "step": 47900 + }, + { + "epoch": 7.928642220019822, + "grad_norm": 1.0039881467819214, + "learning_rate": 8.174576955548214e-06, + "loss": 0.148, + "step": 48000 + }, + { + "epoch": 7.928642220019822, + "eval_cer": 0.04386250085551981, + "eval_loss": 0.13279926776885986, + "eval_runtime": 48.5969, + "eval_samples_per_second": 34.796, + "eval_steps_per_second": 8.704, + "eval_wer": 0.24136638980828076, + "step": 48000 + }, + { + "epoch": 7.945160224644863, + "grad_norm": 0.8676062822341919, + "learning_rate": 8.168459176057458e-06, + "loss": 0.1426, + "step": 48100 + }, + { + "epoch": 7.961678229269904, + "grad_norm": 0.7867946028709412, + "learning_rate": 8.162341396566703e-06, + "loss": 0.149, + "step": 48200 + }, + { + "epoch": 7.978196233894946, + "grad_norm": 0.7121617794036865, + "learning_rate": 8.156223617075946e-06, + "loss": 0.1447, + "step": 48300 + }, + { + "epoch": 7.994714238519987, + "grad_norm": 0.7431650757789612, + "learning_rate": 8.15010583758519e-06, + "loss": 0.1352, + "step": 48400 + }, + { + "epoch": 8.011232243145027, + "grad_norm": 1.1219011545181274, + "learning_rate": 8.143988058094435e-06, + "loss": 0.1427, + "step": 48500 + }, + { + "epoch": 8.027750247770069, + "grad_norm": 0.6559448838233948, + "learning_rate": 8.137870278603678e-06, + "loss": 0.1434, + "step": 48600 + }, + { + "epoch": 8.04426825239511, + "grad_norm": 0.7558749318122864, + "learning_rate": 8.131752499112923e-06, + "loss": 0.1396, + "step": 48700 + }, + { + "epoch": 8.060786257020151, + "grad_norm": 0.7132017016410828, + "learning_rate": 8.125634719622167e-06, + "loss": 0.1384, + "step": 48800 + }, + { + "epoch": 8.077304261645192, + "grad_norm": 0.7818734645843506, + "learning_rate": 8.11951694013141e-06, + "loss": 0.1386, + "step": 48900 + }, + { + "epoch": 8.093822266270234, + "grad_norm": 1.4067357778549194, + "learning_rate": 8.113399160640656e-06, + "loss": 0.1356, + "step": 49000 + }, + { + "epoch": 8.093822266270234, + "eval_cer": 0.04489767983026487, + "eval_loss": 0.1310672014951706, + "eval_runtime": 48.4665, + "eval_samples_per_second": 34.89, + "eval_steps_per_second": 8.728, + "eval_wer": 0.2447386498469993, + "step": 49000 + }, + { + "epoch": 8.110340270895275, + "grad_norm": 0.6177778840065002, + "learning_rate": 8.107281381149899e-06, + "loss": 0.1602, + "step": 49100 + }, + { + "epoch": 8.126858275520316, + "grad_norm": 0.6477259993553162, + "learning_rate": 8.101163601659142e-06, + "loss": 0.1356, + "step": 49200 + }, + { + "epoch": 8.14337628014536, + "grad_norm": 0.7413625717163086, + "learning_rate": 8.095045822168388e-06, + "loss": 0.1564, + "step": 49300 + }, + { + "epoch": 8.1598942847704, + "grad_norm": 0.6410078406333923, + "learning_rate": 8.088928042677631e-06, + "loss": 0.1344, + "step": 49400 + }, + { + "epoch": 8.176412289395442, + "grad_norm": 0.7430760860443115, + "learning_rate": 8.082810263186875e-06, + "loss": 0.1423, + "step": 49500 + }, + { + "epoch": 8.192930294020483, + "grad_norm": 0.7136946320533752, + "learning_rate": 8.076692483696118e-06, + "loss": 0.1328, + "step": 49600 + }, + { + "epoch": 8.209448298645524, + "grad_norm": 0.6934331059455872, + "learning_rate": 8.070574704205363e-06, + "loss": 0.1372, + "step": 49700 + }, + { + "epoch": 8.225966303270566, + "grad_norm": 0.9232444167137146, + "learning_rate": 8.064456924714607e-06, + "loss": 0.1333, + "step": 49800 + }, + { + "epoch": 8.242484307895607, + "grad_norm": 0.9037547707557678, + "learning_rate": 8.05833914522385e-06, + "loss": 0.1304, + "step": 49900 + }, + { + "epoch": 8.259002312520648, + "grad_norm": 0.7326195240020752, + "learning_rate": 8.052221365733094e-06, + "loss": 0.1426, + "step": 50000 + }, + { + "epoch": 8.259002312520648, + "eval_cer": 0.043101088221203204, + "eval_loss": 0.1275395154953003, + "eval_runtime": 48.7537, + "eval_samples_per_second": 34.685, + "eval_steps_per_second": 8.676, + "eval_wer": 0.23793168050958596, + "step": 50000 + }, + { + "epoch": 8.27552031714569, + "grad_norm": 0.8698177933692932, + "learning_rate": 8.046103586242339e-06, + "loss": 0.1436, + "step": 50100 + }, + { + "epoch": 8.29203832177073, + "grad_norm": 0.6039656400680542, + "learning_rate": 8.039985806751582e-06, + "loss": 0.1381, + "step": 50200 + }, + { + "epoch": 8.308556326395772, + "grad_norm": 0.6586022973060608, + "learning_rate": 8.033868027260826e-06, + "loss": 0.1419, + "step": 50300 + }, + { + "epoch": 8.325074331020813, + "grad_norm": 0.7226503491401672, + "learning_rate": 8.027750247770069e-06, + "loss": 0.1383, + "step": 50400 + }, + { + "epoch": 8.341592335645855, + "grad_norm": 0.6569647192955017, + "learning_rate": 8.021632468279314e-06, + "loss": 0.14, + "step": 50500 + }, + { + "epoch": 8.358110340270896, + "grad_norm": 0.7345595955848694, + "learning_rate": 8.015514688788558e-06, + "loss": 0.1335, + "step": 50600 + }, + { + "epoch": 8.374628344895937, + "grad_norm": 0.9025945067405701, + "learning_rate": 8.009396909297801e-06, + "loss": 0.1327, + "step": 50700 + }, + { + "epoch": 8.391146349520978, + "grad_norm": 0.693868100643158, + "learning_rate": 8.003279129807046e-06, + "loss": 0.1405, + "step": 50800 + }, + { + "epoch": 8.40766435414602, + "grad_norm": 0.726399302482605, + "learning_rate": 7.99716135031629e-06, + "loss": 0.1412, + "step": 50900 + }, + { + "epoch": 8.424182358771061, + "grad_norm": 0.8612838983535767, + "learning_rate": 7.991043570825533e-06, + "loss": 0.1605, + "step": 51000 + }, + { + "epoch": 8.424182358771061, + "eval_cer": 0.04358873451509137, + "eval_loss": 0.12762609124183655, + "eval_runtime": 48.4653, + "eval_samples_per_second": 34.891, + "eval_steps_per_second": 8.728, + "eval_wer": 0.2394929120089927, + "step": 51000 + }, + { + "epoch": 8.440700363396102, + "grad_norm": 0.7443174719810486, + "learning_rate": 7.984925791334778e-06, + "loss": 0.1586, + "step": 51100 + }, + { + "epoch": 8.457218368021143, + "grad_norm": 0.8640626668930054, + "learning_rate": 7.978808011844022e-06, + "loss": 0.1315, + "step": 51200 + }, + { + "epoch": 8.473736372646185, + "grad_norm": 0.8193531632423401, + "learning_rate": 7.972690232353267e-06, + "loss": 0.1345, + "step": 51300 + }, + { + "epoch": 8.490254377271226, + "grad_norm": 0.922201931476593, + "learning_rate": 7.96657245286251e-06, + "loss": 0.1406, + "step": 51400 + }, + { + "epoch": 8.506772381896267, + "grad_norm": 0.6063607931137085, + "learning_rate": 7.960454673371754e-06, + "loss": 0.1311, + "step": 51500 + }, + { + "epoch": 8.523290386521309, + "grad_norm": 0.6972509622573853, + "learning_rate": 7.954336893880997e-06, + "loss": 0.1313, + "step": 51600 + }, + { + "epoch": 8.53980839114635, + "grad_norm": 0.8324514627456665, + "learning_rate": 7.948219114390243e-06, + "loss": 0.1357, + "step": 51700 + }, + { + "epoch": 8.556326395771391, + "grad_norm": 0.8537706136703491, + "learning_rate": 7.942101334899486e-06, + "loss": 0.1326, + "step": 51800 + }, + { + "epoch": 8.572844400396432, + "grad_norm": 0.7509739398956299, + "learning_rate": 7.93598355540873e-06, + "loss": 0.141, + "step": 51900 + }, + { + "epoch": 8.589362405021474, + "grad_norm": 0.8359591364860535, + "learning_rate": 7.929865775917973e-06, + "loss": 0.1619, + "step": 52000 + }, + { + "epoch": 8.589362405021474, + "eval_cer": 0.04345185134487715, + "eval_loss": 0.128912091255188, + "eval_runtime": 48.8649, + "eval_samples_per_second": 34.606, + "eval_steps_per_second": 8.657, + "eval_wer": 0.23874352088927747, + "step": 52000 + }, + { + "epoch": 8.605880409646515, + "grad_norm": 0.9481146335601807, + "learning_rate": 7.923747996427218e-06, + "loss": 0.1305, + "step": 52100 + }, + { + "epoch": 8.622398414271556, + "grad_norm": 0.7615697979927063, + "learning_rate": 7.917630216936462e-06, + "loss": 0.1575, + "step": 52200 + }, + { + "epoch": 8.638916418896597, + "grad_norm": 0.8259508609771729, + "learning_rate": 7.911512437445705e-06, + "loss": 0.1378, + "step": 52300 + }, + { + "epoch": 8.655434423521639, + "grad_norm": 2.3674380779266357, + "learning_rate": 7.905394657954948e-06, + "loss": 0.1279, + "step": 52400 + }, + { + "epoch": 8.67195242814668, + "grad_norm": 0.6892039179801941, + "learning_rate": 7.899276878464194e-06, + "loss": 0.1387, + "step": 52500 + }, + { + "epoch": 8.688470432771721, + "grad_norm": 0.7626641988754272, + "learning_rate": 7.893159098973437e-06, + "loss": 0.1354, + "step": 52600 + }, + { + "epoch": 8.704988437396763, + "grad_norm": 0.9490280747413635, + "learning_rate": 7.88704131948268e-06, + "loss": 0.1273, + "step": 52700 + }, + { + "epoch": 8.721506442021804, + "grad_norm": 0.7699686884880066, + "learning_rate": 7.880923539991924e-06, + "loss": 0.1277, + "step": 52800 + }, + { + "epoch": 8.738024446646845, + "grad_norm": 0.819313108921051, + "learning_rate": 7.87480576050117e-06, + "loss": 0.1337, + "step": 52900 + }, + { + "epoch": 8.754542451271886, + "grad_norm": 0.7003619074821472, + "learning_rate": 7.868687981010413e-06, + "loss": 0.1594, + "step": 53000 + }, + { + "epoch": 8.754542451271886, + "eval_cer": 0.042955649852850594, + "eval_loss": 0.12719017267227173, + "eval_runtime": 48.6494, + "eval_samples_per_second": 34.759, + "eval_steps_per_second": 8.695, + "eval_wer": 0.237244738649847, + "step": 53000 + }, + { + "epoch": 8.771060455896928, + "grad_norm": 0.6663370728492737, + "learning_rate": 7.862570201519658e-06, + "loss": 0.1376, + "step": 53100 + }, + { + "epoch": 8.787578460521969, + "grad_norm": 0.5805346369743347, + "learning_rate": 7.856452422028901e-06, + "loss": 0.1379, + "step": 53200 + }, + { + "epoch": 8.80409646514701, + "grad_norm": 0.7736044526100159, + "learning_rate": 7.850334642538145e-06, + "loss": 0.1323, + "step": 53300 + }, + { + "epoch": 8.820614469772051, + "grad_norm": 0.9529663324356079, + "learning_rate": 7.84421686304739e-06, + "loss": 0.1289, + "step": 53400 + }, + { + "epoch": 8.837132474397093, + "grad_norm": 0.583885908126831, + "learning_rate": 7.838099083556633e-06, + "loss": 0.1322, + "step": 53500 + }, + { + "epoch": 8.853650479022134, + "grad_norm": 0.7250145673751831, + "learning_rate": 7.831981304065877e-06, + "loss": 0.133, + "step": 53600 + }, + { + "epoch": 8.870168483647175, + "grad_norm": 0.6262508034706116, + "learning_rate": 7.825863524575122e-06, + "loss": 0.1599, + "step": 53700 + }, + { + "epoch": 8.886686488272217, + "grad_norm": 0.7456868290901184, + "learning_rate": 7.819745745084365e-06, + "loss": 0.1389, + "step": 53800 + }, + { + "epoch": 8.903204492897258, + "grad_norm": 0.944975733757019, + "learning_rate": 7.813627965593609e-06, + "loss": 0.1339, + "step": 53900 + }, + { + "epoch": 8.919722497522299, + "grad_norm": 0.7597010731697083, + "learning_rate": 7.807510186102852e-06, + "loss": 0.1247, + "step": 54000 + }, + { + "epoch": 8.919722497522299, + "eval_cer": 0.042852987475189924, + "eval_loss": 0.12474211305379868, + "eval_runtime": 48.5952, + "eval_samples_per_second": 34.798, + "eval_steps_per_second": 8.705, + "eval_wer": 0.23530881159058264, + "step": 54000 + }, + { + "epoch": 8.93624050214734, + "grad_norm": 0.8667477965354919, + "learning_rate": 7.801392406612098e-06, + "loss": 0.1305, + "step": 54100 + }, + { + "epoch": 8.952758506772382, + "grad_norm": 0.7182130813598633, + "learning_rate": 7.795274627121341e-06, + "loss": 0.1315, + "step": 54200 + }, + { + "epoch": 8.969276511397423, + "grad_norm": 0.7797411680221558, + "learning_rate": 7.789156847630584e-06, + "loss": 0.1342, + "step": 54300 + }, + { + "epoch": 8.985794516022464, + "grad_norm": 0.7155345678329468, + "learning_rate": 7.783039068139828e-06, + "loss": 0.128, + "step": 54400 + }, + { + "epoch": 9.002312520647505, + "grad_norm": 0.8948495388031006, + "learning_rate": 7.776921288649073e-06, + "loss": 0.1352, + "step": 54500 + }, + { + "epoch": 9.018830525272547, + "grad_norm": 1.397560954093933, + "learning_rate": 7.770803509158317e-06, + "loss": 0.1362, + "step": 54600 + }, + { + "epoch": 9.035348529897588, + "grad_norm": 0.7347814440727234, + "learning_rate": 7.76468572966756e-06, + "loss": 0.1331, + "step": 54700 + }, + { + "epoch": 9.05186653452263, + "grad_norm": 0.9538244009017944, + "learning_rate": 7.758567950176803e-06, + "loss": 0.1339, + "step": 54800 + }, + { + "epoch": 9.06838453914767, + "grad_norm": 0.7012799382209778, + "learning_rate": 7.752450170686049e-06, + "loss": 0.1572, + "step": 54900 + }, + { + "epoch": 9.084902543772712, + "grad_norm": 0.6902897357940674, + "learning_rate": 7.746332391195292e-06, + "loss": 0.1382, + "step": 55000 + }, + { + "epoch": 9.084902543772712, + "eval_cer": 0.04332352337280131, + "eval_loss": 0.1268453598022461, + "eval_runtime": 48.6908, + "eval_samples_per_second": 34.729, + "eval_steps_per_second": 8.687, + "eval_wer": 0.23674514457003684, + "step": 55000 + }, + { + "epoch": 9.101420548397753, + "grad_norm": 0.785682737827301, + "learning_rate": 7.740214611704536e-06, + "loss": 0.1246, + "step": 55100 + }, + { + "epoch": 9.117938553022794, + "grad_norm": 0.6153995990753174, + "learning_rate": 7.73409683221378e-06, + "loss": 0.1308, + "step": 55200 + }, + { + "epoch": 9.134456557647836, + "grad_norm": 0.9429919719696045, + "learning_rate": 7.727979052723024e-06, + "loss": 0.1297, + "step": 55300 + }, + { + "epoch": 9.150974562272877, + "grad_norm": 0.509573221206665, + "learning_rate": 7.721861273232268e-06, + "loss": 0.1536, + "step": 55400 + }, + { + "epoch": 9.167492566897918, + "grad_norm": 0.8129322528839111, + "learning_rate": 7.715743493741513e-06, + "loss": 0.1335, + "step": 55500 + }, + { + "epoch": 9.18401057152296, + "grad_norm": 0.6652195453643799, + "learning_rate": 7.709625714250756e-06, + "loss": 0.1227, + "step": 55600 + }, + { + "epoch": 9.200528576148, + "grad_norm": 0.9443718791007996, + "learning_rate": 7.703507934760001e-06, + "loss": 0.1327, + "step": 55700 + }, + { + "epoch": 9.217046580773042, + "grad_norm": 1.0480358600616455, + "learning_rate": 7.697390155269245e-06, + "loss": 0.1333, + "step": 55800 + }, + { + "epoch": 9.233564585398083, + "grad_norm": 0.8347544074058533, + "learning_rate": 7.691272375778488e-06, + "loss": 0.1265, + "step": 55900 + }, + { + "epoch": 9.250082590023124, + "grad_norm": 0.9064726233482361, + "learning_rate": 7.685154596287732e-06, + "loss": 0.1263, + "step": 56000 + }, + { + "epoch": 9.250082590023124, + "eval_cer": 0.043246526589555814, + "eval_loss": 0.12526705861091614, + "eval_runtime": 48.6324, + "eval_samples_per_second": 34.771, + "eval_steps_per_second": 8.698, + "eval_wer": 0.23574595641041654, + "step": 56000 + }, + { + "epoch": 9.266600594648166, + "grad_norm": 0.6867343783378601, + "learning_rate": 7.679036816796977e-06, + "loss": 0.1634, + "step": 56100 + }, + { + "epoch": 9.283118599273207, + "grad_norm": 0.5822398662567139, + "learning_rate": 7.67291903730622e-06, + "loss": 0.1232, + "step": 56200 + }, + { + "epoch": 9.299636603898248, + "grad_norm": 0.70078444480896, + "learning_rate": 7.666801257815464e-06, + "loss": 0.1334, + "step": 56300 + }, + { + "epoch": 9.31615460852329, + "grad_norm": 0.7892748117446899, + "learning_rate": 7.660683478324707e-06, + "loss": 0.1364, + "step": 56400 + }, + { + "epoch": 9.33267261314833, + "grad_norm": 0.6174075603485107, + "learning_rate": 7.654565698833952e-06, + "loss": 0.1279, + "step": 56500 + }, + { + "epoch": 9.349190617773372, + "grad_norm": 0.5943549275398254, + "learning_rate": 7.648447919343196e-06, + "loss": 0.1211, + "step": 56600 + }, + { + "epoch": 9.365708622398415, + "grad_norm": 0.7577424645423889, + "learning_rate": 7.64233013985244e-06, + "loss": 0.1282, + "step": 56700 + }, + { + "epoch": 9.382226627023456, + "grad_norm": 0.6361554861068726, + "learning_rate": 7.636212360361683e-06, + "loss": 0.146, + "step": 56800 + }, + { + "epoch": 9.398744631648498, + "grad_norm": 0.7385000586509705, + "learning_rate": 7.630094580870928e-06, + "loss": 0.1218, + "step": 56900 + }, + { + "epoch": 9.415262636273539, + "grad_norm": 0.8645774126052856, + "learning_rate": 7.6239768013801715e-06, + "loss": 0.1296, + "step": 57000 + }, + { + "epoch": 9.415262636273539, + "eval_cer": 0.041988912463212645, + "eval_loss": 0.12412171810865402, + "eval_runtime": 48.7466, + "eval_samples_per_second": 34.69, + "eval_steps_per_second": 8.678, + "eval_wer": 0.23099981265222008, + "step": 57000 + }, + { + "epoch": 9.43178064089858, + "grad_norm": 0.8043680787086487, + "learning_rate": 7.617859021889416e-06, + "loss": 0.1263, + "step": 57100 + }, + { + "epoch": 9.448298645523622, + "grad_norm": 0.6487779021263123, + "learning_rate": 7.611741242398659e-06, + "loss": 0.1409, + "step": 57200 + }, + { + "epoch": 9.464816650148663, + "grad_norm": 0.8494729399681091, + "learning_rate": 7.605623462907904e-06, + "loss": 0.1292, + "step": 57300 + }, + { + "epoch": 9.481334654773704, + "grad_norm": 0.6968827247619629, + "learning_rate": 7.599505683417148e-06, + "loss": 0.1237, + "step": 57400 + }, + { + "epoch": 9.497852659398745, + "grad_norm": 0.7528768181800842, + "learning_rate": 7.593387903926391e-06, + "loss": 0.121, + "step": 57500 + }, + { + "epoch": 9.514370664023787, + "grad_norm": 0.8891571164131165, + "learning_rate": 7.587270124435635e-06, + "loss": 0.1271, + "step": 57600 + }, + { + "epoch": 9.530888668648828, + "grad_norm": 0.60162353515625, + "learning_rate": 7.58115234494488e-06, + "loss": 0.1321, + "step": 57700 + }, + { + "epoch": 9.54740667327387, + "grad_norm": 0.9320480823516846, + "learning_rate": 7.575034565454123e-06, + "loss": 0.136, + "step": 57800 + }, + { + "epoch": 9.56392467789891, + "grad_norm": 0.7406982779502869, + "learning_rate": 7.568916785963367e-06, + "loss": 0.128, + "step": 57900 + }, + { + "epoch": 9.580442682523952, + "grad_norm": 0.753324568271637, + "learning_rate": 7.562799006472611e-06, + "loss": 0.1263, + "step": 58000 + }, + { + "epoch": 9.580442682523952, + "eval_cer": 0.042305454794333036, + "eval_loss": 0.12526877224445343, + "eval_runtime": 48.8065, + "eval_samples_per_second": 34.647, + "eval_steps_per_second": 8.667, + "eval_wer": 0.23212389933179292, + "step": 58000 + }, + { + "epoch": 9.596960687148993, + "grad_norm": 0.6665119528770447, + "learning_rate": 7.5566812269818555e-06, + "loss": 0.1285, + "step": 58100 + }, + { + "epoch": 9.613478691774034, + "grad_norm": 0.6425819993019104, + "learning_rate": 7.5505634474911e-06, + "loss": 0.1303, + "step": 58200 + }, + { + "epoch": 9.629996696399076, + "grad_norm": 0.7088574767112732, + "learning_rate": 7.544445668000343e-06, + "loss": 0.1246, + "step": 58300 + }, + { + "epoch": 9.646514701024117, + "grad_norm": 0.6304578185081482, + "learning_rate": 7.538327888509587e-06, + "loss": 0.1357, + "step": 58400 + }, + { + "epoch": 9.663032705649158, + "grad_norm": 0.9591554403305054, + "learning_rate": 7.532210109018832e-06, + "loss": 0.1352, + "step": 58500 + }, + { + "epoch": 9.6795507102742, + "grad_norm": 0.8646364808082581, + "learning_rate": 7.526092329528075e-06, + "loss": 0.1225, + "step": 58600 + }, + { + "epoch": 9.69606871489924, + "grad_norm": 0.729404866695404, + "learning_rate": 7.519974550037319e-06, + "loss": 0.1241, + "step": 58700 + }, + { + "epoch": 9.712586719524282, + "grad_norm": 0.719990611076355, + "learning_rate": 7.513856770546562e-06, + "loss": 0.1184, + "step": 58800 + }, + { + "epoch": 9.729104724149323, + "grad_norm": 0.7622655630111694, + "learning_rate": 7.507738991055807e-06, + "loss": 0.1281, + "step": 58900 + }, + { + "epoch": 9.745622728774364, + "grad_norm": 0.8316338658332825, + "learning_rate": 7.501621211565051e-06, + "loss": 0.1327, + "step": 59000 + }, + { + "epoch": 9.745622728774364, + "eval_cer": 0.042014578057627816, + "eval_loss": 0.12281199544668198, + "eval_runtime": 48.6883, + "eval_samples_per_second": 34.731, + "eval_steps_per_second": 8.688, + "eval_wer": 0.23018797227252857, + "step": 59000 + }, + { + "epoch": 9.762140733399406, + "grad_norm": 0.6978473663330078, + "learning_rate": 7.495503432074294e-06, + "loss": 0.1218, + "step": 59100 + }, + { + "epoch": 9.778658738024447, + "grad_norm": 0.7994058728218079, + "learning_rate": 7.489385652583539e-06, + "loss": 0.1217, + "step": 59200 + }, + { + "epoch": 9.795176742649488, + "grad_norm": 0.6791940927505493, + "learning_rate": 7.483267873092783e-06, + "loss": 0.1598, + "step": 59300 + }, + { + "epoch": 9.81169474727453, + "grad_norm": 0.7519752383232117, + "learning_rate": 7.477150093602027e-06, + "loss": 0.1267, + "step": 59400 + }, + { + "epoch": 9.82821275189957, + "grad_norm": 0.6616401672363281, + "learning_rate": 7.471032314111271e-06, + "loss": 0.126, + "step": 59500 + }, + { + "epoch": 9.844730756524612, + "grad_norm": 0.8174837231636047, + "learning_rate": 7.464914534620514e-06, + "loss": 0.1287, + "step": 59600 + }, + { + "epoch": 9.861248761149653, + "grad_norm": 0.746444046497345, + "learning_rate": 7.458796755129759e-06, + "loss": 0.1576, + "step": 59700 + }, + { + "epoch": 9.877766765774695, + "grad_norm": 0.9220054745674133, + "learning_rate": 7.452678975639003e-06, + "loss": 0.1396, + "step": 59800 + }, + { + "epoch": 9.894284770399736, + "grad_norm": 0.8045241832733154, + "learning_rate": 7.446561196148246e-06, + "loss": 0.1217, + "step": 59900 + }, + { + "epoch": 9.910802775024777, + "grad_norm": 0.759081244468689, + "learning_rate": 7.44044341665749e-06, + "loss": 0.1262, + "step": 60000 + }, + { + "epoch": 9.910802775024777, + "eval_cer": 0.041911915679967146, + "eval_loss": 0.12326313555240631, + "eval_runtime": 48.7409, + "eval_samples_per_second": 34.694, + "eval_steps_per_second": 8.679, + "eval_wer": 0.23018797227252857, + "step": 60000 + }, + { + "epoch": 9.927320779649818, + "grad_norm": 0.8618572354316711, + "learning_rate": 7.434325637166735e-06, + "loss": 0.1251, + "step": 60100 + }, + { + "epoch": 9.94383878427486, + "grad_norm": 0.6436170935630798, + "learning_rate": 7.428207857675978e-06, + "loss": 0.1201, + "step": 60200 + }, + { + "epoch": 9.960356788899901, + "grad_norm": 0.8590137958526611, + "learning_rate": 7.422090078185223e-06, + "loss": 0.129, + "step": 60300 + }, + { + "epoch": 9.976874793524942, + "grad_norm": 0.7668434381484985, + "learning_rate": 7.415972298694466e-06, + "loss": 0.1255, + "step": 60400 + }, + { + "epoch": 9.993392798149983, + "grad_norm": 0.7597298622131348, + "learning_rate": 7.4098545192037104e-06, + "loss": 0.1231, + "step": 60500 + }, + { + "epoch": 10.009910802775025, + "grad_norm": 0.8070718050003052, + "learning_rate": 7.403736739712955e-06, + "loss": 0.1642, + "step": 60600 + }, + { + "epoch": 10.026428807400066, + "grad_norm": 0.7364042401313782, + "learning_rate": 7.397618960222198e-06, + "loss": 0.1243, + "step": 60700 + }, + { + "epoch": 10.042946812025107, + "grad_norm": 0.8967491984367371, + "learning_rate": 7.391501180731442e-06, + "loss": 0.1321, + "step": 60800 + }, + { + "epoch": 10.059464816650149, + "grad_norm": 0.8420141339302063, + "learning_rate": 7.385383401240687e-06, + "loss": 0.1274, + "step": 60900 + }, + { + "epoch": 10.07598282127519, + "grad_norm": 0.7145309448242188, + "learning_rate": 7.37926562174993e-06, + "loss": 0.1207, + "step": 61000 + }, + { + "epoch": 10.07598282127519, + "eval_cer": 0.041920470878105534, + "eval_loss": 0.12428971379995346, + "eval_runtime": 48.748, + "eval_samples_per_second": 34.689, + "eval_steps_per_second": 8.677, + "eval_wer": 0.22881408855305066, + "step": 61000 + }, + { + "epoch": 10.092500825900231, + "grad_norm": 0.7484616041183472, + "learning_rate": 7.373147842259174e-06, + "loss": 0.1279, + "step": 61100 + }, + { + "epoch": 10.109018830525272, + "grad_norm": 0.7732555270195007, + "learning_rate": 7.367030062768418e-06, + "loss": 0.1251, + "step": 61200 + }, + { + "epoch": 10.125536835150314, + "grad_norm": 0.7943729162216187, + "learning_rate": 7.360912283277662e-06, + "loss": 0.1194, + "step": 61300 + }, + { + "epoch": 10.142054839775355, + "grad_norm": 0.7555480003356934, + "learning_rate": 7.354794503786906e-06, + "loss": 0.1171, + "step": 61400 + }, + { + "epoch": 10.158572844400396, + "grad_norm": 0.6439567804336548, + "learning_rate": 7.34867672429615e-06, + "loss": 0.1203, + "step": 61500 + }, + { + "epoch": 10.175090849025437, + "grad_norm": 0.5505064725875854, + "learning_rate": 7.342558944805394e-06, + "loss": 0.1198, + "step": 61600 + }, + { + "epoch": 10.191608853650479, + "grad_norm": 0.6508448123931885, + "learning_rate": 7.336441165314639e-06, + "loss": 0.1227, + "step": 61700 + }, + { + "epoch": 10.20812685827552, + "grad_norm": 0.6717207431793213, + "learning_rate": 7.330323385823882e-06, + "loss": 0.1258, + "step": 61800 + }, + { + "epoch": 10.224644862900561, + "grad_norm": 0.7035212516784668, + "learning_rate": 7.324205606333126e-06, + "loss": 0.1235, + "step": 61900 + }, + { + "epoch": 10.241162867525603, + "grad_norm": 0.6881560683250427, + "learning_rate": 7.318087826842369e-06, + "loss": 0.1411, + "step": 62000 + }, + { + "epoch": 10.241162867525603, + "eval_cer": 0.04166381493395387, + "eval_loss": 0.12054095417261124, + "eval_runtime": 48.5305, + "eval_samples_per_second": 34.844, + "eval_steps_per_second": 8.716, + "eval_wer": 0.22781490039343033, + "step": 62000 + }, + { + "epoch": 10.257680872150644, + "grad_norm": 0.8385311961174011, + "learning_rate": 7.311970047351614e-06, + "loss": 0.1223, + "step": 62100 + }, + { + "epoch": 10.274198876775685, + "grad_norm": 0.5517755746841431, + "learning_rate": 7.305852267860858e-06, + "loss": 0.1251, + "step": 62200 + }, + { + "epoch": 10.290716881400726, + "grad_norm": 0.736827552318573, + "learning_rate": 7.299734488370101e-06, + "loss": 0.1215, + "step": 62300 + }, + { + "epoch": 10.307234886025768, + "grad_norm": 0.8742785453796387, + "learning_rate": 7.2936167088793455e-06, + "loss": 0.1284, + "step": 62400 + }, + { + "epoch": 10.323752890650809, + "grad_norm": 0.6363995671272278, + "learning_rate": 7.28749892938859e-06, + "loss": 0.1287, + "step": 62500 + }, + { + "epoch": 10.34027089527585, + "grad_norm": 0.8067464232444763, + "learning_rate": 7.281381149897834e-06, + "loss": 0.1206, + "step": 62600 + }, + { + "epoch": 10.356788899900891, + "grad_norm": 0.7671234011650085, + "learning_rate": 7.275263370407078e-06, + "loss": 0.1216, + "step": 62700 + }, + { + "epoch": 10.373306904525933, + "grad_norm": 1.0161293745040894, + "learning_rate": 7.269145590916321e-06, + "loss": 0.1244, + "step": 62800 + }, + { + "epoch": 10.389824909150974, + "grad_norm": 0.7212845683097839, + "learning_rate": 7.263027811425566e-06, + "loss": 0.1298, + "step": 62900 + }, + { + "epoch": 10.406342913776015, + "grad_norm": 0.6176585555076599, + "learning_rate": 7.25691003193481e-06, + "loss": 0.1401, + "step": 63000 + }, + { + "epoch": 10.406342913776015, + "eval_cer": 0.04089384710149887, + "eval_loss": 0.12006353586912155, + "eval_runtime": 48.5157, + "eval_samples_per_second": 34.855, + "eval_steps_per_second": 8.719, + "eval_wer": 0.22494223443452194, + "step": 63000 + }, + { + "epoch": 10.422860918401057, + "grad_norm": 0.724654495716095, + "learning_rate": 7.250792252444053e-06, + "loss": 0.1246, + "step": 63100 + }, + { + "epoch": 10.439378923026098, + "grad_norm": 2.0829966068267822, + "learning_rate": 7.244674472953297e-06, + "loss": 0.1206, + "step": 63200 + }, + { + "epoch": 10.455896927651139, + "grad_norm": 0.7068758606910706, + "learning_rate": 7.238556693462542e-06, + "loss": 0.1214, + "step": 63300 + }, + { + "epoch": 10.47241493227618, + "grad_norm": 0.717832624912262, + "learning_rate": 7.232438913971785e-06, + "loss": 0.1233, + "step": 63400 + }, + { + "epoch": 10.488932936901222, + "grad_norm": 0.7591824531555176, + "learning_rate": 7.226321134481029e-06, + "loss": 0.1223, + "step": 63500 + }, + { + "epoch": 10.505450941526263, + "grad_norm": 0.8358705639839172, + "learning_rate": 7.220203354990273e-06, + "loss": 0.117, + "step": 63600 + }, + { + "epoch": 10.521968946151304, + "grad_norm": 0.7193006277084351, + "learning_rate": 7.214085575499517e-06, + "loss": 0.1229, + "step": 63700 + }, + { + "epoch": 10.538486950776345, + "grad_norm": 0.8279296159744263, + "learning_rate": 7.207967796008762e-06, + "loss": 0.1304, + "step": 63800 + }, + { + "epoch": 10.555004955401387, + "grad_norm": 0.9237922430038452, + "learning_rate": 7.201850016518005e-06, + "loss": 0.121, + "step": 63900 + }, + { + "epoch": 10.571522960026428, + "grad_norm": 0.6493191719055176, + "learning_rate": 7.1957322370272486e-06, + "loss": 0.1165, + "step": 64000 + }, + { + "epoch": 10.571522960026428, + "eval_cer": 0.04139860379166382, + "eval_loss": 0.12036388367414474, + "eval_runtime": 48.7181, + "eval_samples_per_second": 34.71, + "eval_steps_per_second": 8.683, + "eval_wer": 0.2271279585336914, + "step": 64000 + }, + { + "epoch": 10.58804096465147, + "grad_norm": 0.9154326319694519, + "learning_rate": 7.189614457536494e-06, + "loss": 0.1184, + "step": 64100 + }, + { + "epoch": 10.60455896927651, + "grad_norm": 0.8205140829086304, + "learning_rate": 7.183496678045737e-06, + "loss": 0.1243, + "step": 64200 + }, + { + "epoch": 10.621076973901552, + "grad_norm": 0.7464902400970459, + "learning_rate": 7.177378898554981e-06, + "loss": 0.1274, + "step": 64300 + }, + { + "epoch": 10.637594978526593, + "grad_norm": 0.6660764217376709, + "learning_rate": 7.171261119064224e-06, + "loss": 0.1348, + "step": 64400 + }, + { + "epoch": 10.654112983151634, + "grad_norm": 0.8469193577766418, + "learning_rate": 7.165143339573469e-06, + "loss": 0.1273, + "step": 64500 + }, + { + "epoch": 10.670630987776677, + "grad_norm": 0.7771629095077515, + "learning_rate": 7.159025560082713e-06, + "loss": 0.1202, + "step": 64600 + }, + { + "epoch": 10.687148992401719, + "grad_norm": 0.7291647791862488, + "learning_rate": 7.152907780591957e-06, + "loss": 0.12, + "step": 64700 + }, + { + "epoch": 10.70366699702676, + "grad_norm": 0.670477032661438, + "learning_rate": 7.1467900011012005e-06, + "loss": 0.1407, + "step": 64800 + }, + { + "epoch": 10.720185001651801, + "grad_norm": 0.7225449085235596, + "learning_rate": 7.140672221610445e-06, + "loss": 0.1207, + "step": 64900 + }, + { + "epoch": 10.736703006276842, + "grad_norm": 0.6769737601280212, + "learning_rate": 7.134554442119689e-06, + "loss": 0.1145, + "step": 65000 + }, + { + "epoch": 10.736703006276842, + "eval_cer": 0.04139004859352543, + "eval_loss": 0.12175419926643372, + "eval_runtime": 48.7387, + "eval_samples_per_second": 34.695, + "eval_steps_per_second": 8.679, + "eval_wer": 0.2271279585336914, + "step": 65000 + }, + { + "epoch": 10.753221010901884, + "grad_norm": 0.7679712772369385, + "learning_rate": 7.1284366626289326e-06, + "loss": 0.1201, + "step": 65100 + }, + { + "epoch": 10.769739015526925, + "grad_norm": 0.8149623274803162, + "learning_rate": 7.122318883138176e-06, + "loss": 0.1142, + "step": 65200 + }, + { + "epoch": 10.786257020151966, + "grad_norm": 0.6343280076980591, + "learning_rate": 7.116201103647421e-06, + "loss": 0.1392, + "step": 65300 + }, + { + "epoch": 10.802775024777008, + "grad_norm": 0.759663462638855, + "learning_rate": 7.110083324156665e-06, + "loss": 0.1168, + "step": 65400 + }, + { + "epoch": 10.819293029402049, + "grad_norm": 0.9799485206604004, + "learning_rate": 7.103965544665908e-06, + "loss": 0.122, + "step": 65500 + }, + { + "epoch": 10.83581103402709, + "grad_norm": 0.6957365274429321, + "learning_rate": 7.097847765175152e-06, + "loss": 0.1152, + "step": 65600 + }, + { + "epoch": 10.852329038652131, + "grad_norm": 0.8867782950401306, + "learning_rate": 7.091729985684397e-06, + "loss": 0.1175, + "step": 65700 + }, + { + "epoch": 10.868847043277173, + "grad_norm": 0.8960587382316589, + "learning_rate": 7.08561220619364e-06, + "loss": 0.1217, + "step": 65800 + }, + { + "epoch": 10.885365047902214, + "grad_norm": 0.6762132048606873, + "learning_rate": 7.0794944267028845e-06, + "loss": 0.1223, + "step": 65900 + }, + { + "epoch": 10.901883052527255, + "grad_norm": 0.835166335105896, + "learning_rate": 7.073376647212128e-06, + "loss": 0.119, + "step": 66000 + }, + { + "epoch": 10.901883052527255, + "eval_cer": 0.0415611525562932, + "eval_loss": 0.11837079375982285, + "eval_runtime": 48.8246, + "eval_samples_per_second": 34.634, + "eval_steps_per_second": 8.664, + "eval_wer": 0.22619121963404734, + "step": 66000 + }, + { + "epoch": 10.918401057152296, + "grad_norm": 0.6607236862182617, + "learning_rate": 7.067258867721373e-06, + "loss": 0.1195, + "step": 66100 + }, + { + "epoch": 10.934919061777338, + "grad_norm": 0.7014028429985046, + "learning_rate": 7.061141088230617e-06, + "loss": 0.1233, + "step": 66200 + }, + { + "epoch": 10.951437066402379, + "grad_norm": 0.8678550124168396, + "learning_rate": 7.05502330873986e-06, + "loss": 0.127, + "step": 66300 + }, + { + "epoch": 10.96795507102742, + "grad_norm": 0.676571786403656, + "learning_rate": 7.048905529249105e-06, + "loss": 0.1171, + "step": 66400 + }, + { + "epoch": 10.984473075652462, + "grad_norm": 0.8348824381828308, + "learning_rate": 7.042787749758349e-06, + "loss": 0.1209, + "step": 66500 + }, + { + "epoch": 11.000991080277503, + "grad_norm": 1.0055019855499268, + "learning_rate": 7.036669970267592e-06, + "loss": 0.1217, + "step": 66600 + }, + { + "epoch": 11.017509084902544, + "grad_norm": 0.8197912573814392, + "learning_rate": 7.030552190776836e-06, + "loss": 0.12, + "step": 66700 + }, + { + "epoch": 11.034027089527585, + "grad_norm": 0.8832284212112427, + "learning_rate": 7.024434411286081e-06, + "loss": 0.1175, + "step": 66800 + }, + { + "epoch": 11.050545094152627, + "grad_norm": 0.7222535014152527, + "learning_rate": 7.018316631795324e-06, + "loss": 0.1138, + "step": 66900 + }, + { + "epoch": 11.067063098777668, + "grad_norm": 0.6838215589523315, + "learning_rate": 7.012198852304568e-06, + "loss": 0.1155, + "step": 67000 + }, + { + "epoch": 11.067063098777668, + "eval_cer": 0.04099650947915954, + "eval_loss": 0.11776668578386307, + "eval_runtime": 48.4053, + "eval_samples_per_second": 34.934, + "eval_steps_per_second": 8.739, + "eval_wer": 0.22519203147442704, + "step": 67000 + }, + { + "epoch": 11.08358110340271, + "grad_norm": 0.8637756109237671, + "learning_rate": 7.006081072813812e-06, + "loss": 0.1202, + "step": 67100 + }, + { + "epoch": 11.10009910802775, + "grad_norm": 0.9718282222747803, + "learning_rate": 6.999963293323056e-06, + "loss": 0.1203, + "step": 67200 + }, + { + "epoch": 11.116617112652792, + "grad_norm": 0.531356930732727, + "learning_rate": 6.993845513832301e-06, + "loss": 0.1128, + "step": 67300 + }, + { + "epoch": 11.133135117277833, + "grad_norm": 0.5169577598571777, + "learning_rate": 6.987727734341544e-06, + "loss": 0.1232, + "step": 67400 + }, + { + "epoch": 11.149653121902874, + "grad_norm": 0.634283185005188, + "learning_rate": 6.9816099548507875e-06, + "loss": 0.1125, + "step": 67500 + }, + { + "epoch": 11.166171126527916, + "grad_norm": 0.7183799743652344, + "learning_rate": 6.975492175360033e-06, + "loss": 0.1187, + "step": 67600 + }, + { + "epoch": 11.182689131152957, + "grad_norm": 0.5369941592216492, + "learning_rate": 6.969374395869276e-06, + "loss": 0.1161, + "step": 67700 + }, + { + "epoch": 11.199207135777998, + "grad_norm": 0.5835019946098328, + "learning_rate": 6.96325661637852e-06, + "loss": 0.1137, + "step": 67800 + }, + { + "epoch": 11.21572514040304, + "grad_norm": 0.7346104383468628, + "learning_rate": 6.957138836887763e-06, + "loss": 0.1135, + "step": 67900 + }, + { + "epoch": 11.23224314502808, + "grad_norm": 0.6166725754737854, + "learning_rate": 6.951021057397008e-06, + "loss": 0.1224, + "step": 68000 + }, + { + "epoch": 11.23224314502808, + "eval_cer": 0.040833960714530146, + "eval_loss": 0.11831438541412354, + "eval_runtime": 48.7233, + "eval_samples_per_second": 34.706, + "eval_steps_per_second": 8.682, + "eval_wer": 0.22494223443452194, + "step": 68000 + }, + { + "epoch": 11.248761149653122, + "grad_norm": 0.9099162220954895, + "learning_rate": 6.944903277906252e-06, + "loss": 0.1248, + "step": 68100 + }, + { + "epoch": 11.265279154278163, + "grad_norm": 0.5954209566116333, + "learning_rate": 6.938785498415496e-06, + "loss": 0.1184, + "step": 68200 + }, + { + "epoch": 11.281797158903204, + "grad_norm": 0.765312910079956, + "learning_rate": 6.9326677189247395e-06, + "loss": 0.1422, + "step": 68300 + }, + { + "epoch": 11.298315163528246, + "grad_norm": 0.9866732954978943, + "learning_rate": 6.926549939433984e-06, + "loss": 0.1213, + "step": 68400 + }, + { + "epoch": 11.314833168153287, + "grad_norm": 0.6962186694145203, + "learning_rate": 6.920432159943228e-06, + "loss": 0.1178, + "step": 68500 + }, + { + "epoch": 11.331351172778328, + "grad_norm": 0.547361433506012, + "learning_rate": 6.9143143804524715e-06, + "loss": 0.1215, + "step": 68600 + }, + { + "epoch": 11.34786917740337, + "grad_norm": 0.5408198833465576, + "learning_rate": 6.908196600961715e-06, + "loss": 0.122, + "step": 68700 + }, + { + "epoch": 11.36438718202841, + "grad_norm": 0.7383239269256592, + "learning_rate": 6.90207882147096e-06, + "loss": 0.1232, + "step": 68800 + }, + { + "epoch": 11.380905186653452, + "grad_norm": 0.7225533127784729, + "learning_rate": 6.895961041980204e-06, + "loss": 0.1773, + "step": 68900 + }, + { + "epoch": 11.397423191278493, + "grad_norm": 0.7376521825790405, + "learning_rate": 6.889843262489447e-06, + "loss": 0.1146, + "step": 69000 + }, + { + "epoch": 11.397423191278493, + "eval_cer": 0.04105639586612826, + "eval_loss": 0.11837118864059448, + "eval_runtime": 48.8385, + "eval_samples_per_second": 34.624, + "eval_steps_per_second": 8.661, + "eval_wer": 0.22556672703428465, + "step": 69000 + }, + { + "epoch": 11.413941195903535, + "grad_norm": 0.6268288493156433, + "learning_rate": 6.883725482998691e-06, + "loss": 0.141, + "step": 69100 + }, + { + "epoch": 11.430459200528576, + "grad_norm": 0.9457260370254517, + "learning_rate": 6.877607703507936e-06, + "loss": 0.1174, + "step": 69200 + }, + { + "epoch": 11.446977205153617, + "grad_norm": 0.8935351371765137, + "learning_rate": 6.871489924017179e-06, + "loss": 0.1109, + "step": 69300 + }, + { + "epoch": 11.463495209778658, + "grad_norm": 0.6600612998008728, + "learning_rate": 6.8653721445264235e-06, + "loss": 0.1285, + "step": 69400 + }, + { + "epoch": 11.4800132144037, + "grad_norm": 0.6968724727630615, + "learning_rate": 6.859254365035667e-06, + "loss": 0.1327, + "step": 69500 + }, + { + "epoch": 11.496531219028741, + "grad_norm": 0.738458514213562, + "learning_rate": 6.853136585544912e-06, + "loss": 0.1089, + "step": 69600 + }, + { + "epoch": 11.513049223653782, + "grad_norm": 0.8320337533950806, + "learning_rate": 6.8470188060541556e-06, + "loss": 0.1131, + "step": 69700 + }, + { + "epoch": 11.529567228278824, + "grad_norm": 0.6417104005813599, + "learning_rate": 6.840901026563399e-06, + "loss": 0.1219, + "step": 69800 + }, + { + "epoch": 11.546085232903865, + "grad_norm": 0.7197741866111755, + "learning_rate": 6.8347832470726425e-06, + "loss": 0.1165, + "step": 69900 + }, + { + "epoch": 11.562603237528906, + "grad_norm": 0.8726572394371033, + "learning_rate": 6.828665467581888e-06, + "loss": 0.1137, + "step": 70000 + }, + { + "epoch": 11.562603237528906, + "eval_cer": 0.04100506467729793, + "eval_loss": 0.11831272393465042, + "eval_runtime": 48.5609, + "eval_samples_per_second": 34.822, + "eval_steps_per_second": 8.711, + "eval_wer": 0.22487978517454568, + "step": 70000 + }, + { + "epoch": 11.579121242153947, + "grad_norm": 0.6094586849212646, + "learning_rate": 6.822547688091131e-06, + "loss": 0.1233, + "step": 70100 + }, + { + "epoch": 11.595639246778989, + "grad_norm": 0.7053238749504089, + "learning_rate": 6.8164299086003746e-06, + "loss": 0.1207, + "step": 70200 + }, + { + "epoch": 11.61215725140403, + "grad_norm": 0.5145518183708191, + "learning_rate": 6.810312129109619e-06, + "loss": 0.1181, + "step": 70300 + }, + { + "epoch": 11.628675256029071, + "grad_norm": 1.1360536813735962, + "learning_rate": 6.804194349618863e-06, + "loss": 0.1227, + "step": 70400 + }, + { + "epoch": 11.645193260654112, + "grad_norm": 0.7354953289031982, + "learning_rate": 6.7980765701281075e-06, + "loss": 0.1151, + "step": 70500 + }, + { + "epoch": 11.661711265279154, + "grad_norm": 0.6327475309371948, + "learning_rate": 6.791958790637351e-06, + "loss": 0.1132, + "step": 70600 + }, + { + "epoch": 11.678229269904195, + "grad_norm": 0.7320681214332581, + "learning_rate": 6.785841011146594e-06, + "loss": 0.116, + "step": 70700 + }, + { + "epoch": 11.694747274529236, + "grad_norm": 0.7258247137069702, + "learning_rate": 6.7797232316558396e-06, + "loss": 0.1236, + "step": 70800 + }, + { + "epoch": 11.711265279154278, + "grad_norm": 0.7472134232521057, + "learning_rate": 6.773605452165083e-06, + "loss": 0.1148, + "step": 70900 + }, + { + "epoch": 11.727783283779319, + "grad_norm": 0.9377081394195557, + "learning_rate": 6.7674876726743265e-06, + "loss": 0.1173, + "step": 71000 + }, + { + "epoch": 11.727783283779319, + "eval_cer": 0.04082540551639176, + "eval_loss": 0.11736804246902466, + "eval_runtime": 48.644, + "eval_samples_per_second": 34.763, + "eval_steps_per_second": 8.696, + "eval_wer": 0.22294385811528133, + "step": 71000 + }, + { + "epoch": 11.74430128840436, + "grad_norm": 0.638346791267395, + "learning_rate": 6.76136989318357e-06, + "loss": 0.1333, + "step": 71100 + }, + { + "epoch": 11.760819293029401, + "grad_norm": 0.7357622981071472, + "learning_rate": 6.755252113692815e-06, + "loss": 0.1136, + "step": 71200 + }, + { + "epoch": 11.777337297654443, + "grad_norm": 0.801539957523346, + "learning_rate": 6.7491343342020586e-06, + "loss": 0.1162, + "step": 71300 + }, + { + "epoch": 11.793855302279484, + "grad_norm": 0.869429886341095, + "learning_rate": 6.743016554711302e-06, + "loss": 0.112, + "step": 71400 + }, + { + "epoch": 11.810373306904525, + "grad_norm": 0.649721622467041, + "learning_rate": 6.736898775220546e-06, + "loss": 0.1115, + "step": 71500 + }, + { + "epoch": 11.826891311529566, + "grad_norm": 0.8566005229949951, + "learning_rate": 6.730780995729791e-06, + "loss": 0.1178, + "step": 71600 + }, + { + "epoch": 11.84340931615461, + "grad_norm": 0.8232606649398804, + "learning_rate": 6.724663216239035e-06, + "loss": 0.1267, + "step": 71700 + }, + { + "epoch": 11.85992732077965, + "grad_norm": 0.7500156760215759, + "learning_rate": 6.7185454367482784e-06, + "loss": 0.1163, + "step": 71800 + }, + { + "epoch": 11.876445325404692, + "grad_norm": 0.635427713394165, + "learning_rate": 6.712427657257522e-06, + "loss": 0.1555, + "step": 71900 + }, + { + "epoch": 11.892963330029733, + "grad_norm": 0.807422399520874, + "learning_rate": 6.706309877766767e-06, + "loss": 0.1428, + "step": 72000 + }, + { + "epoch": 11.892963330029733, + "eval_cer": 0.04042331120388748, + "eval_loss": 0.11749948561191559, + "eval_runtime": 48.902, + "eval_samples_per_second": 34.579, + "eval_steps_per_second": 8.65, + "eval_wer": 0.2228189595953288, + "step": 72000 + }, + { + "epoch": 11.909481334654775, + "grad_norm": 0.6549407839775085, + "learning_rate": 6.7001920982760105e-06, + "loss": 0.1114, + "step": 72100 + }, + { + "epoch": 11.925999339279816, + "grad_norm": 1.0132852792739868, + "learning_rate": 6.694074318785254e-06, + "loss": 0.1153, + "step": 72200 + }, + { + "epoch": 11.942517343904857, + "grad_norm": 0.5365763306617737, + "learning_rate": 6.6879565392944974e-06, + "loss": 0.1109, + "step": 72300 + }, + { + "epoch": 11.959035348529898, + "grad_norm": 0.6037495732307434, + "learning_rate": 6.681838759803743e-06, + "loss": 0.1144, + "step": 72400 + }, + { + "epoch": 11.97555335315494, + "grad_norm": 0.4775562286376953, + "learning_rate": 6.675720980312986e-06, + "loss": 0.1178, + "step": 72500 + }, + { + "epoch": 11.992071357779981, + "grad_norm": 0.863073468208313, + "learning_rate": 6.66960320082223e-06, + "loss": 0.1168, + "step": 72600 + }, + { + "epoch": 12.008589362405022, + "grad_norm": 2.4738621711730957, + "learning_rate": 6.663485421331474e-06, + "loss": 0.1322, + "step": 72700 + }, + { + "epoch": 12.025107367030063, + "grad_norm": 0.6702748537063599, + "learning_rate": 6.657367641840718e-06, + "loss": 0.1702, + "step": 72800 + }, + { + "epoch": 12.041625371655105, + "grad_norm": 0.9668029546737671, + "learning_rate": 6.6512498623499624e-06, + "loss": 0.1358, + "step": 72900 + }, + { + "epoch": 12.058143376280146, + "grad_norm": 0.6446594595909119, + "learning_rate": 6.645132082859206e-06, + "loss": 0.1128, + "step": 73000 + }, + { + "epoch": 12.058143376280146, + "eval_cer": 0.040833960714530146, + "eval_loss": 0.11493762582540512, + "eval_runtime": 48.8939, + "eval_samples_per_second": 34.585, + "eval_steps_per_second": 8.651, + "eval_wer": 0.22231936551551865, + "step": 73000 + }, + { + "epoch": 12.074661380905187, + "grad_norm": 0.7115280032157898, + "learning_rate": 6.639014303368449e-06, + "loss": 0.1126, + "step": 73100 + }, + { + "epoch": 12.091179385530229, + "grad_norm": 0.6623009443283081, + "learning_rate": 6.6328965238776945e-06, + "loss": 0.1187, + "step": 73200 + }, + { + "epoch": 12.10769739015527, + "grad_norm": 0.7320263981819153, + "learning_rate": 6.626778744386938e-06, + "loss": 0.11, + "step": 73300 + }, + { + "epoch": 12.124215394780311, + "grad_norm": 0.7504459619522095, + "learning_rate": 6.6206609648961814e-06, + "loss": 0.1128, + "step": 73400 + }, + { + "epoch": 12.140733399405352, + "grad_norm": 0.6281275749206543, + "learning_rate": 6.614543185405426e-06, + "loss": 0.1169, + "step": 73500 + }, + { + "epoch": 12.157251404030394, + "grad_norm": 0.6658099889755249, + "learning_rate": 6.60842540591467e-06, + "loss": 0.1093, + "step": 73600 + }, + { + "epoch": 12.173769408655435, + "grad_norm": 0.8157078623771667, + "learning_rate": 6.6023076264239135e-06, + "loss": 0.1128, + "step": 73700 + }, + { + "epoch": 12.190287413280476, + "grad_norm": 0.7392610907554626, + "learning_rate": 6.596189846933158e-06, + "loss": 0.1168, + "step": 73800 + }, + { + "epoch": 12.206805417905517, + "grad_norm": 0.5370469689369202, + "learning_rate": 6.590072067442401e-06, + "loss": 0.115, + "step": 73900 + }, + { + "epoch": 12.223323422530559, + "grad_norm": 0.7587655782699585, + "learning_rate": 6.5839542879516465e-06, + "loss": 0.1063, + "step": 74000 + }, + { + "epoch": 12.223323422530559, + "eval_cer": 0.04020087605228937, + "eval_loss": 0.1159936785697937, + "eval_runtime": 48.8724, + "eval_samples_per_second": 34.6, + "eval_steps_per_second": 8.655, + "eval_wer": 0.22044588771623055, + "step": 74000 + }, + { + "epoch": 12.2398414271556, + "grad_norm": 0.6018242835998535, + "learning_rate": 6.57783650846089e-06, + "loss": 0.1118, + "step": 74100 + }, + { + "epoch": 12.256359431780641, + "grad_norm": 0.6748114228248596, + "learning_rate": 6.571718728970133e-06, + "loss": 0.1094, + "step": 74200 + }, + { + "epoch": 12.272877436405683, + "grad_norm": 0.6757166981697083, + "learning_rate": 6.565600949479377e-06, + "loss": 0.1097, + "step": 74300 + }, + { + "epoch": 12.289395441030724, + "grad_norm": 0.948271632194519, + "learning_rate": 6.559483169988622e-06, + "loss": 0.1186, + "step": 74400 + }, + { + "epoch": 12.305913445655765, + "grad_norm": 0.6468844413757324, + "learning_rate": 6.5533653904978655e-06, + "loss": 0.1206, + "step": 74500 + }, + { + "epoch": 12.322431450280806, + "grad_norm": 0.6049870848655701, + "learning_rate": 6.547247611007109e-06, + "loss": 0.1672, + "step": 74600 + }, + { + "epoch": 12.338949454905848, + "grad_norm": 1.080959677696228, + "learning_rate": 6.541129831516353e-06, + "loss": 0.136, + "step": 74700 + }, + { + "epoch": 12.355467459530889, + "grad_norm": 0.7225471138954163, + "learning_rate": 6.5350120520255975e-06, + "loss": 0.1095, + "step": 74800 + }, + { + "epoch": 12.37198546415593, + "grad_norm": 0.6947051286697388, + "learning_rate": 6.528894272534841e-06, + "loss": 0.1354, + "step": 74900 + }, + { + "epoch": 12.388503468780971, + "grad_norm": 0.6471466422080994, + "learning_rate": 6.522776493044085e-06, + "loss": 0.1051, + "step": 75000 + }, + { + "epoch": 12.388503468780971, + "eval_cer": 0.04020943125042776, + "eval_loss": 0.11492911726236343, + "eval_runtime": 48.7391, + "eval_samples_per_second": 34.695, + "eval_steps_per_second": 8.679, + "eval_wer": 0.2200087428963967, + "step": 75000 + }, + { + "epoch": 12.405021473406013, + "grad_norm": 0.6231672763824463, + "learning_rate": 6.516658713553329e-06, + "loss": 0.1331, + "step": 75100 + }, + { + "epoch": 12.421539478031054, + "grad_norm": 0.49103644490242004, + "learning_rate": 6.510540934062574e-06, + "loss": 0.11, + "step": 75200 + }, + { + "epoch": 12.438057482656095, + "grad_norm": 0.7189831733703613, + "learning_rate": 6.504423154571817e-06, + "loss": 0.115, + "step": 75300 + }, + { + "epoch": 12.454575487281137, + "grad_norm": 0.5822007060050964, + "learning_rate": 6.498305375081061e-06, + "loss": 0.112, + "step": 75400 + }, + { + "epoch": 12.471093491906178, + "grad_norm": 0.6000872254371643, + "learning_rate": 6.492187595590304e-06, + "loss": 0.1088, + "step": 75500 + }, + { + "epoch": 12.487611496531219, + "grad_norm": 0.6508600115776062, + "learning_rate": 6.4860698160995495e-06, + "loss": 0.1111, + "step": 75600 + }, + { + "epoch": 12.50412950115626, + "grad_norm": 0.6574178338050842, + "learning_rate": 6.479952036608793e-06, + "loss": 0.1095, + "step": 75700 + }, + { + "epoch": 12.520647505781302, + "grad_norm": 0.845613956451416, + "learning_rate": 6.473834257118036e-06, + "loss": 0.1139, + "step": 75800 + }, + { + "epoch": 12.537165510406343, + "grad_norm": 0.5848095417022705, + "learning_rate": 6.467716477627281e-06, + "loss": 0.1147, + "step": 75900 + }, + { + "epoch": 12.553683515031384, + "grad_norm": 0.9496851563453674, + "learning_rate": 6.461598698136525e-06, + "loss": 0.1128, + "step": 76000 + }, + { + "epoch": 12.553683515031384, + "eval_cer": 0.040106768872767096, + "eval_loss": 0.112928107380867, + "eval_runtime": 48.6721, + "eval_samples_per_second": 34.743, + "eval_steps_per_second": 8.691, + "eval_wer": 0.22025853993630176, + "step": 76000 + }, + { + "epoch": 12.570201519656425, + "grad_norm": 0.7724167108535767, + "learning_rate": 6.455480918645769e-06, + "loss": 0.1156, + "step": 76100 + }, + { + "epoch": 12.586719524281467, + "grad_norm": 0.753487765789032, + "learning_rate": 6.449363139155013e-06, + "loss": 0.1216, + "step": 76200 + }, + { + "epoch": 12.603237528906508, + "grad_norm": 0.7323099970817566, + "learning_rate": 6.443245359664256e-06, + "loss": 0.1163, + "step": 76300 + }, + { + "epoch": 12.61975553353155, + "grad_norm": 0.5276266932487488, + "learning_rate": 6.437127580173501e-06, + "loss": 0.1261, + "step": 76400 + }, + { + "epoch": 12.63627353815659, + "grad_norm": 0.7041454315185547, + "learning_rate": 6.431009800682745e-06, + "loss": 0.1097, + "step": 76500 + }, + { + "epoch": 12.652791542781632, + "grad_norm": 0.5830830931663513, + "learning_rate": 6.424892021191988e-06, + "loss": 0.1053, + "step": 76600 + }, + { + "epoch": 12.669309547406673, + "grad_norm": 0.8507035970687866, + "learning_rate": 6.418774241701232e-06, + "loss": 0.1157, + "step": 76700 + }, + { + "epoch": 12.685827552031714, + "grad_norm": 0.7934384942054749, + "learning_rate": 6.412656462210477e-06, + "loss": 0.1139, + "step": 76800 + }, + { + "epoch": 12.702345556656756, + "grad_norm": 0.8126075863838196, + "learning_rate": 6.40653868271972e-06, + "loss": 0.1382, + "step": 76900 + }, + { + "epoch": 12.718863561281797, + "grad_norm": 0.7506862282752991, + "learning_rate": 6.400420903228965e-06, + "loss": 0.1108, + "step": 77000 + }, + { + "epoch": 12.718863561281797, + "eval_cer": 0.04023509684484293, + "eval_loss": 0.11516769230365753, + "eval_runtime": 48.6282, + "eval_samples_per_second": 34.774, + "eval_steps_per_second": 8.699, + "eval_wer": 0.2200087428963967, + "step": 77000 + }, + { + "epoch": 12.735381565906838, + "grad_norm": 0.6928473114967346, + "learning_rate": 6.394303123738208e-06, + "loss": 0.1118, + "step": 77100 + }, + { + "epoch": 12.75189957053188, + "grad_norm": 0.7494087815284729, + "learning_rate": 6.3881853442474525e-06, + "loss": 0.1152, + "step": 77200 + }, + { + "epoch": 12.76841757515692, + "grad_norm": 0.7207498550415039, + "learning_rate": 6.382067564756697e-06, + "loss": 0.1074, + "step": 77300 + }, + { + "epoch": 12.784935579781962, + "grad_norm": 0.6607386469841003, + "learning_rate": 6.37594978526594e-06, + "loss": 0.1102, + "step": 77400 + }, + { + "epoch": 12.801453584407003, + "grad_norm": 0.5259993076324463, + "learning_rate": 6.369832005775184e-06, + "loss": 0.1067, + "step": 77500 + }, + { + "epoch": 12.817971589032044, + "grad_norm": 0.7667635679244995, + "learning_rate": 6.363714226284429e-06, + "loss": 0.1079, + "step": 77600 + }, + { + "epoch": 12.834489593657086, + "grad_norm": 0.676259458065033, + "learning_rate": 6.357596446793672e-06, + "loss": 0.1323, + "step": 77700 + }, + { + "epoch": 12.851007598282127, + "grad_norm": 0.6613221168518066, + "learning_rate": 6.351478667302916e-06, + "loss": 0.1104, + "step": 77800 + }, + { + "epoch": 12.867525602907168, + "grad_norm": 0.8658110499382019, + "learning_rate": 6.34536088781216e-06, + "loss": 0.1087, + "step": 77900 + }, + { + "epoch": 12.88404360753221, + "grad_norm": 0.5932702422142029, + "learning_rate": 6.3392431083214044e-06, + "loss": 0.1184, + "step": 78000 + }, + { + "epoch": 12.88404360753221, + "eval_cer": 0.04030353842995004, + "eval_loss": 0.11411629617214203, + "eval_runtime": 48.5234, + "eval_samples_per_second": 34.849, + "eval_steps_per_second": 8.717, + "eval_wer": 0.22019609067632548, + "step": 78000 + }, + { + "epoch": 12.90056161215725, + "grad_norm": 0.7417730689048767, + "learning_rate": 6.333125328830648e-06, + "loss": 0.1262, + "step": 78100 + }, + { + "epoch": 12.917079616782292, + "grad_norm": 0.625182032585144, + "learning_rate": 6.327007549339892e-06, + "loss": 0.1114, + "step": 78200 + }, + { + "epoch": 12.933597621407333, + "grad_norm": 0.9503306746482849, + "learning_rate": 6.320889769849136e-06, + "loss": 0.109, + "step": 78300 + }, + { + "epoch": 12.950115626032375, + "grad_norm": 0.4723988473415375, + "learning_rate": 6.314771990358381e-06, + "loss": 0.1346, + "step": 78400 + }, + { + "epoch": 12.966633630657416, + "grad_norm": 0.5400856137275696, + "learning_rate": 6.308654210867624e-06, + "loss": 0.1162, + "step": 78500 + }, + { + "epoch": 12.983151635282457, + "grad_norm": 0.9495701789855957, + "learning_rate": 6.302536431376868e-06, + "loss": 0.1116, + "step": 78600 + }, + { + "epoch": 12.999669639907498, + "grad_norm": 0.5586131811141968, + "learning_rate": 6.296418651886111e-06, + "loss": 0.1093, + "step": 78700 + }, + { + "epoch": 13.01618764453254, + "grad_norm": 0.7302865386009216, + "learning_rate": 6.290300872395356e-06, + "loss": 0.1095, + "step": 78800 + }, + { + "epoch": 13.032705649157581, + "grad_norm": 0.726801872253418, + "learning_rate": 6.2841830929046e-06, + "loss": 0.1144, + "step": 78900 + }, + { + "epoch": 13.049223653782622, + "grad_norm": 0.6335176825523376, + "learning_rate": 6.278065313413843e-06, + "loss": 0.1099, + "step": 79000 + }, + { + "epoch": 13.049223653782622, + "eval_cer": 0.04023509684484293, + "eval_loss": 0.11533664911985397, + "eval_runtime": 48.8975, + "eval_samples_per_second": 34.583, + "eval_steps_per_second": 8.651, + "eval_wer": 0.2199462936364204, + "step": 79000 + }, + { + "epoch": 13.065741658407664, + "grad_norm": 0.7183253765106201, + "learning_rate": 6.271947533923088e-06, + "loss": 0.1015, + "step": 79100 + }, + { + "epoch": 13.082259663032705, + "grad_norm": 0.8460133075714111, + "learning_rate": 6.265829754432332e-06, + "loss": 0.1147, + "step": 79200 + }, + { + "epoch": 13.098777667657746, + "grad_norm": 0.9035709500312805, + "learning_rate": 6.259711974941575e-06, + "loss": 0.105, + "step": 79300 + }, + { + "epoch": 13.115295672282787, + "grad_norm": 1.1149568557739258, + "learning_rate": 6.25359419545082e-06, + "loss": 0.1173, + "step": 79400 + }, + { + "epoch": 13.131813676907829, + "grad_norm": 0.746825635433197, + "learning_rate": 6.247476415960063e-06, + "loss": 0.1103, + "step": 79500 + }, + { + "epoch": 13.148331681532872, + "grad_norm": 0.5890305638313293, + "learning_rate": 6.241358636469308e-06, + "loss": 0.1075, + "step": 79600 + }, + { + "epoch": 13.164849686157913, + "grad_norm": 0.6706238985061646, + "learning_rate": 6.235240856978552e-06, + "loss": 0.1043, + "step": 79700 + }, + { + "epoch": 13.181367690782954, + "grad_norm": 0.7864231467247009, + "learning_rate": 6.229123077487795e-06, + "loss": 0.1105, + "step": 79800 + }, + { + "epoch": 13.197885695407995, + "grad_norm": 0.7406273484230042, + "learning_rate": 6.223005297997039e-06, + "loss": 0.1046, + "step": 79900 + }, + { + "epoch": 13.214403700033037, + "grad_norm": 0.7028843760490417, + "learning_rate": 6.216887518506284e-06, + "loss": 0.1119, + "step": 80000 + }, + { + "epoch": 13.214403700033037, + "eval_cer": 0.039867223324892204, + "eval_loss": 0.11367151141166687, + "eval_runtime": 48.9472, + "eval_samples_per_second": 34.547, + "eval_steps_per_second": 8.642, + "eval_wer": 0.21919690251670518, + "step": 80000 + }, + { + "epoch": 13.230921704658078, + "grad_norm": 0.6652178168296814, + "learning_rate": 6.210769739015527e-06, + "loss": 0.1118, + "step": 80100 + }, + { + "epoch": 13.24743970928312, + "grad_norm": 0.9752405285835266, + "learning_rate": 6.204651959524771e-06, + "loss": 0.1305, + "step": 80200 + }, + { + "epoch": 13.26395771390816, + "grad_norm": 0.6729234457015991, + "learning_rate": 6.198534180034015e-06, + "loss": 0.1102, + "step": 80300 + }, + { + "epoch": 13.280475718533202, + "grad_norm": 0.7551404237747192, + "learning_rate": 6.192416400543259e-06, + "loss": 0.1131, + "step": 80400 + }, + { + "epoch": 13.296993723158243, + "grad_norm": 0.5141217112541199, + "learning_rate": 6.186298621052504e-06, + "loss": 0.1041, + "step": 80500 + }, + { + "epoch": 13.313511727783284, + "grad_norm": 0.7362185716629028, + "learning_rate": 6.180180841561747e-06, + "loss": 0.1054, + "step": 80600 + }, + { + "epoch": 13.330029732408326, + "grad_norm": 0.6110237240791321, + "learning_rate": 6.174063062070991e-06, + "loss": 0.1067, + "step": 80700 + }, + { + "epoch": 13.346547737033367, + "grad_norm": 0.5987915992736816, + "learning_rate": 6.167945282580236e-06, + "loss": 0.1518, + "step": 80800 + }, + { + "epoch": 13.363065741658408, + "grad_norm": 0.7611739635467529, + "learning_rate": 6.161827503089479e-06, + "loss": 0.1036, + "step": 80900 + }, + { + "epoch": 13.37958374628345, + "grad_norm": 0.5500743389129639, + "learning_rate": 6.155709723598723e-06, + "loss": 0.1102, + "step": 81000 + }, + { + "epoch": 13.37958374628345, + "eval_cer": 0.039858668126753816, + "eval_loss": 0.11399171501398087, + "eval_runtime": 48.7532, + "eval_samples_per_second": 34.685, + "eval_steps_per_second": 8.676, + "eval_wer": 0.21869730843689503, + "step": 81000 + }, + { + "epoch": 13.39610175090849, + "grad_norm": 0.7734577059745789, + "learning_rate": 6.149591944107966e-06, + "loss": 0.109, + "step": 81100 + }, + { + "epoch": 13.412619755533532, + "grad_norm": 0.6689289808273315, + "learning_rate": 6.143474164617211e-06, + "loss": 0.1097, + "step": 81200 + }, + { + "epoch": 13.429137760158573, + "grad_norm": 0.7353644371032715, + "learning_rate": 6.137356385126455e-06, + "loss": 0.1084, + "step": 81300 + }, + { + "epoch": 13.445655764783615, + "grad_norm": 0.6356621384620667, + "learning_rate": 6.131238605635699e-06, + "loss": 0.115, + "step": 81400 + }, + { + "epoch": 13.462173769408656, + "grad_norm": 0.6484361290931702, + "learning_rate": 6.1251208261449426e-06, + "loss": 0.1125, + "step": 81500 + }, + { + "epoch": 13.478691774033697, + "grad_norm": 0.9929621815681458, + "learning_rate": 6.119003046654187e-06, + "loss": 0.1033, + "step": 81600 + }, + { + "epoch": 13.495209778658738, + "grad_norm": 0.7411353588104248, + "learning_rate": 6.112885267163431e-06, + "loss": 0.115, + "step": 81700 + }, + { + "epoch": 13.51172778328378, + "grad_norm": 0.7139526009559631, + "learning_rate": 6.106767487672675e-06, + "loss": 0.1023, + "step": 81800 + }, + { + "epoch": 13.528245787908821, + "grad_norm": 0.6597611904144287, + "learning_rate": 6.100649708181918e-06, + "loss": 0.1063, + "step": 81900 + }, + { + "epoch": 13.544763792533862, + "grad_norm": 0.8007270097732544, + "learning_rate": 6.094531928691163e-06, + "loss": 0.1086, + "step": 82000 + }, + { + "epoch": 13.544763792533862, + "eval_cer": 0.03977311614536993, + "eval_loss": 0.11309035122394562, + "eval_runtime": 48.6041, + "eval_samples_per_second": 34.791, + "eval_steps_per_second": 8.703, + "eval_wer": 0.2185724099169425, + "step": 82000 + }, + { + "epoch": 13.561281797158903, + "grad_norm": 0.8659864068031311, + "learning_rate": 6.088414149200407e-06, + "loss": 0.1084, + "step": 82100 + }, + { + "epoch": 13.577799801783945, + "grad_norm": 0.6871950030326843, + "learning_rate": 6.08229636970965e-06, + "loss": 0.1072, + "step": 82200 + }, + { + "epoch": 13.594317806408986, + "grad_norm": 0.5756420493125916, + "learning_rate": 6.0761785902188945e-06, + "loss": 0.1124, + "step": 82300 + }, + { + "epoch": 13.610835811034027, + "grad_norm": 1.0295737981796265, + "learning_rate": 6.070060810728139e-06, + "loss": 0.1162, + "step": 82400 + }, + { + "epoch": 13.627353815659069, + "grad_norm": 0.5129362940788269, + "learning_rate": 6.063943031237382e-06, + "loss": 0.1103, + "step": 82500 + }, + { + "epoch": 13.64387182028411, + "grad_norm": 0.7439867258071899, + "learning_rate": 6.0578252517466266e-06, + "loss": 0.1061, + "step": 82600 + }, + { + "epoch": 13.660389824909151, + "grad_norm": 0.4660612940788269, + "learning_rate": 6.05170747225587e-06, + "loss": 0.1143, + "step": 82700 + }, + { + "epoch": 13.676907829534192, + "grad_norm": 0.7765456438064575, + "learning_rate": 6.045589692765114e-06, + "loss": 0.1858, + "step": 82800 + }, + { + "epoch": 13.693425834159234, + "grad_norm": 0.793312132358551, + "learning_rate": 6.039471913274359e-06, + "loss": 0.1098, + "step": 82900 + }, + { + "epoch": 13.709943838784275, + "grad_norm": 0.6621662378311157, + "learning_rate": 6.033354133783602e-06, + "loss": 0.1151, + "step": 83000 + }, + { + "epoch": 13.709943838784275, + "eval_cer": 0.03952501539935665, + "eval_loss": 0.11215273290872574, + "eval_runtime": 49.1873, + "eval_samples_per_second": 34.379, + "eval_steps_per_second": 8.6, + "eval_wer": 0.21701117841753575, + "step": 83000 + }, + { + "epoch": 13.726461843409316, + "grad_norm": 0.6841396689414978, + "learning_rate": 6.0272363542928456e-06, + "loss": 0.1094, + "step": 83100 + }, + { + "epoch": 13.742979848034357, + "grad_norm": 0.7111786007881165, + "learning_rate": 6.021118574802091e-06, + "loss": 0.1072, + "step": 83200 + }, + { + "epoch": 13.759497852659399, + "grad_norm": 0.7815682291984558, + "learning_rate": 6.015000795311334e-06, + "loss": 0.1102, + "step": 83300 + }, + { + "epoch": 13.77601585728444, + "grad_norm": 0.8677568435668945, + "learning_rate": 6.008883015820578e-06, + "loss": 0.1062, + "step": 83400 + }, + { + "epoch": 13.792533861909481, + "grad_norm": 0.5680195689201355, + "learning_rate": 6.002765236329822e-06, + "loss": 0.106, + "step": 83500 + }, + { + "epoch": 13.809051866534523, + "grad_norm": 0.9129924178123474, + "learning_rate": 5.996647456839066e-06, + "loss": 0.0995, + "step": 83600 + }, + { + "epoch": 13.825569871159564, + "grad_norm": 0.662200927734375, + "learning_rate": 5.99052967734831e-06, + "loss": 0.1088, + "step": 83700 + }, + { + "epoch": 13.842087875784605, + "grad_norm": 0.887140691280365, + "learning_rate": 5.984411897857554e-06, + "loss": 0.1098, + "step": 83800 + }, + { + "epoch": 13.858605880409646, + "grad_norm": 0.9814369082450867, + "learning_rate": 5.978294118366798e-06, + "loss": 0.1068, + "step": 83900 + }, + { + "epoch": 13.875123885034688, + "grad_norm": 0.761234700679779, + "learning_rate": 5.972176338876043e-06, + "loss": 0.1033, + "step": 84000 + }, + { + "epoch": 13.875123885034688, + "eval_cer": 0.03929402504962015, + "eval_loss": 0.112494558095932, + "eval_runtime": 49.3853, + "eval_samples_per_second": 34.241, + "eval_steps_per_second": 8.565, + "eval_wer": 0.21626178729782053, + "step": 84000 + }, + { + "epoch": 13.891641889659729, + "grad_norm": 0.5570207238197327, + "learning_rate": 5.966058559385286e-06, + "loss": 0.1081, + "step": 84100 + }, + { + "epoch": 13.90815989428477, + "grad_norm": 0.5992655158042908, + "learning_rate": 5.95994077989453e-06, + "loss": 0.1362, + "step": 84200 + }, + { + "epoch": 13.924677898909811, + "grad_norm": 0.4389006197452545, + "learning_rate": 5.953823000403775e-06, + "loss": 0.1122, + "step": 84300 + }, + { + "epoch": 13.941195903534853, + "grad_norm": 0.6106426119804382, + "learning_rate": 5.947705220913018e-06, + "loss": 0.1065, + "step": 84400 + }, + { + "epoch": 13.957713908159894, + "grad_norm": 0.5008405447006226, + "learning_rate": 5.941587441422262e-06, + "loss": 0.1301, + "step": 84500 + }, + { + "epoch": 13.974231912784935, + "grad_norm": 20.616357803344727, + "learning_rate": 5.935469661931505e-06, + "loss": 0.1729, + "step": 84600 + }, + { + "epoch": 13.990749917409977, + "grad_norm": 0.7851992845535278, + "learning_rate": 5.92935188244075e-06, + "loss": 0.1043, + "step": 84700 + }, + { + "epoch": 14.007267922035018, + "grad_norm": 0.8801394104957581, + "learning_rate": 5.923234102949994e-06, + "loss": 0.1074, + "step": 84800 + }, + { + "epoch": 14.023785926660059, + "grad_norm": 0.5735670924186707, + "learning_rate": 5.917116323459238e-06, + "loss": 0.105, + "step": 84900 + }, + { + "epoch": 14.0403039312851, + "grad_norm": 0.6361643671989441, + "learning_rate": 5.9109985439684815e-06, + "loss": 0.1078, + "step": 85000 + }, + { + "epoch": 14.0403039312851, + "eval_cer": 0.03945657381424954, + "eval_loss": 0.1119338721036911, + "eval_runtime": 49.079, + "eval_samples_per_second": 34.455, + "eval_steps_per_second": 8.619, + "eval_wer": 0.2175107724973459, + "step": 85000 + }, + { + "epoch": 14.056821935910142, + "grad_norm": 0.6829052567481995, + "learning_rate": 5.904880764477726e-06, + "loss": 0.1007, + "step": 85100 + }, + { + "epoch": 14.073339940535183, + "grad_norm": 0.5998505353927612, + "learning_rate": 5.89876298498697e-06, + "loss": 0.1058, + "step": 85200 + }, + { + "epoch": 14.089857945160224, + "grad_norm": 0.7161391973495483, + "learning_rate": 5.892645205496214e-06, + "loss": 0.1096, + "step": 85300 + }, + { + "epoch": 14.106375949785265, + "grad_norm": 0.5567154288291931, + "learning_rate": 5.886527426005457e-06, + "loss": 0.1078, + "step": 85400 + }, + { + "epoch": 14.122893954410307, + "grad_norm": 0.9288133978843689, + "learning_rate": 5.880409646514702e-06, + "loss": 0.1075, + "step": 85500 + }, + { + "epoch": 14.139411959035348, + "grad_norm": 0.7576249837875366, + "learning_rate": 5.874291867023946e-06, + "loss": 0.1135, + "step": 85600 + }, + { + "epoch": 14.15592996366039, + "grad_norm": 0.7857004404067993, + "learning_rate": 5.868174087533189e-06, + "loss": 0.1045, + "step": 85700 + }, + { + "epoch": 14.17244796828543, + "grad_norm": 0.962145984172821, + "learning_rate": 5.8620563080424335e-06, + "loss": 0.1, + "step": 85800 + }, + { + "epoch": 14.188965972910472, + "grad_norm": 0.7464323043823242, + "learning_rate": 5.855938528551678e-06, + "loss": 0.1034, + "step": 85900 + }, + { + "epoch": 14.205483977535513, + "grad_norm": 0.8271916508674622, + "learning_rate": 5.849820749060921e-06, + "loss": 0.1082, + "step": 86000 + }, + { + "epoch": 14.205483977535513, + "eval_cer": 0.039174252275682706, + "eval_loss": 0.1129402220249176, + "eval_runtime": 48.9184, + "eval_samples_per_second": 34.568, + "eval_steps_per_second": 8.647, + "eval_wer": 0.21513770061824766, + "step": 86000 + }, + { + "epoch": 14.222001982160554, + "grad_norm": 0.5619252324104309, + "learning_rate": 5.8437029695701655e-06, + "loss": 0.1072, + "step": 86100 + }, + { + "epoch": 14.238519986785596, + "grad_norm": 0.5619592070579529, + "learning_rate": 5.837585190079409e-06, + "loss": 0.1038, + "step": 86200 + }, + { + "epoch": 14.255037991410637, + "grad_norm": 1.2644349336624146, + "learning_rate": 5.831467410588653e-06, + "loss": 0.102, + "step": 86300 + }, + { + "epoch": 14.271555996035678, + "grad_norm": 0.7374313473701477, + "learning_rate": 5.825349631097898e-06, + "loss": 0.1008, + "step": 86400 + }, + { + "epoch": 14.28807400066072, + "grad_norm": 0.8285679221153259, + "learning_rate": 5.819231851607141e-06, + "loss": 0.1024, + "step": 86500 + }, + { + "epoch": 14.30459200528576, + "grad_norm": 0.5749133825302124, + "learning_rate": 5.8131140721163845e-06, + "loss": 0.1078, + "step": 86600 + }, + { + "epoch": 14.321110009910802, + "grad_norm": 0.6757526397705078, + "learning_rate": 5.80699629262563e-06, + "loss": 0.1298, + "step": 86700 + }, + { + "epoch": 14.337628014535843, + "grad_norm": 0.4636983275413513, + "learning_rate": 5.800878513134873e-06, + "loss": 0.1045, + "step": 86800 + }, + { + "epoch": 14.354146019160885, + "grad_norm": 0.6189342737197876, + "learning_rate": 5.794760733644117e-06, + "loss": 0.1335, + "step": 86900 + }, + { + "epoch": 14.370664023785928, + "grad_norm": 0.7641118764877319, + "learning_rate": 5.788642954153361e-06, + "loss": 0.102, + "step": 87000 + }, + { + "epoch": 14.370664023785928, + "eval_cer": 0.038857709944562314, + "eval_loss": 0.11137784272432327, + "eval_runtime": 49.1649, + "eval_samples_per_second": 34.394, + "eval_steps_per_second": 8.604, + "eval_wer": 0.21457565727846126, + "step": 87000 + }, + { + "epoch": 14.387182028410969, + "grad_norm": 0.8745734095573425, + "learning_rate": 5.782525174662605e-06, + "loss": 0.1056, + "step": 87100 + }, + { + "epoch": 14.40370003303601, + "grad_norm": 0.4426126182079315, + "learning_rate": 5.776407395171849e-06, + "loss": 0.1294, + "step": 87200 + }, + { + "epoch": 14.420218037661051, + "grad_norm": 0.7525532841682434, + "learning_rate": 5.770289615681093e-06, + "loss": 0.1139, + "step": 87300 + }, + { + "epoch": 14.436736042286093, + "grad_norm": 0.6336373686790466, + "learning_rate": 5.7641718361903365e-06, + "loss": 0.1023, + "step": 87400 + }, + { + "epoch": 14.453254046911134, + "grad_norm": 0.6930210590362549, + "learning_rate": 5.758054056699582e-06, + "loss": 0.1336, + "step": 87500 + }, + { + "epoch": 14.469772051536175, + "grad_norm": 0.7454831004142761, + "learning_rate": 5.751936277208825e-06, + "loss": 0.1032, + "step": 87600 + }, + { + "epoch": 14.486290056161216, + "grad_norm": 0.7100419998168945, + "learning_rate": 5.7458184977180686e-06, + "loss": 0.1034, + "step": 87700 + }, + { + "epoch": 14.502808060786258, + "grad_norm": 0.6206198334693909, + "learning_rate": 5.739700718227312e-06, + "loss": 0.1073, + "step": 87800 + }, + { + "epoch": 14.519326065411299, + "grad_norm": 0.5653363466262817, + "learning_rate": 5.733582938736557e-06, + "loss": 0.1034, + "step": 87900 + }, + { + "epoch": 14.53584407003634, + "grad_norm": 0.6938855051994324, + "learning_rate": 5.727465159245801e-06, + "loss": 0.1065, + "step": 88000 + }, + { + "epoch": 14.53584407003634, + "eval_cer": 0.03898603791663815, + "eval_loss": 0.11170890182256699, + "eval_runtime": 62.1432, + "eval_samples_per_second": 27.211, + "eval_steps_per_second": 6.807, + "eval_wer": 0.21582464247798663, + "step": 88000 + }, + { + "epoch": 14.552362074661382, + "grad_norm": 0.9062691330909729, + "learning_rate": 5.721347379755044e-06, + "loss": 0.0999, + "step": 88100 + }, + { + "epoch": 14.568880079286423, + "grad_norm": 0.6869949102401733, + "learning_rate": 5.715229600264288e-06, + "loss": 0.1033, + "step": 88200 + }, + { + "epoch": 14.585398083911464, + "grad_norm": 0.6004628539085388, + "learning_rate": 5.709111820773533e-06, + "loss": 0.108, + "step": 88300 + }, + { + "epoch": 14.601916088536505, + "grad_norm": 0.6582931876182556, + "learning_rate": 5.702994041282777e-06, + "loss": 0.1034, + "step": 88400 + }, + { + "epoch": 14.618434093161547, + "grad_norm": 0.5958510637283325, + "learning_rate": 5.6968762617920205e-06, + "loss": 0.1082, + "step": 88500 + }, + { + "epoch": 14.634952097786588, + "grad_norm": 0.8877278566360474, + "learning_rate": 5.690758482301264e-06, + "loss": 0.1051, + "step": 88600 + }, + { + "epoch": 14.65147010241163, + "grad_norm": 0.46800002455711365, + "learning_rate": 5.684640702810509e-06, + "loss": 0.1032, + "step": 88700 + }, + { + "epoch": 14.66798810703667, + "grad_norm": 0.6601079106330872, + "learning_rate": 5.6785229233197526e-06, + "loss": 0.108, + "step": 88800 + }, + { + "epoch": 14.684506111661712, + "grad_norm": 0.6476488709449768, + "learning_rate": 5.672405143828996e-06, + "loss": 0.1049, + "step": 88900 + }, + { + "epoch": 14.701024116286753, + "grad_norm": 0.7255818843841553, + "learning_rate": 5.6662873643382395e-06, + "loss": 0.1322, + "step": 89000 + }, + { + "epoch": 14.701024116286753, + "eval_cer": 0.039174252275682706, + "eval_loss": 0.11011859029531479, + "eval_runtime": 52.7786, + "eval_samples_per_second": 32.04, + "eval_steps_per_second": 8.015, + "eval_wer": 0.21588709173796292, + "step": 89000 + }, + { + "epoch": 14.717542120911794, + "grad_norm": 0.7102627754211426, + "learning_rate": 5.660169584847485e-06, + "loss": 0.1046, + "step": 89100 + }, + { + "epoch": 14.734060125536836, + "grad_norm": 0.6122440099716187, + "learning_rate": 5.654051805356728e-06, + "loss": 0.1012, + "step": 89200 + }, + { + "epoch": 14.750578130161877, + "grad_norm": 0.6586080193519592, + "learning_rate": 5.6479340258659724e-06, + "loss": 0.1021, + "step": 89300 + }, + { + "epoch": 14.767096134786918, + "grad_norm": 0.9857539534568787, + "learning_rate": 5.641816246375216e-06, + "loss": 0.158, + "step": 89400 + }, + { + "epoch": 14.78361413941196, + "grad_norm": 0.8294028043746948, + "learning_rate": 5.63569846688446e-06, + "loss": 0.1115, + "step": 89500 + }, + { + "epoch": 14.800132144037, + "grad_norm": 0.6861185431480408, + "learning_rate": 5.6295806873937045e-06, + "loss": 0.1043, + "step": 89600 + }, + { + "epoch": 14.816650148662042, + "grad_norm": 0.6036092042922974, + "learning_rate": 5.623462907902948e-06, + "loss": 0.1105, + "step": 89700 + }, + { + "epoch": 14.833168153287083, + "grad_norm": 0.778626561164856, + "learning_rate": 5.6173451284121914e-06, + "loss": 0.1032, + "step": 89800 + }, + { + "epoch": 14.849686157912124, + "grad_norm": 0.5784227252006531, + "learning_rate": 5.611227348921437e-06, + "loss": 0.1122, + "step": 89900 + }, + { + "epoch": 14.866204162537166, + "grad_norm": 0.6248123645782471, + "learning_rate": 5.60510956943068e-06, + "loss": 0.1027, + "step": 90000 + }, + { + "epoch": 14.866204162537166, + "eval_cer": 0.0388662651427007, + "eval_loss": 0.11089600622653961, + "eval_runtime": 53.1033, + "eval_samples_per_second": 31.844, + "eval_steps_per_second": 7.966, + "eval_wer": 0.2147005557984138, + "step": 90000 + }, + { + "epoch": 14.882722167162207, + "grad_norm": 0.5955941081047058, + "learning_rate": 5.5989917899399235e-06, + "loss": 0.117, + "step": 90100 + }, + { + "epoch": 14.899240171787248, + "grad_norm": 0.7445477247238159, + "learning_rate": 5.592874010449168e-06, + "loss": 0.1035, + "step": 90200 + }, + { + "epoch": 14.91575817641229, + "grad_norm": 0.4745796024799347, + "learning_rate": 5.586756230958412e-06, + "loss": 0.1042, + "step": 90300 + }, + { + "epoch": 14.93227618103733, + "grad_norm": 0.7581929564476013, + "learning_rate": 5.580638451467656e-06, + "loss": 0.1047, + "step": 90400 + }, + { + "epoch": 14.948794185662372, + "grad_norm": 1.0136349201202393, + "learning_rate": 5.5745206719769e-06, + "loss": 0.105, + "step": 90500 + }, + { + "epoch": 14.965312190287413, + "grad_norm": 0.7604655623435974, + "learning_rate": 5.568402892486143e-06, + "loss": 0.1091, + "step": 90600 + }, + { + "epoch": 14.981830194912455, + "grad_norm": 0.7419881224632263, + "learning_rate": 5.562285112995388e-06, + "loss": 0.1009, + "step": 90700 + }, + { + "epoch": 14.998348199537496, + "grad_norm": 0.571348249912262, + "learning_rate": 5.556167333504632e-06, + "loss": 0.099, + "step": 90800 + }, + { + "epoch": 15.014866204162537, + "grad_norm": 0.7786069512367249, + "learning_rate": 5.5500495540138754e-06, + "loss": 0.1141, + "step": 90900 + }, + { + "epoch": 15.031384208787578, + "grad_norm": 0.6933959722518921, + "learning_rate": 5.543931774523119e-06, + "loss": 0.1029, + "step": 91000 + }, + { + "epoch": 15.031384208787578, + "eval_cer": 0.03914003148312915, + "eval_loss": 0.110771544277668, + "eval_runtime": 55.563, + "eval_samples_per_second": 30.434, + "eval_steps_per_second": 7.613, + "eval_wer": 0.21532504839817648, + "step": 91000 + }, + { + "epoch": 15.04790221341262, + "grad_norm": 0.5616199374198914, + "learning_rate": 5.537813995032364e-06, + "loss": 0.0993, + "step": 91100 + }, + { + "epoch": 15.064420218037661, + "grad_norm": 0.6372345089912415, + "learning_rate": 5.5316962155416075e-06, + "loss": 0.104, + "step": 91200 + }, + { + "epoch": 15.080938222662702, + "grad_norm": 0.7811592817306519, + "learning_rate": 5.525578436050851e-06, + "loss": 0.1002, + "step": 91300 + }, + { + "epoch": 15.097456227287744, + "grad_norm": 0.974866509437561, + "learning_rate": 5.519460656560095e-06, + "loss": 0.1252, + "step": 91400 + }, + { + "epoch": 15.113974231912785, + "grad_norm": 0.705301821231842, + "learning_rate": 5.51334287706934e-06, + "loss": 0.1028, + "step": 91500 + }, + { + "epoch": 15.130492236537826, + "grad_norm": 0.6617374420166016, + "learning_rate": 5.507225097578583e-06, + "loss": 0.1029, + "step": 91600 + }, + { + "epoch": 15.147010241162867, + "grad_norm": 0.6031976342201233, + "learning_rate": 5.501107318087827e-06, + "loss": 0.0994, + "step": 91700 + }, + { + "epoch": 15.163528245787909, + "grad_norm": 0.8132845163345337, + "learning_rate": 5.494989538597071e-06, + "loss": 0.1013, + "step": 91800 + }, + { + "epoch": 15.18004625041295, + "grad_norm": 0.4518735110759735, + "learning_rate": 5.488871759106316e-06, + "loss": 0.1086, + "step": 91900 + }, + { + "epoch": 15.196564255037991, + "grad_norm": 0.6119063496589661, + "learning_rate": 5.4827539796155595e-06, + "loss": 0.0978, + "step": 92000 + }, + { + "epoch": 15.196564255037991, + "eval_cer": 0.03872082677434809, + "eval_loss": 0.11010745912790298, + "eval_runtime": 56.6751, + "eval_samples_per_second": 29.837, + "eval_steps_per_second": 7.464, + "eval_wer": 0.21351401985886467, + "step": 92000 + }, + { + "epoch": 15.213082259663032, + "grad_norm": 1.2135928869247437, + "learning_rate": 5.476636200124803e-06, + "loss": 0.126, + "step": 92100 + }, + { + "epoch": 15.229600264288074, + "grad_norm": 0.7273651361465454, + "learning_rate": 5.470518420634046e-06, + "loss": 0.1026, + "step": 92200 + }, + { + "epoch": 15.246118268913115, + "grad_norm": 0.5206860303878784, + "learning_rate": 5.4644006411432915e-06, + "loss": 0.1061, + "step": 92300 + }, + { + "epoch": 15.262636273538156, + "grad_norm": 0.5830551981925964, + "learning_rate": 5.458282861652535e-06, + "loss": 0.1049, + "step": 92400 + }, + { + "epoch": 15.279154278163197, + "grad_norm": 0.5370995402336121, + "learning_rate": 5.4521650821617785e-06, + "loss": 0.1022, + "step": 92500 + }, + { + "epoch": 15.295672282788239, + "grad_norm": 0.6254607439041138, + "learning_rate": 5.446047302671023e-06, + "loss": 0.1076, + "step": 92600 + }, + { + "epoch": 15.31219028741328, + "grad_norm": 0.7650060057640076, + "learning_rate": 5.439929523180267e-06, + "loss": 0.1061, + "step": 92700 + }, + { + "epoch": 15.328708292038321, + "grad_norm": 0.786281168460846, + "learning_rate": 5.433811743689511e-06, + "loss": 0.1059, + "step": 92800 + }, + { + "epoch": 15.345226296663363, + "grad_norm": 0.7369528412818909, + "learning_rate": 5.427693964198755e-06, + "loss": 0.1025, + "step": 92900 + }, + { + "epoch": 15.361744301288404, + "grad_norm": 0.8376733660697937, + "learning_rate": 5.421576184707998e-06, + "loss": 0.1048, + "step": 93000 + }, + { + "epoch": 15.361744301288404, + "eval_cer": 0.038275956471151874, + "eval_loss": 0.1115972101688385, + "eval_runtime": 60.1921, + "eval_samples_per_second": 28.093, + "eval_steps_per_second": 7.028, + "eval_wer": 0.21095360019983764, + "step": 93000 + }, + { + "epoch": 15.378262305913445, + "grad_norm": 0.6252397298812866, + "learning_rate": 5.4154584052172435e-06, + "loss": 0.1, + "step": 93100 + }, + { + "epoch": 15.394780310538486, + "grad_norm": 0.7326919436454773, + "learning_rate": 5.409340625726487e-06, + "loss": 0.11, + "step": 93200 + }, + { + "epoch": 15.411298315163528, + "grad_norm": 0.6019201874732971, + "learning_rate": 5.40322284623573e-06, + "loss": 0.1065, + "step": 93300 + }, + { + "epoch": 15.427816319788569, + "grad_norm": 0.7445711493492126, + "learning_rate": 5.397105066744974e-06, + "loss": 0.105, + "step": 93400 + }, + { + "epoch": 15.44433432441361, + "grad_norm": 1.068389892578125, + "learning_rate": 5.390987287254219e-06, + "loss": 0.1038, + "step": 93500 + }, + { + "epoch": 15.460852329038651, + "grad_norm": 0.672622561454773, + "learning_rate": 5.3848695077634625e-06, + "loss": 0.104, + "step": 93600 + }, + { + "epoch": 15.477370333663693, + "grad_norm": 0.6717888712882996, + "learning_rate": 5.378751728272707e-06, + "loss": 0.0985, + "step": 93700 + }, + { + "epoch": 15.493888338288734, + "grad_norm": 1.2381566762924194, + "learning_rate": 5.37263394878195e-06, + "loss": 0.1078, + "step": 93800 + }, + { + "epoch": 15.510406342913775, + "grad_norm": 0.6967211365699768, + "learning_rate": 5.3665161692911946e-06, + "loss": 0.1017, + "step": 93900 + }, + { + "epoch": 15.526924347538817, + "grad_norm": 0.7272515892982483, + "learning_rate": 5.360398389800439e-06, + "loss": 0.1027, + "step": 94000 + }, + { + "epoch": 15.526924347538817, + "eval_cer": 0.038635274792964205, + "eval_loss": 0.10984691232442856, + "eval_runtime": 58.0727, + "eval_samples_per_second": 29.119, + "eval_steps_per_second": 7.284, + "eval_wer": 0.21320177355898332, + "step": 94000 + }, + { + "epoch": 15.543442352163858, + "grad_norm": 0.7049939036369324, + "learning_rate": 5.354280610309682e-06, + "loss": 0.1035, + "step": 94100 + }, + { + "epoch": 15.559960356788899, + "grad_norm": 1.3722845315933228, + "learning_rate": 5.348162830818926e-06, + "loss": 0.1002, + "step": 94200 + }, + { + "epoch": 15.57647836141394, + "grad_norm": 0.7943611145019531, + "learning_rate": 5.342045051328171e-06, + "loss": 0.0976, + "step": 94300 + }, + { + "epoch": 15.592996366038982, + "grad_norm": 0.6992027163505554, + "learning_rate": 5.335927271837414e-06, + "loss": 0.1038, + "step": 94400 + }, + { + "epoch": 15.609514370664023, + "grad_norm": 0.9091536998748779, + "learning_rate": 5.329809492346658e-06, + "loss": 0.1059, + "step": 94500 + }, + { + "epoch": 15.626032375289064, + "grad_norm": 0.6817540526390076, + "learning_rate": 5.323691712855902e-06, + "loss": 0.1099, + "step": 94600 + }, + { + "epoch": 15.642550379914105, + "grad_norm": 0.6020603775978088, + "learning_rate": 5.3175739333651465e-06, + "loss": 0.0986, + "step": 94700 + }, + { + "epoch": 15.659068384539147, + "grad_norm": 0.6270213723182678, + "learning_rate": 5.31145615387439e-06, + "loss": 0.1034, + "step": 94800 + }, + { + "epoch": 15.67558638916419, + "grad_norm": 0.7782559990882874, + "learning_rate": 5.305338374383634e-06, + "loss": 0.0969, + "step": 94900 + }, + { + "epoch": 15.692104393789231, + "grad_norm": 0.6843757629394531, + "learning_rate": 5.299220594892878e-06, + "loss": 0.0996, + "step": 95000 + }, + { + "epoch": 15.692104393789231, + "eval_cer": 0.03852405721716515, + "eval_loss": 0.11085934937000275, + "eval_runtime": 60.5345, + "eval_samples_per_second": 27.934, + "eval_steps_per_second": 6.988, + "eval_wer": 0.21270217947917316, + "step": 95000 + }, + { + "epoch": 15.708622398414272, + "grad_norm": 0.5856791138648987, + "learning_rate": 5.293102815402122e-06, + "loss": 0.1043, + "step": 95100 + }, + { + "epoch": 15.725140403039314, + "grad_norm": 1.0118597745895386, + "learning_rate": 5.286985035911366e-06, + "loss": 0.1207, + "step": 95200 + }, + { + "epoch": 15.741658407664355, + "grad_norm": 0.4559677541255951, + "learning_rate": 5.28086725642061e-06, + "loss": 0.0953, + "step": 95300 + }, + { + "epoch": 15.758176412289396, + "grad_norm": 0.7937479615211487, + "learning_rate": 5.274749476929853e-06, + "loss": 0.1018, + "step": 95400 + }, + { + "epoch": 15.774694416914437, + "grad_norm": 0.7912297248840332, + "learning_rate": 5.2686316974390984e-06, + "loss": 0.1013, + "step": 95500 + }, + { + "epoch": 15.791212421539479, + "grad_norm": 0.9011877775192261, + "learning_rate": 5.262513917948342e-06, + "loss": 0.1085, + "step": 95600 + }, + { + "epoch": 15.80773042616452, + "grad_norm": 0.7926939129829407, + "learning_rate": 5.256396138457585e-06, + "loss": 0.0993, + "step": 95700 + }, + { + "epoch": 15.824248430789561, + "grad_norm": 0.9147284626960754, + "learning_rate": 5.25027835896683e-06, + "loss": 0.106, + "step": 95800 + }, + { + "epoch": 15.840766435414602, + "grad_norm": 0.6496513485908508, + "learning_rate": 5.244160579476074e-06, + "loss": 0.101, + "step": 95900 + }, + { + "epoch": 15.857284440039644, + "grad_norm": 0.5024608969688416, + "learning_rate": 5.2380427999853174e-06, + "loss": 0.0959, + "step": 96000 + }, + { + "epoch": 15.857284440039644, + "eval_cer": 0.03859249880227226, + "eval_loss": 0.1082993894815445, + "eval_runtime": 61.2314, + "eval_samples_per_second": 27.617, + "eval_steps_per_second": 6.908, + "eval_wer": 0.21288952725910198, + "step": 96000 + }, + { + "epoch": 15.873802444664685, + "grad_norm": 0.9131097197532654, + "learning_rate": 5.231925020494562e-06, + "loss": 0.1023, + "step": 96100 + }, + { + "epoch": 15.890320449289726, + "grad_norm": 0.7231972813606262, + "learning_rate": 5.225807241003805e-06, + "loss": 0.1057, + "step": 96200 + }, + { + "epoch": 15.906838453914768, + "grad_norm": 0.7179155349731445, + "learning_rate": 5.21968946151305e-06, + "loss": 0.1054, + "step": 96300 + }, + { + "epoch": 15.923356458539809, + "grad_norm": 0.6966670751571655, + "learning_rate": 5.213571682022294e-06, + "loss": 0.0992, + "step": 96400 + }, + { + "epoch": 15.93987446316485, + "grad_norm": 0.7580718398094177, + "learning_rate": 5.207453902531537e-06, + "loss": 0.0978, + "step": 96500 + }, + { + "epoch": 15.956392467789891, + "grad_norm": 0.6020950675010681, + "learning_rate": 5.201336123040781e-06, + "loss": 0.1, + "step": 96600 + }, + { + "epoch": 15.972910472414933, + "grad_norm": 0.7800185680389404, + "learning_rate": 5.195218343550026e-06, + "loss": 0.1041, + "step": 96700 + }, + { + "epoch": 15.989428477039974, + "grad_norm": 0.6527479290962219, + "learning_rate": 5.189100564059269e-06, + "loss": 0.1504, + "step": 96800 + }, + { + "epoch": 16.005946481665013, + "grad_norm": 2.855896472930908, + "learning_rate": 5.182982784568513e-06, + "loss": 0.1247, + "step": 96900 + }, + { + "epoch": 16.022464486290055, + "grad_norm": 0.6793861985206604, + "learning_rate": 5.176865005077757e-06, + "loss": 0.1015, + "step": 97000 + }, + { + "epoch": 16.022464486290055, + "eval_cer": 0.038601054000410646, + "eval_loss": 0.10963103175163269, + "eval_runtime": 57.4338, + "eval_samples_per_second": 29.443, + "eval_steps_per_second": 7.365, + "eval_wer": 0.21257728095922063, + "step": 97000 + }, + { + "epoch": 16.038982490915096, + "grad_norm": 0.5223618149757385, + "learning_rate": 5.1707472255870015e-06, + "loss": 0.0952, + "step": 97100 + }, + { + "epoch": 16.055500495540137, + "grad_norm": 0.6774017810821533, + "learning_rate": 5.164629446096246e-06, + "loss": 0.101, + "step": 97200 + }, + { + "epoch": 16.07201850016518, + "grad_norm": 0.6046952605247498, + "learning_rate": 5.158511666605489e-06, + "loss": 0.0986, + "step": 97300 + }, + { + "epoch": 16.08853650479022, + "grad_norm": 0.5968722701072693, + "learning_rate": 5.152393887114733e-06, + "loss": 0.1172, + "step": 97400 + }, + { + "epoch": 16.10505450941526, + "grad_norm": 0.49890223145484924, + "learning_rate": 5.146276107623978e-06, + "loss": 0.095, + "step": 97500 + }, + { + "epoch": 16.121572514040302, + "grad_norm": 0.5506992936134338, + "learning_rate": 5.140158328133221e-06, + "loss": 0.0983, + "step": 97600 + }, + { + "epoch": 16.138090518665344, + "grad_norm": 0.8042443990707397, + "learning_rate": 5.134040548642465e-06, + "loss": 0.2112, + "step": 97700 + }, + { + "epoch": 16.154608523290385, + "grad_norm": 0.8264985680580139, + "learning_rate": 5.127922769151708e-06, + "loss": 0.1013, + "step": 97800 + }, + { + "epoch": 16.171126527915426, + "grad_norm": 0.5965238809585571, + "learning_rate": 5.121804989660953e-06, + "loss": 0.1457, + "step": 97900 + }, + { + "epoch": 16.187644532540467, + "grad_norm": 0.8089606761932373, + "learning_rate": 5.115687210170197e-06, + "loss": 0.1058, + "step": 98000 + }, + { + "epoch": 16.187644532540467, + "eval_cer": 0.03804496612141537, + "eval_loss": 0.10823166370391846, + "eval_runtime": 58.9445, + "eval_samples_per_second": 28.688, + "eval_steps_per_second": 7.176, + "eval_wer": 0.21132829575969525, + "step": 98000 + }, + { + "epoch": 16.20416253716551, + "grad_norm": 0.5970620512962341, + "learning_rate": 5.109569430679441e-06, + "loss": 0.1046, + "step": 98100 + }, + { + "epoch": 16.22068054179055, + "grad_norm": 0.45412981510162354, + "learning_rate": 5.103451651188685e-06, + "loss": 0.1035, + "step": 98200 + }, + { + "epoch": 16.23719854641559, + "grad_norm": 0.5827893018722534, + "learning_rate": 5.097333871697929e-06, + "loss": 0.0992, + "step": 98300 + }, + { + "epoch": 16.253716551040633, + "grad_norm": 0.6516451239585876, + "learning_rate": 5.091216092207173e-06, + "loss": 0.1023, + "step": 98400 + }, + { + "epoch": 16.270234555665677, + "grad_norm": 0.731946587562561, + "learning_rate": 5.085098312716417e-06, + "loss": 0.0969, + "step": 98500 + }, + { + "epoch": 16.28675256029072, + "grad_norm": 0.70552659034729, + "learning_rate": 5.07898053322566e-06, + "loss": 0.1478, + "step": 98600 + }, + { + "epoch": 16.30327056491576, + "grad_norm": 0.7130141258239746, + "learning_rate": 5.072862753734905e-06, + "loss": 0.1049, + "step": 98700 + }, + { + "epoch": 16.3197885695408, + "grad_norm": 0.9122040867805481, + "learning_rate": 5.066744974244149e-06, + "loss": 0.0976, + "step": 98800 + }, + { + "epoch": 16.336306574165842, + "grad_norm": 0.7150751948356628, + "learning_rate": 5.060627194753392e-06, + "loss": 0.0938, + "step": 98900 + }, + { + "epoch": 16.352824578790884, + "grad_norm": 0.571891188621521, + "learning_rate": 5.0545094152626366e-06, + "loss": 0.0966, + "step": 99000 + }, + { + "epoch": 16.352824578790884, + "eval_cer": 0.03804496612141537, + "eval_loss": 0.1098564937710762, + "eval_runtime": 59.3833, + "eval_samples_per_second": 28.476, + "eval_steps_per_second": 7.123, + "eval_wer": 0.21089115093986135, + "step": 99000 + }, + { + "epoch": 16.369342583415925, + "grad_norm": 0.5009652376174927, + "learning_rate": 5.048391635771881e-06, + "loss": 0.0956, + "step": 99100 + }, + { + "epoch": 16.385860588040966, + "grad_norm": 0.6411744952201843, + "learning_rate": 5.042273856281124e-06, + "loss": 0.1109, + "step": 99200 + }, + { + "epoch": 16.402378592666008, + "grad_norm": 0.7724633812904358, + "learning_rate": 5.036156076790369e-06, + "loss": 0.0996, + "step": 99300 + }, + { + "epoch": 16.41889659729105, + "grad_norm": 0.5513240694999695, + "learning_rate": 5.030038297299612e-06, + "loss": 0.0972, + "step": 99400 + }, + { + "epoch": 16.43541460191609, + "grad_norm": 0.685674786567688, + "learning_rate": 5.023920517808856e-06, + "loss": 0.1259, + "step": 99500 + }, + { + "epoch": 16.45193260654113, + "grad_norm": 0.7051562070846558, + "learning_rate": 5.017802738318101e-06, + "loss": 0.1076, + "step": 99600 + }, + { + "epoch": 16.468450611166173, + "grad_norm": 0.6196284890174866, + "learning_rate": 5.011684958827344e-06, + "loss": 0.1033, + "step": 99700 + }, + { + "epoch": 16.484968615791214, + "grad_norm": 0.6664172410964966, + "learning_rate": 5.005567179336588e-06, + "loss": 0.0988, + "step": 99800 + }, + { + "epoch": 16.501486620416255, + "grad_norm": 0.8101247549057007, + "learning_rate": 4.999449399845832e-06, + "loss": 0.1005, + "step": 99900 + }, + { + "epoch": 16.518004625041296, + "grad_norm": 0.5494738817214966, + "learning_rate": 4.993331620355076e-06, + "loss": 0.0988, + "step": 100000 + }, + { + "epoch": 16.518004625041296, + "eval_cer": 0.03866949558551776, + "eval_loss": 0.10886727273464203, + "eval_runtime": 57.6452, + "eval_samples_per_second": 29.335, + "eval_steps_per_second": 7.338, + "eval_wer": 0.21388871541872229, + "step": 100000 + }, + { + "epoch": 16.534522629666338, + "grad_norm": 0.752741813659668, + "learning_rate": 4.98721384086432e-06, + "loss": 0.0958, + "step": 100100 + }, + { + "epoch": 16.55104063429138, + "grad_norm": 0.6410621404647827, + "learning_rate": 4.981096061373564e-06, + "loss": 0.1042, + "step": 100200 + }, + { + "epoch": 16.56755863891642, + "grad_norm": 0.5088207125663757, + "learning_rate": 4.974978281882808e-06, + "loss": 0.0953, + "step": 100300 + }, + { + "epoch": 16.58407664354146, + "grad_norm": 0.7134143114089966, + "learning_rate": 4.968860502392052e-06, + "loss": 0.1021, + "step": 100400 + }, + { + "epoch": 16.600594648166503, + "grad_norm": 0.9732416272163391, + "learning_rate": 4.962742722901296e-06, + "loss": 0.1004, + "step": 100500 + }, + { + "epoch": 16.617112652791544, + "grad_norm": 0.5259725451469421, + "learning_rate": 4.95662494341054e-06, + "loss": 0.1008, + "step": 100600 + }, + { + "epoch": 16.633630657416585, + "grad_norm": 0.7298964262008667, + "learning_rate": 4.950507163919784e-06, + "loss": 0.1216, + "step": 100700 + }, + { + "epoch": 16.650148662041627, + "grad_norm": 0.6519650816917419, + "learning_rate": 4.944389384429028e-06, + "loss": 0.0989, + "step": 100800 + }, + { + "epoch": 16.666666666666668, + "grad_norm": 0.5744999647140503, + "learning_rate": 4.938271604938272e-06, + "loss": 0.1034, + "step": 100900 + }, + { + "epoch": 16.68318467129171, + "grad_norm": 0.8439196944236755, + "learning_rate": 4.932153825447516e-06, + "loss": 0.0983, + "step": 101000 + }, + { + "epoch": 16.68318467129171, + "eval_cer": 0.03823318048045993, + "eval_loss": 0.10998154431581497, + "eval_runtime": 57.0437, + "eval_samples_per_second": 29.644, + "eval_steps_per_second": 7.415, + "eval_wer": 0.21026665834009867, + "step": 101000 + }, + { + "epoch": 16.69970267591675, + "grad_norm": 0.7444531917572021, + "learning_rate": 4.9260360459567594e-06, + "loss": 0.0986, + "step": 101100 + }, + { + "epoch": 16.71622068054179, + "grad_norm": 0.64404296875, + "learning_rate": 4.919918266466004e-06, + "loss": 0.0977, + "step": 101200 + }, + { + "epoch": 16.732738685166833, + "grad_norm": 0.5869942903518677, + "learning_rate": 4.913800486975248e-06, + "loss": 0.0943, + "step": 101300 + }, + { + "epoch": 16.749256689791874, + "grad_norm": 0.7566863894462585, + "learning_rate": 4.9076827074844915e-06, + "loss": 0.0932, + "step": 101400 + }, + { + "epoch": 16.765774694416915, + "grad_norm": 0.6613245010375977, + "learning_rate": 4.901564927993736e-06, + "loss": 0.094, + "step": 101500 + }, + { + "epoch": 16.782292699041957, + "grad_norm": 0.5942184925079346, + "learning_rate": 4.89544714850298e-06, + "loss": 0.1035, + "step": 101600 + }, + { + "epoch": 16.798810703666998, + "grad_norm": 0.6501281261444092, + "learning_rate": 4.889329369012224e-06, + "loss": 0.0991, + "step": 101700 + }, + { + "epoch": 16.81532870829204, + "grad_norm": 0.5310657024383545, + "learning_rate": 4.883211589521468e-06, + "loss": 0.1031, + "step": 101800 + }, + { + "epoch": 16.83184671291708, + "grad_norm": 0.7567028403282166, + "learning_rate": 4.877093810030712e-06, + "loss": 0.1248, + "step": 101900 + }, + { + "epoch": 16.848364717542122, + "grad_norm": 0.5749494433403015, + "learning_rate": 4.870976030539956e-06, + "loss": 0.1394, + "step": 102000 + }, + { + "epoch": 16.848364717542122, + "eval_cer": 0.03819040448976798, + "eval_loss": 0.10844554007053375, + "eval_runtime": 55.7352, + "eval_samples_per_second": 30.34, + "eval_steps_per_second": 7.589, + "eval_wer": 0.21051645538000374, + "step": 102000 + }, + { + "epoch": 16.864882722167163, + "grad_norm": 0.6915029883384705, + "learning_rate": 4.8648582510492e-06, + "loss": 0.0999, + "step": 102100 + }, + { + "epoch": 16.881400726792204, + "grad_norm": 0.6687765121459961, + "learning_rate": 4.8587404715584434e-06, + "loss": 0.147, + "step": 102200 + }, + { + "epoch": 16.897918731417246, + "grad_norm": 0.6900584697723389, + "learning_rate": 4.852622692067688e-06, + "loss": 0.1014, + "step": 102300 + }, + { + "epoch": 16.914436736042287, + "grad_norm": 0.5827245712280273, + "learning_rate": 4.846504912576931e-06, + "loss": 0.1018, + "step": 102400 + }, + { + "epoch": 16.930954740667328, + "grad_norm": 0.6611018180847168, + "learning_rate": 4.8403871330861755e-06, + "loss": 0.0986, + "step": 102500 + }, + { + "epoch": 16.94747274529237, + "grad_norm": 0.7171707153320312, + "learning_rate": 4.834269353595419e-06, + "loss": 0.1365, + "step": 102600 + }, + { + "epoch": 16.96399074991741, + "grad_norm": 0.7955174446105957, + "learning_rate": 4.828151574104663e-06, + "loss": 0.1115, + "step": 102700 + }, + { + "epoch": 16.980508754542452, + "grad_norm": 0.6726603507995605, + "learning_rate": 4.822033794613908e-06, + "loss": 0.0998, + "step": 102800 + }, + { + "epoch": 16.997026759167493, + "grad_norm": 0.6470670104026794, + "learning_rate": 4.815916015123152e-06, + "loss": 0.1008, + "step": 102900 + }, + { + "epoch": 17.013544763792535, + "grad_norm": 0.8556126952171326, + "learning_rate": 4.809798235632395e-06, + "loss": 0.1134, + "step": 103000 + }, + { + "epoch": 17.013544763792535, + "eval_cer": 0.03776264458284854, + "eval_loss": 0.1081945076584816, + "eval_runtime": 57.2963, + "eval_samples_per_second": 29.513, + "eval_steps_per_second": 7.383, + "eval_wer": 0.20945481796040716, + "step": 103000 + }, + { + "epoch": 17.030062768417576, + "grad_norm": 0.8898919224739075, + "learning_rate": 4.80368045614164e-06, + "loss": 0.1023, + "step": 103100 + }, + { + "epoch": 17.046580773042617, + "grad_norm": 0.4918752908706665, + "learning_rate": 4.797562676650883e-06, + "loss": 0.1012, + "step": 103200 + }, + { + "epoch": 17.06309877766766, + "grad_norm": 0.5613105297088623, + "learning_rate": 4.7914448971601275e-06, + "loss": 0.1064, + "step": 103300 + }, + { + "epoch": 17.0796167822927, + "grad_norm": 0.6109268665313721, + "learning_rate": 4.785327117669371e-06, + "loss": 0.1048, + "step": 103400 + }, + { + "epoch": 17.09613478691774, + "grad_norm": 0.6237761974334717, + "learning_rate": 4.779209338178615e-06, + "loss": 0.1044, + "step": 103500 + }, + { + "epoch": 17.112652791542782, + "grad_norm": 0.6789395213127136, + "learning_rate": 4.773091558687859e-06, + "loss": 0.0973, + "step": 103600 + }, + { + "epoch": 17.129170796167823, + "grad_norm": 0.7461550831794739, + "learning_rate": 4.766973779197103e-06, + "loss": 0.0965, + "step": 103700 + }, + { + "epoch": 17.145688800792865, + "grad_norm": 0.655927836894989, + "learning_rate": 4.760855999706347e-06, + "loss": 0.0962, + "step": 103800 + }, + { + "epoch": 17.162206805417906, + "grad_norm": 0.6233280897140503, + "learning_rate": 4.754738220215591e-06, + "loss": 0.0949, + "step": 103900 + }, + { + "epoch": 17.178724810042947, + "grad_norm": 0.6471518874168396, + "learning_rate": 4.748620440724835e-06, + "loss": 0.0916, + "step": 104000 + }, + { + "epoch": 17.178724810042947, + "eval_cer": 0.038019300527000206, + "eval_loss": 0.10819939523935318, + "eval_runtime": 61.0456, + "eval_samples_per_second": 27.701, + "eval_steps_per_second": 6.929, + "eval_wer": 0.21051645538000374, + "step": 104000 + }, + { + "epoch": 17.19524281466799, + "grad_norm": 0.6647598147392273, + "learning_rate": 4.742502661234079e-06, + "loss": 0.0941, + "step": 104100 + }, + { + "epoch": 17.21176081929303, + "grad_norm": 0.6160650849342346, + "learning_rate": 4.736384881743323e-06, + "loss": 0.0958, + "step": 104200 + }, + { + "epoch": 17.22827882391807, + "grad_norm": 0.7830073833465576, + "learning_rate": 4.730267102252567e-06, + "loss": 0.0988, + "step": 104300 + }, + { + "epoch": 17.244796828543112, + "grad_norm": 0.5620446801185608, + "learning_rate": 4.724149322761811e-06, + "loss": 0.0981, + "step": 104400 + }, + { + "epoch": 17.261314833168154, + "grad_norm": 0.5912889838218689, + "learning_rate": 4.718031543271055e-06, + "loss": 0.1015, + "step": 104500 + }, + { + "epoch": 17.277832837793195, + "grad_norm": 0.8370086550712585, + "learning_rate": 4.711913763780298e-06, + "loss": 0.0953, + "step": 104600 + }, + { + "epoch": 17.294350842418236, + "grad_norm": 0.5910390615463257, + "learning_rate": 4.705795984289543e-06, + "loss": 0.1, + "step": 104700 + }, + { + "epoch": 17.310868847043277, + "grad_norm": 0.6430118083953857, + "learning_rate": 4.699678204798786e-06, + "loss": 0.1022, + "step": 104800 + }, + { + "epoch": 17.32738685166832, + "grad_norm": 0.6246772408485413, + "learning_rate": 4.6935604253080305e-06, + "loss": 0.0989, + "step": 104900 + }, + { + "epoch": 17.34390485629336, + "grad_norm": 0.598013699054718, + "learning_rate": 4.687442645817275e-06, + "loss": 0.1074, + "step": 105000 + }, + { + "epoch": 17.34390485629336, + "eval_cer": 0.038062076517692146, + "eval_loss": 0.11045213788747787, + "eval_runtime": 57.9821, + "eval_samples_per_second": 29.164, + "eval_steps_per_second": 7.295, + "eval_wer": 0.20883032536064447, + "step": 105000 + }, + { + "epoch": 17.3604228609184, + "grad_norm": 1.371077060699463, + "learning_rate": 4.681324866326519e-06, + "loss": 0.0984, + "step": 105100 + }, + { + "epoch": 17.376940865543443, + "grad_norm": 0.9299737811088562, + "learning_rate": 4.6752070868357626e-06, + "loss": 0.1229, + "step": 105200 + }, + { + "epoch": 17.393458870168484, + "grad_norm": 0.7221639156341553, + "learning_rate": 4.669089307345007e-06, + "loss": 0.104, + "step": 105300 + }, + { + "epoch": 17.409976874793525, + "grad_norm": 1.0506755113601685, + "learning_rate": 4.66297152785425e-06, + "loss": 0.0978, + "step": 105400 + }, + { + "epoch": 17.426494879418566, + "grad_norm": 0.6425598859786987, + "learning_rate": 4.656853748363495e-06, + "loss": 0.0983, + "step": 105500 + }, + { + "epoch": 17.443012884043608, + "grad_norm": 0.7174783945083618, + "learning_rate": 4.650735968872738e-06, + "loss": 0.0997, + "step": 105600 + }, + { + "epoch": 17.45953088866865, + "grad_norm": 0.5940792560577393, + "learning_rate": 4.644618189381982e-06, + "loss": 0.1225, + "step": 105700 + }, + { + "epoch": 17.47604889329369, + "grad_norm": 0.47687768936157227, + "learning_rate": 4.638500409891226e-06, + "loss": 0.0972, + "step": 105800 + }, + { + "epoch": 17.49256689791873, + "grad_norm": 0.7543063759803772, + "learning_rate": 4.63238263040047e-06, + "loss": 0.095, + "step": 105900 + }, + { + "epoch": 17.509084902543773, + "grad_norm": 0.6112996339797974, + "learning_rate": 4.6262648509097145e-06, + "loss": 0.095, + "step": 106000 + }, + { + "epoch": 17.509084902543773, + "eval_cer": 0.038027855725138594, + "eval_loss": 0.10872569680213928, + "eval_runtime": 58.3098, + "eval_samples_per_second": 29.0, + "eval_steps_per_second": 7.254, + "eval_wer": 0.20964216574033598, + "step": 106000 + }, + { + "epoch": 17.525602907168814, + "grad_norm": 0.7269030809402466, + "learning_rate": 4.620147071418958e-06, + "loss": 0.1022, + "step": 106100 + }, + { + "epoch": 17.542120911793855, + "grad_norm": 0.6887866258621216, + "learning_rate": 4.614029291928202e-06, + "loss": 0.1003, + "step": 106200 + }, + { + "epoch": 17.558638916418897, + "grad_norm": 0.7257598638534546, + "learning_rate": 4.6079115124374466e-06, + "loss": 0.096, + "step": 106300 + }, + { + "epoch": 17.575156921043938, + "grad_norm": 0.8789656162261963, + "learning_rate": 4.60179373294669e-06, + "loss": 0.0986, + "step": 106400 + }, + { + "epoch": 17.59167492566898, + "grad_norm": 0.6779934167861938, + "learning_rate": 4.595675953455934e-06, + "loss": 0.1376, + "step": 106500 + }, + { + "epoch": 17.60819293029402, + "grad_norm": 0.63017737865448, + "learning_rate": 4.589558173965178e-06, + "loss": 0.1013, + "step": 106600 + }, + { + "epoch": 17.62471093491906, + "grad_norm": 0.6014170050621033, + "learning_rate": 4.583440394474422e-06, + "loss": 0.093, + "step": 106700 + }, + { + "epoch": 17.641228939544103, + "grad_norm": 0.7778664231300354, + "learning_rate": 4.577322614983666e-06, + "loss": 0.1045, + "step": 106800 + }, + { + "epoch": 17.657746944169144, + "grad_norm": 0.6275627017021179, + "learning_rate": 4.57120483549291e-06, + "loss": 0.0915, + "step": 106900 + }, + { + "epoch": 17.674264948794185, + "grad_norm": 0.5987668633460999, + "learning_rate": 4.565087056002153e-06, + "loss": 0.0973, + "step": 107000 + }, + { + "epoch": 17.674264948794185, + "eval_cer": 0.03785675176237081, + "eval_loss": 0.10704370588064194, + "eval_runtime": 57.1479, + "eval_samples_per_second": 29.59, + "eval_steps_per_second": 7.402, + "eval_wer": 0.20826828202085806, + "step": 107000 + }, + { + "epoch": 17.690782953419227, + "grad_norm": 0.6655980944633484, + "learning_rate": 4.558969276511398e-06, + "loss": 0.097, + "step": 107100 + }, + { + "epoch": 17.707300958044268, + "grad_norm": 0.6261619925498962, + "learning_rate": 4.552851497020642e-06, + "loss": 0.0994, + "step": 107200 + }, + { + "epoch": 17.72381896266931, + "grad_norm": 0.6233117580413818, + "learning_rate": 4.546733717529886e-06, + "loss": 0.0967, + "step": 107300 + }, + { + "epoch": 17.74033696729435, + "grad_norm": 0.5294709205627441, + "learning_rate": 4.54061593803913e-06, + "loss": 0.0941, + "step": 107400 + }, + { + "epoch": 17.75685497191939, + "grad_norm": 0.6875845193862915, + "learning_rate": 4.534498158548374e-06, + "loss": 0.1, + "step": 107500 + }, + { + "epoch": 17.773372976544433, + "grad_norm": 0.7154032588005066, + "learning_rate": 4.5283803790576175e-06, + "loss": 0.094, + "step": 107600 + }, + { + "epoch": 17.789890981169474, + "grad_norm": 0.672591507434845, + "learning_rate": 4.522262599566862e-06, + "loss": 0.0971, + "step": 107700 + }, + { + "epoch": 17.806408985794516, + "grad_norm": 0.744452178478241, + "learning_rate": 4.516144820076105e-06, + "loss": 0.092, + "step": 107800 + }, + { + "epoch": 17.822926990419557, + "grad_norm": 0.8155786991119385, + "learning_rate": 4.51002704058535e-06, + "loss": 0.0919, + "step": 107900 + }, + { + "epoch": 17.839444995044598, + "grad_norm": 0.7288583517074585, + "learning_rate": 4.503909261094593e-06, + "loss": 0.0984, + "step": 108000 + }, + { + "epoch": 17.839444995044598, + "eval_cer": 0.03769420299774143, + "eval_loss": 0.10798373073339462, + "eval_runtime": 57.3331, + "eval_samples_per_second": 29.494, + "eval_steps_per_second": 7.378, + "eval_wer": 0.20833073128083432, + "step": 108000 + }, + { + "epoch": 17.85596299966964, + "grad_norm": 0.6306447386741638, + "learning_rate": 4.497791481603837e-06, + "loss": 0.0945, + "step": 108100 + }, + { + "epoch": 17.87248100429468, + "grad_norm": 0.7502180933952332, + "learning_rate": 4.491673702113082e-06, + "loss": 0.0991, + "step": 108200 + }, + { + "epoch": 17.888999008919722, + "grad_norm": 0.6571519374847412, + "learning_rate": 4.485555922622325e-06, + "loss": 0.1046, + "step": 108300 + }, + { + "epoch": 17.905517013544763, + "grad_norm": 0.6141965389251709, + "learning_rate": 4.4794381431315694e-06, + "loss": 0.0982, + "step": 108400 + }, + { + "epoch": 17.922035018169804, + "grad_norm": 0.4874700903892517, + "learning_rate": 4.473320363640814e-06, + "loss": 0.0984, + "step": 108500 + }, + { + "epoch": 17.938553022794846, + "grad_norm": 0.8021253347396851, + "learning_rate": 4.467202584150057e-06, + "loss": 0.0955, + "step": 108600 + }, + { + "epoch": 17.955071027419887, + "grad_norm": 0.6977095603942871, + "learning_rate": 4.4610848046593015e-06, + "loss": 0.1019, + "step": 108700 + }, + { + "epoch": 17.97158903204493, + "grad_norm": 0.649456262588501, + "learning_rate": 4.454967025168545e-06, + "loss": 0.0992, + "step": 108800 + }, + { + "epoch": 17.98810703666997, + "grad_norm": 0.6027668714523315, + "learning_rate": 4.448849245677789e-06, + "loss": 0.0958, + "step": 108900 + }, + { + "epoch": 18.00462504129501, + "grad_norm": 0.7994409203529358, + "learning_rate": 4.442731466187033e-06, + "loss": 0.0972, + "step": 109000 + }, + { + "epoch": 18.00462504129501, + "eval_cer": 0.03761720621449593, + "eval_loss": 0.10664419084787369, + "eval_runtime": 61.6933, + "eval_samples_per_second": 27.41, + "eval_steps_per_second": 6.856, + "eval_wer": 0.20845562980078686, + "step": 109000 + }, + { + "epoch": 18.021143045920052, + "grad_norm": 0.5892287492752075, + "learning_rate": 4.436613686696277e-06, + "loss": 0.1006, + "step": 109100 + }, + { + "epoch": 18.037661050545093, + "grad_norm": 0.5561397075653076, + "learning_rate": 4.4304959072055205e-06, + "loss": 0.1027, + "step": 109200 + }, + { + "epoch": 18.054179055170135, + "grad_norm": 0.6827639937400818, + "learning_rate": 4.424378127714765e-06, + "loss": 0.0924, + "step": 109300 + }, + { + "epoch": 18.070697059795176, + "grad_norm": 0.4659579396247864, + "learning_rate": 4.418260348224009e-06, + "loss": 0.0955, + "step": 109400 + }, + { + "epoch": 18.087215064420217, + "grad_norm": 0.5949708223342896, + "learning_rate": 4.4121425687332535e-06, + "loss": 0.1026, + "step": 109500 + }, + { + "epoch": 18.10373306904526, + "grad_norm": 0.7214411497116089, + "learning_rate": 4.406024789242497e-06, + "loss": 0.0951, + "step": 109600 + }, + { + "epoch": 18.1202510736703, + "grad_norm": 0.7198790311813354, + "learning_rate": 4.399907009751741e-06, + "loss": 0.0958, + "step": 109700 + }, + { + "epoch": 18.13676907829534, + "grad_norm": 0.6852260828018188, + "learning_rate": 4.393789230260985e-06, + "loss": 0.0946, + "step": 109800 + }, + { + "epoch": 18.153287082920382, + "grad_norm": 0.6294082403182983, + "learning_rate": 4.387671450770229e-06, + "loss": 0.0956, + "step": 109900 + }, + { + "epoch": 18.169805087545424, + "grad_norm": 0.6702645421028137, + "learning_rate": 4.3815536712794725e-06, + "loss": 0.1124, + "step": 110000 + }, + { + "epoch": 18.169805087545424, + "eval_cer": 0.037873862158647596, + "eval_loss": 0.10725517570972443, + "eval_runtime": 57.4138, + "eval_samples_per_second": 29.453, + "eval_steps_per_second": 7.368, + "eval_wer": 0.20920502092050208, + "step": 110000 + }, + { + "epoch": 18.186323092170465, + "grad_norm": 0.8165464997291565, + "learning_rate": 4.375435891788717e-06, + "loss": 0.1, + "step": 110100 + }, + { + "epoch": 18.202841096795506, + "grad_norm": 0.5550252199172974, + "learning_rate": 4.36931811229796e-06, + "loss": 0.1161, + "step": 110200 + }, + { + "epoch": 18.219359101420547, + "grad_norm": 0.6268564462661743, + "learning_rate": 4.3632003328072045e-06, + "loss": 0.0868, + "step": 110300 + }, + { + "epoch": 18.23587710604559, + "grad_norm": 0.9998523592948914, + "learning_rate": 4.357082553316449e-06, + "loss": 0.0888, + "step": 110400 + }, + { + "epoch": 18.25239511067063, + "grad_norm": 0.7963108420372009, + "learning_rate": 4.350964773825692e-06, + "loss": 0.0976, + "step": 110500 + }, + { + "epoch": 18.26891311529567, + "grad_norm": 0.6764956712722778, + "learning_rate": 4.344846994334937e-06, + "loss": 0.0945, + "step": 110600 + }, + { + "epoch": 18.285431119920712, + "grad_norm": 0.845342218875885, + "learning_rate": 4.338729214844181e-06, + "loss": 0.1018, + "step": 110700 + }, + { + "epoch": 18.301949124545754, + "grad_norm": 0.519926130771637, + "learning_rate": 4.332611435353424e-06, + "loss": 0.1012, + "step": 110800 + }, + { + "epoch": 18.318467129170795, + "grad_norm": 0.5771397352218628, + "learning_rate": 4.326493655862669e-06, + "loss": 0.0963, + "step": 110900 + }, + { + "epoch": 18.334985133795836, + "grad_norm": 0.6836599707603455, + "learning_rate": 4.320375876371912e-06, + "loss": 0.0961, + "step": 111000 + }, + { + "epoch": 18.334985133795836, + "eval_cer": 0.03749743344055848, + "eval_loss": 0.10766017436981201, + "eval_runtime": 57.0005, + "eval_samples_per_second": 29.666, + "eval_steps_per_second": 7.421, + "eval_wer": 0.20683194904140387, + "step": 111000 + }, + { + "epoch": 18.351503138420878, + "grad_norm": 0.655540943145752, + "learning_rate": 4.3142580968811565e-06, + "loss": 0.0937, + "step": 111100 + }, + { + "epoch": 18.36802114304592, + "grad_norm": 0.7102084755897522, + "learning_rate": 4.3081403173904e-06, + "loss": 0.0982, + "step": 111200 + }, + { + "epoch": 18.38453914767096, + "grad_norm": 0.5947690010070801, + "learning_rate": 4.302022537899644e-06, + "loss": 0.0984, + "step": 111300 + }, + { + "epoch": 18.401057152296, + "grad_norm": 0.5899379253387451, + "learning_rate": 4.295904758408888e-06, + "loss": 0.0897, + "step": 111400 + }, + { + "epoch": 18.417575156921043, + "grad_norm": 0.48331010341644287, + "learning_rate": 4.289786978918132e-06, + "loss": 0.0971, + "step": 111500 + }, + { + "epoch": 18.434093161546084, + "grad_norm": 0.6152350306510925, + "learning_rate": 4.283669199427376e-06, + "loss": 0.1001, + "step": 111600 + }, + { + "epoch": 18.450611166171125, + "grad_norm": 0.5093644857406616, + "learning_rate": 4.277551419936621e-06, + "loss": 0.099, + "step": 111700 + }, + { + "epoch": 18.467129170796166, + "grad_norm": 0.6065688133239746, + "learning_rate": 4.271433640445864e-06, + "loss": 0.1007, + "step": 111800 + }, + { + "epoch": 18.483647175421208, + "grad_norm": 0.7295845746994019, + "learning_rate": 4.265315860955108e-06, + "loss": 0.1, + "step": 111900 + }, + { + "epoch": 18.50016518004625, + "grad_norm": 1.8832347393035889, + "learning_rate": 4.259198081464352e-06, + "loss": 0.0975, + "step": 112000 + }, + { + "epoch": 18.50016518004625, + "eval_cer": 0.03751454383683526, + "eval_loss": 0.10801618546247482, + "eval_runtime": 57.4664, + "eval_samples_per_second": 29.426, + "eval_steps_per_second": 7.361, + "eval_wer": 0.2083931805408106, + "step": 112000 + }, + { + "epoch": 18.51668318467129, + "grad_norm": 0.7237940430641174, + "learning_rate": 4.253080301973596e-06, + "loss": 0.0929, + "step": 112100 + }, + { + "epoch": 18.53320118929633, + "grad_norm": 0.8659229278564453, + "learning_rate": 4.24696252248284e-06, + "loss": 0.0956, + "step": 112200 + }, + { + "epoch": 18.549719193921373, + "grad_norm": 0.6070505380630493, + "learning_rate": 4.240844742992084e-06, + "loss": 0.0942, + "step": 112300 + }, + { + "epoch": 18.566237198546414, + "grad_norm": 0.5244882702827454, + "learning_rate": 4.234726963501327e-06, + "loss": 0.0917, + "step": 112400 + }, + { + "epoch": 18.582755203171455, + "grad_norm": 0.7137103080749512, + "learning_rate": 4.228609184010572e-06, + "loss": 0.0949, + "step": 112500 + }, + { + "epoch": 18.599273207796497, + "grad_norm": 0.5891650319099426, + "learning_rate": 4.222491404519816e-06, + "loss": 0.0953, + "step": 112600 + }, + { + "epoch": 18.615791212421538, + "grad_norm": 0.5612820386886597, + "learning_rate": 4.2163736250290595e-06, + "loss": 0.0966, + "step": 112700 + }, + { + "epoch": 18.63230921704658, + "grad_norm": 0.7165923714637756, + "learning_rate": 4.210255845538304e-06, + "loss": 0.0908, + "step": 112800 + }, + { + "epoch": 18.64882722167162, + "grad_norm": 0.6711476445198059, + "learning_rate": 4.204138066047548e-06, + "loss": 0.1005, + "step": 112900 + }, + { + "epoch": 18.66534522629666, + "grad_norm": 0.5342008471488953, + "learning_rate": 4.198020286556792e-06, + "loss": 0.089, + "step": 113000 + }, + { + "epoch": 18.66534522629666, + "eval_cer": 0.03749743344055848, + "eval_loss": 0.10710610449314117, + "eval_runtime": 62.8493, + "eval_samples_per_second": 26.906, + "eval_steps_per_second": 6.73, + "eval_wer": 0.20783113720102417, + "step": 113000 + }, + { + "epoch": 18.681863230921703, + "grad_norm": 0.49494898319244385, + "learning_rate": 4.191902507066036e-06, + "loss": 0.098, + "step": 113100 + }, + { + "epoch": 18.698381235546744, + "grad_norm": 0.8655831217765808, + "learning_rate": 4.185784727575279e-06, + "loss": 0.1006, + "step": 113200 + }, + { + "epoch": 18.71489924017179, + "grad_norm": 0.7897951006889343, + "learning_rate": 4.179666948084524e-06, + "loss": 0.0914, + "step": 113300 + }, + { + "epoch": 18.73141724479683, + "grad_norm": 0.5988522171974182, + "learning_rate": 4.173549168593767e-06, + "loss": 0.1146, + "step": 113400 + }, + { + "epoch": 18.74793524942187, + "grad_norm": 0.690118134021759, + "learning_rate": 4.1674313891030114e-06, + "loss": 0.096, + "step": 113500 + }, + { + "epoch": 18.764453254046913, + "grad_norm": 0.7711309790611267, + "learning_rate": 4.161313609612255e-06, + "loss": 0.0935, + "step": 113600 + }, + { + "epoch": 18.780971258671954, + "grad_norm": 0.596615195274353, + "learning_rate": 4.155195830121499e-06, + "loss": 0.0973, + "step": 113700 + }, + { + "epoch": 18.797489263296995, + "grad_norm": 0.7073595523834229, + "learning_rate": 4.1490780506307435e-06, + "loss": 0.0941, + "step": 113800 + }, + { + "epoch": 18.814007267922037, + "grad_norm": 0.7061730027198792, + "learning_rate": 4.142960271139988e-06, + "loss": 0.0935, + "step": 113900 + }, + { + "epoch": 18.830525272547078, + "grad_norm": 0.7775730490684509, + "learning_rate": 4.136842491649231e-06, + "loss": 0.0902, + "step": 114000 + }, + { + "epoch": 18.830525272547078, + "eval_cer": 0.03754876462938882, + "eval_loss": 0.10710606724023819, + "eval_runtime": 57.0219, + "eval_samples_per_second": 29.655, + "eval_steps_per_second": 7.418, + "eval_wer": 0.20683194904140387, + "step": 114000 + }, + { + "epoch": 18.84704327717212, + "grad_norm": 0.6550432443618774, + "learning_rate": 4.130724712158476e-06, + "loss": 0.0954, + "step": 114100 + }, + { + "epoch": 18.86356128179716, + "grad_norm": 0.5847755670547485, + "learning_rate": 4.124606932667719e-06, + "loss": 0.0996, + "step": 114200 + }, + { + "epoch": 18.880079286422202, + "grad_norm": 0.6805459260940552, + "learning_rate": 4.118489153176963e-06, + "loss": 0.0948, + "step": 114300 + }, + { + "epoch": 18.896597291047243, + "grad_norm": 0.6957184076309204, + "learning_rate": 4.112371373686207e-06, + "loss": 0.0981, + "step": 114400 + }, + { + "epoch": 18.913115295672284, + "grad_norm": 0.6642732620239258, + "learning_rate": 4.106253594195451e-06, + "loss": 0.0929, + "step": 114500 + }, + { + "epoch": 18.929633300297326, + "grad_norm": 0.6945010423660278, + "learning_rate": 4.100135814704695e-06, + "loss": 0.1019, + "step": 114600 + }, + { + "epoch": 18.946151304922367, + "grad_norm": 0.7043463587760925, + "learning_rate": 4.094018035213939e-06, + "loss": 0.0999, + "step": 114700 + }, + { + "epoch": 18.962669309547408, + "grad_norm": 0.5579137206077576, + "learning_rate": 4.087900255723183e-06, + "loss": 0.0944, + "step": 114800 + }, + { + "epoch": 18.97918731417245, + "grad_norm": 0.6382579803466797, + "learning_rate": 4.081782476232427e-06, + "loss": 0.0975, + "step": 114900 + }, + { + "epoch": 18.99570531879749, + "grad_norm": 0.6321828365325928, + "learning_rate": 4.075664696741671e-06, + "loss": 0.101, + "step": 115000 + }, + { + "epoch": 18.99570531879749, + "eval_cer": 0.03747176784614332, + "eval_loss": 0.10826370120048523, + "eval_runtime": 56.9853, + "eval_samples_per_second": 29.674, + "eval_steps_per_second": 7.423, + "eval_wer": 0.20770623868107163, + "step": 115000 + }, + { + "epoch": 19.012223323422532, + "grad_norm": 0.6674085855484009, + "learning_rate": 4.069546917250915e-06, + "loss": 0.0945, + "step": 115100 + }, + { + "epoch": 19.028741328047573, + "grad_norm": 0.7357644438743591, + "learning_rate": 4.063429137760159e-06, + "loss": 0.0907, + "step": 115200 + }, + { + "epoch": 19.045259332672615, + "grad_norm": 0.6607419848442078, + "learning_rate": 4.057311358269403e-06, + "loss": 0.1015, + "step": 115300 + }, + { + "epoch": 19.061777337297656, + "grad_norm": 0.7660622000694275, + "learning_rate": 4.0511935787786465e-06, + "loss": 0.0989, + "step": 115400 + }, + { + "epoch": 19.078295341922697, + "grad_norm": 0.5479562282562256, + "learning_rate": 4.045075799287891e-06, + "loss": 0.0942, + "step": 115500 + }, + { + "epoch": 19.09481334654774, + "grad_norm": 0.9880116581916809, + "learning_rate": 4.038958019797134e-06, + "loss": 0.0926, + "step": 115600 + }, + { + "epoch": 19.11133135117278, + "grad_norm": 0.9058769941329956, + "learning_rate": 4.032840240306379e-06, + "loss": 0.0956, + "step": 115700 + }, + { + "epoch": 19.12784935579782, + "grad_norm": 0.6099743247032166, + "learning_rate": 4.026722460815622e-06, + "loss": 0.0999, + "step": 115800 + }, + { + "epoch": 19.144367360422862, + "grad_norm": 0.7211606502532959, + "learning_rate": 4.020604681324866e-06, + "loss": 0.093, + "step": 115900 + }, + { + "epoch": 19.160885365047903, + "grad_norm": 0.7449648380279541, + "learning_rate": 4.014486901834111e-06, + "loss": 0.0957, + "step": 116000 + }, + { + "epoch": 19.160885365047903, + "eval_cer": 0.03714667031688454, + "eval_loss": 0.10729096084833145, + "eval_runtime": 57.487, + "eval_samples_per_second": 29.415, + "eval_steps_per_second": 7.358, + "eval_wer": 0.20583276088178354, + "step": 116000 + }, + { + "epoch": 19.177403369672945, + "grad_norm": 0.7683896422386169, + "learning_rate": 4.008369122343355e-06, + "loss": 0.0959, + "step": 116100 + }, + { + "epoch": 19.193921374297986, + "grad_norm": 0.6918036341667175, + "learning_rate": 4.0022513428525985e-06, + "loss": 0.0922, + "step": 116200 + }, + { + "epoch": 19.210439378923027, + "grad_norm": 0.9067769050598145, + "learning_rate": 3.996133563361843e-06, + "loss": 0.1088, + "step": 116300 + }, + { + "epoch": 19.22695738354807, + "grad_norm": 0.5687412023544312, + "learning_rate": 3.990015783871086e-06, + "loss": 0.0955, + "step": 116400 + }, + { + "epoch": 19.24347538817311, + "grad_norm": 0.6453192830085754, + "learning_rate": 3.9838980043803305e-06, + "loss": 0.0942, + "step": 116500 + }, + { + "epoch": 19.25999339279815, + "grad_norm": 0.6212557554244995, + "learning_rate": 3.977780224889574e-06, + "loss": 0.0972, + "step": 116600 + }, + { + "epoch": 19.276511397423192, + "grad_norm": 0.7683126926422119, + "learning_rate": 3.971662445398818e-06, + "loss": 0.0943, + "step": 116700 + }, + { + "epoch": 19.293029402048234, + "grad_norm": 0.9485934972763062, + "learning_rate": 3.965544665908062e-06, + "loss": 0.0954, + "step": 116800 + }, + { + "epoch": 19.309547406673275, + "grad_norm": 0.5345008373260498, + "learning_rate": 3.959426886417306e-06, + "loss": 0.0951, + "step": 116900 + }, + { + "epoch": 19.326065411298316, + "grad_norm": 0.5996564626693726, + "learning_rate": 3.95330910692655e-06, + "loss": 0.094, + "step": 117000 + }, + { + "epoch": 19.326065411298316, + "eval_cer": 0.03736910546848265, + "eval_loss": 0.10748081654310226, + "eval_runtime": 57.0717, + "eval_samples_per_second": 29.629, + "eval_steps_per_second": 7.412, + "eval_wer": 0.20670705052145133, + "step": 117000 + }, + { + "epoch": 19.342583415923357, + "grad_norm": 0.7661871910095215, + "learning_rate": 3.947191327435794e-06, + "loss": 0.092, + "step": 117100 + }, + { + "epoch": 19.3591014205484, + "grad_norm": 0.5788329839706421, + "learning_rate": 3.941073547945038e-06, + "loss": 0.0949, + "step": 117200 + }, + { + "epoch": 19.37561942517344, + "grad_norm": 0.6844035983085632, + "learning_rate": 3.9349557684542825e-06, + "loss": 0.0948, + "step": 117300 + }, + { + "epoch": 19.39213742979848, + "grad_norm": 0.5855575203895569, + "learning_rate": 3.928837988963526e-06, + "loss": 0.0932, + "step": 117400 + }, + { + "epoch": 19.408655434423522, + "grad_norm": 0.500566840171814, + "learning_rate": 3.92272020947277e-06, + "loss": 0.0984, + "step": 117500 + }, + { + "epoch": 19.425173439048564, + "grad_norm": 0.7234964370727539, + "learning_rate": 3.916602429982014e-06, + "loss": 0.1, + "step": 117600 + }, + { + "epoch": 19.441691443673605, + "grad_norm": 0.6670413613319397, + "learning_rate": 3.910484650491258e-06, + "loss": 0.0903, + "step": 117700 + }, + { + "epoch": 19.458209448298646, + "grad_norm": 1.1672645807266235, + "learning_rate": 3.9043668710005015e-06, + "loss": 0.0941, + "step": 117800 + }, + { + "epoch": 19.474727452923688, + "grad_norm": 0.8242597579956055, + "learning_rate": 3.898249091509746e-06, + "loss": 0.1007, + "step": 117900 + }, + { + "epoch": 19.49124545754873, + "grad_norm": 0.6898741722106934, + "learning_rate": 3.892131312018989e-06, + "loss": 0.094, + "step": 118000 + }, + { + "epoch": 19.49124545754873, + "eval_cer": 0.03757443022380398, + "eval_loss": 0.10757853835821152, + "eval_runtime": 59.6316, + "eval_samples_per_second": 28.357, + "eval_steps_per_second": 7.094, + "eval_wer": 0.2075188909011428, + "step": 118000 + }, + { + "epoch": 19.50776346217377, + "grad_norm": 1.0092437267303467, + "learning_rate": 3.8860135325282336e-06, + "loss": 0.0977, + "step": 118100 + }, + { + "epoch": 19.52428146679881, + "grad_norm": 0.5592585802078247, + "learning_rate": 3.879895753037478e-06, + "loss": 0.0908, + "step": 118200 + }, + { + "epoch": 19.540799471423853, + "grad_norm": 0.7661089301109314, + "learning_rate": 3.873777973546722e-06, + "loss": 0.0894, + "step": 118300 + }, + { + "epoch": 19.557317476048894, + "grad_norm": 0.6303833723068237, + "learning_rate": 3.867660194055966e-06, + "loss": 0.0938, + "step": 118400 + }, + { + "epoch": 19.573835480673935, + "grad_norm": 0.6270598769187927, + "learning_rate": 3.86154241456521e-06, + "loss": 0.0963, + "step": 118500 + }, + { + "epoch": 19.590353485298976, + "grad_norm": 0.7540850639343262, + "learning_rate": 3.855424635074453e-06, + "loss": 0.0941, + "step": 118600 + }, + { + "epoch": 19.606871489924018, + "grad_norm": 0.6837806701660156, + "learning_rate": 3.849306855583698e-06, + "loss": 0.0965, + "step": 118700 + }, + { + "epoch": 19.62338949454906, + "grad_norm": 0.5979442000389099, + "learning_rate": 3.843189076092942e-06, + "loss": 0.0934, + "step": 118800 + }, + { + "epoch": 19.6399074991741, + "grad_norm": 0.5547999143600464, + "learning_rate": 3.8370712966021855e-06, + "loss": 0.0954, + "step": 118900 + }, + { + "epoch": 19.65642550379914, + "grad_norm": 0.7073753476142883, + "learning_rate": 3.83095351711143e-06, + "loss": 0.0912, + "step": 119000 + }, + { + "epoch": 19.65642550379914, + "eval_cer": 0.037488878242420094, + "eval_loss": 0.10558834671974182, + "eval_runtime": 57.5545, + "eval_samples_per_second": 29.381, + "eval_steps_per_second": 7.35, + "eval_wer": 0.20639480422156997, + "step": 119000 + }, + { + "epoch": 19.672943508424183, + "grad_norm": 0.6621033549308777, + "learning_rate": 3.824835737620673e-06, + "loss": 0.096, + "step": 119100 + }, + { + "epoch": 19.689461513049224, + "grad_norm": 0.6240584254264832, + "learning_rate": 3.818717958129918e-06, + "loss": 0.0925, + "step": 119200 + }, + { + "epoch": 19.705979517674265, + "grad_norm": 0.834823489189148, + "learning_rate": 3.8126001786391615e-06, + "loss": 0.0904, + "step": 119300 + }, + { + "epoch": 19.722497522299307, + "grad_norm": 0.5534527897834778, + "learning_rate": 3.8064823991484058e-06, + "loss": 0.0864, + "step": 119400 + }, + { + "epoch": 19.739015526924348, + "grad_norm": 0.5191404819488525, + "learning_rate": 3.8003646196576492e-06, + "loss": 0.0943, + "step": 119500 + }, + { + "epoch": 19.75553353154939, + "grad_norm": 0.7800885438919067, + "learning_rate": 3.7942468401668936e-06, + "loss": 0.096, + "step": 119600 + }, + { + "epoch": 19.77205153617443, + "grad_norm": 0.624622106552124, + "learning_rate": 3.7881290606761374e-06, + "loss": 0.1021, + "step": 119700 + }, + { + "epoch": 19.78856954079947, + "grad_norm": 0.448397159576416, + "learning_rate": 3.7820112811853813e-06, + "loss": 0.0934, + "step": 119800 + }, + { + "epoch": 19.805087545424513, + "grad_norm": 0.6804871559143066, + "learning_rate": 3.775893501694625e-06, + "loss": 0.0907, + "step": 119900 + }, + { + "epoch": 19.821605550049554, + "grad_norm": 0.8749545216560364, + "learning_rate": 3.7697757222038695e-06, + "loss": 0.0914, + "step": 120000 + }, + { + "epoch": 19.821605550049554, + "eval_cer": 0.03736910546848265, + "eval_loss": 0.1068761870265007, + "eval_runtime": 61.4436, + "eval_samples_per_second": 27.521, + "eval_steps_per_second": 6.884, + "eval_wer": 0.20639480422156997, + "step": 120000 + }, + { + "epoch": 19.838123554674596, + "grad_norm": 0.6852620244026184, + "learning_rate": 3.763657942713113e-06, + "loss": 0.0893, + "step": 120100 + }, + { + "epoch": 19.854641559299637, + "grad_norm": 0.48279762268066406, + "learning_rate": 3.7575401632223573e-06, + "loss": 0.0929, + "step": 120200 + }, + { + "epoch": 19.871159563924678, + "grad_norm": 0.9051792025566101, + "learning_rate": 3.751422383731601e-06, + "loss": 0.1167, + "step": 120300 + }, + { + "epoch": 19.88767756854972, + "grad_norm": 0.5648931264877319, + "learning_rate": 3.7453046042408455e-06, + "loss": 0.0946, + "step": 120400 + }, + { + "epoch": 19.90419557317476, + "grad_norm": 0.6829082369804382, + "learning_rate": 3.739186824750089e-06, + "loss": 0.0914, + "step": 120500 + }, + { + "epoch": 19.920713577799802, + "grad_norm": 0.8707834482192993, + "learning_rate": 3.7330690452593333e-06, + "loss": 0.0919, + "step": 120600 + }, + { + "epoch": 19.937231582424843, + "grad_norm": 0.6333926916122437, + "learning_rate": 3.7269512657685767e-06, + "loss": 0.0991, + "step": 120700 + }, + { + "epoch": 19.953749587049884, + "grad_norm": 0.5039823055267334, + "learning_rate": 3.720833486277821e-06, + "loss": 0.0899, + "step": 120800 + }, + { + "epoch": 19.970267591674926, + "grad_norm": 0.5696250200271606, + "learning_rate": 3.714715706787065e-06, + "loss": 0.0934, + "step": 120900 + }, + { + "epoch": 19.986785596299967, + "grad_norm": 0.6956700682640076, + "learning_rate": 3.7085979272963092e-06, + "loss": 0.0933, + "step": 121000 + }, + { + "epoch": 19.986785596299967, + "eval_cer": 0.03744610225172815, + "eval_loss": 0.10595033317804337, + "eval_runtime": 57.0726, + "eval_samples_per_second": 29.629, + "eval_steps_per_second": 7.412, + "eval_wer": 0.20664460126147505, + "step": 121000 + }, + { + "epoch": 20.00330360092501, + "grad_norm": 0.615598201751709, + "learning_rate": 3.7024801478055527e-06, + "loss": 0.0954, + "step": 121100 + }, + { + "epoch": 20.01982160555005, + "grad_norm": 0.4726826250553131, + "learning_rate": 3.696362368314797e-06, + "loss": 0.1088, + "step": 121200 + }, + { + "epoch": 20.03633961017509, + "grad_norm": 0.554155170917511, + "learning_rate": 3.690244588824041e-06, + "loss": 0.0934, + "step": 121300 + }, + { + "epoch": 20.052857614800132, + "grad_norm": 0.8273882269859314, + "learning_rate": 3.6841268093332848e-06, + "loss": 0.0982, + "step": 121400 + }, + { + "epoch": 20.069375619425173, + "grad_norm": 0.6084610819816589, + "learning_rate": 3.6780090298425287e-06, + "loss": 0.096, + "step": 121500 + }, + { + "epoch": 20.085893624050215, + "grad_norm": 0.42655861377716064, + "learning_rate": 3.671891250351773e-06, + "loss": 0.0929, + "step": 121600 + }, + { + "epoch": 20.102411628675256, + "grad_norm": 0.7716320753097534, + "learning_rate": 3.6657734708610164e-06, + "loss": 0.0925, + "step": 121700 + }, + { + "epoch": 20.118929633300297, + "grad_norm": 0.5255216360092163, + "learning_rate": 3.6596556913702607e-06, + "loss": 0.0929, + "step": 121800 + }, + { + "epoch": 20.13544763792534, + "grad_norm": 0.8503526449203491, + "learning_rate": 3.6535379118795046e-06, + "loss": 0.0963, + "step": 121900 + }, + { + "epoch": 20.15196564255038, + "grad_norm": 0.5264951586723328, + "learning_rate": 3.6474201323887485e-06, + "loss": 0.1132, + "step": 122000 + }, + { + "epoch": 20.15196564255038, + "eval_cer": 0.03729210868523715, + "eval_loss": 0.10676946491003036, + "eval_runtime": 57.951, + "eval_samples_per_second": 29.18, + "eval_steps_per_second": 7.299, + "eval_wer": 0.20701929682133266, + "step": 122000 + }, + { + "epoch": 20.16848364717542, + "grad_norm": 0.6232183575630188, + "learning_rate": 3.6413023528979924e-06, + "loss": 0.0964, + "step": 122100 + }, + { + "epoch": 20.185001651800462, + "grad_norm": 0.6945717334747314, + "learning_rate": 3.6351845734072367e-06, + "loss": 0.0967, + "step": 122200 + }, + { + "epoch": 20.201519656425504, + "grad_norm": 0.7937995195388794, + "learning_rate": 3.62906679391648e-06, + "loss": 0.0896, + "step": 122300 + }, + { + "epoch": 20.218037661050545, + "grad_norm": 0.7246369123458862, + "learning_rate": 3.6229490144257245e-06, + "loss": 0.0867, + "step": 122400 + }, + { + "epoch": 20.234555665675586, + "grad_norm": 0.5831708908081055, + "learning_rate": 3.6168312349349684e-06, + "loss": 0.091, + "step": 122500 + }, + { + "epoch": 20.251073670300627, + "grad_norm": 0.7024865746498108, + "learning_rate": 3.6107134554442127e-06, + "loss": 0.0935, + "step": 122600 + }, + { + "epoch": 20.26759167492567, + "grad_norm": 0.8907782435417175, + "learning_rate": 3.604595675953456e-06, + "loss": 0.0956, + "step": 122700 + }, + { + "epoch": 20.28410967955071, + "grad_norm": 0.6609148383140564, + "learning_rate": 3.5984778964627004e-06, + "loss": 0.0933, + "step": 122800 + }, + { + "epoch": 20.30062768417575, + "grad_norm": 0.8460065722465515, + "learning_rate": 3.592360116971944e-06, + "loss": 0.0891, + "step": 122900 + }, + { + "epoch": 20.317145688800792, + "grad_norm": 0.6879094839096069, + "learning_rate": 3.586242337481188e-06, + "loss": 0.0888, + "step": 123000 + }, + { + "epoch": 20.317145688800792, + "eval_cer": 0.03731777427965232, + "eval_loss": 0.10576903820037842, + "eval_runtime": 55.0332, + "eval_samples_per_second": 30.727, + "eval_steps_per_second": 7.686, + "eval_wer": 0.20670705052145133, + "step": 123000 + }, + { + "epoch": 20.333663693425834, + "grad_norm": 0.7055560946464539, + "learning_rate": 3.580124557990432e-06, + "loss": 0.098, + "step": 123100 + }, + { + "epoch": 20.350181698050875, + "grad_norm": 0.5633586049079895, + "learning_rate": 3.5740067784996764e-06, + "loss": 0.0926, + "step": 123200 + }, + { + "epoch": 20.366699702675916, + "grad_norm": 0.6035296320915222, + "learning_rate": 3.56788899900892e-06, + "loss": 0.0939, + "step": 123300 + }, + { + "epoch": 20.383217707300957, + "grad_norm": 0.6581436395645142, + "learning_rate": 3.561771219518164e-06, + "loss": 0.094, + "step": 123400 + }, + { + "epoch": 20.399735711926, + "grad_norm": 0.6265963912010193, + "learning_rate": 3.555653440027408e-06, + "loss": 0.0901, + "step": 123500 + }, + { + "epoch": 20.41625371655104, + "grad_norm": 0.6421579718589783, + "learning_rate": 3.549535660536652e-06, + "loss": 0.0916, + "step": 123600 + }, + { + "epoch": 20.43277172117608, + "grad_norm": 0.5874105095863342, + "learning_rate": 3.543417881045896e-06, + "loss": 0.0899, + "step": 123700 + }, + { + "epoch": 20.449289725801123, + "grad_norm": 0.7892938256263733, + "learning_rate": 3.53730010155514e-06, + "loss": 0.0943, + "step": 123800 + }, + { + "epoch": 20.465807730426164, + "grad_norm": 0.5755423903465271, + "learning_rate": 3.5311823220643836e-06, + "loss": 0.0895, + "step": 123900 + }, + { + "epoch": 20.482325735051205, + "grad_norm": 0.5386433005332947, + "learning_rate": 3.525064542573628e-06, + "loss": 0.0942, + "step": 124000 + }, + { + "epoch": 20.482325735051205, + "eval_cer": 0.03739477106289782, + "eval_loss": 0.10733035951852798, + "eval_runtime": 53.425, + "eval_samples_per_second": 31.652, + "eval_steps_per_second": 7.918, + "eval_wer": 0.205707862361831, + "step": 124000 + }, + { + "epoch": 20.498843739676246, + "grad_norm": 0.6741786003112793, + "learning_rate": 3.518946763082872e-06, + "loss": 0.1323, + "step": 124100 + }, + { + "epoch": 20.515361744301288, + "grad_norm": 0.5971143245697021, + "learning_rate": 3.5128289835921157e-06, + "loss": 0.1021, + "step": 124200 + }, + { + "epoch": 20.53187974892633, + "grad_norm": 0.6608400344848633, + "learning_rate": 3.5067112041013596e-06, + "loss": 0.0947, + "step": 124300 + }, + { + "epoch": 20.54839775355137, + "grad_norm": 0.7231072187423706, + "learning_rate": 3.500593424610604e-06, + "loss": 0.1009, + "step": 124400 + }, + { + "epoch": 20.56491575817641, + "grad_norm": 0.6929687857627869, + "learning_rate": 3.4944756451198473e-06, + "loss": 0.0947, + "step": 124500 + }, + { + "epoch": 20.581433762801453, + "grad_norm": 0.7624922394752502, + "learning_rate": 3.4883578656290917e-06, + "loss": 0.1048, + "step": 124600 + }, + { + "epoch": 20.597951767426494, + "grad_norm": 0.6456249356269836, + "learning_rate": 3.4822400861383355e-06, + "loss": 0.0898, + "step": 124700 + }, + { + "epoch": 20.614469772051535, + "grad_norm": 0.5414004921913147, + "learning_rate": 3.47612230664758e-06, + "loss": 0.0919, + "step": 124800 + }, + { + "epoch": 20.630987776676577, + "grad_norm": 0.6581851840019226, + "learning_rate": 3.4700045271568233e-06, + "loss": 0.0933, + "step": 124900 + }, + { + "epoch": 20.647505781301618, + "grad_norm": 0.7606977820396423, + "learning_rate": 3.4638867476660676e-06, + "loss": 0.09, + "step": 125000 + }, + { + "epoch": 20.647505781301618, + "eval_cer": 0.037086783929915816, + "eval_loss": 0.10901934653520584, + "eval_runtime": 53.6092, + "eval_samples_per_second": 31.543, + "eval_steps_per_second": 7.89, + "eval_wer": 0.2050833697620683, + "step": 125000 + }, + { + "epoch": 20.66402378592666, + "grad_norm": 0.8043733239173889, + "learning_rate": 3.457768968175311e-06, + "loss": 0.0946, + "step": 125100 + }, + { + "epoch": 20.6805417905517, + "grad_norm": 0.5810668468475342, + "learning_rate": 3.4516511886845554e-06, + "loss": 0.1174, + "step": 125200 + }, + { + "epoch": 20.69705979517674, + "grad_norm": 0.5117190480232239, + "learning_rate": 3.4455334091937993e-06, + "loss": 0.087, + "step": 125300 + }, + { + "epoch": 20.713577799801783, + "grad_norm": 0.6740157604217529, + "learning_rate": 3.4394156297030436e-06, + "loss": 0.0974, + "step": 125400 + }, + { + "epoch": 20.730095804426824, + "grad_norm": 0.7044069170951843, + "learning_rate": 3.433297850212287e-06, + "loss": 0.0983, + "step": 125500 + }, + { + "epoch": 20.746613809051865, + "grad_norm": 0.7274563908576965, + "learning_rate": 3.4271800707215314e-06, + "loss": 0.0893, + "step": 125600 + }, + { + "epoch": 20.763131813676907, + "grad_norm": 0.6939309239387512, + "learning_rate": 3.4210622912307752e-06, + "loss": 0.0905, + "step": 125700 + }, + { + "epoch": 20.779649818301948, + "grad_norm": 0.841923713684082, + "learning_rate": 3.414944511740019e-06, + "loss": 0.0919, + "step": 125800 + }, + { + "epoch": 20.79616782292699, + "grad_norm": 0.6695510149002075, + "learning_rate": 3.408826732249263e-06, + "loss": 0.0996, + "step": 125900 + }, + { + "epoch": 20.81268582755203, + "grad_norm": 0.5963577628135681, + "learning_rate": 3.4027089527585073e-06, + "loss": 0.1104, + "step": 126000 + }, + { + "epoch": 20.81268582755203, + "eval_cer": 0.03736910546848265, + "eval_loss": 0.10715510696172714, + "eval_runtime": 53.3765, + "eval_samples_per_second": 31.681, + "eval_steps_per_second": 7.925, + "eval_wer": 0.20608255792168864, + "step": 126000 + }, + { + "epoch": 20.829203832177072, + "grad_norm": 0.5884077548980713, + "learning_rate": 3.396591173267751e-06, + "loss": 0.0941, + "step": 126100 + }, + { + "epoch": 20.845721836802113, + "grad_norm": 0.7187851071357727, + "learning_rate": 3.390473393776995e-06, + "loss": 0.0951, + "step": 126200 + }, + { + "epoch": 20.862239841427154, + "grad_norm": 0.8274132609367371, + "learning_rate": 3.384355614286239e-06, + "loss": 0.0926, + "step": 126300 + }, + { + "epoch": 20.878757846052196, + "grad_norm": 0.7908247113227844, + "learning_rate": 3.378237834795483e-06, + "loss": 0.0929, + "step": 126400 + }, + { + "epoch": 20.895275850677237, + "grad_norm": 0.7135681509971619, + "learning_rate": 3.3721200553047268e-06, + "loss": 0.0896, + "step": 126500 + }, + { + "epoch": 20.911793855302278, + "grad_norm": 0.5181264877319336, + "learning_rate": 3.366002275813971e-06, + "loss": 0.0903, + "step": 126600 + }, + { + "epoch": 20.92831185992732, + "grad_norm": 0.6559743285179138, + "learning_rate": 3.3598844963232145e-06, + "loss": 0.091, + "step": 126700 + }, + { + "epoch": 20.94482986455236, + "grad_norm": 0.498858243227005, + "learning_rate": 3.353766716832459e-06, + "loss": 0.0873, + "step": 126800 + }, + { + "epoch": 20.961347869177402, + "grad_norm": 0.6528595089912415, + "learning_rate": 3.3476489373417027e-06, + "loss": 0.0944, + "step": 126900 + }, + { + "epoch": 20.977865873802443, + "grad_norm": 0.4162144064903259, + "learning_rate": 3.341531157850947e-06, + "loss": 0.0865, + "step": 127000 + }, + { + "epoch": 20.977865873802443, + "eval_cer": 0.037052563137362264, + "eval_loss": 0.10633628815412521, + "eval_runtime": 53.5789, + "eval_samples_per_second": 31.561, + "eval_steps_per_second": 7.895, + "eval_wer": 0.2053331668019734, + "step": 127000 + }, + { + "epoch": 20.994383878427485, + "grad_norm": 0.8712024092674255, + "learning_rate": 3.3354133783601905e-06, + "loss": 0.0937, + "step": 127100 + }, + { + "epoch": 21.010901883052526, + "grad_norm": 0.8483607172966003, + "learning_rate": 3.329295598869435e-06, + "loss": 0.0879, + "step": 127200 + }, + { + "epoch": 21.027419887677567, + "grad_norm": 0.9952839016914368, + "learning_rate": 3.3231778193786783e-06, + "loss": 0.095, + "step": 127300 + }, + { + "epoch": 21.04393789230261, + "grad_norm": 0.4832421541213989, + "learning_rate": 3.3170600398879226e-06, + "loss": 0.109, + "step": 127400 + }, + { + "epoch": 21.06045589692765, + "grad_norm": 0.6822460889816284, + "learning_rate": 3.3109422603971665e-06, + "loss": 0.0897, + "step": 127500 + }, + { + "epoch": 21.07697390155269, + "grad_norm": 0.6260835528373718, + "learning_rate": 3.3048244809064108e-06, + "loss": 0.0892, + "step": 127600 + }, + { + "epoch": 21.093491906177732, + "grad_norm": 0.6604743003845215, + "learning_rate": 3.2987067014156542e-06, + "loss": 0.0957, + "step": 127700 + }, + { + "epoch": 21.110009910802773, + "grad_norm": 0.5889437794685364, + "learning_rate": 3.2925889219248985e-06, + "loss": 0.0923, + "step": 127800 + }, + { + "epoch": 21.126527915427815, + "grad_norm": 0.6197744011878967, + "learning_rate": 3.2864711424341424e-06, + "loss": 0.0936, + "step": 127900 + }, + { + "epoch": 21.143045920052856, + "grad_norm": 0.6159409880638123, + "learning_rate": 3.2803533629433863e-06, + "loss": 0.0901, + "step": 128000 + }, + { + "epoch": 21.143045920052856, + "eval_cer": 0.03699267675039354, + "eval_loss": 0.10548759251832962, + "eval_runtime": 53.1647, + "eval_samples_per_second": 31.807, + "eval_steps_per_second": 7.956, + "eval_wer": 0.20320989196278025, + "step": 128000 + }, + { + "epoch": 21.159563924677897, + "grad_norm": 0.4305579960346222, + "learning_rate": 3.27423558345263e-06, + "loss": 0.0962, + "step": 128100 + }, + { + "epoch": 21.17608192930294, + "grad_norm": 0.8560436367988586, + "learning_rate": 3.2681178039618745e-06, + "loss": 0.1129, + "step": 128200 + }, + { + "epoch": 21.19259993392798, + "grad_norm": 0.824738085269928, + "learning_rate": 3.262000024471118e-06, + "loss": 0.095, + "step": 128300 + }, + { + "epoch": 21.20911793855302, + "grad_norm": 0.7285254597663879, + "learning_rate": 3.2558822449803623e-06, + "loss": 0.0967, + "step": 128400 + }, + { + "epoch": 21.225635943178062, + "grad_norm": 0.8130694627761841, + "learning_rate": 3.249764465489606e-06, + "loss": 0.0952, + "step": 128500 + }, + { + "epoch": 21.242153947803104, + "grad_norm": 0.640154242515564, + "learning_rate": 3.24364668599885e-06, + "loss": 0.0911, + "step": 128600 + }, + { + "epoch": 21.258671952428145, + "grad_norm": 0.5193492770195007, + "learning_rate": 3.237528906508094e-06, + "loss": 0.085, + "step": 128700 + }, + { + "epoch": 21.27518995705319, + "grad_norm": 0.7556692957878113, + "learning_rate": 3.2314111270173382e-06, + "loss": 0.0904, + "step": 128800 + }, + { + "epoch": 21.29170796167823, + "grad_norm": 0.6025686860084534, + "learning_rate": 3.2252933475265817e-06, + "loss": 0.0969, + "step": 128900 + }, + { + "epoch": 21.308225966303272, + "grad_norm": 0.6533762812614441, + "learning_rate": 3.219175568035826e-06, + "loss": 0.0962, + "step": 129000 + }, + { + "epoch": 21.308225966303272, + "eval_cer": 0.037095339128054204, + "eval_loss": 0.10510192811489105, + "eval_runtime": 53.7455, + "eval_samples_per_second": 31.463, + "eval_steps_per_second": 7.87, + "eval_wer": 0.20539561606194967, + "step": 129000 + }, + { + "epoch": 21.324743970928314, + "grad_norm": 0.7926602363586426, + "learning_rate": 3.21305778854507e-06, + "loss": 0.0847, + "step": 129100 + }, + { + "epoch": 21.341261975553355, + "grad_norm": 0.5735198855400085, + "learning_rate": 3.2069400090543142e-06, + "loss": 0.0949, + "step": 129200 + }, + { + "epoch": 21.357779980178396, + "grad_norm": 0.4958854615688324, + "learning_rate": 3.2008222295635577e-06, + "loss": 0.0916, + "step": 129300 + }, + { + "epoch": 21.374297984803437, + "grad_norm": 0.9454948306083679, + "learning_rate": 3.194704450072802e-06, + "loss": 0.1177, + "step": 129400 + }, + { + "epoch": 21.39081598942848, + "grad_norm": 0.6658288240432739, + "learning_rate": 3.1885866705820454e-06, + "loss": 0.0931, + "step": 129500 + }, + { + "epoch": 21.40733399405352, + "grad_norm": 0.6774040460586548, + "learning_rate": 3.1824688910912898e-06, + "loss": 0.0971, + "step": 129600 + }, + { + "epoch": 21.42385199867856, + "grad_norm": 0.5878866910934448, + "learning_rate": 3.1763511116005336e-06, + "loss": 0.1129, + "step": 129700 + }, + { + "epoch": 21.440370003303602, + "grad_norm": 0.6971415281295776, + "learning_rate": 3.170233332109778e-06, + "loss": 0.0937, + "step": 129800 + }, + { + "epoch": 21.456888007928644, + "grad_norm": 0.7083709239959717, + "learning_rate": 3.1641155526190214e-06, + "loss": 0.088, + "step": 129900 + }, + { + "epoch": 21.473406012553685, + "grad_norm": 0.7533379197120667, + "learning_rate": 3.1579977731282657e-06, + "loss": 0.0979, + "step": 130000 + }, + { + "epoch": 21.473406012553685, + "eval_cer": 0.036872903976456095, + "eval_loss": 0.10504589229822159, + "eval_runtime": 53.1803, + "eval_samples_per_second": 31.797, + "eval_steps_per_second": 7.954, + "eval_wer": 0.20427152938237683, + "step": 130000 + }, + { + "epoch": 21.489924017178726, + "grad_norm": 0.7101976275444031, + "learning_rate": 3.1518799936375096e-06, + "loss": 0.0909, + "step": 130100 + }, + { + "epoch": 21.506442021803768, + "grad_norm": 0.6949977874755859, + "learning_rate": 3.1457622141467535e-06, + "loss": 0.0878, + "step": 130200 + }, + { + "epoch": 21.52296002642881, + "grad_norm": 0.584557831287384, + "learning_rate": 3.1396444346559974e-06, + "loss": 0.101, + "step": 130300 + }, + { + "epoch": 21.53947803105385, + "grad_norm": 1.0865356922149658, + "learning_rate": 3.1335266551652417e-06, + "loss": 0.0925, + "step": 130400 + }, + { + "epoch": 21.55599603567889, + "grad_norm": 0.6468126177787781, + "learning_rate": 3.127408875674485e-06, + "loss": 0.0855, + "step": 130500 + }, + { + "epoch": 21.572514040303933, + "grad_norm": 0.6762518286705017, + "learning_rate": 3.1212910961837295e-06, + "loss": 0.1107, + "step": 130600 + }, + { + "epoch": 21.589032044928974, + "grad_norm": 0.7978318333625793, + "learning_rate": 3.1151733166929734e-06, + "loss": 0.0897, + "step": 130700 + }, + { + "epoch": 21.605550049554015, + "grad_norm": 0.8376022577285767, + "learning_rate": 3.1090555372022172e-06, + "loss": 0.0902, + "step": 130800 + }, + { + "epoch": 21.622068054179056, + "grad_norm": 0.702617347240448, + "learning_rate": 3.102937757711461e-06, + "loss": 0.0937, + "step": 130900 + }, + { + "epoch": 21.638586058804098, + "grad_norm": 0.5407445430755615, + "learning_rate": 3.0968199782207054e-06, + "loss": 0.0912, + "step": 131000 + }, + { + "epoch": 21.638586058804098, + "eval_cer": 0.03681301758948737, + "eval_loss": 0.10601798444986343, + "eval_runtime": 52.6946, + "eval_samples_per_second": 32.091, + "eval_steps_per_second": 8.027, + "eval_wer": 0.20352213826266158, + "step": 131000 + }, + { + "epoch": 21.65510406342914, + "grad_norm": 0.48049646615982056, + "learning_rate": 3.090702198729949e-06, + "loss": 0.1007, + "step": 131100 + }, + { + "epoch": 21.67162206805418, + "grad_norm": 0.8997465372085571, + "learning_rate": 3.084584419239193e-06, + "loss": 0.0901, + "step": 131200 + }, + { + "epoch": 21.68814007267922, + "grad_norm": 0.6192366480827332, + "learning_rate": 3.078466639748437e-06, + "loss": 0.0892, + "step": 131300 + }, + { + "epoch": 21.704658077304263, + "grad_norm": 0.7299876809120178, + "learning_rate": 3.0723488602576814e-06, + "loss": 0.0929, + "step": 131400 + }, + { + "epoch": 21.721176081929304, + "grad_norm": 0.6832283735275269, + "learning_rate": 3.066231080766925e-06, + "loss": 0.0936, + "step": 131500 + }, + { + "epoch": 21.737694086554345, + "grad_norm": 0.5136446952819824, + "learning_rate": 3.060113301276169e-06, + "loss": 0.0911, + "step": 131600 + }, + { + "epoch": 21.754212091179387, + "grad_norm": 0.6710427403450012, + "learning_rate": 3.0539955217854126e-06, + "loss": 0.0833, + "step": 131700 + }, + { + "epoch": 21.770730095804428, + "grad_norm": 0.6596719026565552, + "learning_rate": 3.047877742294657e-06, + "loss": 0.0921, + "step": 131800 + }, + { + "epoch": 21.78724810042947, + "grad_norm": 0.5548281669616699, + "learning_rate": 3.041759962803901e-06, + "loss": 0.0903, + "step": 131900 + }, + { + "epoch": 21.80376610505451, + "grad_norm": 0.5049402713775635, + "learning_rate": 3.035642183313145e-06, + "loss": 0.1321, + "step": 132000 + }, + { + "epoch": 21.80376610505451, + "eval_cer": 0.03675313120251865, + "eval_loss": 0.10501556098461151, + "eval_runtime": 52.4304, + "eval_samples_per_second": 32.252, + "eval_steps_per_second": 8.068, + "eval_wer": 0.20283519640292264, + "step": 132000 + }, + { + "epoch": 21.82028410967955, + "grad_norm": 0.7295696139335632, + "learning_rate": 3.0295244038223886e-06, + "loss": 0.0888, + "step": 132100 + }, + { + "epoch": 21.836802114304593, + "grad_norm": 0.5694403648376465, + "learning_rate": 3.023406624331633e-06, + "loss": 0.1097, + "step": 132200 + }, + { + "epoch": 21.853320118929634, + "grad_norm": 0.5931413769721985, + "learning_rate": 3.017288844840877e-06, + "loss": 0.0895, + "step": 132300 + }, + { + "epoch": 21.869838123554675, + "grad_norm": 0.715791642665863, + "learning_rate": 3.0111710653501207e-06, + "loss": 0.0926, + "step": 132400 + }, + { + "epoch": 21.886356128179717, + "grad_norm": 0.7110834717750549, + "learning_rate": 3.0050532858593646e-06, + "loss": 0.0892, + "step": 132500 + }, + { + "epoch": 21.902874132804758, + "grad_norm": 0.8973935842514038, + "learning_rate": 2.998935506368609e-06, + "loss": 0.0917, + "step": 132600 + }, + { + "epoch": 21.9193921374298, + "grad_norm": 0.6259893178939819, + "learning_rate": 2.9928177268778523e-06, + "loss": 0.0919, + "step": 132700 + }, + { + "epoch": 21.93591014205484, + "grad_norm": 0.6321418881416321, + "learning_rate": 2.9866999473870966e-06, + "loss": 0.0985, + "step": 132800 + }, + { + "epoch": 21.952428146679882, + "grad_norm": 0.690564751625061, + "learning_rate": 2.9805821678963405e-06, + "loss": 0.0857, + "step": 132900 + }, + { + "epoch": 21.968946151304923, + "grad_norm": 0.5872605443000793, + "learning_rate": 2.9744643884055844e-06, + "loss": 0.0954, + "step": 133000 + }, + { + "epoch": 21.968946151304923, + "eval_cer": 0.03696701115597837, + "eval_loss": 0.10601279884576797, + "eval_runtime": 52.5874, + "eval_samples_per_second": 32.156, + "eval_steps_per_second": 8.044, + "eval_wer": 0.20445887716230562, + "step": 133000 + }, + { + "epoch": 21.985464155929964, + "grad_norm": 0.5473292469978333, + "learning_rate": 2.9683466089148283e-06, + "loss": 0.0858, + "step": 133100 + }, + { + "epoch": 22.001982160555006, + "grad_norm": 0.7367832660675049, + "learning_rate": 2.9622288294240726e-06, + "loss": 0.0909, + "step": 133200 + }, + { + "epoch": 22.018500165180047, + "grad_norm": 1.0184003114700317, + "learning_rate": 2.956111049933316e-06, + "loss": 0.0901, + "step": 133300 + }, + { + "epoch": 22.035018169805088, + "grad_norm": 0.7270667552947998, + "learning_rate": 2.9499932704425604e-06, + "loss": 0.0942, + "step": 133400 + }, + { + "epoch": 22.05153617443013, + "grad_norm": 0.6220849752426147, + "learning_rate": 2.9438754909518043e-06, + "loss": 0.0892, + "step": 133500 + }, + { + "epoch": 22.06805417905517, + "grad_norm": 0.6055799126625061, + "learning_rate": 2.9377577114610486e-06, + "loss": 0.0895, + "step": 133600 + }, + { + "epoch": 22.084572183680212, + "grad_norm": 0.5487551689147949, + "learning_rate": 2.931639931970292e-06, + "loss": 0.0894, + "step": 133700 + }, + { + "epoch": 22.101090188305253, + "grad_norm": 0.6704040765762329, + "learning_rate": 2.9255221524795364e-06, + "loss": 0.0945, + "step": 133800 + }, + { + "epoch": 22.117608192930295, + "grad_norm": 0.5721579194068909, + "learning_rate": 2.91940437298878e-06, + "loss": 0.0959, + "step": 133900 + }, + { + "epoch": 22.134126197555336, + "grad_norm": 0.6543858051300049, + "learning_rate": 2.913286593498024e-06, + "loss": 0.1333, + "step": 134000 + }, + { + "epoch": 22.134126197555336, + "eval_cer": 0.03689001437273287, + "eval_loss": 0.10688560456037521, + "eval_runtime": 52.1166, + "eval_samples_per_second": 32.446, + "eval_steps_per_second": 8.116, + "eval_wer": 0.2038968338225192, + "step": 134000 + }, + { + "epoch": 22.150644202180377, + "grad_norm": 0.6130584478378296, + "learning_rate": 2.907168814007268e-06, + "loss": 0.0962, + "step": 134100 + }, + { + "epoch": 22.16716220680542, + "grad_norm": 0.7324750423431396, + "learning_rate": 2.9010510345165123e-06, + "loss": 0.0903, + "step": 134200 + }, + { + "epoch": 22.18368021143046, + "grad_norm": 0.6277410984039307, + "learning_rate": 2.8949332550257558e-06, + "loss": 0.0818, + "step": 134300 + }, + { + "epoch": 22.2001982160555, + "grad_norm": 0.5178551077842712, + "learning_rate": 2.888815475535e-06, + "loss": 0.1053, + "step": 134400 + }, + { + "epoch": 22.216716220680542, + "grad_norm": 0.6540612578392029, + "learning_rate": 2.8826976960442436e-06, + "loss": 0.0866, + "step": 134500 + }, + { + "epoch": 22.233234225305583, + "grad_norm": 0.5932282209396362, + "learning_rate": 2.876579916553488e-06, + "loss": 0.0927, + "step": 134600 + }, + { + "epoch": 22.249752229930625, + "grad_norm": 0.6185062527656555, + "learning_rate": 2.8704621370627317e-06, + "loss": 0.089, + "step": 134700 + }, + { + "epoch": 22.266270234555666, + "grad_norm": 0.8983421921730042, + "learning_rate": 2.864344357571976e-06, + "loss": 0.0861, + "step": 134800 + }, + { + "epoch": 22.282788239180707, + "grad_norm": 0.3891274034976959, + "learning_rate": 2.8582265780812195e-06, + "loss": 0.0944, + "step": 134900 + }, + { + "epoch": 22.29930624380575, + "grad_norm": 0.7119171023368835, + "learning_rate": 2.852108798590464e-06, + "loss": 0.089, + "step": 135000 + }, + { + "epoch": 22.29930624380575, + "eval_cer": 0.03690712476900965, + "eval_loss": 0.10521671921014786, + "eval_runtime": 52.7579, + "eval_samples_per_second": 32.052, + "eval_steps_per_second": 8.018, + "eval_wer": 0.20402173234247173, + "step": 135000 + }, + { + "epoch": 22.31582424843079, + "grad_norm": 0.5368226766586304, + "learning_rate": 2.8459910190997077e-06, + "loss": 0.0905, + "step": 135100 + }, + { + "epoch": 22.33234225305583, + "grad_norm": 0.6488823890686035, + "learning_rate": 2.8398732396089516e-06, + "loss": 0.1179, + "step": 135200 + }, + { + "epoch": 22.348860257680872, + "grad_norm": 0.6369620561599731, + "learning_rate": 2.8337554601181955e-06, + "loss": 0.0939, + "step": 135300 + }, + { + "epoch": 22.365378262305914, + "grad_norm": 0.6993893384933472, + "learning_rate": 2.82763768062744e-06, + "loss": 0.0944, + "step": 135400 + }, + { + "epoch": 22.381896266930955, + "grad_norm": 0.8022906184196472, + "learning_rate": 2.8215199011366833e-06, + "loss": 0.0832, + "step": 135500 + }, + { + "epoch": 22.398414271555996, + "grad_norm": 0.5833423733711243, + "learning_rate": 2.8154021216459276e-06, + "loss": 0.0944, + "step": 135600 + }, + { + "epoch": 22.414932276181037, + "grad_norm": 0.72309410572052, + "learning_rate": 2.8092843421551715e-06, + "loss": 0.093, + "step": 135700 + }, + { + "epoch": 22.43145028080608, + "grad_norm": 0.5882470011711121, + "learning_rate": 2.8031665626644158e-06, + "loss": 0.1, + "step": 135800 + }, + { + "epoch": 22.44796828543112, + "grad_norm": 0.6774691343307495, + "learning_rate": 2.7970487831736592e-06, + "loss": 0.0954, + "step": 135900 + }, + { + "epoch": 22.46448629005616, + "grad_norm": 0.9647297263145447, + "learning_rate": 2.7909310036829035e-06, + "loss": 0.094, + "step": 136000 + }, + { + "epoch": 22.46448629005616, + "eval_cer": 0.03667613441927315, + "eval_loss": 0.10520410537719727, + "eval_runtime": 52.5832, + "eval_samples_per_second": 32.159, + "eval_steps_per_second": 8.044, + "eval_wer": 0.20314744270280397, + "step": 136000 + }, + { + "epoch": 22.481004294681203, + "grad_norm": 0.6736404895782471, + "learning_rate": 2.784813224192147e-06, + "loss": 0.0912, + "step": 136100 + }, + { + "epoch": 22.497522299306244, + "grad_norm": 0.4658312499523163, + "learning_rate": 2.7786954447013913e-06, + "loss": 0.0874, + "step": 136200 + }, + { + "epoch": 22.514040303931285, + "grad_norm": 0.8794094920158386, + "learning_rate": 2.7725776652106356e-06, + "loss": 0.1005, + "step": 136300 + }, + { + "epoch": 22.530558308556326, + "grad_norm": 0.6956797242164612, + "learning_rate": 2.7664598857198795e-06, + "loss": 0.0895, + "step": 136400 + }, + { + "epoch": 22.547076313181368, + "grad_norm": 0.4646037220954895, + "learning_rate": 2.7603421062291234e-06, + "loss": 0.0869, + "step": 136500 + }, + { + "epoch": 22.56359431780641, + "grad_norm": 0.8446247577667236, + "learning_rate": 2.7542243267383673e-06, + "loss": 0.0958, + "step": 136600 + }, + { + "epoch": 22.58011232243145, + "grad_norm": 0.47825750708580017, + "learning_rate": 2.7481065472476116e-06, + "loss": 0.0918, + "step": 136700 + }, + { + "epoch": 22.59663032705649, + "grad_norm": 0.8411787152290344, + "learning_rate": 2.741988767756855e-06, + "loss": 0.0886, + "step": 136800 + }, + { + "epoch": 22.613148331681533, + "grad_norm": 0.5080142021179199, + "learning_rate": 2.7358709882660994e-06, + "loss": 0.0982, + "step": 136900 + }, + { + "epoch": 22.629666336306574, + "grad_norm": 0.7875675559043884, + "learning_rate": 2.7297532087753432e-06, + "loss": 0.0909, + "step": 137000 + }, + { + "epoch": 22.629666336306574, + "eval_cer": 0.03681301758948737, + "eval_loss": 0.10519874840974808, + "eval_runtime": 52.8842, + "eval_samples_per_second": 31.976, + "eval_steps_per_second": 7.999, + "eval_wer": 0.20264784862299381, + "step": 137000 + }, + { + "epoch": 22.646184340931615, + "grad_norm": 0.7804688215255737, + "learning_rate": 2.7236354292845875e-06, + "loss": 0.1158, + "step": 137100 + }, + { + "epoch": 22.662702345556657, + "grad_norm": 0.49170786142349243, + "learning_rate": 2.717517649793831e-06, + "loss": 0.0981, + "step": 137200 + }, + { + "epoch": 22.679220350181698, + "grad_norm": 0.649940550327301, + "learning_rate": 2.7113998703030753e-06, + "loss": 0.0892, + "step": 137300 + }, + { + "epoch": 22.69573835480674, + "grad_norm": 0.7027512192726135, + "learning_rate": 2.7052820908123188e-06, + "loss": 0.1086, + "step": 137400 + }, + { + "epoch": 22.71225635943178, + "grad_norm": 0.7389455437660217, + "learning_rate": 2.699164311321563e-06, + "loss": 0.0899, + "step": 137500 + }, + { + "epoch": 22.72877436405682, + "grad_norm": 0.7065523862838745, + "learning_rate": 2.693046531830807e-06, + "loss": 0.0851, + "step": 137600 + }, + { + "epoch": 22.745292368681863, + "grad_norm": 0.768282949924469, + "learning_rate": 2.6869287523400513e-06, + "loss": 0.0869, + "step": 137700 + }, + { + "epoch": 22.761810373306904, + "grad_norm": 0.6381931900978088, + "learning_rate": 2.6808109728492948e-06, + "loss": 0.0894, + "step": 137800 + }, + { + "epoch": 22.778328377931945, + "grad_norm": 0.6711616516113281, + "learning_rate": 2.674693193358539e-06, + "loss": 0.0932, + "step": 137900 + }, + { + "epoch": 22.794846382556987, + "grad_norm": 0.8620249629020691, + "learning_rate": 2.668575413867783e-06, + "loss": 0.0946, + "step": 138000 + }, + { + "epoch": 22.794846382556987, + "eval_cer": 0.036624803230442815, + "eval_loss": 0.1052507609128952, + "eval_runtime": 52.4513, + "eval_samples_per_second": 32.239, + "eval_steps_per_second": 8.065, + "eval_wer": 0.20314744270280397, + "step": 138000 + }, + { + "epoch": 22.811364387182028, + "grad_norm": 0.4814409911632538, + "learning_rate": 2.662457634377027e-06, + "loss": 0.0948, + "step": 138100 + }, + { + "epoch": 22.82788239180707, + "grad_norm": 1.151419997215271, + "learning_rate": 2.6563398548862707e-06, + "loss": 0.1138, + "step": 138200 + }, + { + "epoch": 22.84440039643211, + "grad_norm": 0.6814967393875122, + "learning_rate": 2.650222075395515e-06, + "loss": 0.114, + "step": 138300 + }, + { + "epoch": 22.860918401057152, + "grad_norm": 0.8873021602630615, + "learning_rate": 2.6441042959047585e-06, + "loss": 0.0866, + "step": 138400 + }, + { + "epoch": 22.877436405682193, + "grad_norm": 0.6129996180534363, + "learning_rate": 2.637986516414003e-06, + "loss": 0.0902, + "step": 138500 + }, + { + "epoch": 22.893954410307234, + "grad_norm": 0.8606892228126526, + "learning_rate": 2.6318687369232467e-06, + "loss": 0.0953, + "step": 138600 + }, + { + "epoch": 22.910472414932276, + "grad_norm": 0.6854122281074524, + "learning_rate": 2.6257509574324906e-06, + "loss": 0.0963, + "step": 138700 + }, + { + "epoch": 22.926990419557317, + "grad_norm": 0.7230859398841858, + "learning_rate": 2.6196331779417345e-06, + "loss": 0.0866, + "step": 138800 + }, + { + "epoch": 22.943508424182358, + "grad_norm": 0.4967285692691803, + "learning_rate": 2.6135153984509788e-06, + "loss": 0.0875, + "step": 138900 + }, + { + "epoch": 22.9600264288074, + "grad_norm": 0.6331928372383118, + "learning_rate": 2.6073976189602222e-06, + "loss": 0.0897, + "step": 139000 + }, + { + "epoch": 22.9600264288074, + "eval_cer": 0.03681301758948737, + "eval_loss": 0.10469213128089905, + "eval_runtime": 52.8869, + "eval_samples_per_second": 31.974, + "eval_steps_per_second": 7.998, + "eval_wer": 0.20383438456254294, + "step": 139000 + }, + { + "epoch": 22.97654443343244, + "grad_norm": 0.68625807762146, + "learning_rate": 2.6012798394694665e-06, + "loss": 0.103, + "step": 139100 + }, + { + "epoch": 22.993062438057482, + "grad_norm": 0.7166194915771484, + "learning_rate": 2.5951620599787104e-06, + "loss": 0.0937, + "step": 139200 + }, + { + "epoch": 23.009580442682523, + "grad_norm": 0.7146703600883484, + "learning_rate": 2.5890442804879547e-06, + "loss": 0.0936, + "step": 139300 + }, + { + "epoch": 23.026098447307564, + "grad_norm": 0.5112409591674805, + "learning_rate": 2.582926500997198e-06, + "loss": 0.0863, + "step": 139400 + }, + { + "epoch": 23.042616451932606, + "grad_norm": 0.5813011527061462, + "learning_rate": 2.5768087215064425e-06, + "loss": 0.0827, + "step": 139500 + }, + { + "epoch": 23.059134456557647, + "grad_norm": 0.6480150818824768, + "learning_rate": 2.570690942015686e-06, + "loss": 0.0866, + "step": 139600 + }, + { + "epoch": 23.07565246118269, + "grad_norm": 1.00325608253479, + "learning_rate": 2.5645731625249303e-06, + "loss": 0.0927, + "step": 139700 + }, + { + "epoch": 23.09217046580773, + "grad_norm": 0.5901710391044617, + "learning_rate": 2.558455383034174e-06, + "loss": 0.0978, + "step": 139800 + }, + { + "epoch": 23.10868847043277, + "grad_norm": 0.6397861242294312, + "learning_rate": 2.5523376035434185e-06, + "loss": 0.0869, + "step": 139900 + }, + { + "epoch": 23.125206475057812, + "grad_norm": 0.4879724085330963, + "learning_rate": 2.546219824052662e-06, + "loss": 0.0876, + "step": 140000 + }, + { + "epoch": 23.125206475057812, + "eval_cer": 0.03693279036342482, + "eval_loss": 0.10442952066659927, + "eval_runtime": 52.9301, + "eval_samples_per_second": 31.948, + "eval_steps_per_second": 7.992, + "eval_wer": 0.2038968338225192, + "step": 140000 + }, + { + "epoch": 23.141724479682853, + "grad_norm": 0.7894465327262878, + "learning_rate": 2.5401020445619062e-06, + "loss": 0.1086, + "step": 140100 + }, + { + "epoch": 23.158242484307895, + "grad_norm": 0.7804042100906372, + "learning_rate": 2.53398426507115e-06, + "loss": 0.0881, + "step": 140200 + }, + { + "epoch": 23.174760488932936, + "grad_norm": 0.5835601091384888, + "learning_rate": 2.527866485580394e-06, + "loss": 0.0909, + "step": 140300 + }, + { + "epoch": 23.191278493557977, + "grad_norm": 0.7063116431236267, + "learning_rate": 2.521748706089638e-06, + "loss": 0.0875, + "step": 140400 + }, + { + "epoch": 23.20779649818302, + "grad_norm": 0.66155606508255, + "learning_rate": 2.515630926598882e-06, + "loss": 0.0859, + "step": 140500 + }, + { + "epoch": 23.22431450280806, + "grad_norm": 0.5779556035995483, + "learning_rate": 2.5095131471081257e-06, + "loss": 0.0879, + "step": 140600 + }, + { + "epoch": 23.2408325074331, + "grad_norm": 0.5715177655220032, + "learning_rate": 2.50339536761737e-06, + "loss": 0.0914, + "step": 140700 + }, + { + "epoch": 23.257350512058142, + "grad_norm": 0.5225812792778015, + "learning_rate": 2.497277588126614e-06, + "loss": 0.0902, + "step": 140800 + }, + { + "epoch": 23.273868516683184, + "grad_norm": 0.8125872015953064, + "learning_rate": 2.4911598086358578e-06, + "loss": 0.1079, + "step": 140900 + }, + { + "epoch": 23.290386521308225, + "grad_norm": 0.7094987034797668, + "learning_rate": 2.4850420291451016e-06, + "loss": 0.0863, + "step": 141000 + }, + { + "epoch": 23.290386521308225, + "eval_cer": 0.03659913763602765, + "eval_loss": 0.10513997077941895, + "eval_runtime": 52.6735, + "eval_samples_per_second": 32.103, + "eval_steps_per_second": 8.031, + "eval_wer": 0.20246050084306502, + "step": 141000 + }, + { + "epoch": 23.306904525933266, + "grad_norm": 0.5953539609909058, + "learning_rate": 2.4789242496543455e-06, + "loss": 0.0941, + "step": 141100 + }, + { + "epoch": 23.323422530558307, + "grad_norm": 0.6508031487464905, + "learning_rate": 2.4728064701635894e-06, + "loss": 0.0951, + "step": 141200 + }, + { + "epoch": 23.33994053518335, + "grad_norm": 0.6292299032211304, + "learning_rate": 2.4666886906728333e-06, + "loss": 0.0838, + "step": 141300 + }, + { + "epoch": 23.35645853980839, + "grad_norm": 0.7818630337715149, + "learning_rate": 2.4605709111820776e-06, + "loss": 0.0869, + "step": 141400 + }, + { + "epoch": 23.37297654443343, + "grad_norm": 0.8426799774169922, + "learning_rate": 2.4544531316913215e-06, + "loss": 0.0852, + "step": 141500 + }, + { + "epoch": 23.389494549058472, + "grad_norm": 0.5545341968536377, + "learning_rate": 2.4483353522005654e-06, + "loss": 0.0827, + "step": 141600 + }, + { + "epoch": 23.406012553683514, + "grad_norm": 1.2653288841247559, + "learning_rate": 2.4422175727098093e-06, + "loss": 0.0841, + "step": 141700 + }, + { + "epoch": 23.422530558308555, + "grad_norm": 0.7402147650718689, + "learning_rate": 2.436099793219053e-06, + "loss": 0.0836, + "step": 141800 + }, + { + "epoch": 23.439048562933596, + "grad_norm": 0.5832458734512329, + "learning_rate": 2.4299820137282975e-06, + "loss": 0.1247, + "step": 141900 + }, + { + "epoch": 23.455566567558638, + "grad_norm": 0.6517156958580017, + "learning_rate": 2.4238642342375413e-06, + "loss": 0.0871, + "step": 142000 + }, + { + "epoch": 23.455566567558638, + "eval_cer": 0.03671035521182671, + "eval_loss": 0.10508172959089279, + "eval_runtime": 52.7407, + "eval_samples_per_second": 32.063, + "eval_steps_per_second": 8.02, + "eval_wer": 0.20246050084306502, + "step": 142000 + }, + { + "epoch": 23.47208457218368, + "grad_norm": 0.6041878461837769, + "learning_rate": 2.4177464547467852e-06, + "loss": 0.0912, + "step": 142100 + }, + { + "epoch": 23.48860257680872, + "grad_norm": 0.5178912878036499, + "learning_rate": 2.411628675256029e-06, + "loss": 0.0925, + "step": 142200 + }, + { + "epoch": 23.50512058143376, + "grad_norm": 0.6299303770065308, + "learning_rate": 2.405510895765273e-06, + "loss": 0.088, + "step": 142300 + }, + { + "epoch": 23.521638586058803, + "grad_norm": 0.6988112926483154, + "learning_rate": 2.399393116274517e-06, + "loss": 0.1043, + "step": 142400 + }, + { + "epoch": 23.538156590683844, + "grad_norm": 0.5607922077178955, + "learning_rate": 2.393275336783761e-06, + "loss": 0.089, + "step": 142500 + }, + { + "epoch": 23.554674595308885, + "grad_norm": 0.4817243218421936, + "learning_rate": 2.387157557293005e-06, + "loss": 0.0874, + "step": 142600 + }, + { + "epoch": 23.571192599933926, + "grad_norm": 0.6620100140571594, + "learning_rate": 2.381039777802249e-06, + "loss": 0.0878, + "step": 142700 + }, + { + "epoch": 23.587710604558968, + "grad_norm": 0.9131438732147217, + "learning_rate": 2.374921998311493e-06, + "loss": 0.0879, + "step": 142800 + }, + { + "epoch": 23.60422860918401, + "grad_norm": 0.5091140270233154, + "learning_rate": 2.3688042188207367e-06, + "loss": 0.0941, + "step": 142900 + }, + { + "epoch": 23.62074661380905, + "grad_norm": 0.6191192865371704, + "learning_rate": 2.362686439329981e-06, + "loss": 0.0932, + "step": 143000 + }, + { + "epoch": 23.62074661380905, + "eval_cer": 0.036659024022996374, + "eval_loss": 0.10472416132688522, + "eval_runtime": 52.4751, + "eval_samples_per_second": 32.225, + "eval_steps_per_second": 8.061, + "eval_wer": 0.2030849934428277, + "step": 143000 + }, + { + "epoch": 23.63726461843409, + "grad_norm": 0.5774977803230286, + "learning_rate": 2.356568659839225e-06, + "loss": 0.1072, + "step": 143100 + }, + { + "epoch": 23.653782623059133, + "grad_norm": 0.6127384901046753, + "learning_rate": 2.350450880348469e-06, + "loss": 0.0856, + "step": 143200 + }, + { + "epoch": 23.670300627684174, + "grad_norm": 0.5244102478027344, + "learning_rate": 2.3443331008577127e-06, + "loss": 0.0845, + "step": 143300 + }, + { + "epoch": 23.686818632309215, + "grad_norm": 0.8045458197593689, + "learning_rate": 2.3382153213669566e-06, + "loss": 0.0887, + "step": 143400 + }, + { + "epoch": 23.703336636934257, + "grad_norm": 0.5768733024597168, + "learning_rate": 2.3320975418762005e-06, + "loss": 0.0997, + "step": 143500 + }, + { + "epoch": 23.7198546415593, + "grad_norm": 0.7417640089988708, + "learning_rate": 2.3259797623854448e-06, + "loss": 0.0952, + "step": 143600 + }, + { + "epoch": 23.736372646184343, + "grad_norm": 0.6068658232688904, + "learning_rate": 2.3198619828946887e-06, + "loss": 0.0928, + "step": 143700 + }, + { + "epoch": 23.752890650809384, + "grad_norm": 0.8562188148498535, + "learning_rate": 2.3137442034039326e-06, + "loss": 0.0868, + "step": 143800 + }, + { + "epoch": 23.769408655434425, + "grad_norm": 0.6002250909805298, + "learning_rate": 2.3076264239131764e-06, + "loss": 0.0883, + "step": 143900 + }, + { + "epoch": 23.785926660059467, + "grad_norm": 0.6457869410514832, + "learning_rate": 2.3015086444224203e-06, + "loss": 0.0871, + "step": 144000 + }, + { + "epoch": 23.785926660059467, + "eval_cer": 0.03666757922113476, + "eval_loss": 0.10333551466464996, + "eval_runtime": 53.1345, + "eval_samples_per_second": 31.825, + "eval_steps_per_second": 7.961, + "eval_wer": 0.20296009492287517, + "step": 144000 + }, + { + "epoch": 23.802444664684508, + "grad_norm": 0.6609480381011963, + "learning_rate": 2.2953908649316646e-06, + "loss": 0.0935, + "step": 144100 + }, + { + "epoch": 23.81896266930955, + "grad_norm": 0.5107303261756897, + "learning_rate": 2.2892730854409085e-06, + "loss": 0.0887, + "step": 144200 + }, + { + "epoch": 23.83548067393459, + "grad_norm": 0.6314355134963989, + "learning_rate": 2.2831553059501524e-06, + "loss": 0.0903, + "step": 144300 + }, + { + "epoch": 23.85199867855963, + "grad_norm": 0.49561741948127747, + "learning_rate": 2.2770375264593963e-06, + "loss": 0.0859, + "step": 144400 + }, + { + "epoch": 23.868516683184673, + "grad_norm": 0.7324890494346619, + "learning_rate": 2.27091974696864e-06, + "loss": 0.0924, + "step": 144500 + }, + { + "epoch": 23.885034687809714, + "grad_norm": 0.5809805393218994, + "learning_rate": 2.264801967477884e-06, + "loss": 0.0917, + "step": 144600 + }, + { + "epoch": 23.901552692434755, + "grad_norm": 0.6561674475669861, + "learning_rate": 2.2586841879871284e-06, + "loss": 0.0921, + "step": 144700 + }, + { + "epoch": 23.918070697059797, + "grad_norm": 0.618030846118927, + "learning_rate": 2.2525664084963723e-06, + "loss": 0.0954, + "step": 144800 + }, + { + "epoch": 23.934588701684838, + "grad_norm": 0.6414436101913452, + "learning_rate": 2.2464486290056166e-06, + "loss": 0.0881, + "step": 144900 + }, + { + "epoch": 23.95110670630988, + "grad_norm": 0.9026370644569397, + "learning_rate": 2.2403308495148605e-06, + "loss": 0.091, + "step": 145000 + }, + { + "epoch": 23.95110670630988, + "eval_cer": 0.036701800013688314, + "eval_loss": 0.1043509840965271, + "eval_runtime": 52.6333, + "eval_samples_per_second": 32.128, + "eval_steps_per_second": 8.037, + "eval_wer": 0.20320989196278025, + "step": 145000 + }, + { + "epoch": 23.96762471093492, + "grad_norm": 0.4682947099208832, + "learning_rate": 2.2342130700241043e-06, + "loss": 0.0892, + "step": 145100 + }, + { + "epoch": 23.984142715559962, + "grad_norm": 0.6425852179527283, + "learning_rate": 2.2280952905333482e-06, + "loss": 0.0877, + "step": 145200 + }, + { + "epoch": 24.000660720185003, + "grad_norm": 0.4977991282939911, + "learning_rate": 2.221977511042592e-06, + "loss": 0.0855, + "step": 145300 + }, + { + "epoch": 24.017178724810044, + "grad_norm": 0.68033766746521, + "learning_rate": 2.2158597315518364e-06, + "loss": 0.0887, + "step": 145400 + }, + { + "epoch": 24.033696729435086, + "grad_norm": 0.8233726024627686, + "learning_rate": 2.2097419520610803e-06, + "loss": 0.0926, + "step": 145500 + }, + { + "epoch": 24.050214734060127, + "grad_norm": 0.6886569261550903, + "learning_rate": 2.203624172570324e-06, + "loss": 0.0959, + "step": 145600 + }, + { + "epoch": 24.066732738685168, + "grad_norm": 0.5320963263511658, + "learning_rate": 2.197506393079568e-06, + "loss": 0.0928, + "step": 145700 + }, + { + "epoch": 24.08325074331021, + "grad_norm": 0.6369372010231018, + "learning_rate": 2.191388613588812e-06, + "loss": 0.0952, + "step": 145800 + }, + { + "epoch": 24.09976874793525, + "grad_norm": 0.6117287874221802, + "learning_rate": 2.1852708340980563e-06, + "loss": 0.1055, + "step": 145900 + }, + { + "epoch": 24.116286752560292, + "grad_norm": 0.7260856032371521, + "learning_rate": 2.1791530546073e-06, + "loss": 0.0978, + "step": 146000 + }, + { + "epoch": 24.116286752560292, + "eval_cer": 0.03682157278762576, + "eval_loss": 0.10561419278383255, + "eval_runtime": 52.1637, + "eval_samples_per_second": 32.417, + "eval_steps_per_second": 8.109, + "eval_wer": 0.20333479048273279, + "step": 146000 + } + ], + "logging_steps": 100, + "max_steps": 181620, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8.861432368096291e+20, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +}